KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > poi > hssf > record > SSTDeserializer


1
2 /* ====================================================================
3    Copyright 2002-2004 Apache Software Foundation
4
5    Licensed under the Apache License, Version 2.0 (the "License");
6    you may not use this file except in compliance with the License.
7    You may obtain a copy of the License at
8
9        http://www.apache.org/licenses/LICENSE-2.0
10
11    Unless required by applicable law or agreed to in writing, software
12    distributed under the License is distributed on an "AS IS" BASIS,
13    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14    See the License for the specific language governing permissions and
15    limitations under the License.
16 ==================================================================== */

17         
18
19 package org.apache.poi.hssf.record;
20
21 import org.apache.poi.util.BinaryTree;
22 import org.apache.poi.util.LittleEndian;
23 import org.apache.poi.util.LittleEndianConsts;
24
25 /**
26  * Handles the task of deserializing a SST string. The two main entry points are
27  *
28  * @author Glen Stampoultzis (glens at apache.org)
29  * @author Jason Height (jheight at apache.org)
30  */

31 class SSTDeserializer
32 {
33
34     private BinaryTree strings;
35     /** this is the number of characters that have been read prior to the continuation */
36     private int continuationReadChars;
37     /** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */
38     private String JavaDoc unfinishedString;
39     /** this is true if the string uses wide characters */
40     private boolean wideChar;
41     /** this is true if the string is a rich text string */
42     private boolean richText;
43     /** this is true if the string is a far east string or some other wierd string */
44     private boolean extendedText;
45     /** Number of formatting runs in this rich text field */
46     private short runCount;
47     /** Number of characters in current string */
48     private int charCount;
49     private int extensionLength;
50     private int continueSkipBytes = 0;
51
52
53     public SSTDeserializer( BinaryTree strings )
54     {
55         this.strings = strings;
56         initVars();
57     }
58
59     private void initVars()
60     {
61         runCount = 0;
62         continuationReadChars = 0;
63         unfinishedString = "";
64 // bytesInCurrentSegment = 0;
65
// stringDataOffset = 0;
66
wideChar = false;
67         richText = false;
68         extendedText = false;
69         continueSkipBytes = 0;
70     }
71
72     /**
73      * This is the starting point where strings are constructed. Note that
74      * strings may span across multiple continuations. Read the SST record
75      * carefully before beginning to hack.
76      */

77     public void manufactureStrings( final byte[] data, final int initialOffset)
78     {
79         initVars();
80
81         int offset = initialOffset;
82         final int dataSize = data.length;
83         while ( offset < dataSize )
84         {
85             int remaining = dataSize - offset;
86
87             if ( ( remaining > 0 ) && ( remaining < LittleEndianConsts.SHORT_SIZE ) )
88             {
89                 throw new RecordFormatException( "Cannot get length of the last string in SSTRecord" );
90             }
91             if ( remaining == LittleEndianConsts.SHORT_SIZE )
92             {
93               //JMH Dont know about this
94
setContinuationCharsRead( 0 );//LittleEndian.getUShort( data, offset ) );
95
unfinishedString = "";
96                 break;
97             }
98             charCount = LittleEndian.getUShort( data, offset );
99             int charsRead = charCount;
100             readStringHeader( data, offset );
101             boolean stringContinuesOverContinuation = remaining < totalStringSize();
102             if ( stringContinuesOverContinuation )
103             {
104                 int remainingBytes = dataSize - offset - stringHeaderOverhead();
105                 //Only read the size of the string or whatever is left before the
106
//continuation
107
charsRead = Math.min(charsRead, calculateCharCount( remainingBytes ));
108                 setContinuationCharsRead( charsRead );
109                 if (charsRead == charCount) {
110                   //Since all of the characters will have been read, but the entire string (including formatting runs etc)
111
//hasnt, Compute the number of bytes to skip when the continue record starts
112
continueSkipBytes = offsetForContinuedRecord(0) - (remainingBytes - calculateByteCount(charsRead));
113                 }
114             }
115             processString( data, offset, charsRead );
116             offset += totalStringSize();
117             if ( stringContinuesOverContinuation )
118             {
119                 break;
120             }
121         }
122     }
123
124 // private void dump( final byte[] data, int offset, int length )
125
// {
126
// try
127
// {
128
// System.out.println( "------------------- SST DUMP -------------------------" );
129
// HexDump.dump( (byte[]) data, offset, System.out, offset, length );
130
// }
131
// catch ( IOException e )
132
// {
133
// }
134
// catch ( ArrayIndexOutOfBoundsException e )
135
// {
136
// }
137
// catch ( IllegalArgumentException e )
138
// {
139
// }
140
// }
141

142     /**
143      * Detemines the option types for the string (ie, compressed or uncompressed unicode, rich text string or
144      * plain string etc) and calculates the length and offset for the string.
145      *
146      */

147     private void readStringHeader( final byte[] data, final int index )
148     {
149
150         byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE];
151
152         wideChar = ( optionFlag & 1 ) == 1;
153         extendedText = ( optionFlag & 4 ) == 4;
154         richText = ( optionFlag & 8 ) == 8;
155         runCount = 0;
156         if ( richText )
157         {
158             runCount = LittleEndian.getShort( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD );
159         }
160         extensionLength = 0;
161         if ( extendedText )
162         {
163             extensionLength = LittleEndian.getInt( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD
164                     + (richText ? LittleEndianConsts.SHORT_SIZE : 0) );
165         }
166
167     }
168
169
170     /**
171      * Reads a string or the first part of a string.
172      *
173      * @param characters the number of characters to write.
174      *
175      * @return the number of bytes written.
176      */

177     private int processString( final byte[] data, final int dataIndex, final int characters )
178     {
179
180         // length is the length we store it as. not the length that is read.
181
int length = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( characters );
182         byte[] unicodeStringBuffer = new byte[length];
183
184         int offset = 0;
185
186         // Set the length in characters
187
LittleEndian.putUShort( unicodeStringBuffer, offset, characters );
188         offset += LittleEndianConsts.SHORT_SIZE;
189         // Set the option flags
190
unicodeStringBuffer[offset] = data[dataIndex + offset];
191         // Copy in the string data
192
int bytesRead = unicodeStringBuffer.length - SSTRecord.STRING_MINIMAL_OVERHEAD;
193         arraycopy( data, dataIndex + stringHeaderOverhead(), unicodeStringBuffer, SSTRecord.STRING_MINIMAL_OVERHEAD, bytesRead );
194         // Create the unicode string
195
UnicodeString string = new UnicodeString( UnicodeString.sid,
196                 (short) unicodeStringBuffer.length,
197                 unicodeStringBuffer );
198         setContinuationCharsRead( calculateCharCount(bytesRead));
199
200         if ( isStringFinished() )
201         {
202             Integer JavaDoc integer = new Integer JavaDoc( strings.size() );
203             addToStringTable( strings, integer, string );
204         }
205         else
206         {
207             unfinishedString = string.getString();
208         }
209
210         return bytesRead;
211     }
212
213     private boolean isStringFinished()
214     {
215         return getContinuationCharsRead() == charCount;
216     }
217
218     /**
219      * Okay, we are doing some major cheating here. Because we can't handle rich text strings properly
220      * we end up getting duplicate strings. To get around this I'm doing two things: 1. Converting rich
221      * text to normal text and 2. If there's a duplicate I'm adding a space onto the end. Sneaky perhaps
222      * but it gets the job done until we can handle this a little better.
223      */

224     static public void addToStringTable( BinaryTree strings, Integer JavaDoc integer, UnicodeString string )
225     {
226
227         if ( string.isRichText() )
228             string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~8 ) ) );
229         if ( string.isExtendedText() )
230             string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~4 ) ) );
231
232         boolean added = false;
233         while ( added == false )
234         {
235             try
236             {
237                 strings.put( integer, string );
238                 added = true;
239             }
240             catch ( Exception JavaDoc ignore )
241             {
242                 string.setString( string.getString() + " " );
243             }
244         }
245
246     }
247
248
249     private int calculateCharCount( final int byte_count )
250     {
251         return byte_count / ( wideChar ? LittleEndianConsts.SHORT_SIZE : LittleEndianConsts.BYTE_SIZE );
252     }
253
254     /**
255      * Process a Continue record. A Continue record for an SST record
256      * contains the same kind of data that the SST record contains,
257      * with the following exceptions:
258      * <P>
259      * <OL>
260      * <LI>The string counts at the beginning of the SST record are
261      * not in the Continue record
262      * <LI>The first string in the Continue record might NOT begin
263      * with a size. If the last string in the previous record is
264      * continued in this record, the size is determined by that
265      * last string in the previous record; the first string will
266      * begin with a flag byte, followed by the remaining bytes (or
267      * words) of the last string from the previous
268      * record. Otherwise, the first string in the record will
269      * begin with a string length
270      * </OL>
271      *
272      * @param record the Continue record's byte data
273      */

274     public void processContinueRecord( final byte[] record )
275     {
276         if ( isStringFinished() )
277         {
278             final int offset = continueSkipBytes;
279             initVars();
280             manufactureStrings( record, offset);
281         }
282         else
283         {
284             // reset the wide bit because that can change across a continuation. the fact that it's
285
// actually rich text doesn't change across continuations even though the rich text
286
// may on longer be set in the "new" option flag. confusing huh?
287
wideChar = ( record[0] & 1 ) == 1;
288
289             if ( stringSpansContinuation( record.length - LittleEndianConsts.BYTE_SIZE ) )
290             {
291                 processEntireContinuation( record );
292             }
293             else
294             {
295                 readStringRemainder( record );
296             }
297         }
298
299     }
300
301     /**
302      * Reads the remainder string and any subsequent strings from the continuation record.
303      *
304      * @param record The entire continuation record data.
305      */

306     private void readStringRemainder( final byte[] record )
307     {
308         int stringRemainderSizeInBytes = calculateByteCount( charCount-getContinuationCharsRead() );
309         byte[] unicodeStringData = new byte[SSTRecord.STRING_MINIMAL_OVERHEAD
310                 + stringRemainderSizeInBytes];
311
312         // write the string length
313
LittleEndian.putShort( unicodeStringData, 0, (short) (charCount-getContinuationCharsRead()) );
314
315         // write the options flag
316
unicodeStringData[LittleEndianConsts.SHORT_SIZE] = createOptionByte( wideChar, richText, extendedText );
317
318         // copy the bytes/words making up the string; skipping
319
// past all the overhead of the str_data array
320
arraycopy( record, LittleEndianConsts.BYTE_SIZE, unicodeStringData,
321                 SSTRecord.STRING_MINIMAL_OVERHEAD,
322                 stringRemainderSizeInBytes );
323
324         // use special constructor to create the final string
325
UnicodeString string = new UnicodeString( UnicodeString.sid,
326                 (short) unicodeStringData.length, unicodeStringData,
327                 unfinishedString );
328         Integer JavaDoc integer = new Integer JavaDoc( strings.size() );
329
330         addToStringTable( strings, integer, string );
331
332         int newOffset = offsetForContinuedRecord( stringRemainderSizeInBytes );
333         manufactureStrings( record, newOffset);
334     }
335
336     /**
337      * Calculates the size of the string in bytes based on the character width
338      */

339     private int stringSizeInBytes()
340     {
341         return calculateByteCount( charCount );
342     }
343
344     /**
345      * Calculates the size of the string in byes. This figure includes all the over
346      * heads for the string.
347      */

348     private int totalStringSize()
349     {
350         return stringSizeInBytes()
351                 + stringHeaderOverhead()
352                 + LittleEndianConsts.INT_SIZE * runCount
353                 + extensionLength;
354     }
355
356     private int stringHeaderOverhead()
357     {
358         return SSTRecord.STRING_MINIMAL_OVERHEAD
359                 + ( richText ? LittleEndianConsts.SHORT_SIZE : 0 )
360                 + ( extendedText ? LittleEndianConsts.INT_SIZE : 0 );
361     }
362
363     private int offsetForContinuedRecord( int stringRemainderSizeInBytes )
364     {
365         int offset = stringRemainderSizeInBytes + runCount * LittleEndianConsts.INT_SIZE + extensionLength;
366         if (stringRemainderSizeInBytes != 0)
367           //If a portion of the string remains then the wideChar options byte is repeated,
368
//so need to skip this.
369
offset += + LittleEndianConsts.BYTE_SIZE;
370         return offset;
371     }
372
373     private byte createOptionByte( boolean wideChar, boolean richText, boolean farEast )
374     {
375         return (byte) ( ( wideChar ? 1 : 0 ) + ( farEast ? 4 : 0 ) + ( richText ? 8 : 0 ) );
376     }
377
378     /**
379      * If the continued record is so long is spans into the next continue then
380      * simply suck the remaining string data into the existing <code>unfinishedString</code>.
381      *
382      * @param record The data from the continuation record.
383      */

384     private void processEntireContinuation( final byte[] record )
385     {
386         // create artificial data to create a UnicodeString
387
int dataLengthInBytes = record.length - LittleEndianConsts.BYTE_SIZE;
388         byte[] unicodeStringData = new byte[record.length + LittleEndianConsts.SHORT_SIZE];
389
390         int charsRead = calculateCharCount( dataLengthInBytes );
391         LittleEndian.putShort( unicodeStringData, (byte) 0, (short) charsRead );
392         arraycopy( record, 0, unicodeStringData, LittleEndianConsts.SHORT_SIZE, record.length );
393         UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short) unicodeStringData.length, unicodeStringData, unfinishedString);
394
395         unfinishedString = ucs.getString();
396         setContinuationCharsRead( getContinuationCharsRead() + charsRead );
397         if (getContinuationCharsRead() == charCount) {
398           Integer JavaDoc integer = new Integer JavaDoc( strings.size() );
399           addToStringTable( strings, integer, ucs );
400         }
401     }
402
403     private boolean stringSpansContinuation( int continuationSizeInBytes )
404     {
405         return calculateByteCount( charCount - getContinuationCharsRead() ) > continuationSizeInBytes;
406     }
407
408     /**
409      * @return the number of characters we expect in the first
410      * sub-record in a subsequent continuation record
411      */

412
413     int getContinuationCharsRead()
414     {
415         return continuationReadChars;
416     }
417
418     private void setContinuationCharsRead( final int count )
419     {
420         continuationReadChars = count;
421     }
422
423     private int calculateByteCount( final int character_count )
424     {
425         return character_count * ( wideChar ? LittleEndianConsts.SHORT_SIZE : LittleEndianConsts.BYTE_SIZE );
426     }
427
428
429     /**
430      * Copies an array from the specified source array, beginning at the
431      * specified position, to the specified position of the destination array.
432      * A subsequence of array components are copied from the source
433      * array referenced by <code>src</code> to the destination array
434      * referenced by <code>dst</code>. The number of components copied is
435      * equal to the <code>length</code> argument. The components at
436      * positions <code>srcOffset</code> through
437      * <code>srcOffset+length-1</code> in the source array are copied into
438      * positions <code>dstOffset</code> through
439      * <code>dstOffset+length-1</code>, respectively, of the destination
440      * array.
441      * <p>
442      * If the <code>src</code> and <code>dst</code> arguments refer to the
443      * same array object, then the copying is performed as if the
444      * components at positions <code>srcOffset</code> through
445      * <code>srcOffset+length-1</code> were first copied to a temporary
446      * array with <code>length</code> components and then the contents of
447      * the temporary array were copied into positions
448      * <code>dstOffset</code> through <code>dstOffset+length-1</code> of the
449      * destination array.
450      * <p>
451      * If <code>dst</code> is <code>null</code>, then a
452      * <code>NullPointerException</code> is thrown.
453      * <p>
454      * If <code>src</code> is <code>null</code>, then a
455      * <code>NullPointerException</code> is thrown and the destination
456      * array is not modified.
457      * <p>
458      * Otherwise, if any of the following is true, an
459      * <code>ArrayStoreException</code> is thrown and the destination is
460      * not modified:
461      * <ul>
462      * <li>The <code>src</code> argument refers to an object that is not an
463      * array.
464      * <li>The <code>dst</code> argument refers to an object that is not an
465      * array.
466      * <li>The <code>src</code> argument and <code>dst</code> argument refer to
467      * arrays whose component types are different primitive types.
468      * <li>The <code>src</code> argument refers to an array with a primitive
469      * component type and the <code>dst</code> argument refers to an array
470      * with a reference component type.
471      * <li>The <code>src</code> argument refers to an array with a reference
472      * component type and the <code>dst</code> argument refers to an array
473      * with a primitive component type.
474      * </ul>
475      * <p>
476      * Otherwise, if any of the following is true, an
477      * <code>IndexOutOfBoundsException</code> is
478      * thrown and the destination is not modified:
479      * <ul>
480      * <li>The <code>srcOffset</code> argument is negative.
481      * <li>The <code>dstOffset</code> argument is negative.
482      * <li>The <code>length</code> argument is negative.
483      * <li><code>srcOffset+length</code> is greater than
484      * <code>src.length</code>, the length of the source array.
485      * <li><code>dstOffset+length</code> is greater than
486      * <code>dst.length</code>, the length of the destination array.
487      * </ul>
488      * <p>
489      * Otherwise, if any actual component of the source array from
490      * position <code>srcOffset</code> through
491      * <code>srcOffset+length-1</code> cannot be converted to the component
492      * type of the destination array by assignment conversion, an
493      * <code>ArrayStoreException</code> is thrown. In this case, let
494      * <b><i>k</i></b> be the smallest nonnegative integer less than
495      * length such that <code>src[srcOffset+</code><i>k</i><code>]</code>
496      * cannot be converted to the component type of the destination
497      * array; when the exception is thrown, source array components from
498      * positions <code>srcOffset</code> through
499      * <code>srcOffset+</code><i>k</i><code>-1</code>
500      * will already have been copied to destination array positions
501      * <code>dstOffset</code> through
502      * <code>dstOffset+</code><i>k</I><code>-1</code> and no other
503      * positions of the destination array will have been modified.
504      * (Because of the restrictions already itemized, this
505      * paragraph effectively applies only to the situation where both
506      * arrays have component types that are reference types.)
507      *
508      * @param src the source array.
509      * @param src_position start position in the source array.
510      * @param dst the destination array.
511      * @param dst_position pos start position in the destination data.
512      * @param length the number of array elements to be copied.
513      * @exception IndexOutOfBoundsException if copying would cause
514      * access of data outside array bounds.
515      * @exception ArrayStoreException if an element in the <code>src</code>
516      * array could not be stored into the <code>dest</code> array
517      * because of a type mismatch.
518      * @exception NullPointerException if either <code>src</code> or
519      * <code>dst</code> is <code>null</code>.
520      */

521     private void arraycopy( byte[] src, int src_position,
522                             byte[] dst, int dst_position,
523                             int length )
524     {
525         System.arraycopy( src, src_position, dst, dst_position, length );
526     }
527
528     /**
529      * @return the unfinished string
530      */

531     String JavaDoc getUnfinishedString()
532     {
533         return unfinishedString;
534     }
535
536     /**
537      * @return true if current string uses wide characters
538      */

539     boolean isWideChar()
540     {
541         return wideChar;
542     }
543
544
545 }
546
Popular Tags