KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > poi > hssf > record > SSTRecord


1
2 /* ====================================================================
3    Copyright 2002-2004 Apache Software Foundation
4
5    Licensed under the Apache License, Version 2.0 (the "License");
6    you may not use this file except in compliance with the License.
7    You may obtain a copy of the License at
8
9        http://www.apache.org/licenses/LICENSE-2.0
10
11    Unless required by applicable law or agreed to in writing, software
12    distributed under the License is distributed on an "AS IS" BASIS,
13    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14    See the License for the specific language governing permissions and
15    limitations under the License.
16 ==================================================================== */

17         
18
19 package org.apache.poi.hssf.record;
20
21 import org.apache.poi.util.BinaryTree;
22 import org.apache.poi.util.LittleEndian;
23 import org.apache.poi.util.LittleEndianConsts;
24
25 import java.util.Iterator JavaDoc;
26 import java.util.List JavaDoc;
27
28 /**
29  * Title: Static String Table Record
30  * <P>
31  * Description: This holds all the strings for LabelSSTRecords.
32  * <P>
33  * REFERENCE: PG 389 Microsoft Excel 97 Developer's Kit (ISBN:
34  * 1-57231-498-2)
35  * <P>
36  * @author Andrew C. Oliver (acoliver at apache dot org)
37  * @author Marc Johnson (mjohnson at apache dot org)
38  * @author Glen Stampoultzis (glens at apache.org)
39  *
40  * @see org.apache.poi.hssf.record.LabelSSTRecord
41  * @see org.apache.poi.hssf.record.ContinueRecord
42  */

43
44 public class SSTRecord
45         extends Record
46 {
47
48     /** how big can an SST record be? As big as any record can be: 8228 bytes */
49     static final int MAX_RECORD_SIZE = 8228;
50
51     /** standard record overhead: two shorts (record id plus data space size)*/
52     static final int STD_RECORD_OVERHEAD =
53             2 * LittleEndianConsts.SHORT_SIZE;
54
55     /** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */
56     static final int SST_RECORD_OVERHEAD =
57             ( STD_RECORD_OVERHEAD + ( 2 * LittleEndianConsts.INT_SIZE ) );
58
59     /** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */
60     static final int MAX_DATA_SPACE = MAX_RECORD_SIZE - SST_RECORD_OVERHEAD;
61
62     /** overhead for each string includes the string's character count (a short) and the flag describing its characteristics (a byte) */
63     static final int STRING_MINIMAL_OVERHEAD = LittleEndianConsts.SHORT_SIZE + LittleEndianConsts.BYTE_SIZE;
64
65     public static final short sid = 0xfc;
66
67     /** union of strings in the SST and EXTSST */
68     private int field_1_num_strings;
69
70     /** according to docs ONLY SST */
71     private int field_2_num_unique_strings;
72     private BinaryTree field_3_strings;
73
74     /** Record lengths for initial SST record and all continue records */
75     private List JavaDoc _record_lengths = null;
76     private SSTDeserializer deserializer;
77
78     /** Offsets from the beginning of the SST record (even across continuations) */
79     int[] bucketAbsoluteOffsets;
80     /** Offsets relative the start of the current SST or continue record */
81     int[] bucketRelativeOffsets;
82
83     /**
84      * default constructor
85      */

86     public SSTRecord()
87     {
88         field_1_num_strings = 0;
89         field_2_num_unique_strings = 0;
90         field_3_strings = new BinaryTree();
91         deserializer = new SSTDeserializer(field_3_strings);
92     }
93
94     /**
95      * Constructs an SST record and sets its fields appropriately.
96      *
97      * @param id must be 0xfc or an exception will be throw upon
98      * validation
99      * @param size the size of the data area of the record
100      * @param data of the record (should not contain sid/len)
101      */

102
103     public SSTRecord( final short id, final short size, final byte[] data )
104     {
105         super( id, size, data );
106     }
107
108     /**
109      * Constructs an SST record and sets its fields appropriately.
110      *
111      * @param id must be 0xfc or an exception will be throw upon
112      * validation
113      * @param size the size of the data area of the record
114      * @param data of the record (should not contain sid/len)
115      * @param offset of the record
116      */

117
118     public SSTRecord( final short id, final short size, final byte[] data,
119                       int offset )
120     {
121         super( id, size, data, offset );
122     }
123
124     /**
125      * Add a string. Determines whether 8-bit encoding can be used, or
126      * whether 16-bit encoding must be used.
127      * <p>
128      * THIS IS THE PREFERRED METHOD OF ADDING A STRING. IF YOU USE THE
129      * OTHER ,code>addString</code> METHOD AND FORCE 8-BIT ENCODING ON
130      * A STRING THAT SHOULD USE 16-BIT ENCODING, YOU WILL CORRUPT THE
131      * STRING; IF YOU USE THAT METHOD AND FORCE 16-BIT ENCODING, YOU
132      * ARE WASTING SPACE WHEN THE WORKBOOK IS WRITTEN OUT.
133      *
134      * @param string string to be added
135      *
136      * @return the index of that string in the table
137      */

138
139     public int addString( final String JavaDoc string )
140     {
141         int rval;
142
143         if ( string == null )
144         {
145             rval = addString( "", false );
146         }
147         else
148         {
149
150             // scan for characters greater than 255 ... if any are
151
// present, we have to use 16-bit encoding. Otherwise, we
152
// can use 8-bit encoding
153
boolean useUTF16 = false;
154             int strlen = string.length();
155
156             for ( int j = 0; j < strlen; j++ )
157             {
158                 if ( string.charAt( j ) > 255 )
159                 {
160                     useUTF16 = true;
161                     break;
162                 }
163             }
164             rval = addString( string, useUTF16 );
165         }
166         return rval;
167     }
168
169     /**
170      * Add a string and assert the encoding (8-bit or 16-bit) to be
171      * used.
172      * <P>
173      * USE THIS METHOD AT YOUR OWN RISK. IF YOU FORCE 8-BIT ENCODING,
174      * YOU MAY CORRUPT YOUR STRING. IF YOU FORCE 16-BIT ENCODING AND
175      * IT ISN'T NECESSARY, YOU WILL WASTE SPACE WHEN THIS RECORD IS
176      * WRITTEN OUT.
177      *
178      * @param string string to be added
179      * @param useUTF16 if true, forces 16-bit encoding. If false,
180      * forces 8-bit encoding
181      *
182      * @return the index of that string in the table
183      */

184
185     public int addString( final String JavaDoc string, final boolean useUTF16 )
186     {
187         field_1_num_strings++;
188         String JavaDoc str = ( string == null ) ? ""
189                 : string;
190         int rval;
191         UnicodeString ucs = new UnicodeString();
192
193         ucs.setString( str );
194         ucs.setCharCount( (short) str.length() );
195         ucs.setOptionFlags( (byte) ( useUTF16 ? 1
196                 : 0 ) );
197         Integer JavaDoc integer = (Integer JavaDoc) field_3_strings.getKeyForValue( ucs );
198
199         if ( integer != null )
200         {
201             rval = integer.intValue();
202         }
203         else
204         {
205
206             // This is a new string -- we didn't see it among the
207
// strings we've already collected
208
rval = field_3_strings.size();
209             field_2_num_unique_strings++;
210             integer = new Integer JavaDoc( rval );
211             SSTDeserializer.addToStringTable( field_3_strings, integer, ucs );
212 // field_3_strings.put( integer, ucs );
213
}
214         return rval;
215     }
216
217     /**
218      * @return number of strings
219      */

220
221     public int getNumStrings()
222     {
223         return field_1_num_strings;
224     }
225
226     /**
227      * @return number of unique strings
228      */

229
230     public int getNumUniqueStrings()
231     {
232         return field_2_num_unique_strings;
233     }
234
235     /**
236      * USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
237      * METHODS MANIPULATE THE NUMBER OF STRINGS AS A SIDE EFFECT; YOUR
238      * ATTEMPTS AT MANIPULATING THE STRING COUNT IS LIKELY TO BE VERY
239      * WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN THIS RECORD IS
240      * WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ THE RECORD
241      *
242      * @param count number of strings
243      *
244      */

245
246     public void setNumStrings( final int count )
247     {
248         field_1_num_strings = count;
249     }
250
251     /**
252      * USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
253      * METHODS MANIPULATE THE NUMBER OF UNIQUE STRINGS AS A SIDE
254      * EFFECT; YOUR ATTEMPTS AT MANIPULATING THE UNIQUE STRING COUNT
255      * IS LIKELY TO BE VERY WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN
256      * THIS RECORD IS WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ
257      * THE RECORD
258      *
259      * @param count number of strings
260      */

261
262     public void setNumUniqueStrings( final int count )
263     {
264         field_2_num_unique_strings = count;
265     }
266
267     /**
268      * Get a particular string by its index
269      *
270      * @param id index into the array of strings
271      *
272      * @return the desired string
273      */

274
275     public String JavaDoc getString( final int id )
276     {
277         return ( (UnicodeString) field_3_strings.get( new Integer JavaDoc( id ) ) ).getString();
278     }
279
280     public boolean isString16bit( final int id )
281     {
282         UnicodeString unicodeString = ( (UnicodeString) field_3_strings.get( new Integer JavaDoc( id ) ) );
283         return ( ( unicodeString.getOptionFlags() & 0x01 ) == 1 );
284     }
285
286     /**
287      * Return a debugging string representation
288      *
289      * @return string representation
290      */

291
292     public String JavaDoc toString()
293     {
294         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
295
296         buffer.append( "[SST]\n" );
297         buffer.append( " .numstrings = " )
298                 .append( Integer.toHexString( getNumStrings() ) ).append( "\n" );
299         buffer.append( " .uniquestrings = " )
300                 .append( Integer.toHexString( getNumUniqueStrings() ) ).append( "\n" );
301         for ( int k = 0; k < field_3_strings.size(); k++ )
302         {
303             buffer.append( " .string_" + k + " = " )
304                     .append( ( field_3_strings
305                     .get( new Integer JavaDoc( k ) ) ).toString() ).append( "\n" );
306         }
307         buffer.append( "[/SST]\n" );
308         return buffer.toString();
309     }
310
311     /**
312      * @return sid
313      */

314     public short getSid()
315     {
316         return sid;
317     }
318
319     /**
320      * @return hashcode
321      */

322     public int hashCode()
323     {
324         return field_2_num_unique_strings;
325     }
326
327     public boolean equals( Object JavaDoc o )
328     {
329         if ( ( o == null ) || ( o.getClass() != this.getClass() ) )
330         {
331             return false;
332         }
333         SSTRecord other = (SSTRecord) o;
334
335         return ( ( field_1_num_strings == other
336                 .field_1_num_strings ) && ( field_2_num_unique_strings == other
337                 .field_2_num_unique_strings ) && field_3_strings
338                 .equals( other.field_3_strings ) );
339     }
340
341     /**
342      * validate SID
343      *
344      * @param id the alleged SID
345      *
346      * @exception RecordFormatException if validation fails
347      */

348
349     protected void validateSid( final short id )
350             throws RecordFormatException
351     {
352         if ( id != sid )
353         {
354             throw new RecordFormatException( "NOT An SST RECORD" );
355         }
356     }
357
358     /**
359      * Fill the fields from the data
360      * <P>
361      * The data consists of sets of string data. This string data is
362      * arranged as follows:
363      * <P>
364      * <CODE><pre>
365      * short string_length; // length of string data
366      * byte string_flag; // flag specifying special string
367      * // handling
368      * short run_count; // optional count of formatting runs
369      * int extend_length; // optional extension length
370      * char[] string_data; // string data, can be byte[] or
371      * // short[] (length of array is
372      * // string_length)
373      * int[] formatting_runs; // optional formatting runs (length of
374      * // array is run_count)
375      * byte[] extension; // optional extension (length of array
376      * // is extend_length)
377      * </pre></CODE>
378      * <P>
379      * The string_flag is bit mapped as follows:
380      * <P>
381      * <TABLE>
382      * <TR>
383      * <TH>Bit number</TH>
384      * <TH>Meaning if 0</TH>
385      * <TH>Meaning if 1</TH>
386      * <TR>
387      * <TR>
388      * <TD>0</TD>
389      * <TD>string_data is byte[]</TD>
390      * <TD>string_data is short[]</TH>
391      * <TR>
392      * <TR>
393      * <TD>1</TD>
394      * <TD>Should always be 0</TD>
395      * <TD>string_flag is defective</TH>
396      * <TR>
397      * <TR>
398      * <TD>2</TD>
399      * <TD>extension is not included</TD>
400      * <TD>extension is included</TH>
401      * <TR>
402      * <TR>
403      * <TD>3</TD>
404      * <TD>formatting run data is not included</TD>
405      * <TD>formatting run data is included</TH>
406      * <TR>
407      * <TR>
408      * <TD>4</TD>
409      * <TD>Should always be 0</TD>
410      * <TD>string_flag is defective</TH>
411      * <TR>
412      * <TR>
413      * <TD>5</TD>
414      * <TD>Should always be 0</TD>
415      * <TD>string_flag is defective</TH>
416      * <TR>
417      * <TR>
418      * <TD>6</TD>
419      * <TD>Should always be 0</TD>
420      * <TD>string_flag is defective</TH>
421      * <TR>
422      * <TR>
423      * <TD>7</TD>
424      * <TD>Should always be 0</TD>
425      * <TD>string_flag is defective</TH>
426      * <TR>
427      * </TABLE>
428      * <P>
429      * We can handle eating the overhead associated with bits 2 or 3
430      * (or both) being set, but we have no idea what to do with the
431      * associated data. The UnicodeString class can handle the byte[]
432      * vs short[] nature of the actual string data
433      *
434      * @param data raw data
435      * @param size size of the raw data
436      */

437
438     protected void fillFields( final byte[] data, final short size,
439                                int offset )
440     {
441
442         // this method is ALWAYS called after construction -- using
443
// the nontrivial constructor, of course -- so this is where
444
// we initialize our fields
445
field_1_num_strings = LittleEndian.getInt( data, 0 + offset );
446         field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
447         field_3_strings = new BinaryTree();
448         deserializer = new SSTDeserializer(field_3_strings);
449         deserializer.manufactureStrings( data, 8 + offset);
450     }
451
452
453     /**
454      * @return an iterator of the strings we hold. All instances are
455      * UnicodeStrings
456      */

457
458     Iterator JavaDoc getStrings()
459     {
460         return field_3_strings.values().iterator();
461     }
462
463     /**
464      * @return count of the strings we hold.
465      */

466
467     int countStrings()
468     {
469         return field_3_strings.size();
470     }
471
472     /**
473      * called by the class that is responsible for writing this sucker.
474      * Subclasses should implement this so that their data is passed back in a
475      * byte array.
476      *
477      * @return size
478      */

479
480     public int serialize( int offset, byte[] data )
481     {
482         SSTSerializer serializer = new SSTSerializer(
483                 _record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() );
484         int bytes = serializer.serialize( getRecordSize(), offset, data );
485         bucketAbsoluteOffsets = serializer.getBucketAbsoluteOffsets();
486         bucketRelativeOffsets = serializer.getBucketRelativeOffsets();
487 // for ( int i = 0; i < bucketAbsoluteOffsets.length; i++ )
488
// {
489
// System.out.println( "bucketAbsoluteOffset = " + bucketAbsoluteOffsets[i] );
490
// System.out.println( "bucketRelativeOffset = " + bucketRelativeOffsets[i] );
491
// }
492
return bytes;
493     }
494
495
496     public int getRecordSize()
497     {
498         SSTRecordSizeCalculator calculator = new SSTRecordSizeCalculator(field_3_strings);
499         int recordSize = calculator.getRecordSize();
500         _record_lengths = calculator.getRecordLengths();
501         return recordSize;
502     }
503
504     SSTDeserializer getDeserializer()
505     {
506         return deserializer;
507     }
508
509     /**
510      * Strange to handle continue records this way. Is it a smell?
511      */

512     public void processContinueRecord( byte[] record )
513     {
514         deserializer.processContinueRecord( record );
515     }
516
517     /**
518      * Creates an extended string record based on the current contents of
519      * the current SST record. The offset within the stream to the SST record
520      * is required because the extended string record points directly to the
521      * strings in the SST record.
522      * <p>
523      * NOTE: THIS FUNCTION MUST ONLY BE CALLED AFTER THE SST RECORD HAS BEEN
524      * SERIALIZED.
525      *
526      * @param sstOffset The offset in the stream to the start of the
527      * SST record.
528      * @return The new SST record.
529      */

530     public ExtSSTRecord createExtSSTRecord(int sstOffset)
531     {
532         if (bucketAbsoluteOffsets == null || bucketAbsoluteOffsets == null)
533             throw new IllegalStateException JavaDoc("SST record has not yet been serialized.");
534
535         ExtSSTRecord extSST = new ExtSSTRecord();
536         extSST.setNumStringsPerBucket((short)8);
537         int[] absoluteOffsets = (int[]) bucketAbsoluteOffsets.clone();
538         int[] relativeOffsets = (int[]) bucketRelativeOffsets.clone();
539         for ( int i = 0; i < absoluteOffsets.length; i++ )
540             absoluteOffsets[i] += sstOffset;
541         extSST.setBucketOffsets(absoluteOffsets, relativeOffsets);
542         return extSST;
543     }
544
545     /**
546      * Calculates the size in bytes of the EXTSST record as it would be if the
547      * record was serialized.
548      *
549      * @return The size of the ExtSST record in bytes.
550      */

551     public int calcExtSSTRecordSize()
552     {
553       return ExtSSTRecord.getRecordSizeForStrings(field_3_strings.size());
554     }
555 }
556
557
558
Popular Tags