KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xml > utils > FastStringBuffer


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 /*
17  * $Id: FastStringBuffer.java,v 1.25 2004/02/17 04:21:14 minchau Exp $
18  */

19 package org.apache.xml.utils;
20
21 /**
22  * Bare-bones, unsafe, fast string buffer. No thread-safety, no
23  * parameter range checking, exposed fields. Note that in typical
24  * applications, thread-safety of a StringBuffer is a somewhat
25  * dubious concept in any case.
26  * <p>
27  * Note that Stree and DTM used a single FastStringBuffer as a string pool,
28  * by recording start and length indices within this single buffer. This
29  * minimizes heap overhead, but of course requires more work when retrieving
30  * the data.
31  * <p>
32  * FastStringBuffer operates as a "chunked buffer". Doing so
33  * reduces the need to recopy existing information when an append
34  * exceeds the space available; we just allocate another chunk and
35  * flow across to it. (The array of chunks may need to grow,
36  * admittedly, but that's a much smaller object.) Some excess
37  * recopying may arise when we extract Strings which cross chunk
38  * boundaries; larger chunks make that less frequent.
39  * <p>
40  * The size values are parameterized, to allow tuning this code. In
41  * theory, Result Tree Fragments might want to be tuned differently
42  * from the main document's text.
43  * <p>
44  * %REVIEW% An experiment in self-tuning is
45  * included in the code (using nested FastStringBuffers to achieve
46  * variation in chunk sizes), but this implementation has proven to
47  * be problematic when data may be being copied from the FSB into itself.
48  * We should either re-architect that to make this safe (if possible)
49  * or remove that code and clean up for performance/maintainability reasons.
50  * <p>
51  */

52 public class FastStringBuffer
53 {
54   // If nonzero, forces the inial chunk size.
55
/**/static final int DEBUG_FORCE_INIT_BITS=0;
56   
57     // %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied
58
// back into the same FSB (variable set from previous variable, for example)
59
// and blocksize changes in mid-copy... there's risk of severe malfunction in
60
// the read process, due to how the resizing code re-jiggers storage. Arggh.
61
// If we want to retain the variable-size-block feature, we need to reconsider
62
// that issue. For now, I have forced us into fixed-size mode.
63
static boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true;
64
65     /** Manifest constant: Suppress leading whitespace.
66      * This should be used when normalize-to-SAX is called for the first chunk of a
67      * multi-chunk output, or one following unsuppressed whitespace in a previous
68      * chunk.
69      * @see sendNormalizedSAXcharacters(char[],int,int,org.xml.sax.ContentHandler,int)
70      */

71     public static final int SUPPRESS_LEADING_WS=0x01;
72     
73     /** Manifest constant: Suppress trailing whitespace.
74      * This should be used when normalize-to-SAX is called for the last chunk of a
75      * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
76      */

77     public static final int SUPPRESS_TRAILING_WS=0x02;
78     
79     /** Manifest constant: Suppress both leading and trailing whitespace.
80      * This should be used when normalize-to-SAX is called for a complete string.
81      * (I'm not wild about the name of this one. Ideas welcome.)
82      * @see sendNormalizedSAXcharacters(char[],int,int,org.xml.sax.ContentHandler,int)
83      */

84     public static final int SUPPRESS_BOTH
85         = SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS;
86
87     /** Manifest constant: Carry trailing whitespace of one chunk as leading
88      * whitespace of the next chunk. Used internally; I don't see any reason
89      * to make it public right now.
90      */

91     private static final int CARRY_WS=0x04;
92
93     /**
94    * Field m_chunkBits sets our chunking strategy, by saying how many
95    * bits of index can be used within a single chunk before flowing over
96    * to the next chunk. For example, if m_chunkbits is set to 15, each
97    * chunk can contain up to 2^15 (32K) characters
98    */

99   int m_chunkBits = 15;
100
101   /**
102    * Field m_maxChunkBits affects our chunk-growth strategy, by saying what
103    * the largest permissible chunk size is in this particular FastStringBuffer
104    * hierarchy.
105    */

106   int m_maxChunkBits = 15;
107
108   /**
109    * Field m_rechunkBits affects our chunk-growth strategy, by saying how
110    * many chunks should be allocated at one size before we encapsulate them
111    * into the first chunk of the next size up. For example, if m_rechunkBits
112    * is set to 3, then after 8 chunks at a given size we will rebundle
113    * them as the first element of a FastStringBuffer using a chunk size
114    * 8 times larger (chunkBits shifted left three bits).
115    */

116   int m_rebundleBits = 2;
117
118   /**
119    * Field m_chunkSize establishes the maximum size of one chunk of the array
120    * as 2**chunkbits characters.
121    * (Which may also be the minimum size if we aren't tuning for storage)
122    */

123   int m_chunkSize; // =1<<(m_chunkBits-1);
124

125   /**
126    * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits
127    * worth of low-order '1' bits, useful for shift-and-mask addressing
128    * within the chunks.
129    */

130   int m_chunkMask; // =m_chunkSize-1;
131

132   /**
133    * Field m_array holds the string buffer's text contents, using an
134    * array-of-arrays. Note that this array, and the arrays it contains, may be
135    * reallocated when necessary in order to allow the buffer to grow;
136    * references to them should be considered to be invalidated after any
137    * append. However, the only time these arrays are directly exposed
138    * is in the sendSAXcharacters call.
139    */

140   char[][] m_array;
141
142   /**
143    * Field m_lastChunk is an index into m_array[], pointing to the last
144    * chunk of the Chunked Array currently in use. Note that additional
145    * chunks may actually be allocated, eg if the FastStringBuffer had
146    * previously been truncated or if someone issued an ensureSpace request.
147    * <p>
148    * The insertion point for append operations is addressed by the combination
149    * of m_lastChunk and m_firstFree.
150    */

151   int m_lastChunk = 0;
152
153   /**
154    * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
155    * the first character in the Chunked Array which is not part of the
156    * FastStringBuffer's current content. Since m_array[][] is zero-based,
157    * the length of that content can be calculated as
158    * (m_lastChunk<<m_chunkBits) + m_firstFree
159    */

160   int m_firstFree = 0;
161
162   /**
163    * Field m_innerFSB, when non-null, is a FastStringBuffer whose total
164    * length equals m_chunkSize, and which replaces m_array[0]. This allows
165    * building a hierarchy of FastStringBuffers, where early appends use
166    * a smaller chunkSize (for less wasted memory overhead) but later
167    * ones use a larger chunkSize (for less heap activity overhead).
168    */

169   FastStringBuffer m_innerFSB = null;
170
171   /**
172    * Construct a FastStringBuffer, with allocation policy as per parameters.
173    * <p>
174    * For coding convenience, I've expressed both allocation sizes in terms of
175    * a number of bits. That's needed for the final size of a chunk,
176    * to permit fast and efficient shift-and-mask addressing. It's less critical
177    * for the inital size, and may be reconsidered.
178    * <p>
179    * An alternative would be to accept integer sizes and round to powers of two;
180    * that really doesn't seem to buy us much, if anything.
181    *
182    * @param initChunkBits Length in characters of the initial allocation
183    * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024
184    * characters.) Later chunks will use larger allocation units, to trade off
185    * allocation speed of large document against storage efficiency of small
186    * ones.
187    * @param maxChunkBits Number of character-offset bits that should be used for
188    * addressing within a chunk. Maximum length of a chunk is 2^chunkBits
189    * characters.
190    * @param rebundleBits Number of character-offset bits that addressing should
191    * advance before we attempt to take a step from initChunkBits to maxChunkBits
192    */

193   public FastStringBuffer(int initChunkBits, int maxChunkBits,
194                           int rebundleBits)
195   {
196     if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS;
197     
198     // %REVIEW%
199
// Should this force to larger value, or smaller? Smaller less efficient, but if
200
// someone requested variable mode it's because they care about storage space.
201
// On the other hand, given the other changes I'm making, odds are that we should
202
// adopt the larger size. Dither, dither, dither... This is just stopgap workaround
203
// anyway; we need a permanant solution.
204
//
205
if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits;
206     //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits;
207

208     m_array = new char[16][];
209
210     // Don't bite off more than we're prepared to swallow!
211
if (initChunkBits > maxChunkBits)
212       initChunkBits = maxChunkBits;
213
214     m_chunkBits = initChunkBits;
215     m_maxChunkBits = maxChunkBits;
216     m_rebundleBits = rebundleBits;
217     m_chunkSize = 1 << (initChunkBits);
218     m_chunkMask = m_chunkSize - 1;
219     m_array[0] = new char[m_chunkSize];
220   }
221
222   /**
223    * Construct a FastStringBuffer, using a default rebundleBits value.
224    *
225    * NEEDSDOC @param initChunkBits
226    * NEEDSDOC @param maxChunkBits
227    */

228   public FastStringBuffer(int initChunkBits, int maxChunkBits)
229   {
230     this(initChunkBits, maxChunkBits, 2);
231   }
232
233   /**
234    * Construct a FastStringBuffer, using default maxChunkBits and
235    * rebundleBits values.
236    * <p>
237    * ISSUE: Should this call assert initial size, or fixed size?
238    * Now configured as initial, with a default for fixed.
239    *
240    * @param
241    *
242    * NEEDSDOC @param initChunkBits
243    */

244   public FastStringBuffer(int initChunkBits)
245   {
246     this(initChunkBits, 15, 2);
247   }
248
249   /**
250    * Construct a FastStringBuffer, using a default allocation policy.
251    */

252   public FastStringBuffer()
253   {
254
255     // 10 bits is 1K. 15 bits is 32K. Remember that these are character
256
// counts, so actual memory allocation unit is doubled for UTF-16 chars.
257
//
258
// For reference: In the original FastStringBuffer, we simply
259
// overallocated by blocksize (default 1KB) on each buffer-growth.
260
this(10, 15, 2);
261   }
262
263   /**
264    * Get the length of the list. Synonym for length().
265    *
266    * @return the number of characters in the FastStringBuffer's content.
267    */

268   public final int size()
269   {
270     return (m_lastChunk << m_chunkBits) + m_firstFree;
271   }
272
273   /**
274    * Get the length of the list. Synonym for size().
275    *
276    * @return the number of characters in the FastStringBuffer's content.
277    */

278   public final int length()
279   {
280     return (m_lastChunk << m_chunkBits) + m_firstFree;
281   }
282
283   /**
284    * Discard the content of the FastStringBuffer, and most of the memory
285    * that was allocated by it, restoring the initial state. Note that this
286    * may eventually be different from setLength(0), which see.
287    */

288   public final void reset()
289   {
290
291     m_lastChunk = 0;
292     m_firstFree = 0;
293
294     // Recover the original chunk size
295
FastStringBuffer innermost = this;
296
297     while (innermost.m_innerFSB != null)
298     {
299       innermost = innermost.m_innerFSB;
300     }
301
302     m_chunkBits = innermost.m_chunkBits;
303     m_chunkSize = innermost.m_chunkSize;
304     m_chunkMask = innermost.m_chunkMask;
305
306     // Discard the hierarchy
307
m_innerFSB = null;
308     m_array = new char[16][0];
309     m_array[0] = new char[m_chunkSize];
310   }
311
312   /**
313    * Directly set how much of the FastStringBuffer's storage is to be
314    * considered part of its content. This is a fast but hazardous
315    * operation. It is not protected against negative values, or values
316    * greater than the amount of storage currently available... and even
317    * if additional storage does exist, its contents are unpredictable.
318    * The only safe use for our setLength() is to truncate the FastStringBuffer
319    * to a shorter string.
320    *
321    * @param l New length. If l<0 or l>=getLength(), this operation will
322    * not report an error but future operations will almost certainly fail.
323    */

324   public final void setLength(int l)
325   {
326     m_lastChunk = l >>> m_chunkBits;
327
328     if (m_lastChunk == 0 && m_innerFSB != null)
329     {
330       // Replace this FSB with the appropriate inner FSB, truncated
331
m_innerFSB.setLength(l, this);
332     }
333     else
334     {
335       m_firstFree = l & m_chunkMask;
336       
337       // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving
338
// us pointing at the start of a chunk which has not yet been allocated. Rather than
339
// pay the cost of dealing with that in the append loops (more scattered and more
340
// inner-loop), we correct it here by moving to the safe side of that
341
// line -- as we would have left the indexes had we appended up to that point.
342
if(m_firstFree==0 && m_lastChunk>0)
343       {
344         --m_lastChunk;
345         m_firstFree=m_chunkSize;
346       }
347     }
348   }
349
350   /**
351    * Subroutine for the public setLength() method. Deals with the fact
352    * that truncation may require restoring one of the innerFSBs
353    *
354    * NEEDSDOC @param l
355    * NEEDSDOC @param rootFSB
356    */

357   private final void setLength(int l, FastStringBuffer rootFSB)
358   {
359
360     m_lastChunk = l >>> m_chunkBits;
361
362     if (m_lastChunk == 0 && m_innerFSB != null)
363     {
364       m_innerFSB.setLength(l, rootFSB);
365     }
366     else
367     {
368
369       // Undo encapsulation -- pop the innerFSB data back up to root.
370
// Inefficient, but attempts to keep the code simple.
371
rootFSB.m_chunkBits = m_chunkBits;
372       rootFSB.m_maxChunkBits = m_maxChunkBits;
373       rootFSB.m_rebundleBits = m_rebundleBits;
374       rootFSB.m_chunkSize = m_chunkSize;
375       rootFSB.m_chunkMask = m_chunkMask;
376       rootFSB.m_array = m_array;
377       rootFSB.m_innerFSB = m_innerFSB;
378       rootFSB.m_lastChunk = m_lastChunk;
379
380       // Finally, truncate this sucker.
381
rootFSB.m_firstFree = l & m_chunkMask;
382     }
383   }
384
385   /**
386    * Note that this operation has been somewhat deoptimized by the shift to a
387    * chunked array, as there is no factory method to produce a String object
388    * directly from an array of arrays and hence a double copy is needed.
389    * By using ensureCapacity we hope to minimize the heap overhead of building
390    * the intermediate StringBuffer.
391    * <p>
392    * (It really is a pity that Java didn't design String as a final subclass
393    * of MutableString, rather than having StringBuffer be a separate hierarchy.
394    * We'd avoid a <strong>lot</strong> of double-buffering.)
395    *
396    * @return the contents of the FastStringBuffer as a standard Java string.
397    */

398   public final String JavaDoc toString()
399   {
400
401     int length = (m_lastChunk << m_chunkBits) + m_firstFree;
402
403     return getString(new StringBuffer JavaDoc(length), 0, 0, length).toString();
404   }
405
406   /**
407    * Append a single character onto the FastStringBuffer, growing the
408    * storage if necessary.
409    * <p>
410    * NOTE THAT after calling append(), previously obtained
411    * references to m_array[][] may no longer be valid....
412    * though in fact they should be in this instance.
413    *
414    * @param value character to be appended.
415    */

416   public final void append(char value)
417   {
418     
419     char[] chunk;
420
421     // We may have preallocated chunks. If so, all but last should
422
// be at full size.
423
boolean lastchunk = (m_lastChunk + 1 == m_array.length);
424
425     if (m_firstFree < m_chunkSize) // Simplified test single-character-fits
426
chunk = m_array[m_lastChunk];
427     else
428     {
429
430       // Extend array?
431
int i = m_array.length;
432
433       if (m_lastChunk + 1 == i)
434       {
435         char[][] newarray = new char[i + 16][];
436
437         System.arraycopy(m_array, 0, newarray, 0, i);
438
439         m_array = newarray;
440       }
441
442       // Advance one chunk
443
chunk = m_array[++m_lastChunk];
444
445       if (chunk == null)
446       {
447
448         // Hierarchical encapsulation
449
if (m_lastChunk == 1 << m_rebundleBits
450                 && m_chunkBits < m_maxChunkBits)
451         {
452
453           // Should do all the work of both encapsulating
454
// existing data and establishing new sizes/offsets
455
m_innerFSB = new FastStringBuffer(this);
456         }
457
458         // Add a chunk.
459
chunk = m_array[m_lastChunk] = new char[m_chunkSize];
460       }
461
462       m_firstFree = 0;
463     }
464
465     // Space exists in the chunk. Append the character.
466
chunk[m_firstFree++] = value;
467   }
468
469   /**
470    * Append the contents of a String onto the FastStringBuffer,
471    * growing the storage if necessary.
472    * <p>
473    * NOTE THAT after calling append(), previously obtained
474    * references to m_array[] may no longer be valid.
475    *
476    * @param value String whose contents are to be appended.
477    */

478   public final void append(String JavaDoc value)
479   {
480
481     if (value == null)
482       return;
483     int strlen = value.length();
484
485     if (0 == strlen)
486       return;
487
488     int copyfrom = 0;
489     char[] chunk = m_array[m_lastChunk];
490     int available = m_chunkSize - m_firstFree;
491
492     // Repeat while data remains to be copied
493
while (strlen > 0)
494     {
495
496       // Copy what fits
497
if (available > strlen)
498         available = strlen;
499
500       value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
501                      m_firstFree);
502
503       strlen -= available;
504       copyfrom += available;
505
506       // If there's more left, allocate another chunk and continue
507
if (strlen > 0)
508       {
509
510         // Extend array?
511
int i = m_array.length;
512
513         if (m_lastChunk + 1 == i)
514         {
515           char[][] newarray = new char[i + 16][];
516
517           System.arraycopy(m_array, 0, newarray, 0, i);
518
519           m_array = newarray;
520         }
521
522         // Advance one chunk
523
chunk = m_array[++m_lastChunk];
524
525         if (chunk == null)
526         {
527
528           // Hierarchical encapsulation
529
if (m_lastChunk == 1 << m_rebundleBits
530                   && m_chunkBits < m_maxChunkBits)
531           {
532
533             // Should do all the work of both encapsulating
534
// existing data and establishing new sizes/offsets
535
m_innerFSB = new FastStringBuffer(this);
536           }
537
538           // Add a chunk.
539
chunk = m_array[m_lastChunk] = new char[m_chunkSize];
540         }
541
542         available = m_chunkSize;
543         m_firstFree = 0;
544       }
545     }
546
547     // Adjust the insert point in the last chunk, when we've reached it.
548
m_firstFree += available;
549   }
550
551   /**
552    * Append the contents of a StringBuffer onto the FastStringBuffer,
553    * growing the storage if necessary.
554    * <p>
555    * NOTE THAT after calling append(), previously obtained
556    * references to m_array[] may no longer be valid.
557    *
558    * @param value StringBuffer whose contents are to be appended.
559    */

560   public final void append(StringBuffer JavaDoc value)
561   {
562
563     if (value == null)
564       return;
565     int strlen = value.length();
566
567     if (0 == strlen)
568       return;
569
570     int copyfrom = 0;
571     char[] chunk = m_array[m_lastChunk];
572     int available = m_chunkSize - m_firstFree;
573
574     // Repeat while data remains to be copied
575
while (strlen > 0)
576     {
577
578       // Copy what fits
579
if (available > strlen)
580         available = strlen;
581
582       value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
583                      m_firstFree);
584
585       strlen -= available;
586       copyfrom += available;
587
588       // If there's more left, allocate another chunk and continue
589
if (strlen > 0)
590       {
591
592         // Extend array?
593
int i = m_array.length;
594
595         if (m_lastChunk + 1 == i)
596         {
597           char[][] newarray = new char[i + 16][];
598
599           System.arraycopy(m_array, 0, newarray, 0, i);
600
601           m_array = newarray;
602         }
603
604         // Advance one chunk
605
chunk = m_array[++m_lastChunk];
606
607         if (chunk == null)
608         {
609
610           // Hierarchical encapsulation
611
if (m_lastChunk == 1 << m_rebundleBits
612                   && m_chunkBits < m_maxChunkBits)
613           {
614
615             // Should do all the work of both encapsulating
616
// existing data and establishing new sizes/offsets
617
m_innerFSB = new FastStringBuffer(this);
618           }
619
620           // Add a chunk.
621
chunk = m_array[m_lastChunk] = new char[m_chunkSize];
622         }
623
624         available = m_chunkSize;
625         m_firstFree = 0;
626       }
627     }
628
629     // Adjust the insert point in the last chunk, when we've reached it.
630
m_firstFree += available;
631   }
632
633   /**
634    * Append part of the contents of a Character Array onto the
635    * FastStringBuffer, growing the storage if necessary.
636    * <p>
637    * NOTE THAT after calling append(), previously obtained
638    * references to m_array[] may no longer be valid.
639    *
640    * @param chars character array from which data is to be copied
641    * @param start offset in chars of first character to be copied,
642    * zero-based.
643    * @param length number of characters to be copied
644    */

645   public final void append(char[] chars, int start, int length)
646   {
647
648     int strlen = length;
649
650     if (0 == strlen)
651       return;
652
653     int copyfrom = start;
654     char[] chunk = m_array[m_lastChunk];
655     int available = m_chunkSize - m_firstFree;
656
657     // Repeat while data remains to be copied
658
while (strlen > 0)
659     {
660
661       // Copy what fits
662
if (available > strlen)
663         available = strlen;
664
665       System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree,
666                        available);
667
668       strlen -= available;
669       copyfrom += available;
670
671       // If there's more left, allocate another chunk and continue
672
if (strlen > 0)
673       {
674
675         // Extend array?
676
int i = m_array.length;
677
678         if (m_lastChunk + 1 == i)
679         {
680           char[][] newarray = new char[i + 16][];
681
682           System.arraycopy(m_array, 0, newarray, 0, i);
683
684           m_array = newarray;
685         }
686
687         // Advance one chunk
688
chunk = m_array[++m_lastChunk];
689
690         if (chunk == null)
691         {
692
693           // Hierarchical encapsulation
694
if (m_lastChunk == 1 << m_rebundleBits
695                   && m_chunkBits < m_maxChunkBits)
696           {
697
698             // Should do all the work of both encapsulating
699
// existing data and establishing new sizes/offsets
700
m_innerFSB = new FastStringBuffer(this);
701           }
702
703           // Add a chunk.
704
chunk = m_array[m_lastChunk] = new char[m_chunkSize];
705         }
706
707         available = m_chunkSize;
708         m_firstFree = 0;
709       }
710     }
711
712     // Adjust the insert point in the last chunk, when we've reached it.
713
m_firstFree += available;
714   }
715
716   /**
717    * Append the contents of another FastStringBuffer onto
718    * this FastStringBuffer, growing the storage if necessary.
719    * <p>
720    * NOTE THAT after calling append(), previously obtained
721    * references to m_array[] may no longer be valid.
722    *
723    * @param value FastStringBuffer whose contents are
724    * to be appended.
725    */

726   public final void append(FastStringBuffer value)
727   {
728
729     // Complicating factor here is that the two buffers may use
730
// different chunk sizes, and even if they're the same we're
731
// probably on a different alignment due to previously appended
732
// data. We have to work through the source in bite-sized chunks.
733
if (value == null)
734       return;
735     int strlen = value.length();
736
737     if (0 == strlen)
738       return;
739
740     int copyfrom = 0;
741     char[] chunk = m_array[m_lastChunk];
742     int available = m_chunkSize - m_firstFree;
743
744     // Repeat while data remains to be copied
745
while (strlen > 0)
746     {
747
748       // Copy what fits
749
if (available > strlen)
750         available = strlen;
751
752       int sourcechunk = (copyfrom + value.m_chunkSize - 1)
753                         >>> value.m_chunkBits;
754       int sourcecolumn = copyfrom & value.m_chunkMask;
755       int runlength = value.m_chunkSize - sourcecolumn;
756
757       if (runlength > available)
758         runlength = available;
759
760       System.arraycopy(value.m_array[sourcechunk], sourcecolumn,
761                        m_array[m_lastChunk], m_firstFree, runlength);
762
763       if (runlength != available)
764         System.arraycopy(value.m_array[sourcechunk + 1], 0,
765                          m_array[m_lastChunk], m_firstFree + runlength,
766                          available - runlength);
767
768       strlen -= available;
769       copyfrom += available;
770
771       // If there's more left, allocate another chunk and continue
772
if (strlen > 0)
773       {
774
775         // Extend array?
776
int i = m_array.length;
777
778         if (m_lastChunk + 1 == i)
779         {
780           char[][] newarray = new char[i + 16][];
781
782           System.arraycopy(m_array, 0, newarray, 0, i);
783
784           m_array = newarray;
785         }
786
787         // Advance one chunk
788
chunk = m_array[++m_lastChunk];
789
790         if (chunk == null)
791         {
792
793           // Hierarchical encapsulation
794
if (m_lastChunk == 1 << m_rebundleBits
795                   && m_chunkBits < m_maxChunkBits)
796           {
797
798             // Should do all the work of both encapsulating
799
// existing data and establishing new sizes/offsets
800
m_innerFSB = new FastStringBuffer(this);
801           }
802
803           // Add a chunk.
804
chunk = m_array[m_lastChunk] = new char[m_chunkSize];
805         }
806
807         available = m_chunkSize;
808         m_firstFree = 0;
809       }
810     }
811
812     // Adjust the insert point in the last chunk, when we've reached it.
813
m_firstFree += available;
814   }
815
816   /**
817    * @return true if the specified range of characters are all whitespace,
818    * as defined by XMLCharacterRecognizer.
819    * <p>
820    * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE.
821    *
822    * @param start Offset of first character in the range.
823    * @param length Number of characters to send.
824    */

825   public boolean isWhitespace(int start, int length)
826   {
827
828     int sourcechunk = start >>> m_chunkBits;
829     int sourcecolumn = start & m_chunkMask;
830     int available = m_chunkSize - sourcecolumn;
831     boolean chunkOK;
832
833     while (length > 0)
834     {
835       int runlength = (length <= available) ? length : available;
836
837       if (sourcechunk == 0 && m_innerFSB != null)
838         chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength);
839       else
840         chunkOK = org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace(
841           m_array[sourcechunk], sourcecolumn, runlength);
842
843       if (!chunkOK)
844         return false;
845
846       length -= runlength;
847
848       ++sourcechunk;
849
850       sourcecolumn = 0;
851       available = m_chunkSize;
852     }
853
854     return true;
855   }
856
857   /**
858    * @param start Offset of first character in the range.
859    * @param length Number of characters to send.
860    * @return a new String object initialized from the specified range of
861    * characters.
862    */

863   public String JavaDoc getString(int start, int length)
864   {
865     int startColumn = start & m_chunkMask;
866     int startChunk = start >>> m_chunkBits;
867     if (startColumn + length < m_chunkMask && m_innerFSB == null) {
868       return getOneChunkString(startChunk, startColumn, length);
869     }
870     return getString(new StringBuffer JavaDoc(length), startChunk, startColumn,
871                      length).toString();
872   }
873
874   protected String JavaDoc getOneChunkString(int startChunk, int startColumn,
875                                      int length) {
876     return new String JavaDoc(m_array[startChunk], startColumn, length);
877   }
878
879   /**
880    * @param sb StringBuffer to be appended to
881    * @param start Offset of first character in the range.
882    * @param length Number of characters to send.
883    * @return sb with the requested text appended to it
884    */

885   StringBuffer JavaDoc getString(StringBuffer JavaDoc sb, int start, int length)
886   {
887     return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length);
888   }
889
890   /**
891    * Internal support for toString() and getString().
892    * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into
893    * and returns a StringBuffer supplied by the caller. This simplifies
894    * m_innerFSB support.
895    * <p>
896    * Note that this operation has been somewhat deoptimized by the shift to a
897    * chunked array, as there is no factory method to produce a String object
898    * directly from an array of arrays and hence a double copy is needed.
899    * By presetting length we hope to minimize the heap overhead of building
900    * the intermediate StringBuffer.
901    * <p>
902    * (It really is a pity that Java didn't design String as a final subclass
903    * of MutableString, rather than having StringBuffer be a separate hierarchy.
904    * We'd avoid a <strong>lot</strong> of double-buffering.)
905    *
906    *
907    * @param sb
908    * @param startChunk
909    * @param startColumn
910    * @param length
911    *
912    * @return the contents of the FastStringBuffer as a standard Java string.
913    */

914   StringBuffer JavaDoc getString(StringBuffer JavaDoc sb, int startChunk, int startColumn,
915                          int length)
916   {
917
918     int stop = (startChunk << m_chunkBits) + startColumn + length;
919     int stopChunk = stop >>> m_chunkBits;
920     int stopColumn = stop & m_chunkMask;
921
922     // Factored out
923
//StringBuffer sb=new StringBuffer(length);
924
for (int i = startChunk; i < stopChunk; ++i)
925     {
926       if (i == 0 && m_innerFSB != null)
927         m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn);
928       else
929         sb.append(m_array[i], startColumn, m_chunkSize - startColumn);
930
931       startColumn = 0; // after first chunk
932
}
933
934     if (stopChunk == 0 && m_innerFSB != null)
935       m_innerFSB.getString(sb, startColumn, stopColumn - startColumn);
936     else if (stopColumn > startColumn)
937       sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn);
938
939     return sb;
940   }
941
942   /**
943    * Get a single character from the string buffer.
944    *
945    *
946    * @param pos character position requested.
947    * @return A character from the requested position.
948    */

949   public char charAt(int pos)
950   {
951     int startChunk = pos >>> m_chunkBits;
952
953     if (startChunk == 0 && m_innerFSB != null)
954       return m_innerFSB.charAt(pos & m_chunkMask);
955     else
956       return m_array[startChunk][pos & m_chunkMask];
957   }
958
959   /**
960    * Sends the specified range of characters as one or more SAX characters()
961    * events.
962    * Note that the buffer reference passed to the ContentHandler may be
963    * invalidated if the FastStringBuffer is edited; it's the user's
964    * responsibility to manage access to the FastStringBuffer to prevent this
965    * problem from arising.
966    * <p>
967    * Note too that there is no promise that the output will be sent as a
968    * single call. As is always true in SAX, one logical string may be split
969    * across multiple blocks of memory and hence delivered as several
970    * successive events.
971    *
972    * @param ch SAX ContentHandler object to receive the event.
973    * @param start Offset of first character in the range.
974    * @param length Number of characters to send.
975    * @exception org.xml.sax.SAXException may be thrown by handler's
976    * characters() method.
977    */

978   public void sendSAXcharacters(
979           org.xml.sax.ContentHandler JavaDoc ch, int start, int length)
980             throws org.xml.sax.SAXException JavaDoc
981   {
982
983     int startChunk = start >>> m_chunkBits;
984     int startColumn = start & m_chunkMask;
985     if (startColumn + length < m_chunkMask && m_innerFSB == null) {
986         ch.characters(m_array[startChunk], startColumn, length);
987         return;
988     }
989     
990     int stop = start + length;
991     int stopChunk = stop >>> m_chunkBits;
992     int stopColumn = stop & m_chunkMask;
993
994     for (int i = startChunk; i < stopChunk; ++i)
995     {
996       if (i == 0 && m_innerFSB != null)
997         m_innerFSB.sendSAXcharacters(ch, startColumn,
998                                      m_chunkSize - startColumn);
999       else
1000        ch.characters(m_array[i], startColumn, m_chunkSize - startColumn);
1001
1002      startColumn = 0; // after first chunk
1003
}
1004
1005    // Last, or only, chunk
1006
if (stopChunk == 0 && m_innerFSB != null)
1007      m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn);
1008    else if (stopColumn > startColumn)
1009    {
1010      ch.characters(m_array[stopChunk], startColumn,
1011                    stopColumn - startColumn);
1012    }
1013  }
1014  
1015  /**
1016   * Sends the specified range of characters as one or more SAX characters()
1017   * events, normalizing the characters according to XSLT rules.
1018   *
1019   * @param ch SAX ContentHandler object to receive the event.
1020   * @param start Offset of first character in the range.
1021   * @param length Number of characters to send.
1022   * @return normalization status to apply to next chunk (because we may
1023   * have been called recursively to process an inner FSB):
1024   * <dl>
1025   * <dt>0</dt>
1026   * <dd>if this output did not end in retained whitespace, and thus whitespace
1027   * at the start of the following chunk (if any) should be converted to a
1028   * single space.
1029   * <dt>SUPPRESS_LEADING_WS</dt>
1030   * <dd>if this output ended in retained whitespace, and thus whitespace
1031   * at the start of the following chunk (if any) should be completely
1032   * suppressed.</dd>
1033   * </dd>
1034   * </dl>
1035   * @exception org.xml.sax.SAXException may be thrown by handler's
1036   * characters() method.
1037   */

1038  public int sendNormalizedSAXcharacters(
1039          org.xml.sax.ContentHandler JavaDoc ch, int start, int length)
1040            throws org.xml.sax.SAXException JavaDoc
1041  {
1042    // This call always starts at the beginning of the
1043
// string being written out, either because it was called directly or
1044
// because it was an m_innerFSB recursion. This is important since
1045
// it gives us a well-known initial state for this flag:
1046
int stateForNextChunk=SUPPRESS_LEADING_WS;
1047
1048    int stop = start + length;
1049    int startChunk = start >>> m_chunkBits;
1050    int startColumn = start & m_chunkMask;
1051    int stopChunk = stop >>> m_chunkBits;
1052    int stopColumn = stop & m_chunkMask;
1053
1054    for (int i = startChunk; i < stopChunk; ++i)
1055    {
1056      if (i == 0 && m_innerFSB != null)
1057                stateForNextChunk=
1058        m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
1059                                     m_chunkSize - startColumn);
1060      else
1061                stateForNextChunk=
1062        sendNormalizedSAXcharacters(m_array[i], startColumn,
1063                                    m_chunkSize - startColumn,
1064                                                                        ch,stateForNextChunk);
1065
1066      startColumn = 0; // after first chunk
1067
}
1068
1069    // Last, or only, chunk
1070
if (stopChunk == 0 && m_innerFSB != null)
1071            stateForNextChunk= // %REVIEW% Is this update really needed?
1072
m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn);
1073    else if (stopColumn > startColumn)
1074    {
1075            stateForNextChunk= // %REVIEW% Is this update really needed?
1076
sendNormalizedSAXcharacters(m_array[stopChunk],
1077                                                                    startColumn, stopColumn - startColumn,
1078                                                                    ch, stateForNextChunk | SUPPRESS_TRAILING_WS);
1079    }
1080        return stateForNextChunk;
1081  }
1082  
1083  static final char[] SINGLE_SPACE = {' '};
1084      
1085  /**
1086   * Internal method to directly normalize and dispatch the character array.
1087   * This version is aware of the fact that it may be called several times
1088   * in succession if the data is made up of multiple "chunks", and thus
1089   * must actively manage the handling of leading and trailing whitespace.
1090   *
1091   * Note: The recursion is due to the possible recursion of inner FSBs.
1092   *
1093   * @param ch The characters from the XML document.
1094   * @param start The start position in the array.
1095   * @param length The number of characters to read from the array.
1096   * @param handler SAX ContentHandler object to receive the event.
1097   * @param edgeTreatmentFlags How leading/trailing spaces should be handled.
1098   * This is a bitfield contining two flags, bitwise-ORed together:
1099   * <dl>
1100   * <dt>SUPPRESS_LEADING_WS</dt>
1101   * <dd>When false, causes leading whitespace to be converted to a single
1102   * space; when true, causes it to be discarded entirely.
1103   * Should be set TRUE for the first chunk, and (in multi-chunk output)
1104   * whenever the previous chunk ended in retained whitespace.</dd>
1105   * <dt>SUPPRESS_TRAILING_WS</dt>
1106   * <dd>When false, causes trailing whitespace to be converted to a single
1107   * space; when true, causes it to be discarded entirely.
1108   * Should be set TRUE for the last or only chunk.
1109   * </dd>
1110   * </dl>
1111   * @return normalization status, as in the edgeTreatmentFlags parameter:
1112   * <dl>
1113   * <dt>0</dt>
1114   * <dd>if this output did not end in retained whitespace, and thus whitespace
1115   * at the start of the following chunk (if any) should be converted to a
1116   * single space.
1117   * <dt>SUPPRESS_LEADING_WS</dt>
1118   * <dd>if this output ended in retained whitespace, and thus whitespace
1119   * at the start of the following chunk (if any) should be completely
1120   * suppressed.</dd>
1121   * </dd>
1122   * </dl>
1123   *
1124   *
1125   * @exception org.xml.sax.SAXException Any SAX exception, possibly
1126   * wrapping another exception.
1127   */

1128  static int sendNormalizedSAXcharacters(char ch[],
1129             int start, int length,
1130             org.xml.sax.ContentHandler JavaDoc handler,
1131                         int edgeTreatmentFlags)
1132          throws org.xml.sax.SAXException JavaDoc
1133  {
1134     boolean processingLeadingWhitespace =
1135                       ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0);
1136     boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0);
1137     boolean suppressTrailingWhitespace =
1138                       ((edgeTreatmentFlags & SUPPRESS_TRAILING_WS) != 0);
1139     int currPos = start;
1140     int limit = start+length;
1141
1142     // Strip any leading spaces first, if required
1143
if (processingLeadingWhitespace) {
1144         for (; currPos < limit
1145                && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1146              currPos++) { }
1147
1148         // If we've only encountered leading spaces, the
1149
// current state remains unchanged
1150
if (currPos == limit) {
1151             return edgeTreatmentFlags;
1152         }
1153     }
1154
1155     // If we get here, there are no more leading spaces to strip
1156
while (currPos < limit) {
1157         int startNonWhitespace = currPos;
1158
1159         // Grab a chunk of non-whitespace characters
1160
for (; currPos < limit
1161                && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1162              currPos++) { }
1163
1164         // Non-whitespace seen - emit them, along with a single
1165
// space for any preceding whitespace characters
1166
if (startNonWhitespace != currPos) {
1167             if (seenWhitespace) {
1168                 handler.characters(SINGLE_SPACE, 0, 1);
1169                 seenWhitespace = false;
1170             }
1171             handler.characters(ch, startNonWhitespace,
1172                                currPos - startNonWhitespace);
1173         }
1174
1175         int startWhitespace = currPos;
1176
1177         // Consume any whitespace characters
1178
for (; currPos < limit
1179                && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1180              currPos++) { }
1181
1182         if (startWhitespace != currPos) {
1183             seenWhitespace = true;
1184         }
1185     }
1186
1187     return (seenWhitespace ? CARRY_WS : 0)
1188            | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS);
1189  }
1190
1191  /**
1192   * Directly normalize and dispatch the character array.
1193   *
1194   * @param ch The characters from the XML document.
1195   * @param start The start position in the array.
1196   * @param length The number of characters to read from the array.
1197   * @param handler SAX ContentHandler object to receive the event.
1198   * @exception org.xml.sax.SAXException Any SAX exception, possibly
1199   * wrapping another exception.
1200   */

1201  public static void sendNormalizedSAXcharacters(char ch[],
1202             int start, int length,
1203             org.xml.sax.ContentHandler JavaDoc handler)
1204          throws org.xml.sax.SAXException JavaDoc
1205  {
1206        sendNormalizedSAXcharacters(ch, start, length,
1207             handler, SUPPRESS_BOTH);
1208    }
1209        
1210    /**
1211   * Sends the specified range of characters as sax Comment.
1212   * <p>
1213   * Note that, unlike sendSAXcharacters, this has to be done as a single
1214   * call to LexicalHandler#comment.
1215   *
1216   * @param ch SAX LexicalHandler object to receive the event.
1217   * @param start Offset of first character in the range.
1218   * @param length Number of characters to send.
1219   * @exception org.xml.sax.SAXException may be thrown by handler's
1220   * characters() method.
1221   */

1222  public void sendSAXComment(
1223          org.xml.sax.ext.LexicalHandler JavaDoc ch, int start, int length)
1224            throws org.xml.sax.SAXException JavaDoc
1225  {
1226
1227    // %OPT% Do it this way for now...
1228
String JavaDoc comment = getString(start, length);
1229    ch.comment(comment.toCharArray(), 0, length);
1230  }
1231
1232  /**
1233   * Copies characters from this string into the destination character
1234   * array.
1235   *
1236   * @param srcBegin index of the first character in the string
1237   * to copy.
1238   * @param srcEnd index after the last character in the string
1239   * to copy.
1240   * @param dst the destination array.
1241   * @param dstBegin the start offset in the destination array.
1242   * @exception IndexOutOfBoundsException If any of the following
1243   * is true:
1244   * <ul><li><code>srcBegin</code> is negative.
1245   * <li><code>srcBegin</code> is greater than <code>srcEnd</code>
1246   * <li><code>srcEnd</code> is greater than the length of this
1247   * string
1248   * <li><code>dstBegin</code> is negative
1249   * <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
1250   * <code>dst.length</code></ul>
1251   * @exception NullPointerException if <code>dst</code> is <code>null</code>
1252   */

1253  private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
1254  {
1255    // %TBD% Joe needs to write this function. Make public when implemented.
1256
}
1257
1258  /**
1259   * Encapsulation c'tor. After this is called, the source FastStringBuffer
1260   * will be reset to use the new object as its m_innerFSB, and will have
1261   * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED
1262   * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits)
1263   *
1264   * NEEDSDOC @param source
1265   */

1266  private FastStringBuffer(FastStringBuffer source)
1267  {
1268
1269    // Copy existing information into new encapsulation
1270
m_chunkBits = source.m_chunkBits;
1271    m_maxChunkBits = source.m_maxChunkBits;
1272    m_rebundleBits = source.m_rebundleBits;
1273    m_chunkSize = source.m_chunkSize;
1274    m_chunkMask = source.m_chunkMask;
1275    m_array = source.m_array;
1276    m_innerFSB = source.m_innerFSB;
1277
1278    // These have to be adjusted because we're calling just at the time
1279
// when we would be about to allocate another chunk
1280
m_lastChunk = source.m_lastChunk - 1;
1281    m_firstFree = source.m_chunkSize;
1282
1283    // Establish capsule as the Inner FSB, reset chunk sizes/addressing
1284
source.m_array = new char[16][];
1285    source.m_innerFSB = this;
1286
1287    // Since we encapsulated just as we were about to append another
1288
// chunk, return ready to create the chunk after the innerFSB
1289
// -- 1, not 0.
1290
source.m_lastChunk = 1;
1291    source.m_firstFree = 0;
1292    source.m_chunkBits += m_rebundleBits;
1293    source.m_chunkSize = 1 << (source.m_chunkBits);
1294    source.m_chunkMask = source.m_chunkSize - 1;
1295  }
1296}
1297
Popular Tags