KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > org > apache > xml > internal > serializer > WriterToUTF8Buffered


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 /*
17  * $Id: WriterToUTF8Buffered.java,v 1.5 2004/02/17 04:18:18 minchau Exp $
18  */

19 package com.sun.org.apache.xml.internal.serializer;
20
21 import java.io.IOException JavaDoc;
22 import java.io.OutputStream JavaDoc;
23 import java.io.UnsupportedEncodingException JavaDoc;
24 import java.io.Writer JavaDoc;
25
26
27 /**
28  * This class writes unicode characters to a byte stream (java.io.OutputStream)
29  * as quickly as possible. It buffers the output in an internal
30  * buffer which must be flushed to the OutputStream when done. This flushing
31  * is done via the close() flush() or flushBuffer() method.
32  */

33 public final class WriterToUTF8Buffered extends Writer JavaDoc
34 {
35     
36   /** number of bytes that the byte buffer can hold.
37    * This is a fixed constant is used rather than m_outputBytes.lenght for performance.
38    */

39   private static final int BYTES_MAX=16*1024;
40   
41   /** number of characters that the character buffer can hold.
42    * This is 1/3 of the number of bytes because UTF-8 encoding
43    * can expand one unicode character by up to 3 bytes.
44    */

45   private static final int CHARS_MAX=(BYTES_MAX/3);
46   
47  // private static final int
48

49   /** The byte stream to write to. (sc & sb remove final to compile in JDK 1.1.8) */
50   private final OutputStream JavaDoc m_os;
51
52   /**
53    * The internal buffer where data is stored.
54    * (sc & sb remove final to compile in JDK 1.1.8)
55    */

56   private final byte m_outputBytes[];
57   
58   private final char m_inputChars[];
59
60   /**
61    * The number of valid bytes in the buffer. This value is always
62    * in the range <tt>0</tt> through <tt>m_outputBytes.length</tt>; elements
63    * <tt>m_outputBytes[0]</tt> through <tt>m_outputBytes[count-1]</tt> contain valid
64    * byte data.
65    */

66   private int count;
67
68   /**
69    * Create an buffered UTF-8 writer.
70    *
71    *
72    * @param out the underlying output stream.
73    *
74    * @throws UnsupportedEncodingException
75    */

76   public WriterToUTF8Buffered(OutputStream JavaDoc out)
77           throws UnsupportedEncodingException JavaDoc
78   {
79       m_os = out;
80       // get 3 extra bytes to make buffer overflow checking simpler and faster
81
// we won't have to keep checking for a few extra characters
82
m_outputBytes = new byte[BYTES_MAX + 3];
83       
84       // Big enough to hold the input chars that will be transformed
85
// into output bytes in m_ouputBytes.
86
m_inputChars = new char[CHARS_MAX + 1];
87       count = 0;
88       
89 // the old body of this constructor, before the buffersize was changed to a constant
90
// this(out, 8*1024);
91
}
92
93   /**
94    * Create an buffered UTF-8 writer to write data to the
95    * specified underlying output stream with the specified buffer
96    * size.
97    *
98    * @param out the underlying output stream.
99    * @param size the buffer size.
100    * @exception IllegalArgumentException if size <= 0.
101    */

102 // public WriterToUTF8Buffered(final OutputStream out, final int size)
103
// {
104
//
105
// m_os = out;
106
//
107
// if (size <= 0)
108
// {
109
// throw new IllegalArgumentException(
110
// SerializerMessages.createMessage(SerializerErrorResources.ER_BUFFER_SIZE_LESSTHAN_ZERO, null)); //"Buffer size <= 0");
111
// }
112
//
113
// m_outputBytes = new byte[size];
114
// count = 0;
115
// }
116

117   /**
118    * Write a single character. The character to be written is contained in
119    * the 16 low-order bits of the given integer value; the 16 high-order bits
120    * are ignored.
121    *
122    * <p> Subclasses that intend to support efficient single-character output
123    * should override this method.
124    *
125    * @param c int specifying a character to be written.
126    * @exception IOException If an I/O error occurs
127    */

128   public void write(final int c) throws IOException JavaDoc
129   {
130     
131     /* If we are close to the end of the buffer then flush it.
132      * Remember the buffer can hold a few more bytes than BYTES_MAX
133      */

134     if (count >= BYTES_MAX)
135         flushBuffer();
136
137     if (c < 0x80)
138     {
139        m_outputBytes[count++] = (byte) (c);
140     }
141     else if (c < 0x800)
142     {
143       m_outputBytes[count++] = (byte) (0xc0 + (c >> 6));
144       m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
145     }
146     else
147     {
148       m_outputBytes[count++] = (byte) (0xe0 + (c >> 12));
149       m_outputBytes[count++] = (byte) (0x80 + ((c >> 6) & 0x3f));
150       m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
151     }
152   }
153
154
155   /**
156    * Write a portion of an array of characters.
157    *
158    * @param chars Array of characters
159    * @param start Offset from which to start writing characters
160    * @param length Number of characters to write
161    *
162    * @exception IOException If an I/O error occurs
163    *
164    * @throws java.io.IOException
165    */

166   public void write(final char chars[], final int start, final int length)
167           throws java.io.IOException JavaDoc
168   {
169
170     // We multiply the length by three since this is the maximum length
171
// of the characters that we can put into the buffer. It is possible
172
// for each Unicode character to expand to three bytes.
173

174     int lengthx3 = 3*length;
175
176     if (lengthx3 >= BYTES_MAX - count)
177     {
178       // The requested length is greater than the unused part of the buffer
179
flushBuffer();
180
181       if (lengthx3 >= BYTES_MAX)
182       {
183         /*
184          * The requested length exceeds the size of the buffer.
185          * Cut the buffer up into chunks, each of which will
186          * not cause an overflow to the output buffer m_outputBytes,
187          * and make multiple recursive calls.
188          * Be careful about integer overflows in multiplication.
189          */

190         final int chunks = 1 + length/CHARS_MAX;
191         int end_chunk = start;
192         for (int chunk = 1; chunk <= chunks; chunk++)
193         {
194             int start_chunk = end_chunk;
195             end_chunk = start + (int) ((((long) length) * chunk) / chunks);
196             int len_chunk = (end_chunk - start_chunk);
197             this.write(chars,start_chunk, len_chunk);
198         }
199         return;
200       }
201     }
202
203
204
205     final int n = length+start;
206     final byte[] buf_loc = m_outputBytes; // local reference for faster access
207
int count_loc = count; // local integer for faster access
208
int i = start;
209     {
210         /* This block could be omitted and the code would produce
211          * the same result. But this block exists to give the JIT
212          * a better chance of optimizing a tight and common loop which
213          * occurs when writing out ASCII characters.
214          */

215         char c;
216         for(; i < n && (c = chars[i])< 0x80 ; i++ )
217             buf_loc[count_loc++] = (byte)c;
218     }
219     for (; i < n; i++)
220     {
221
222       final char c = chars[i];
223
224       if (c < 0x80)
225         buf_loc[count_loc++] = (byte) (c);
226       else if (c < 0x800)
227       {
228         buf_loc[count_loc++] = (byte) (0xc0 + (c >> 6));
229         buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
230       }
231       else
232       {
233         buf_loc[count_loc++] = (byte) (0xe0 + (c >> 12));
234         buf_loc[count_loc++] = (byte) (0x80 + ((c >> 6) & 0x3f));
235         buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
236       }
237     }
238     // Store the local integer back into the instance variable
239
count = count_loc;
240
241   }
242   
243   /**
244    * Writes out the character array
245    * @param chars a character array with only ASCII characters, so
246    * the UTF-8 encoding is optimized.
247    * @param start the first character in the input array
248    * @param length the number of characters in the input array
249    */

250   private void directWrite(final char chars[], final int start, final int length)
251           throws java.io.IOException JavaDoc
252   {
253
254
255
256     if (length >= BYTES_MAX - count)
257     {
258       // The requested length is greater than the unused part of the buffer
259
flushBuffer();
260
261       if (length >= BYTES_MAX)
262       {
263         /*
264          * The requested length exceeds the size of the buffer.
265          * Cut the buffer up into chunks, each of which will
266          * not cause an overflow to the output buffer m_outputBytes,
267          * and make multiple recursive calls.
268          */

269         int chunks = 1 + length/CHARS_MAX;
270         for (int chunk =0 ; chunk < chunks; chunk++)
271         {
272             int start_chunk = start + ((length*chunk)/chunks);
273             int end_chunk = start + ((length*(chunk+1))/chunks);
274             int len_chunk = (end_chunk - start_chunk);
275             this.directWrite(chars,start_chunk, len_chunk);
276         }
277         return;
278       }
279     }
280
281     final int n = length+start;
282     final byte[] buf_loc = m_outputBytes; // local reference for faster access
283
int count_loc = count; // local integer for faster access
284
for(int i=start; i < n ; i++ )
285         buf_loc[count_loc++] = (byte) buf_loc[i];
286     // Store the local integer back into the instance variable
287
count = count_loc;
288   }
289
290   /**
291    * Write a string.
292    *
293    * @param s String to be written
294    *
295    * @exception IOException If an I/O error occurs
296    */

297   public void write(final String JavaDoc s) throws IOException JavaDoc
298   {
299
300     // We multiply the length by three since this is the maximum length
301
// of the characters that we can put into the buffer. It is possible
302
// for each Unicode character to expand to three bytes.
303
final int length = s.length();
304     int lengthx3 = 3*length;
305
306     if (lengthx3 >= BYTES_MAX - count)
307     {
308       // The requested length is greater than the unused part of the buffer
309
flushBuffer();
310
311       if (lengthx3 >= BYTES_MAX)
312       {
313         /*
314          * The requested length exceeds the size of the buffer,
315          * so break it up in chunks that don't exceed the buffer size.
316          */

317          final int start = 0;
318          int chunks = 1 + length/CHARS_MAX;
319          for (int chunk =0 ; chunk < chunks; chunk++)
320          {
321              int start_chunk = start + ((length*chunk)/chunks);
322              int end_chunk = start + ((length*(chunk+1))/chunks);
323              int len_chunk = (end_chunk - start_chunk);
324              s.getChars(start_chunk,end_chunk, m_inputChars,0);
325              this.write(m_inputChars,0, len_chunk);
326          }
327          return;
328       }
329     }
330
331
332     s.getChars(0, length , m_inputChars, 0);
333     final char[] chars = m_inputChars;
334     final int n = length;
335     final byte[] buf_loc = m_outputBytes; // local reference for faster access
336
int count_loc = count; // local integer for faster access
337
int i = 0;
338     {
339         /* This block could be omitted and the code would produce
340          * the same result. But this block exists to give the JIT
341          * a better chance of optimizing a tight and common loop which
342          * occurs when writing out ASCII characters.
343          */

344         char c;
345         for(; i < n && (c = chars[i])< 0x80 ; i++ )
346             buf_loc[count_loc++] = (byte)c;
347     }
348     for (; i < n; i++)
349     {
350
351       final char c = chars[i];
352
353       if (c < 0x80)
354         buf_loc[count_loc++] = (byte) (c);
355       else if (c < 0x800)
356       {
357         buf_loc[count_loc++] = (byte) (0xc0 + (c >> 6));
358         buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
359       }
360       else
361       {
362         buf_loc[count_loc++] = (byte) (0xe0 + (c >> 12));
363         buf_loc[count_loc++] = (byte) (0x80 + ((c >> 6) & 0x3f));
364         buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
365       }
366     }
367     // Store the local integer back into the instance variable
368
count = count_loc;
369
370   }
371
372   /**
373    * Flush the internal buffer
374    *
375    * @throws IOException
376    */

377   public void flushBuffer() throws IOException JavaDoc
378   {
379
380     if (count > 0)
381     {
382       m_os.write(m_outputBytes, 0, count);
383
384       count = 0;
385     }
386   }
387
388   /**
389    * Flush the stream. If the stream has saved any characters from the
390    * various write() methods in a buffer, write them immediately to their
391    * intended destination. Then, if that destination is another character or
392    * byte stream, flush it. Thus one flush() invocation will flush all the
393    * buffers in a chain of Writers and OutputStreams.
394    *
395    * @exception IOException If an I/O error occurs
396    *
397    * @throws java.io.IOException
398    */

399   public void flush() throws java.io.IOException JavaDoc
400   {
401     flushBuffer();
402     m_os.flush();
403   }
404
405   /**
406    * Close the stream, flushing it first. Once a stream has been closed,
407    * further write() or flush() invocations will cause an IOException to be
408    * thrown. Closing a previously-closed stream, however, has no effect.
409    *
410    * @exception IOException If an I/O error occurs
411    *
412    * @throws java.io.IOException
413    */

414   public void close() throws java.io.IOException JavaDoc
415   {
416     flushBuffer();
417     m_os.close();
418   }
419
420   /**
421    * Get the output stream where the events will be serialized to.
422    *
423    * @return reference to the result stream, or null of only a writer was
424    * set.
425    */

426   public OutputStream JavaDoc getOutputStream()
427   {
428     return m_os;
429   }
430   
431   /**
432    *
433    * @param s A string with only ASCII characters
434    * @throws IOException
435    */

436   public void directWrite(final String JavaDoc s) throws IOException JavaDoc
437   {
438
439     final int length = s.length();
440     
441     if (length >= BYTES_MAX - count)
442     {
443       // The requested length is greater than the unused part of the buffer
444
flushBuffer();
445
446       if (length >= BYTES_MAX)
447       {
448         /*
449          * The requested length exceeds the size of the buffer,
450          * so don't bother to buffer this one, just write it out
451          * directly. The buffer is already flushed so this is a
452          * safe thing to do.
453          */

454          final int start = 0;
455          int chunks = 1 + length/CHARS_MAX;
456          for (int chunk =0 ; chunk < chunks; chunk++)
457          {
458              int start_chunk = start + ((length*chunk)/chunks);
459              int end_chunk = start + ((length*(chunk+1))/chunks);
460              int len_chunk = (end_chunk - start_chunk);
461              s.getChars(start_chunk,end_chunk, m_inputChars,0);
462              this.directWrite(m_inputChars,0, len_chunk);
463          }
464         return;
465       }
466     }
467
468
469     s.getChars(0, length , m_inputChars, 0);
470     final char[] chars = m_inputChars;
471     final byte[] buf_loc = m_outputBytes; // local reference for faster access
472
int count_loc = count; // local integer for faster access
473
int i = 0;
474     while( i < length)
475         buf_loc[count_loc++] = (byte)chars[i++];
476
477  
478     // Store the local integer back into the instance variable
479
count = count_loc;
480
481   }
482 }
483
Popular Tags