KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xml > serializer > WriterToUTF8Buffered


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 /*
17  * $Id: WriterToUTF8Buffered.java,v 1.5 2004/02/17 04:18:18 minchau Exp $
18  */

19 package org.apache.xml.serializer;
20
21 import java.io.IOException JavaDoc;
22 import java.io.OutputStream JavaDoc;
23 import java.io.UnsupportedEncodingException JavaDoc;
24 import java.io.Writer JavaDoc;
25
26
27 /**
28  * This class writes unicode characters to a byte stream (java.io.OutputStream)
29  * as quickly as possible. It buffers the output in an internal
30  * buffer which must be flushed to the OutputStream when done. This flushing
31  * is done via the close() flush() or flushBuffer() method.
32  */

33 public final class WriterToUTF8Buffered extends Writer JavaDoc
34 {
35     
36   /** number of bytes that the byte buffer can hold.
37    * This is a fixed constant is used rather than m_outputBytes.lenght for performance.
38    */

39   private static final int BYTES_MAX=16*1024;
40   
41   /** number of characters that the character buffer can hold.
42    * This is 1/3 of the number of bytes because UTF-8 encoding
43    * can expand one unicode character by up to 3 bytes.
44    */

45   private static final int CHARS_MAX=(BYTES_MAX/3);
46   
47  // private static final int
48

49   /** The byte stream to write to. (sc & sb remove final to compile in JDK 1.1.8) */
50   private final OutputStream JavaDoc m_os;
51
52   /**
53    * The internal buffer where data is stored.
54    * (sc & sb remove final to compile in JDK 1.1.8)
55    */

56   private final byte m_outputBytes[];
57   
58   private final char m_inputChars[];
59
60   /**
61    * The number of valid bytes in the buffer. This value is always
62    * in the range <tt>0</tt> through <tt>m_outputBytes.length</tt>; elements
63    * <tt>m_outputBytes[0]</tt> through <tt>m_outputBytes[count-1]</tt> contain valid
64    * byte data.
65    */

66   private int count;
67
68   /**
69    * Create an buffered UTF-8 writer.
70    *
71    *
72    * @param out the underlying output stream.
73    *
74    * @throws UnsupportedEncodingException
75    */

76   public WriterToUTF8Buffered(OutputStream JavaDoc out)
77           throws UnsupportedEncodingException JavaDoc
78   {
79       m_os = out;
80       // get 3 extra bytes to make buffer overflow checking simpler and faster
81
// we won't have to keep checking for a few extra characters
82
m_outputBytes = new byte[BYTES_MAX + 3];
83       
84       // Big enough to hold the input chars that will be transformed
85
// into output bytes in m_ouputBytes.
86
m_inputChars = new char[CHARS_MAX + 1];
87       count = 0;
88       
89 // the old body of this constructor, before the buffersize was changed to a constant
90
// this(out, 8*1024);
91
}
92
93   /**
94    * Create an buffered UTF-8 writer to write data to the
95    * specified underlying output stream with the specified buffer
96    * size.
97    *
98    * @param out the underlying output stream.
99    * @param size the buffer size.
100    * @exception IllegalArgumentException if size <= 0.
101    */

102 // public WriterToUTF8Buffered(final OutputStream out, final int size)
103
// {
104
//
105
// m_os = out;
106
//
107
// if (size <= 0)
108
// {
109
// throw new IllegalArgumentException(
110
// SerializerMessages.createMessage(SerializerErrorResources.ER_BUFFER_SIZE_LESSTHAN_ZERO, null)); //"Buffer size <= 0");
111
// }
112
//
113
// m_outputBytes = new byte[size];
114
// count = 0;
115
// }
116

117   /**
118    * Write a single character. The character to be written is contained in
119    * the 16 low-order bits of the given integer value; the 16 high-order bits
120    * are ignored.
121    *
122    * <p> Subclasses that intend to support efficient single-character output
123    * should override this method.
124    *
125    * @param c int specifying a character to be written.
126    * @exception IOException If an I/O error occurs
127    */

128   public void write(final int c) throws IOException JavaDoc
129   {
130     
131     /* If we are close to the end of the buffer then flush it.
132      * Remember the buffer can hold a few more bytes than BYTES_MAX
133      */

134     if (count >= BYTES_MAX)
135         flushBuffer();
136
137     if (c < 0x80)
138     {
139        m_outputBytes[count++] = (byte) (c);
140     }
141     else if (c < 0x800)
142     {
143       m_outputBytes[count++] = (byte) (0xc0 + (c >> 6));
144       m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
145     }
146     else
147     {
148       m_outputBytes[count++] = (byte) (0xe0 + (c >> 12));
149       m_outputBytes[count++] = (byte) (0x80 + ((c >> 6) & 0x3f));
150       m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
151     }
152   }
153
154
155   /**
156    * Write a portion of an array of characters.
157    *
158    * @param chars Array of characters
159    * @param start Offset from which to start writing characters
160    * @param length Number of characters to write
161    *
162    * @exception IOException If an I/O error occurs
163    *
164    * @throws java.io.IOException
165    */

166   public void write(final char chars[], final int start, final int length)
167           throws java.io.IOException JavaDoc
168   {
169
170     // We multiply the length by three since this is the maximum length
171
// of the characters that we can put into the buffer. It is possible
172
// for each Unicode character to expand to three bytes.
173

174     int lengthx3 = 3*length;
175
176     if (lengthx3 >= BYTES_MAX - count)
177     {
178       // The requested length is greater than the unused part of the buffer
179
flushBuffer();
180
181       if (lengthx3 >= BYTES_MAX)
182       {
183         /*
184          * The requested length exceeds the size of the buffer.
185          * Cut the buffer up into chunks, each of which will
186          * not cause an overflow to the output buffer m_outputBytes,
187          * and make multiple recursive calls.
188          */

189         final int chunks = 1 + length/CHARS_MAX;
190         for (int chunk =0 ; chunk < chunks; chunk++)
191         {
192             int start_chunk = start + ((length*chunk)/chunks);
193             int end_chunk = start + ((length*(chunk+1))/chunks);
194             int len_chunk = (end_chunk - start_chunk);
195             this.write(chars,start_chunk, len_chunk);
196         }
197         return;
198       }
199     }
200
201
202
203     final int n = length+start;
204     final byte[] buf_loc = m_outputBytes; // local reference for faster access
205
int count_loc = count; // local integer for faster access
206
int i = start;
207     {
208         /* This block could be omitted and the code would produce
209          * the same result. But this block exists to give the JIT
210          * a better chance of optimizing a tight and common loop which
211          * occurs when writing out ASCII characters.
212          */

213         char c;
214         for(; i < n && (c = chars[i])< 0x80 ; i++ )
215             buf_loc[count_loc++] = (byte)c;
216     }
217     for (; i < n; i++)
218     {
219
220       final char c = chars[i];
221
222       if (c < 0x80)
223         buf_loc[count_loc++] = (byte) (c);
224       else if (c < 0x800)
225       {
226         buf_loc[count_loc++] = (byte) (0xc0 + (c >> 6));
227         buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
228       }
229       else
230       {
231         buf_loc[count_loc++] = (byte) (0xe0 + (c >> 12));
232         buf_loc[count_loc++] = (byte) (0x80 + ((c >> 6) & 0x3f));
233         buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
234       }
235     }
236     // Store the local integer back into the instance variable
237
count = count_loc;
238
239   }
240   
241   /**
242    * Writes out the character array
243    * @param chars a character array with only ASCII characters, so
244    * the UTF-8 encoding is optimized.
245    * @param start the first character in the input array
246    * @param length the number of characters in the input array
247    */

248   private void directWrite(final char chars[], final int start, final int length)
249           throws java.io.IOException JavaDoc
250   {
251
252
253
254     if (length >= BYTES_MAX - count)
255     {
256       // The requested length is greater than the unused part of the buffer
257
flushBuffer();
258
259       if (length >= BYTES_MAX)
260       {
261         /*
262          * The requested length exceeds the size of the buffer.
263          * Cut the buffer up into chunks, each of which will
264          * not cause an overflow to the output buffer m_outputBytes,
265          * and make multiple recursive calls.
266          */

267         int chunks = 1 + length/CHARS_MAX;
268         for (int chunk =0 ; chunk < chunks; chunk++)
269         {
270             int start_chunk = start + ((length*chunk)/chunks);
271             int end_chunk = start + ((length*(chunk+1))/chunks);
272             int len_chunk = (end_chunk - start_chunk);
273             this.directWrite(chars,start_chunk, len_chunk);
274         }
275         return;
276       }
277     }
278
279     final int n = length+start;
280     final byte[] buf_loc = m_outputBytes; // local reference for faster access
281
int count_loc = count; // local integer for faster access
282
for(int i=start; i < n ; i++ )
283         buf_loc[count_loc++] = (byte) buf_loc[i];
284     // Store the local integer back into the instance variable
285
count = count_loc;
286   }
287
288   /**
289    * Write a string.
290    *
291    * @param s String to be written
292    *
293    * @exception IOException If an I/O error occurs
294    */

295   public void write(final String JavaDoc s) throws IOException JavaDoc
296   {
297
298     // We multiply the length by three since this is the maximum length
299
// of the characters that we can put into the buffer. It is possible
300
// for each Unicode character to expand to three bytes.
301
final int length = s.length();
302     int lengthx3 = 3*length;
303
304     if (lengthx3 >= BYTES_MAX - count)
305     {
306       // The requested length is greater than the unused part of the buffer
307
flushBuffer();
308
309       if (lengthx3 >= BYTES_MAX)
310       {
311         /*
312          * The requested length exceeds the size of the buffer,
313          * so break it up in chunks that don't exceed the buffer size.
314          */

315          final int start = 0;
316          int chunks = 1 + length/CHARS_MAX;
317          for (int chunk =0 ; chunk < chunks; chunk++)
318          {
319              int start_chunk = start + ((length*chunk)/chunks);
320              int end_chunk = start + ((length*(chunk+1))/chunks);
321              int len_chunk = (end_chunk - start_chunk);
322              s.getChars(start_chunk,end_chunk, m_inputChars,0);
323              this.write(m_inputChars,0, len_chunk);
324          }
325          return;
326       }
327     }
328
329
330     s.getChars(0, length , m_inputChars, 0);
331     final char[] chars = m_inputChars;
332     final int n = length;
333     final byte[] buf_loc = m_outputBytes; // local reference for faster access
334
int count_loc = count; // local integer for faster access
335
int i = 0;
336     {
337         /* This block could be omitted and the code would produce
338          * the same result. But this block exists to give the JIT
339          * a better chance of optimizing a tight and common loop which
340          * occurs when writing out ASCII characters.
341          */

342         char c;
343         for(; i < n && (c = chars[i])< 0x80 ; i++ )
344             buf_loc[count_loc++] = (byte)c;
345     }
346     for (; i < n; i++)
347     {
348
349       final char c = chars[i];
350
351       if (c < 0x80)
352         buf_loc[count_loc++] = (byte) (c);
353       else if (c < 0x800)
354       {
355         buf_loc[count_loc++] = (byte) (0xc0 + (c >> 6));
356         buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
357       }
358       else
359       {
360         buf_loc[count_loc++] = (byte) (0xe0 + (c >> 12));
361         buf_loc[count_loc++] = (byte) (0x80 + ((c >> 6) & 0x3f));
362         buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
363       }
364     }
365     // Store the local integer back into the instance variable
366
count = count_loc;
367
368   }
369
370   /**
371    * Flush the internal buffer
372    *
373    * @throws IOException
374    */

375   public void flushBuffer() throws IOException JavaDoc
376   {
377
378     if (count > 0)
379     {
380       m_os.write(m_outputBytes, 0, count);
381
382       count = 0;
383     }
384   }
385
386   /**
387    * Flush the stream. If the stream has saved any characters from the
388    * various write() methods in a buffer, write them immediately to their
389    * intended destination. Then, if that destination is another character or
390    * byte stream, flush it. Thus one flush() invocation will flush all the
391    * buffers in a chain of Writers and OutputStreams.
392    *
393    * @exception IOException If an I/O error occurs
394    *
395    * @throws java.io.IOException
396    */

397   public void flush() throws java.io.IOException JavaDoc
398   {
399     flushBuffer();
400     m_os.flush();
401   }
402
403   /**
404    * Close the stream, flushing it first. Once a stream has been closed,
405    * further write() or flush() invocations will cause an IOException to be
406    * thrown. Closing a previously-closed stream, however, has no effect.
407    *
408    * @exception IOException If an I/O error occurs
409    *
410    * @throws java.io.IOException
411    */

412   public void close() throws java.io.IOException JavaDoc
413   {
414     flushBuffer();
415     m_os.close();
416   }
417
418   /**
419    * Get the output stream where the events will be serialized to.
420    *
421    * @return reference to the result stream, or null of only a writer was
422    * set.
423    */

424   public OutputStream JavaDoc getOutputStream()
425   {
426     return m_os;
427   }
428   
429   /**
430    *
431    * @param s A string with only ASCII characters
432    * @throws IOException
433    */

434   public void directWrite(final String JavaDoc s) throws IOException JavaDoc
435   {
436
437     final int length = s.length();
438     
439     if (length >= BYTES_MAX - count)
440     {
441       // The requested length is greater than the unused part of the buffer
442
flushBuffer();
443
444       if (length >= BYTES_MAX)
445       {
446         /*
447          * The requested length exceeds the size of the buffer,
448          * so don't bother to buffer this one, just write it out
449          * directly. The buffer is already flushed so this is a
450          * safe thing to do.
451          */

452          final int start = 0;
453          int chunks = 1 + length/CHARS_MAX;
454          for (int chunk =0 ; chunk < chunks; chunk++)
455          {
456              int start_chunk = start + ((length*chunk)/chunks);
457              int end_chunk = start + ((length*(chunk+1))/chunks);
458              int len_chunk = (end_chunk - start_chunk);
459              s.getChars(start_chunk,end_chunk, m_inputChars,0);
460              this.directWrite(m_inputChars,0, len_chunk);
461          }
462         return;
463       }
464     }
465
466
467     s.getChars(0, length , m_inputChars, 0);
468     final char[] chars = m_inputChars;
469     final byte[] buf_loc = m_outputBytes; // local reference for faster access
470
int count_loc = count; // local integer for faster access
471
int i = 0;
472     while( i < length)
473         buf_loc[count_loc++] = (byte)chars[i++];
474
475  
476     // Store the local integer back into the instance variable
477
count = count_loc;
478
479   }
480 }
481
Popular Tags