KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jibx > runtime > impl > InputStreamWrapper


1 /*
2 Copyright (c) 2004, Dennis M. Sosnoski.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7
8  * Redistributions of source code must retain the above copyright notice, this
9    list of conditions and the following disclaimer.
10  * Redistributions in binary form must reproduce the above copyright notice,
11    this list of conditions and the following disclaimer in the documentation
12    and/or other materials provided with the distribution.
13  * Neither the name of JiBX nor the names of its contributors may be used
14    to endorse or promote products derived from this software without specific
15    prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
24 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */

28
29 package org.jibx.runtime.impl;
30
31 import java.io.IOException JavaDoc;
32 import java.io.InputStream JavaDoc;
33 import java.io.InputStreamReader JavaDoc;
34 import java.io.Reader JavaDoc;
35
36 /**
37  * Wrapper for input stream that supports multiple character encodings. This is
38  * needed because the XPP3 pull parser does not support detecting the character
39  * encoding for a document based on the content of the document. If used with a
40  * common encoding this performs the conversion to characters using an inner
41  * reader class; otherwise, this creates the appropriate reader type
42  *
43  * @author Dennis M. Sosnoski
44  * @version 1.0
45  */

46
47 public class InputStreamWrapper
48 {
49     /** Default input buffer size. */
50     private static final int BUFFER_SIZE = 2048;
51     
52     /** Name of encoding to be used for stream. */
53     private String JavaDoc m_encodingName;
54     
55     /** Stream for byte input. */
56     private InputStream JavaDoc m_stream;
57     
58     /** Flag for end of stream reached. */
59     private boolean m_isEnd;
60     
61     /** Buffer for input bytes. */
62     private byte[] m_buffer;
63     
64     /** Offset past end of bytes in buffer. */
65     private int m_endOffset;
66     
67     /** Current offset for generating character from buffer. */
68     private int m_emptyOffset;
69     
70     /** Scan position offset used for lookahead in buffer. */
71     private int m_scanOffset;
72     
73     /**
74      * Constructor.
75      */

76     
77     public InputStreamWrapper() {
78         m_buffer = new byte[BUFFER_SIZE];
79     }
80     
81     /**
82      * Set input stream with encoding to be defined later. If an input stream is
83      * currently open when this is called the existing stream is closed, with
84      * any errors ignored.
85      *
86      * @param ins stream for document data input
87      */

88     
89     public void setInput(InputStream JavaDoc ins) {
90         try {
91             close();
92         } catch (IOException JavaDoc e) { /* deliberately empty */ }
93         m_stream = ins;
94         reset();
95     }
96     
97     /**
98      * Set input stream with specified encoding. If an input stream is currently
99      * open when this is called the existing stream is closed, with any errors
100      * ignored.
101      *
102      * @param ins stream for document data input
103      * @param enc character encoding used for input from stream
104      * (<code>null</code> if to be determined from XML input)
105      * @throws IOException
106      */

107     
108     public void setInput(InputStream JavaDoc ins, String JavaDoc enc) throws IOException JavaDoc {
109         setInput(ins);
110         setEncoding(enc);
111     }
112     
113     /**
114      * Set encoding for stream. This call is only valid if the encoding has not
115      * been set previously, and if the encoding is a recognized type.
116      *
117      * @param enc character encoding used for input from stream
118      * (<code>null</code> if to be determined from XML input)
119      * @throws IOException if unknown encoding, or encoding already set
120      */

121     
122     public void setEncoding(String JavaDoc enc) throws IOException JavaDoc {
123         if (m_encodingName == null) {
124             m_encodingName = enc;
125         } else {
126             throw new IOException JavaDoc("Encoding has already been set for stream");
127         }
128     }
129     
130     /**
131      * Reads data into the buffer. Any retained data is first copied down to the
132      * start of the buffer array. Next, data is read from the wrapped stream
133      * into the available space in the buffer. The actual number of characters
134      * read by a call to this method is normally between one and the space
135      * available in the buffer array.
136      *
137      * @return <code>true</code> if data has been read into buffer,
138      * <code>false</code> if not
139      * @throws IOException on error reading from wrapped stream
140      */

141     
142     private boolean fillBuffer() throws IOException JavaDoc {
143         if (m_isEnd) {
144             return false;
145         } else {
146             
147             // move remaining data in buffer down to start
148
int rem = m_endOffset - m_emptyOffset;
149             if (rem > 0) {
150                 System.arraycopy(m_buffer, m_emptyOffset, m_buffer, 0, rem);
151             }
152             m_emptyOffset = 0;
153             
154             // read to maximum capacity of buffer
155
int max = m_buffer.length - rem;
156             int actual = m_stream.read(m_buffer, rem, max);
157             if (actual >= 0) {
158                 m_endOffset = rem + actual;
159                 return true;
160             } else {
161                 m_endOffset = rem;
162                 m_isEnd = true;
163                 return false;
164             }
165         }
166     }
167     
168     /**
169      * Reads data into the buffer to at least a minimum number of bytes. Any
170      * retained data is first copied down to the start of the buffer array.
171      * Next, data is read from the wrapped stream into the available space in
172      * the buffer until the end of the input stream is reached or at least the
173      * requested number of bytes are present in the buffer.
174      *
175      * @param min number of bytes required
176      * @return <code>true</code> if buffer contains at least the required byte
177      * count on return, <code>false</code> if not
178      * @throws IOException on error reading from wrapped stream
179      */

180     
181     private boolean require(int min) throws IOException JavaDoc {
182         while (m_endOffset - m_emptyOffset < min) {
183             if (!fillBuffer()) {
184                 return false;
185             }
186         }
187         return true;
188     }
189     
190     /**
191      * Check if a character is XML whitespace.
192      *
193      * @return <code>true</code> if whitespace, <code>false</code> if not
194      */

195     
196     private boolean isWhite(int chr) {
197         return chr == ' ' || chr == 0x09 || chr == 0x0A || chr == 0x0D;
198     }
199     
200     /**
201      * Reads a space or equals ('=') delimited token from the scan position in
202      * the buffer. This treats bytes in the buffer as equivalent to characters.
203      *
204      * @return token read from buffer
205      * @throws IOException on error reading from wrapped stream
206      */

207     
208     private String JavaDoc scanToken() throws IOException JavaDoc {
209         boolean skipping = true;
210         StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
211         while (require(m_scanOffset+1)) {
212             char chr = (char)m_buffer[m_scanOffset++];
213             if (skipping) {
214                 if (!isWhite(chr)) {
215                     skipping = false;
216                     buff.append(chr);
217                     if (chr == '=') {
218                         return buff.toString();
219                     }
220                 }
221             } else if (isWhite(chr) || chr == '=') {
222                 m_scanOffset--;
223                 return buff.toString();
224             } else {
225                 buff.append(chr);
226             }
227         }
228         return null;
229     }
230     
231     /**
232      * Reads a quote delimited token from the scan position in the buffer. This
233      * treats bytes in the buffer as equivalent to characters, and skips past
234      * any leading whitespace.
235      *
236      * @return token read from buffer
237      * @throws IOException on error reading from wrapped stream
238      */

239     
240     private String JavaDoc scanQuoted() throws IOException JavaDoc {
241         boolean skipping = true;
242         int quot = 0;
243         StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
244         while (require(m_scanOffset+1)) {
245             char chr = (char)m_buffer[m_scanOffset++];
246             if (skipping) {
247                 if (!isWhite(chr)) {
248                     if (chr == '"' || chr == '\'') {
249                         skipping = false;
250                         quot = chr;
251                     } else {
252                         break;
253                     }
254                 }
255             } else if (chr == quot) {
256                 return buff.toString();
257             } else {
258                 buff.append(chr);
259             }
260         }
261         return null;
262     }
263     
264     /**
265      * Get reader for wrapped input stream. This creates and returns a reader
266      * using the appropriate encoding, if necessary reading and examining the
267      * first part of the stream (including the XML declaration, if present) to
268      * determine the encoding.
269      *
270      * @throws IOException if error reading from document or creating a reader
271      * for the encoding found
272      */

273     
274     public Reader JavaDoc getReader() throws IOException JavaDoc {
275         
276         // check if we need to determine an encoding
277
if (m_encodingName == null) {
278             
279             // try to get enough input to decide if anything other than default
280
m_encodingName = "UTF-8";
281             if (require(4)) {
282                 
283                 // get first four bytes for initial determination
284
int bom = (((m_buffer[0] << 8) + (m_buffer[1] & 0xFF) << 8) +
285                     (m_buffer[2] & 0xFF) << 8) + (m_buffer[3] & 0xFF);
286                 if (bom == 0x3C3F786D) {
287                     
288                     // read encoding declaration with single byte characters
289
m_scanOffset = 2;
290                     String JavaDoc token = scanToken();
291                     if ("xml".equals(token)) {
292                         while ((token = scanToken()) != null &&
293                             !"?>".equals(token)) {
294                             if ("encoding".equals(token)) {
295                                 if ("=".equals(scanToken())) {
296                                     token = scanQuoted();
297                                     if (token != null) {
298                                         m_encodingName = token;
299                                         break;
300                                     }
301                                 }
302                             } else if ("=".equals(token)) {
303                                 scanQuoted();
304                             }
305                         }
306                     }
307                     
308                 } else if (bom == 0x0000FEFF || bom == 0xFFFE0000 ||
309                     bom == 0x0000FFFE || bom == 0xFEFF0000) {
310                     
311                     // just use generic UCS-4 and let the libaries figure it out
312
m_encodingName = "UCS-4";
313                     
314                 } else if ((bom & 0xFFFFFF00) == 0xEFBBBF00) {
315                     
316                     // UTF-8 as specified by byte order mark
317
m_encodingName = "UTF-8";
318                     
319                 } else {
320                     int upper = bom & 0xFFFF0000;
321                     if (upper == 0xFEFF0000 || bom == 0x003C003F) {
322                         
323                         // assume UTF-16BE for 16-bit BE
324
m_encodingName = "UTF-16BE";
325                         
326                     } else if (upper == 0xFFFE0000 || bom == 0x3C003F00) {
327                         
328                         // assume UTF-16LE for 16-bit LE
329
m_encodingName = "UTF-16LE";
330                         
331                     } else if (bom == 0x4C6FA794){
332                         
333                         // just because we can, even though nobody should
334
m_encodingName = "EBCDIC";
335                     }
336                 }
337             }
338         }
339         if (m_encodingName.equalsIgnoreCase("UTF-8")) {
340             return new WrappedStreamUTF8Reader();
341         } else if (m_encodingName.equalsIgnoreCase("ISO-8859-1") ||
342             m_encodingName.equalsIgnoreCase("ASCII")) {
343             return new WrappedStreamISO88591Reader();
344         } else {
345             return new InputStreamReader JavaDoc(new WrappedStream(), m_encodingName);
346         }
347     }
348     
349     /**
350      * Get encoding for input document. This call may not return an accurate
351      * result until after {@link #getReader} is called.
352      *
353      * @return character encoding for input document
354      */

355     
356     public String JavaDoc getEncoding() {
357         return m_encodingName;
358     }
359     
360     /**
361      * Close document input. Completes reading of document input, including
362      * closing the input medium.
363      *
364      * @throws IOException on error closing document
365      */

366
367     public void close() throws IOException JavaDoc {
368         if (m_stream != null) {
369             m_stream.close();
370             m_stream = null;
371         }
372         reset();
373     }
374     
375     /**
376      * Reset to initial state for reuse.
377      */

378     
379     public void reset() {
380         m_isEnd = false;
381         m_endOffset = 0;
382         m_emptyOffset = 0;
383         m_encodingName = null;
384     }
385     
386     /**
387      * Stream that just uses the enclosing class to buffer input from the
388      * wrapped stream.
389      */

390     
391     private class WrappedStream extends InputStream JavaDoc
392     {
393         /* (non-Javadoc)
394          * @see java.io.InputStream#available()
395          */

396         
397         public int available() throws IOException JavaDoc {
398             return m_endOffset - m_emptyOffset + m_stream.available();
399         }
400         
401         /* (non-Javadoc)
402          * @see java.io.InputStream#close()
403          */

404         
405         public void close() throws IOException JavaDoc {
406             InputStreamWrapper.this.close();
407         }
408         
409         /* (non-Javadoc)
410          * @see java.io.InputStream#read(byte[], int, int)
411          */

412         
413         public int read(byte[] b, int off, int len) throws IOException JavaDoc {
414             int avail;
415             int actual = 0;
416             while (len > (avail = m_endOffset - m_emptyOffset)) {
417                 System.arraycopy(m_buffer, m_emptyOffset, b, off, avail);
418                 off += avail;
419                 len -= avail;
420                 actual += avail;
421                 m_emptyOffset = m_endOffset = 0;
422                 if (!fillBuffer()) {
423                     return actual == 0 ? -1 : actual;
424                 }
425             }
426             System.arraycopy(m_buffer, m_emptyOffset, b, off, len);
427             m_emptyOffset += len;
428             return actual + len;
429         }
430         
431         /* (non-Javadoc)
432          * @see java.io.InputStream#read(byte[])
433          */

434         
435         public int read(byte[] b) throws IOException JavaDoc {
436             return read(b, 0, b.length);
437         }
438         
439         /* (non-Javadoc)
440          * @see java.io.InputStream#skip(long)
441          */

442         
443         public long skip(long n) throws IOException JavaDoc {
444             int avail = m_endOffset - m_emptyOffset;
445             if (n >= (long)avail) {
446                 return avail + m_stream.skip(n - avail);
447             } else {
448                 m_emptyOffset += (int)n;
449                 return n;
450             }
451         }
452         
453         /* (non-Javadoc)
454          * @see java.io.InputStream#read()
455          */

456         
457         public int read() throws IOException JavaDoc {
458             if (m_emptyOffset >= m_endOffset && !fillBuffer()) {
459                 return -1;
460             } else {
461                 return m_buffer[m_emptyOffset++];
462             }
463         }
464     }
465     
466     /**
467      * Reader for input stream using UTF-8 encoding. This uses the enclosing
468      * class to buffer input from the stream, interpreting it as characters on
469      * demand.
470      */

471     
472     private class WrappedStreamUTF8Reader extends Reader JavaDoc
473     {
474         /* (non-Javadoc)
475          * @see java.io.Reader#close()
476          */

477         
478         public void close() throws IOException JavaDoc {
479             InputStreamWrapper.this.close();
480         }
481         
482         /* (non-Javadoc)
483          * @see java.io.Reader#read(char[], int, int)
484          */

485         
486         public int read(char[] b, int off, int len) throws IOException JavaDoc {
487             
488             // load up local variables for conversion loop
489
int end = off + len;
490             int empty = m_emptyOffset;
491             byte[] buff = m_buffer;
492             while (off < end) {
493                 
494                 // fill buffer if less than maximum byte count in character
495
if (empty + 3 > m_endOffset) {
496                     m_emptyOffset = empty;
497                     fillBuffer();
498                     empty = m_emptyOffset;
499                     if (empty == m_endOffset) {
500                         int actual = len + off - end;
501                         return actual > 0 ? actual : -1;
502                     }
503                 }
504                 
505                 // check for single-byte vs multi-byte character next
506
int byt = buff[empty++];
507                 if (byt >= 0) {
508                     
509                     // single-byte character, just store to output array
510
b[off++] = (char)byt;
511                     if (byt == 0) {
512                         System.err.println("Wrote null");
513                     }
514                     
515                 } else if ((byt & 0xE0) == 0xC0) {
516                     
517                     // double-byte character, check bytes available and store
518
if (empty < m_endOffset) {
519                         b[off++] = (char)(((byt & 0x1F) << 6) +
520                             (buff[empty++] & 0x3F));
521                         if (b[off-1] == 0) {
522                             System.err.println("Wrote null");
523                         }
524                     } else {
525                         throw new IOException JavaDoc("UTF-8 conversion error");
526                     }
527                     
528                 } else {
529                     
530                     // three-byte character, check bytes available and store
531
if (empty + 1 < m_endOffset) {
532                         int byt2 = buff[empty++] & 0x3F;
533                         b[off++] = (char)((((byt & 0x0F) << 6) +
534                             byt2 << 6) + (buff[empty++] & 0x3F));
535                         if (b[off-1] == 0) {
536                             System.err.println("Wrote null");
537                         }
538                     } else {
539                         throw new IOException JavaDoc("UTF-8 conversion error");
540                     }
541                 }
542             }
543             m_emptyOffset = empty;
544             return len;
545         }
546         
547         /* (non-Javadoc)
548          * @see java.io.Reader#read(char[])
549          */

550         
551         public int read(char[] b) throws IOException JavaDoc {
552             return read(b, 0, b.length);
553         }
554         
555         /* (non-Javadoc)
556          * @see java.io.Reader#read()
557          */

558         
559         public int read() throws IOException JavaDoc {
560             
561             // fill buffer if less than maximum byte count in character
562
if (m_emptyOffset + 3 > m_endOffset) {
563                 fillBuffer();
564                 if (m_emptyOffset == m_endOffset) {
565                     return -1;
566                 }
567             }
568             
569             // check for single-byte vs multi-byte character next
570
int byt = m_buffer[m_emptyOffset++];
571             if (byt >= 0) {
572                 
573                 // single-byte character, just store to output array
574
return byt & 0xFF;
575                 
576             } else if ((byt & 0xE0) == 0xC0) {
577                 
578                 // double-byte character, check bytes available and store
579
if (m_emptyOffset < m_endOffset) {
580                     return ((byt & 0x1F) << 6) +
581                         (m_buffer[m_emptyOffset++] & 0x3F);
582                 } else {
583                     throw new IOException JavaDoc("UTF-8 conversion error");
584                 }
585                 
586             } else {
587                 
588                 // three-byte character, check bytes available and store
589
if (m_emptyOffset + 1 < m_endOffset) {
590                     int byt2 = m_buffer[m_emptyOffset++] & 0xFF;
591                     return (((byt & 0x0F) << 6) +
592                         byt2 << 6) + (m_buffer[m_emptyOffset++] & 0x3F);
593                 } else {
594                     throw new IOException JavaDoc("UTF-8 conversion error");
595                 }
596             }
597         }
598         
599         /* (non-Javadoc)
600          * @see java.io.Reader#ready()
601          */

602         
603         public boolean ready() throws IOException JavaDoc {
604             return m_emptyOffset + 2 < m_endOffset;
605         }
606     }
607     
608     /**
609      * Reader for input stream using ISO8859-1 encoding. This uses the enclosing
610      * class to buffer input from the stream, interpreting it as characters on
611      * demand.
612      */

613     
614     private class WrappedStreamISO88591Reader extends Reader JavaDoc
615     {
616         /* (non-Javadoc)
617          * @see java.io.Reader#close()
618          */

619         
620         public void close() throws IOException JavaDoc {
621             InputStreamWrapper.this.close();
622         }
623         
624         /* (non-Javadoc)
625          * @see java.io.Reader#read(char[], int, int)
626          */

627         
628         public int read(char[] b, int off, int len) throws IOException JavaDoc {
629             
630             // load up local variables for conversion loop
631
int end = off + len;
632             int empty = m_emptyOffset;
633             byte[] buff = m_buffer;
634             while (off < end) {
635                 
636                 // make sure there's data in buffer
637
int avail = m_endOffset - empty;
638                 if (avail == 0) {
639                     m_emptyOffset = empty;
640                     if (fillBuffer()) {
641                         empty = m_emptyOffset;
642                         avail = m_endOffset - empty;
643                     } else {
644                         int actual = len + off - end;
645                         return actual > 0 ? actual : -1;
646                     }
647                 }
648                 
649                 // find count of bytes to convert to characters
650
int use = end - off;
651                 if (use > avail) {
652                     use = avail;
653                 }
654                 
655                 // convert bytes directly to characters
656
int limit = empty + use;
657                 while (empty < limit) {
658                     b[off++] = (char)(buff[empty++] & 0xFF);
659                 }
660             }
661             m_emptyOffset = empty;
662             return len;
663         }
664         
665         /* (non-Javadoc)
666          * @see java.io.Reader#read(char[])
667          */

668         
669         public int read(char[] b) throws IOException JavaDoc {
670             return read(b, 0, b.length);
671         }
672         
673         /* (non-Javadoc)
674          * @see java.io.Reader#read()
675          */

676         
677         public int read() throws IOException JavaDoc {
678             if (m_emptyOffset >= m_endOffset && !fillBuffer()) {
679                 return -1;
680             } else {
681                 return m_buffer[m_emptyOffset++] & 0xFF;
682             }
683         }
684         
685         /* (non-Javadoc)
686          * @see java.io.Reader#ready()
687          */

688         
689         public boolean ready() throws IOException JavaDoc {
690             return m_emptyOffset < m_endOffset;
691         }
692     }
693 }
Popular Tags