KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > hsqldb > lib > StringConverter


1 /* Copyright (c) 1995-2000, The Hypersonic SQL Group.
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * Redistributions of source code must retain the above copyright notice, this
8  * list of conditions and the following disclaimer.
9  *
10  * Redistributions in binary form must reproduce the above copyright notice,
11  * this list of conditions and the following disclaimer in the documentation
12  * and/or other materials provided with the distribution.
13  *
14  * Neither the name of the Hypersonic SQL Group nor the names of its
15  * contributors may be used to endorse or promote products derived from this
16  * software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE HYPERSONIC SQL GROUP,
22  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * This software consists of voluntary contributions made by many individuals
31  * on behalf of the Hypersonic SQL Group.
32  *
33  *
34  * For work added by the HSQL Development Group:
35  *
36  * Copyright (c) 2001-2005, The HSQL Development Group
37  * All rights reserved.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions are met:
41  *
42  * Redistributions of source code must retain the above copyright notice, this
43  * list of conditions and the following disclaimer.
44  *
45  * Redistributions in binary form must reproduce the above copyright notice,
46  * this list of conditions and the following disclaimer in the documentation
47  * and/or other materials provided with the distribution.
48  *
49  * Neither the name of the HSQL Development Group nor the names of its
50  * contributors may be used to endorse or promote products derived from this
51  * software without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
54  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED. IN NO EVENT SHALL HSQL DEVELOPMENT GROUP, HSQLDB.ORG,
57  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
60  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
61  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
62  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  */

65
66
67 package org.hsqldb.lib;
68
69 import java.io.IOException JavaDoc;
70 import java.io.InputStream JavaDoc;
71 import java.io.InputStreamReader JavaDoc;
72 import java.io.StringWriter JavaDoc;
73 import java.io.UTFDataFormatException JavaDoc;
74
75 /**
76  * Collection of static methods for converting strings between different
77  * formats and to and from byte arrays.<p>
78  *
79  * New class, with extensively enhanced and rewritten Hypersonic code.
80  *
81  * @author Thomas Mueller (Hypersonic SQL Group)
82  * @author fredt@users
83  * @version 1.8.0
84  * @since 1.7.2
85  */

86
87 // fredt@users 20020328 - patch 1.7.0 by fredt - error trapping
88
public class StringConverter {
89
90     private static final byte[] HEXBYTES = {
91         (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4',
92         (byte) '5', (byte) '6', (byte) '7', (byte) '8', (byte) '9',
93         (byte) 'a', (byte) 'b', (byte) 'c', (byte) 'd', (byte) 'e', (byte) 'f'
94     };
95     private static final String JavaDoc HEXINDEX = "0123456789abcdef0123456789ABCDEF";
96
97     /**
98      * Converts a String into a byte array by using a big-endian two byte
99      * representation of each char value in the string.
100      */

101     byte[] stringToFullByteArray(String JavaDoc s) {
102
103         int length = s.length();
104         byte[] buffer = new byte[length * 2];
105         int c;
106
107         for (int i = 0; i < length; i++) {
108             c = s.charAt(i);
109             buffer[i * 2] = (byte) ((c & 0x0000ff00) >> 8);
110             buffer[i * 2 + 1] = (byte) (c & 0x000000ff);
111         }
112
113         return buffer;
114     }
115
116     /**
117      * Compacts a hexadecimal string into a byte array
118      *
119      *
120      * @param s hexadecimal string
121      *
122      * @return byte array for the hex string
123      * @throws IOException
124      */

125     public static byte[] hexToByte(String JavaDoc s) throws IOException JavaDoc {
126
127         int l = s.length() / 2;
128         byte[] data = new byte[l];
129         int j = 0;
130
131         if (s.length() % 2 != 0) {
132             throw new IOException JavaDoc(
133                 "hexadecimal string with odd number of characters");
134         }
135
136         for (int i = 0; i < l; i++) {
137             char c = s.charAt(j++);
138             int n, b;
139
140             n = HEXINDEX.indexOf(c);
141
142             if (n == -1) {
143                 throw new IOException JavaDoc(
144                     "hexadecimal string contains non hex character");
145             }
146
147             b = (n & 0xf) << 4;
148             c = s.charAt(j++);
149             n = HEXINDEX.indexOf(c);
150             b += (n & 0xf);
151             data[i] = (byte) b;
152         }
153
154         return data;
155     }
156
157     /**
158      * Converts a byte array into a hexadecimal string
159      *
160      *
161      * @param b byte array
162      *
163      * @return hex string
164      */

165     public static String JavaDoc byteToHex(byte[] b) {
166
167         int len = b.length;
168         char[] s = new char[len * 2];
169
170         for (int i = 0, j = 0; i < len; i++) {
171             int c = ((int) b[i]) & 0xff;
172
173             s[j++] = (char) HEXBYTES[c >> 4 & 0xf];
174             s[j++] = (char) HEXBYTES[c & 0xf];
175         }
176
177         return new String JavaDoc(s);
178     }
179
180     /**
181      * Converts a byte array into hexadecimal characters
182      * which are written as ASCII to the given output stream.
183      *
184      * @param o output stream
185      * @param b byte array
186      */

187     public static void writeHex(byte[] o, int from, byte[] b) {
188
189         int len = b.length;
190
191         for (int i = 0; i < len; i++) {
192             int c = ((int) b[i]) & 0xff;
193
194             o[from++] = HEXBYTES[c >> 4 & 0xf];
195             o[from++] = HEXBYTES[c & 0xf];
196         }
197     }
198
199     public static String JavaDoc byteToString(byte[] b, String JavaDoc charset) {
200
201         try {
202             return (charset == null) ? new String JavaDoc(b)
203                                      : new String JavaDoc(b, charset);
204         } catch (Exception JavaDoc e) {}
205
206         return null;
207     }
208
209     /**
210      * Converts a Unicode string into UTF8 then convert into a hex string
211      *
212      *
213      * @param s normal Unicode string
214      *
215      * @return hex string representation of UTF8 encoding of the input
216      */

217     public static String JavaDoc unicodeToHexString(String JavaDoc s) {
218
219         HsqlByteArrayOutputStream bout = new HsqlByteArrayOutputStream();
220
221         writeUTF(s, bout);
222
223         return byteToHex(bout.toByteArray());
224     }
225
226 // fredt@users 20011120 - patch 450455 by kibu@users - modified
227
// method return type changed to HsqlStringBuffer with spare
228
// space for end-of-line characters -- to reduce String concatenation
229

230     /**
231      * Hsqldb specific encoding used only for log files.
232      *
233      * The SQL statements that need to be written to the log file (input) are
234      * Java Unicode strings. input is converted into a 7bit escaped ASCII
235      * string (output)with the following transformations.
236      * All characters outside the 0x20-7f range are converted to a
237      * escape sequence and added to output.
238      * If a backslash character is immdediately followed by 'u', the
239      * backslash character is converted to escape sequence and
240      * added to output.
241      * All the remaining characters in input are added to output without
242      * conversion.
243      *
244      * The escape sequence is backslash, letter u, xxxx, where xxxx
245      * is the hex representation of the character code.
246      * (fredt@users)
247      *
248      * @param b output stream to wite to
249      * @param s Java Unicode string
250      *
251      * @return number of bytes written out
252      *
253      */

254     public static int unicodeToAscii(HsqlByteArrayOutputStream b, String JavaDoc s,
255                                      boolean doubleSingleQuotes) {
256
257         int count = 0;
258
259         if ((s == null) || (s.length() == 0)) {
260             return 0;
261         }
262
263         int len = s.length();
264
265         for (int i = 0; i < len; i++) {
266             char c = s.charAt(i);
267
268             if (c == '\\') {
269                 if ((i < len - 1) && (s.charAt(i + 1) == 'u')) {
270                     b.write(c); // encode the \ as unicode, so 'u' is ignored
271
b.write('u');
272                     b.write('0');
273                     b.write('0');
274                     b.write('5');
275                     b.write('c');
276
277                     count += 6;
278                 } else {
279                     b.write(c);
280
281                     count++;
282                 }
283             } else if ((c >= 0x0020) && (c <= 0x007f)) {
284                 b.write(c); // this is 99%
285

286                 count++;
287
288                 if (c == '\'' && doubleSingleQuotes) {
289                     b.write(c);
290
291                     count++;
292                 }
293             } else {
294                 b.write('\\');
295                 b.write('u');
296                 b.write(HEXBYTES[(c >> 12) & 0xf]);
297                 b.write(HEXBYTES[(c >> 8) & 0xf]);
298                 b.write(HEXBYTES[(c >> 4) & 0xf]);
299                 b.write(HEXBYTES[c & 0xf]);
300
301                 count += 6;
302             }
303         }
304
305         return count;
306     }
307
308 // fredt@users 20020522 - fix for 557510 - backslash bug
309
// this legacy bug resulted from forward reading the input when a backslash
310
// was present and manifested itself when a backslash was followed
311
// immdediately by a character outside the 0x20-7f range in a database field.
312

313     /**
314      * Hsqldb specific decoding used only for log files.
315      *
316      * This method converts the 7 bit escaped ASCII strings in a log file
317      * back into Java Unicode strings. See unicodeToAccii() above,
318      *
319      * @param s encoded ASCII string in byte array
320      * @param offset position of first byte
321      * @param length number of bytes to use
322      *
323      * @return Java Unicode string
324      */

325     public static String JavaDoc asciiToUnicode(byte[] s, int offset, int length) {
326
327         if (length == 0) {
328             return "";
329         }
330
331         char[] b = new char[length];
332         int j = 0;
333
334         for (int i = 0; i < length; i++) {
335             byte c = s[offset + i];
336
337             if (c == '\\' && i < length - 5) {
338                 byte c1 = s[offset + i + 1];
339
340                 if (c1 == 'u') {
341                     i++;
342
343                     // 4 characters read should always return 0-15
344
int k = HEXINDEX.indexOf(s[offset + (++i)]) << 12;
345
346                     k += HEXINDEX.indexOf(s[offset + (++i)]) << 8;
347                     k += HEXINDEX.indexOf(s[offset + (++i)]) << 4;
348                     k += HEXINDEX.indexOf(s[offset + (++i)]);
349                     b[j++] = (char) k;
350                 } else {
351                     b[j++] = (char) c;
352                 }
353             } else {
354                 b[j++] = (char) c;
355             }
356         }
357
358         return new String JavaDoc(b, 0, j);
359     }
360
361     public static String JavaDoc asciiToUnicode(String JavaDoc s) {
362
363         if ((s == null) || (s.indexOf("\\u") == -1)) {
364             return s;
365         }
366
367         int len = s.length();
368         char[] b = new char[len];
369         int j = 0;
370
371         for (int i = 0; i < len; i++) {
372             char c = s.charAt(i);
373
374             if (c == '\\' && i < len - 5) {
375                 char c1 = s.charAt(i + 1);
376
377                 if (c1 == 'u') {
378                     i++;
379
380                     // 4 characters read should always return 0-15
381
int k = HEXINDEX.indexOf(s.charAt(++i)) << 12;
382
383                     k += HEXINDEX.indexOf(s.charAt(++i)) << 8;
384                     k += HEXINDEX.indexOf(s.charAt(++i)) << 4;
385                     k += HEXINDEX.indexOf(s.charAt(++i));
386                     b[j++] = (char) k;
387                 } else {
388                     b[j++] = c;
389                 }
390             } else {
391                 b[j++] = c;
392             }
393         }
394
395         return new String JavaDoc(b, 0, j);
396     }
397
398     public static String JavaDoc readUTF(byte[] bytearr, int offset,
399                                  int length) throws IOException JavaDoc {
400
401         char[] buf = new char[length];
402
403         return readUTF(bytearr, offset, length, buf);
404     }
405
406     public static String JavaDoc readUTF(byte[] bytearr, int offset, int length,
407                                  char[] buf) throws IOException JavaDoc {
408
409         int bcount = 0;
410         int c, char2, char3;
411         int count = 0;
412
413         while (count < length) {
414             c = (int) bytearr[offset + count];
415
416             if (bcount == buf.length) {
417                 buf = (char[]) ArrayUtil.resizeArray(buf, length);
418             }
419
420             if (c > 0) {
421
422                 /* 0xxxxxxx*/
423                 count++;
424
425                 buf[bcount++] = (char) c;
426
427                 continue;
428             }
429
430             c &= 0xff;
431
432             switch (c >> 4) {
433
434                 case 12 :
435                 case 13 :
436
437                     /* 110x xxxx 10xx xxxx*/
438                     count += 2;
439
440                     if (count > length) {
441                         throw new UTFDataFormatException JavaDoc();
442                     }
443
444                     char2 = (int) bytearr[offset + count - 1];
445
446                     if ((char2 & 0xC0) != 0x80) {
447                         throw new UTFDataFormatException JavaDoc();
448                     }
449
450                     buf[bcount++] = (char) (((c & 0x1F) << 6)
451                                             | (char2 & 0x3F));
452                     break;
453
454                 case 14 :
455
456                     /* 1110 xxxx 10xx xxxx 10xx xxxx */
457                     count += 3;
458
459                     if (count > length) {
460                         throw new UTFDataFormatException JavaDoc();
461                     }
462
463                     char2 = (int) bytearr[offset + count - 2];
464                     char3 = (int) bytearr[offset + count - 1];
465
466                     if (((char2 & 0xC0) != 0x80)
467                             || ((char3 & 0xC0) != 0x80)) {
468                         throw new UTFDataFormatException JavaDoc();
469                     }
470
471                     buf[bcount++] = (char) (((c & 0x0F) << 12)
472                                             | ((char2 & 0x3F) << 6)
473                                             | ((char3 & 0x3F) << 0));
474                     break;
475
476                 default :
477
478                     /* 10xx xxxx, 1111 xxxx */
479                     throw new UTFDataFormatException JavaDoc();
480             }
481         }
482
483         // The number of chars produced may be less than length
484
return new String JavaDoc(buf, 0, bcount);
485     }
486
487     /**
488      * Writes a string to the specified DataOutput using UTF-8 encoding in a
489      * machine-independent manner.
490      * <p>
491      * @param str a string to be written.
492      * @param out destination to write to
493      * @return The number of bytes written out.
494      */

495     public static int writeUTF(String JavaDoc str, HsqlByteArrayOutputStream out) {
496
497         int strlen = str.length();
498         int c,
499             count = 0;
500
501         for (int i = 0; i < strlen; i++) {
502             c = str.charAt(i);
503
504             if (c >= 0x0001 && c <= 0x007F) {
505                 out.write(c);
506
507                 count++;
508             } else if (c > 0x07FF) {
509                 out.write(0xE0 | ((c >> 12) & 0x0F));
510                 out.write(0x80 | ((c >> 6) & 0x3F));
511                 out.write(0x80 | ((c >> 0) & 0x3F));
512
513                 count += 3;
514             } else {
515                 out.write(0xC0 | ((c >> 6) & 0x1F));
516                 out.write(0x80 | ((c >> 0) & 0x3F));
517
518                 count += 2;
519             }
520         }
521
522         return count;
523     }
524
525     public static int getUTFSize(String JavaDoc s) {
526
527         int len = (s == null) ? 0
528                               : s.length();
529         int l = 0;
530
531         for (int i = 0; i < len; i++) {
532             int c = s.charAt(i);
533
534             if ((c >= 0x0001) && (c <= 0x007F)) {
535                 l++;
536             } else if (c > 0x07FF) {
537                 l += 3;
538             } else {
539                 l += 2;
540             }
541         }
542
543         return l;
544     }
545
546     /**
547      * Using a Reader and a Writer, returns a String from an InputStream.
548      */

549     public static String JavaDoc inputStreamToString(InputStream JavaDoc x,
550             int length) throws IOException JavaDoc {
551
552         InputStreamReader JavaDoc in = new InputStreamReader JavaDoc(x);
553         StringWriter JavaDoc writer = new StringWriter JavaDoc();
554         int blocksize = 8 * 1024;
555         char[] buffer = new char[blocksize];
556
557         for (int left = length; left > 0; ) {
558             int read = in.read(buffer, 0, left > blocksize ? blocksize
559                                                            : left);
560
561             if (read == -1) {
562                 break;
563             }
564
565             writer.write(buffer, 0, read);
566
567             left -= read;
568         }
569
570         writer.close();
571
572         return writer.toString();
573     }
574
575 // fredt@users 20020130 - patch 497872 by Nitin Chauhan - use byte[] of exact size
576

577     /**
578      * Returns the quoted version of the string using the quotechar argument.
579      * doublequote argument indicates whether each instance of quotechar
580      * inside the string is doubled.<p>
581      *
582      * null string argument returns null. If the caller needs the literal
583      * "NULL" it should created it itself <p>
584      *
585      * The reverse conversion is handled in Tokenizer.java
586      */

587     public static String JavaDoc toQuotedString(String JavaDoc s, char quoteChar,
588                                         boolean extraQuote) {
589
590         if (s == null) {
591             return null;
592         }
593
594         int count = extraQuote ? count(s, quoteChar)
595                                   : 0;
596         int len = s.length();
597         char[] b = new char[2 + count + len];
598         int i = 0;
599         int j = 0;
600
601         b[j++] = quoteChar;
602
603         for (; i < len; i++) {
604             char c = s.charAt(i);
605
606             b[j++] = c;
607
608             if (extraQuote && c == quoteChar) {
609                 b[j++] = c;
610             }
611         }
612
613         b[j] = quoteChar;
614
615         return new String JavaDoc(b);
616     }
617
618     /**
619      * Counts Character c in String s
620      *
621      * @param String s
622      *
623      * @return int count
624      */

625     static int count(final String JavaDoc s, final char c) {
626
627         int pos = 0;
628         int count = 0;
629
630         if (s != null) {
631             while ((pos = s.indexOf(c, pos)) > -1) {
632                 count++;
633                 pos++;
634             }
635         }
636
637         return count;
638     }
639 }
640
Popular Tags