KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > mysql > jdbc > StringUtils


1 /*
2    Copyright (C) 2002 MySQL AB
3
4       This program is free software; you can redistribute it and/or modify
5       it under the terms of the GNU General Public License as published by
6       the Free Software Foundation; either version 2 of the License, or
7       (at your option) any later version.
8
9       This program is distributed in the hope that it will be useful,
10       but WITHOUT ANY WARRANTY; without even the implied warranty of
11       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12       GNU General Public License for more details.
13
14       You should have received a copy of the GNU General Public License
15       along with this program; if not, write to the Free Software
16       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
18  */

19 package com.mysql.jdbc;
20
21 import java.io.ByteArrayOutputStream JavaDoc;
22 import java.io.UnsupportedEncodingException JavaDoc;
23
24 import java.util.ArrayList JavaDoc;
25 import java.util.List JavaDoc;
26 import java.util.StringTokenizer JavaDoc;
27
28
29 /**
30  * Various utility methods for converting to/from byte arrays in the platform
31  * encoding
32  *
33  * @author Mark Matthews
34  */

35 public class StringUtils {
36     private static final int BYTE_RANGE = (1 + Byte.MAX_VALUE) - Byte.MIN_VALUE;
37     private static byte[] allBytes = new byte[BYTE_RANGE];
38     private static char[] byteToChars = new char[BYTE_RANGE];
39
40     static {
41         for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
42             allBytes[i - Byte.MIN_VALUE] = (byte) i;
43         }
44
45         String JavaDoc allBytesString = new String JavaDoc(allBytes, 0,
46                 Byte.MAX_VALUE - Byte.MIN_VALUE);
47
48         int allBytesStringLen = allBytesString.length();
49
50         for (int i = 0;
51                 (i < (Byte.MAX_VALUE - Byte.MIN_VALUE))
52                 && (i < allBytesStringLen); i++) {
53             byteToChars[i] = allBytesString.charAt(i);
54         }
55     }
56
57     /**
58      * Returns the byte[] representation of the given string using given
59      * encoding.
60      *
61      * @param s the string to convert
62      * @param encoding the character encoding to use
63      *
64      * @return byte[] representation of the string
65      *
66      * @throws UnsupportedEncodingException if an encoding unsupported by the
67      * JVM is supplied.
68      */

69     public static final byte[] getBytes(String JavaDoc s, String JavaDoc encoding)
70         throws UnsupportedEncodingException JavaDoc {
71         SingleByteCharsetConverter converter = SingleByteCharsetConverter
72             .getInstance(encoding);
73
74         return getBytes(s, converter, encoding);
75     }
76
77     /**
78      * Returns the byte[] representation of the given string (re)using the
79      * given charset converter, and the given encoding.
80      *
81      * @param s the string to convert
82      * @param converter the converter to reuse
83      * @param encoding the character encoding to use
84      *
85      * @return byte[] representation of the string
86      *
87      * @throws UnsupportedEncodingException if an encoding unsupported by the
88      * JVM is supplied.
89      */

90     public static final byte[] getBytes(String JavaDoc s,
91         SingleByteCharsetConverter converter, String JavaDoc encoding)
92         throws UnsupportedEncodingException JavaDoc {
93         byte[] b = null;
94
95         if (converter != null) {
96             b = converter.toBytes(s);
97         } else if (encoding == null) {
98             b = s.getBytes();
99         } else {
100             b = s.getBytes(encoding);
101
102             if (encoding.equalsIgnoreCase("SJIS")
103                     || encoding.equalsIgnoreCase("BIG5")
104                     || encoding.equalsIgnoreCase("GBK")) {
105                 b = escapeSJISByteStream(b, s, 0, s.length());
106             }
107         }
108
109         return b;
110     }
111
112     /**
113      * DOCUMENT ME!
114      *
115      * @param s DOCUMENT ME!
116      * @param converter DOCUMENT ME!
117      * @param encoding DOCUMENT ME!
118      * @param offset DOCUMENT ME!
119      * @param length DOCUMENT ME!
120      *
121      * @return DOCUMENT ME!
122      *
123      * @throws UnsupportedEncodingException DOCUMENT ME!
124      */

125     public static final byte[] getBytes(String JavaDoc s,
126         SingleByteCharsetConverter converter, String JavaDoc encoding, int offset,
127         int length) throws UnsupportedEncodingException JavaDoc {
128         byte[] b = null;
129
130         if (converter != null) {
131             b = converter.toBytes(s, offset, length);
132         } else if (encoding == null) {
133             byte[] temp = s.getBytes();
134
135             b = new byte[length];
136             System.arraycopy(temp, offset, b, 0, length);
137         } else {
138             byte[] temp = s.getBytes(encoding);
139
140             b = new byte[length];
141             System.arraycopy(temp, offset, b, 0, length);
142
143             if (encoding.equalsIgnoreCase("SJIS")
144                     || encoding.equalsIgnoreCase("BIG5")
145                     || encoding.equalsIgnoreCase("GBK")) {
146                 b = escapeSJISByteStream(b, s, offset, length);
147             }
148         }
149
150         return b;
151     }
152
153     /**
154      * Dumps the given bytes to STDOUT as a hex dump (up to length bytes).
155      *
156      * @param byteBuffer the data to print as hex
157      * @param length the number of bytes to print
158      */

159     public static final void dumpAsHex(byte[] byteBuffer, int length) {
160         int p = 0;
161         int rows = length / 8;
162
163         for (int i = 0; i < rows; i++) {
164             int ptemp = p;
165
166             for (int j = 0; j < 8; j++) {
167                 String JavaDoc hexVal = Integer.toHexString((int) byteBuffer[ptemp]
168                         & 0xff);
169
170                 if (hexVal.length() == 1) {
171                     hexVal = "0" + hexVal;
172                 }
173
174                 System.out.print(hexVal + " ");
175                 ptemp++;
176             }
177
178             System.out.print(" ");
179
180             for (int j = 0; j < 8; j++) {
181                 if ((byteBuffer[p] > 32) && (byteBuffer[p] < 127)) {
182                     System.out.print((char) byteBuffer[p] + " ");
183                 } else {
184                     System.out.print(". ");
185                 }
186
187                 p++;
188             }
189
190             System.out.println();
191         }
192
193         int n = 0;
194
195         for (int i = p; i < length; i++) {
196             String JavaDoc hexVal = Integer.toHexString((int) byteBuffer[i] & 0xff);
197
198             if (hexVal.length() == 1) {
199                 hexVal = "0" + hexVal;
200             }
201
202             System.out.print(hexVal + " ");
203             n++;
204         }
205
206         for (int i = n; i < 8; i++) {
207             System.out.print(" ");
208         }
209
210         System.out.print(" ");
211
212         for (int i = p; i < length; i++) {
213             if ((byteBuffer[i] > 32) && (byteBuffer[i] < 127)) {
214                 System.out.print((char) byteBuffer[i] + " ");
215             } else {
216                 System.out.print(". ");
217             }
218         }
219
220         System.out.println();
221     }
222
223     /**
224      * Returns the bytes as an ASCII String.
225      *
226      * @param buffer the bytes representing the string
227      *
228      * @return The ASCII String.
229      */

230     public static final String JavaDoc toAsciiString(byte[] buffer) {
231         return toAsciiString(buffer, 0, buffer.length);
232     }
233
234     /**
235      * Returns the bytes as an ASCII String.
236      *
237      * @param buffer the bytes to convert
238      * @param startPos the position to start converting
239      * @param length the length of the string to convert
240      *
241      * @return the ASCII string
242      */

243     public static final String JavaDoc toAsciiString(byte[] buffer, int startPos,
244         int length) {
245         char[] charArray = new char[length];
246         int readpoint = startPos;
247
248         for (int i = 0; i < length; i++) {
249             charArray[i] = (char) buffer[readpoint];
250             readpoint++;
251         }
252
253         return new String JavaDoc(charArray);
254     }
255
256     /**
257      * Unfortunately, SJIS has 0x5c as a high byte in some of its double-byte
258      * characters, so we need to escape it.
259      *
260      * @param origBytes the original bytes in SJIS format
261      * @param origString the string that had .getBytes() called on it
262      * @param offset where to start converting from
263      * @param length how many characters to convert.
264      *
265      * @return byte[] with 0x5c escaped
266      */

267     public static byte[] escapeSJISByteStream(byte[] origBytes,
268         String JavaDoc origString, int offset, int length) {
269         if ((origBytes == null) || (origBytes.length == 0)) {
270             return origBytes;
271         }
272
273         int bytesLen = origBytes.length;
274         int bufIndex = 0;
275         int strIndex = 0;
276
277         ByteArrayOutputStream JavaDoc bytesOut = new ByteArrayOutputStream JavaDoc(bytesLen);
278
279         while (true) {
280             if (strIndex<origString.length() && origString.charAt(strIndex) == '\\') {
281                 // write it out as-is
282
bytesOut.write(origBytes[bufIndex++]);
283                 //bytesOut.write(origBytes[bufIndex++]);
284
} else {
285                 // Grab the first byte
286
int loByte = (int) origBytes[bufIndex];
287
288                 if (loByte < 0) {
289                     loByte += 256; // adjust for signedness/wrap-around
290
}
291
292                 // We always write the first byte
293
bytesOut.write(loByte);
294
295                 //
296
// The codepage characters in question exist between
297
// 0x81-0x9F and 0xE0-0xFC...
298
//
299
// See:
300
//
301
// http://www.microsoft.com/GLOBALDEV/Reference/dbcs/932.htm
302
//
303
// Problematic characters in GBK
304
//
305
// U+905C : CJK UNIFIED IDEOGRAPH
306
//
307
// Problematic characters in Big5
308
//
309
// B9F0 = U+5C62 : CJK UNIFIED IDEOGRAPH
310
//
311
if (((loByte >= 0x81) && (loByte <= 0x9F))
312                         || ((loByte >= 0xE0) && (loByte <= 0xFC))) {
313                     if (bufIndex < (bytesLen - 1)) {
314                         int hiByte = (int) origBytes[bufIndex + 1];
315
316                         if (hiByte < 0) {
317                             hiByte += 256; // adjust for signedness/wrap-around
318
}
319
320                         // write the high byte here, and increment the index
321
// for the high byte
322
bytesOut.write(hiByte);
323                         bufIndex++;
324
325                         // escape 0x5c if necessary
326
if (hiByte == 0x5C) {
327                             bytesOut.write(hiByte);
328                         }
329                     }
330                 } else if (loByte == 0x5c) {
331                     if (bufIndex < (bytesLen - 1)) {
332                         int hiByte = (int) origBytes[bufIndex + 1];
333
334                         if (hiByte < 0) {
335                             hiByte += 256; // adjust for signedness/wrap-around
336
}
337
338                         if (hiByte == 0x62) {
339                             // we need to escape the 0x5c
340
bytesOut.write(0x5c);
341                             bytesOut.write(0x62);
342                             bufIndex++;
343                         }
344                     }
345                 }
346
347                 bufIndex++;
348
349                 
350             }
351
352             if (bufIndex >= bytesLen) {
353                 // we're done
354
break;
355             }
356             
357             strIndex++;
358         }
359
360         return bytesOut.toByteArray();
361     }
362
363     /**
364      * Returns the first non whitespace char, converted to upper case
365      *
366      * @param searchIn the string to search in
367      *
368      * @return the first non-whitespace character, upper cased.
369      */

370     public static char firstNonWsCharUc(String JavaDoc searchIn) {
371         if (searchIn == null) {
372             return 0;
373         }
374
375         int length = searchIn.length();
376
377         for (int i = 0; i < length; i++) {
378             char c = searchIn.charAt(i);
379
380             if (!Character.isWhitespace(c)) {
381                 return Character.toUpperCase(c);
382             }
383         }
384
385         return 0;
386     }
387
388     /**
389      * DOCUMENT ME!
390      *
391      * @param searchIn DOCUMENT ME!
392      * @param searchFor DOCUMENT ME!
393      *
394      * @return DOCUMENT ME!
395      */

396     public static int indexOfIgnoreCase(String JavaDoc searchIn, String JavaDoc searchFor) {
397         if ((searchIn == null) || (searchFor == null)) {
398             return -1;
399         }
400
401         int patternLength = searchFor.length();
402         int stringLength = searchIn.length();
403
404         int i = 0;
405
406         if (patternLength == 0) {
407             return -1;
408         }
409
410         // Brute force string pattern matching
411
char firstCharOfPattern = Character.toUpperCase(searchFor.charAt(0));
412
413 lookForFirstChar:
414         while (true) {
415             while ((i <= stringLength)
416                     && (Character.toUpperCase(searchIn.charAt(i)) != firstCharOfPattern)) {
417                 i++;
418             }
419
420             if (i > stringLength) {
421                 return -1;
422             }
423
424             int j = i + 1;
425             int end = (j + patternLength) - 1;
426
427             int k = 1; // start at second char of pattern
428

429             while (j < end) {
430                 if (Character.toUpperCase(searchIn.charAt(j++)) != Character
431                         .toUpperCase(searchFor.charAt(k++))) {
432                     i++;
433
434                     // start over
435
continue lookForFirstChar;
436                 }
437             }
438
439             return i; // found entire pattern
440
}
441     }
442
443     /**
444      * Splits stringToSplit into a list, using the given delimitter
445      *
446      * @param stringToSplit the string to split
447      * @param delimitter the string to split on
448      * @param trim should the split strings be whitespace trimmed?
449      *
450      * @return the list of strings, split by delimitter
451      *
452      * @throws IllegalArgumentException DOCUMENT ME!
453      */

454     public static final List JavaDoc split(String JavaDoc stringToSplit, String JavaDoc delimitter,
455         boolean trim) {
456         if (stringToSplit == null) {
457             return new ArrayList JavaDoc();
458         }
459
460         if (delimitter == null) {
461             throw new IllegalArgumentException JavaDoc();
462         }
463
464         StringTokenizer JavaDoc tokenizer = new StringTokenizer JavaDoc(stringToSplit,
465                 delimitter, false);
466
467         List JavaDoc splitTokens = new ArrayList JavaDoc(tokenizer.countTokens());
468
469         while (tokenizer.hasMoreTokens()) {
470             String JavaDoc token = tokenizer.nextToken();
471
472             if (trim) {
473                 token = token.trim();
474             }
475
476             splitTokens.add(token);
477         }
478
479         return splitTokens;
480     }
481
482     /**
483      * Determines whether or not the string 'searchIn' contains the string
484      * 'searchFor', dis-regarding case. Shorthand for a
485      * String.regionMatch(...)
486      *
487      * @param searchIn the string to search in
488      * @param searchFor the string to search for
489      *
490      * @return whether searchIn starts with searchFor, ignoring case
491      */

492     public static boolean startsWithIgnoreCase(String JavaDoc searchIn, String JavaDoc searchFor) {
493         return startsWithIgnoreCase(searchIn, 0, searchFor);
494     }
495
496     /**
497      * Determines whether or not the string 'searchIn' contains the string
498      * 'searchFor', dis-regarding case starting at 'startAt' Shorthand for a
499      * String.regionMatch(...)
500      *
501      * @param searchIn the string to search in
502      * @param startAt the position to start at
503      * @param searchFor the string to search for
504      *
505      * @return whether searchIn starts with searchFor, ignoring case
506      */

507     public static boolean startsWithIgnoreCase(String JavaDoc searchIn, int startAt,
508         String JavaDoc searchFor) {
509         return searchIn.regionMatches(true, 0, searchFor, startAt,
510             searchFor.length());
511     }
512
513     /**
514      * Determines whether or not the sting 'searchIn' contains the string
515      * 'searchFor', di-regarding case and leading whitespace
516      *
517      * @param searchIn the string to search in
518      * @param searchFor the string to search for
519      *
520      * @return true if the string starts with 'searchFor' ignoring whitespace
521      */

522     public static boolean startsWithIgnoreCaseAndWs(String JavaDoc searchIn,
523         String JavaDoc searchFor) {
524         int beginPos = 0;
525
526         int inLength = searchIn.length();
527
528         for (beginPos = 0; beginPos < inLength; beginPos++) {
529             if (!Character.isWhitespace(searchIn.charAt(beginPos))) {
530                 break;
531             }
532         }
533
534         return startsWithIgnoreCase(searchIn, beginPos, searchFor);
535     }
536 }
537
Popular Tags