KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ca > commons > cbutil > CBParse


1 package com.ca.commons.cbutil;
2
3 /**
4  * Methods being moved out of CBUtility for stand-alone-ishness
5  */

6 public class CBParse
7 {
8
9     public static String JavaDoc bytes2Hex(byte[] bytes)
10     {
11         StringBuffer JavaDoc ret = new StringBuffer JavaDoc(bytes.length * 2);
12         for (int i = 0; i < bytes.length; i++)
13         {
14             ret.append(byte2Hex(bytes[i]));
15         }
16         return ret.toString();
17     }
18
19
20     public static String JavaDoc string2Hex(String JavaDoc orig)
21     {
22         StringBuffer JavaDoc ret = new StringBuffer JavaDoc(orig.length() * 2);
23         char[] c = orig.toCharArray();
24         for (int i = 0; i < c.length; i++)
25         {
26             ret.append(char2Hex(c[i]));
27         }
28         return ret.toString();
29     }
30
31     static public String JavaDoc byte2Hex(byte b)
32     {
33         // Returns hex String representation of byte b
34
final char hexDigit[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
35         char[] array = {hexDigit[(b >> 4) & 0x0f], hexDigit[b & 0x0f]};
36         return new String JavaDoc(array);
37     }
38
39     static public String JavaDoc char2Hex(char c)
40     {
41         // Returns hex String representation of char c
42
byte hi = (byte) (c >>> 8);
43         byte lo = (byte) (c & 0xff);
44         return byte2Hex(hi) + byte2Hex(lo);
45     }
46
47     static public byte hex2Byte(char hex1, char hex2)
48     {
49         byte a = hexChar2Byte(hex1);
50         byte b = hexChar2Byte(hex2);
51         return (byte) ((a << 4) + b);
52     }
53
54     /**
55      * Convert a single character to a byte...
56      */

57
58     static public byte hexChar2Byte(char hex)
59     {
60         if (hex <= '9')
61             return ((byte) (hex - 48)); // ('0' -> '9')
62
else if (hex <= 'F')
63             return ((byte) (hex - 55)); // ('A' -> 'F')
64
else
65             return ((byte) (hex - 87)); // ('a' -> 'f')
66
}
67
68     /**
69      * From Van Bui - prints out a hex string formatted with
70      * spaces between each hex word of length wordlength.
71      *
72      * @param in input array of bytes to convert
73      * @param wordlength the length of hex words to print otu.
74      */

75     public static String JavaDoc bytes2HexSplit(byte[] in, int wordlength)
76     {
77         String JavaDoc hex = bytes2Hex(in);
78         StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
79
80         for (int i = 0; i < hex.length(); i++)
81         {
82             buff.append(hex.charAt(i));
83             if ((i + 1) % wordlength == 0)
84                 buff.append(" ");
85         }
86
87         return buff.toString();
88     }
89
90     /**
91      * From Van Bui - prints out a hex string formatted with
92      * spaces between each hex word of length wordlength, and
93      * new lines every linelength.
94      *
95      * @param in input array of bytes to convert
96      * @param wordlength the length of hex words to print otu.
97      * @param linelength the length of a line to print before inserting
98      * a line feed.
99      */

100
101     public static String JavaDoc bytes2HexSplit(byte[] in, int wordlength, int linelength)
102     {
103         String JavaDoc hex = bytes2Hex(in);
104         StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
105
106         for (int i = 0; i < hex.length(); i++)
107         {
108             buff.append(hex.charAt(i));
109             if ((i + 1) % wordlength == 0)
110                 buff.append(" ");
111             if ((i + 1) % linelength == 0)
112                 buff.append("\n");
113         }
114
115         return buff.toString();
116     }
117
118     /**
119      * Determines whether a given byte sequence is a valid utf-8
120      * encoding. While this does not mean that the byte *is* a
121      * utf-8 encoded string, the chance of a random byte sequence
122      * happening to be utf8 is roughly (1/2 ** (byte array length)).<p>
123      * Note that '7 bit ascii' is *always* a valid utf-8 string...<p>
124      * see rfc 2279
125      */

126     public static boolean isUTF8(byte[] sequence)
127     {
128         boolean debug = false;
129         if (debug) System.out.println("\n\n Starting UTF8 Check\n\n");
130         int numberBytesInChar;
131
132         for (int i = 0; i < sequence.length; i++)
133         {
134             byte b = sequence[i];
135             if (debug) System.out.println("testing byte: " + byte2Hex(b));
136             if (((b >> 6) & 0x03) == 2)
137             {
138                 if (debug) System.out.println("start byte is invalid utf8 - has 10... start");
139                 return false;
140             }
141             byte test = b;
142             numberBytesInChar = 0;
143             while ((test & 0x80) > 0)
144             {
145                 test <<= 1;
146                 numberBytesInChar++;
147             }
148
149             if (numberBytesInChar > 1) // check that extended bytes are also good...
150
{
151                 for (int j = 1; j < numberBytesInChar; j++)
152                 {
153                     if (i + j >= sequence.length)
154                     {
155                         if (debug) System.out.println("following byte length is invalid - overruns end... ");
156                         return false; // not a character encoding - probably random bytes
157
}
158                     if (debug) System.out.println("testing byte: " + byte2Hex(sequence[i + j]));
159                     if (((sequence[i + j] >> 6) & 0x03) != 2)
160                     {
161                         if (debug) System.out.println("following byte is invalid utf8 - does *not* have 10... start");
162                         return false;
163                     }
164                 }
165                 i += numberBytesInChar - 1; // increment i to the next utf8 character start position.
166
}
167         }
168
169         return true;
170     }
171
172     /**
173      * Determines whether a given byte sequence is a valid utf-8
174      * encoding, encoding (at least in part) something *other* than
175      * normal Ascii (i.e.
176      * it is utf-8 encoding something that is not just 7-bit ascii,
177      * which in utf-8 is indistinguishable from the original text).<p>
178      * <p/>
179      * While this does not mean that the bytes *are* a
180      * utf-8 encoded string, the chance of a random byte sequence
181      * (containing bytes with the high-bit set)
182      * happening to be utf8 is roughly (1/2 ** (byte array length)).<p>
183      * see rfc 2279
184      */

185
186     public static boolean isNonAsciiUTF8(byte[] sequence)
187     {
188         boolean nonAsciiDetected = false;
189
190         int numberBytesInChar;
191         for (int i = 0; i < sequence.length - 3; i++)
192         {
193             byte b = sequence[i];
194             if (((b >> 6) & 0x03) == 2) return false;
195             byte test = b;
196             numberBytesInChar = 0;
197             while ((test & 0x80) > 0)
198             {
199                 test <<= 1;
200                 numberBytesInChar++;
201             }
202
203             // check if multi-byte utf8 sequence found
204
if (numberBytesInChar > 1) // check that extended bytes are also good...
205
{
206                 nonAsciiDetected = true;
207                 for (int j = 1; j < numberBytesInChar; j++)
208                 {
209                     if (((sequence[i + j] >> 6) & 0x03) != 2)
210                         return false;
211                 }
212                 i += numberBytesInChar - 1; // increment i to the next utf8 character start position.
213
}
214         }
215
216         return nonAsciiDetected;
217     }
218
219
220     /**
221      * This uses the implicit 'unicode marker' at the start of a
222      * Unicode file to determine whether a file is a unicode file.
223      * At the beginning of every unicode file is a two byte code
224      * indicating the endien-ness of the file (either FFFE or FEFF).
225      * If either of these sequences is found, this function returns
226      * true, otherwise it returns false. <i>Technically</i> this isn't
227      * a sure test, since a) something else could have this signiture,
228      * and b) unicode files are not absolutely required to have this
229      * signiture (but most do).
230      */

231
232     public static boolean isUnicode(byte[] sequence)
233     {
234         if (sequence.length >= 2)
235         {
236             if (sequence[0] == (byte) 0xFF && sequence[1] == (byte) 0xFE) return true;
237             if (sequence[0] == (byte) 0xFE && sequence[1] == (byte) 0xFF) return true;
238         }
239         return false;
240     }
241
242     /**
243      * Turns a string into HTML displayable text by escaping
244      * special characters ('<','&' etc...).
245      * <p/>
246      * ... add new ones as required; or see if an existing ftn somewhere
247      * does this already...
248      */

249
250     public static String JavaDoc toHTML(String JavaDoc rawText)
251     {
252         String JavaDoc test;
253         if (rawText.length() > 14)
254             test = rawText.substring(0, 14).toLowerCase();
255         else
256             test = rawText.toLowerCase();
257
258         if (test.startsWith("<html>") || test.startsWith("<!doctype html>"))
259         {
260
261 // XXX this was commented out, but it seems to be necessaary/desirable?
262
if (test.startsWith("<html>"))
263                 rawText = rawText.substring(6);
264             else if (test.startsWith("<!doctype html>"))
265                 rawText = rawText.substring(15);
266
267             if (rawText.toLowerCase().endsWith("</html>"))
268             {
269                 rawText = rawText.substring(0, rawText.length() - 7);
270             }
271
272 // END XXX
273

274             return rawText;
275         }
276         char C;
277         StringBuffer JavaDoc temp = new StringBuffer JavaDoc(rawText);
278
279         for (int pos = 0; pos < temp.length(); pos++)
280         {
281             C = temp.charAt(pos);
282
283             switch (C)
284             {
285                 case '<':
286                     replaceChar(temp, pos, "&lt;");
287                     break;
288                 case '>':
289                     replaceChar(temp, pos, "&gt;");
290                     break;
291                 case '&':
292                     replaceChar(temp, pos, "&amp;");
293                     break;
294                 case '\"':
295                     replaceChar(temp, pos, "&quot;");
296                     break;
297                 case '#':
298                     replaceChar(temp, pos, "&#35;");
299                     pos++;
300                     break;
301             }
302         }
303         return temp.toString();
304     }
305
306     /**
307      * Deletes a character in <i>text</i> at position <i>pos<i> and replaces
308      * it with the string <i>replacement</i>.
309      *
310      * @param text the text to be modified
311      * @param pos the position of the character to be deleted
312      * @param replacement the string the character is to be replaced with.
313      */

314
315     public static int replaceChar(StringBuffer JavaDoc text, int pos, String JavaDoc replacement)
316     {
317         text.deleteCharAt(pos);
318         text.insert(pos, replacement);
319         return (pos + replacement.length());
320     }
321
322     /**
323      * Deletes all characters <i>c</i> in <i>text</i> replaces
324      * it with the string <i>replacement</i>.
325      *
326      * @param text the text to be modified
327      * @param replacement the string the character is to be replaced with.
328      */

329
330     public static String JavaDoc replaceAllChar(StringBuffer JavaDoc text, char c, String JavaDoc replacement)
331     {
332         return replaceAllBufferChar(text, c, replacement).toString();
333     }
334
335     /**
336      * Deletes all characters <i>c</i> in <i>text</i> replaces
337      * it with the string <i>replacement</i>.
338      *
339      * @param text the text to be modified
340      * @param replacement the string the character is to be replaced with.
341      */

342
343     public static StringBuffer JavaDoc replaceAllBufferChar(StringBuffer JavaDoc text, char c, String JavaDoc replacement)
344     {
345         int pos = 0;
346         while (pos != -1)
347         {
348             pos = text.toString().indexOf(c, pos);
349             if (pos != -1)
350                 pos = replaceChar(text, pos, replacement);
351         }
352         return text;
353     }
354
355     /**
356      * Deletes a substring in <i>text</i> at position <i>pos<i>, of length <i>len</i> and replaces
357      * it with the string <i>replacement</i>.
358      *
359      * @param text the text to be modified
360      * @param pos the position of the character to be deleted
361      * @param replacement the string the character is to be replaced with.
362      */

363
364     public static int replaceString(StringBuffer JavaDoc text, int pos, int len, String JavaDoc replacement)
365     {
366         text.replace(pos, pos + len, replacement);
367         //text.delete(pos, pos+len);
368
//text.insert(pos, replacement);
369
return (pos + replacement.length());
370     }
371
372     /**
373      * Deletes all characters <i>orig</i> in <i>text</i> and replaces
374      * it with the string <i>replacement</i>.
375      *
376      * @param text the text to be modified
377      * @param orig the original text substring to be changed
378      * @param replacement the string the original substring is to be replaced with.
379      */

380
381     public static String JavaDoc replaceAllString(StringBuffer JavaDoc text, String JavaDoc orig, String JavaDoc replacement)
382     {
383         return replaceAllBufferString(text, orig, replacement).toString();
384     }
385
386     /**
387      * Deletes all characters <i>orig</i> in <i>text</i> replaces
388      * it with the string <i>replacement</i>.
389      *
390      * @param text the text to be modified
391      * @param orig the original text substring to be changed
392      * @param replacement the string the original substring is to be replaced with.
393      */

394
395     public static StringBuffer JavaDoc replaceAllBufferString(StringBuffer JavaDoc text, String JavaDoc orig, String JavaDoc replacement)
396     {
397         int pos = 0;
398         while (pos != -1)
399         {
400             pos = text.toString().indexOf(orig, pos);
401             if (pos != -1)
402                 pos = replaceString(text, pos, orig.length(), replacement);
403         }
404         return text;
405     }
406
407
408     /**
409      * Utility for micro-parser. Gets the next character pos in a string
410      * after an initial offset that either matches, or does not match, <i>any</i>
411      * of a set of comparison characters.
412      *
413      * @param pos the position to start searching from
414      * @param searchMe the string to search
415      * @param compare a string containing characters to compare against
416      * @param match whether the match is for characters in the compare string (true)
417      * or <i>not</i> in the compare string (false)
418      * @return the position found, or -1 if no position is found.
419      */

420
421     public static int nextCharIn(int pos, String JavaDoc searchMe, String JavaDoc compare, boolean match)
422     {
423         char test;
424         int length = searchMe.length();
425         while (pos < length)
426         {
427             test = searchMe.charAt(pos);
428             if ((compare.indexOf(test) != -1) == match)
429                 return pos;
430             pos++;
431         }
432         return -1;
433     }
434 }
435
Popular Tags