Utility


1   //##header 1189099963000 FOUNDATION
2   /*
3    *******************************************************************************
4    * Copyright (C) 1996-2006, International Business Machines Corporation and    *
5    * others. All Rights Reserved.                                                *
6    *******************************************************************************
7    */
8   package com.ibm.icu.impl;
9   import java.util.ArrayList  ;
10  
11  import com.ibm.icu.lang.*;
12  import com.ibm.icu.text.*;
13  import com.ibm.icu.impl.UCharacterProperty;
14  // This class contains utility functions so testing not needed
15  ///CLOVER:OFF
16  public final class Utility {
17  
18      private static final char APOSTROPHE = '\'';
19      private static final char BACKSLASH  = '\\';
20      private static final int MAGIC_UNSIGNED = 0x80000000;
21  
22      /**
23       * Convenience utility to compare two Object[]s.
24       * Ought to be in System
25       */
26      public final static boolean arrayEquals(Object  [] source, Object   target) {
27          if (source == null) return (target == null);
28          if (!(target instanceof Object  [])) return false;
29          Object  [] targ = (Object  []) target;
30          return (source.length == targ.length
31                  && arrayRegionMatches(source, 0, targ, 0, source.length));
32      }
33  
34      /**
35       * Convenience utility to compare two int[]s
36       * Ought to be in System
37       */
38      ///CLOVER:OFF
39      public final static boolean arrayEquals(int[] source, Object   target) {
40          if (source == null) return (target == null);
41          if (!(target instanceof int[])) return false;
42          int[] targ = (int[]) target;
43          return (source.length == targ.length
44                  && arrayRegionMatches(source, 0, targ, 0, source.length));
45      }
46      ///CLOVER:ON
47  
48      /**
49       * Convenience utility to compare two double[]s
50       * Ought to be in System
51       */
52      ///CLOVER:OFF
53      public final static boolean arrayEquals(double[] source, Object   target) {
54          if (source == null) return (target == null);
55          if (!(target instanceof double[])) return false;
56          double[] targ = (double[]) target;
57          return (source.length == targ.length
58                  && arrayRegionMatches(source, 0, targ, 0, source.length));
59      }
60      public final static boolean arrayEquals(byte[] source, Object   target) {
61          if (source == null) return (target == null);
62          if (!(target instanceof byte[])) return false;
63          byte[] targ = (byte[]) target;
64          return (source.length == targ.length
65                  && arrayRegionMatches(source, 0, targ, 0, source.length));
66      }
67      ///CLOVER:ON
68  
69      /**
70       * Convenience utility to compare two Object[]s
71       * Ought to be in System
72       */
73      public final static boolean arrayEquals(Object   source, Object   target) {
74          if (source == null) return (target == null);
75          // for some reason, the correct arrayEquals is not being called
76          // so do it by hand for now.
77          if (source instanceof Object  [])
78              return(arrayEquals((Object  []) source,target));
79          if (source instanceof int[])
80              return(arrayEquals((int[]) source,target));
81          if (source instanceof double[])
82              return(arrayEquals((int[]) source,target));
83          if (source instanceof byte[])
84              return(arrayEquals((byte[]) source,target));
85          return source.equals(target);
86      }
87  
88      /**
89       * Convenience utility to compare two Object[]s
90       * Ought to be in System.
91       * @param len the length to compare.
92       * The start indices and start+len must be valid.
93       */
94      public final static boolean arrayRegionMatches(Object  [] source, int sourceStart,
95                                              Object  [] target, int targetStart,
96                                              int len)
97      {
98          int sourceEnd = sourceStart + len;
99          int delta = targetStart - sourceStart;
100         for (int i = sourceStart; i < sourceEnd; i++) {
101             if (!arrayEquals(source[i],target[i + delta]))
102             return false;
103         }
104         return true;
105     }
106 
107     /**
108      * Convenience utility to compare two Object[]s
109      * Ought to be in System.
110      * @param len the length to compare.
111      * The start indices and start+len must be valid.
112      */
113     public final static boolean arrayRegionMatches(char[] source, int sourceStart,
114                                             char[] target, int targetStart,
115                                             int len)
116     {
117         int sourceEnd = sourceStart + len;
118         int delta = targetStart - sourceStart;
119         for (int i = sourceStart; i < sourceEnd; i++) {
120             if (source[i]!=target[i + delta])
121             return false;
122         }
123         return true;
124     }
125 
126     /** 
127      * Convenience utility to compare two int[]s.
128      * @param len the length to compare.
129      * The start indices and start+len must be valid.
130      * Ought to be in System
131      */
132     ///CLOVER:OFF
133     public final static boolean arrayRegionMatches(int[] source, int sourceStart,
134                                             int[] target, int targetStart,
135                                             int len)
136     {
137         int sourceEnd = sourceStart + len;
138         int delta = targetStart - sourceStart;
139         for (int i = sourceStart; i < sourceEnd; i++) {
140             if (source[i] != target[i + delta])
141             return false;
142         }
143         return true;
144     }
145     ///CLOVER:ON
146 
147     /**
148      * Convenience utility to compare two arrays of doubles.
149      * @param len the length to compare.
150      * The start indices and start+len must be valid.
151      * Ought to be in System
152      */
153     ///CLOVER:OFF
154     public final static boolean arrayRegionMatches(double[] source, int sourceStart,
155                                             double[] target, int targetStart,
156                                             int len)
157     {
158         int sourceEnd = sourceStart + len;
159         int delta = targetStart - sourceStart;
160         for (int i = sourceStart; i < sourceEnd; i++) {
161             if (source[i] != target[i + delta])
162             return false;
163         }
164         return true;
165     }
166     public final static boolean arrayRegionMatches(byte[] source, int sourceStart,
167             byte[] target, int targetStart, int len){
168         int sourceEnd = sourceStart + len;
169         int delta = targetStart - sourceStart;
170         for (int i = sourceStart; i < sourceEnd; i++) {
171             if (source[i] != target[i + delta])
172                 return false;
173         }
174         return true;
175     }
176     ///CLOVER:ON
177 
178     /**
179      * Convenience utility. Does null checks on objects, then calls equals.
180      */
181     public final static boolean objectEquals(Object   source, Object   target) {
182     if (source == null)
183             return (target == null);
184     else
185             return source.equals(target);
186     }
187 
188     /**
189      * The ESCAPE character is used during run-length encoding.  It signals
190      * a run of identical chars.
191      */
192     private static final char ESCAPE = '\uA5A5';
193 
194     /**
195      * The ESCAPE_BYTE character is used during run-length encoding.  It signals
196      * a run of identical bytes.
197      */
198     static final byte ESCAPE_BYTE = (byte)0xA5;
199 
200     /**
201      * Construct a string representing an int array.  Use run-length encoding.
202      * A character represents itself, unless it is the ESCAPE character.  Then
203      * the following notations are possible:
204      *   ESCAPE ESCAPE   ESCAPE literal
205      *   ESCAPE n c      n instances of character c
206      * Since an encoded run occupies 3 characters, we only encode runs of 4 or
207      * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
208      * If we encounter a run where n == ESCAPE, we represent this as:
209      *   c ESCAPE n-1 c
210      * The ESCAPE value is chosen so as not to collide with commonly
211      * seen values.
212      */
213     ///CLOVER:OFF
214     static public final String   arrayToRLEString(int[] a) {
215         StringBuffer   buffer = new StringBuffer  ();
216 
217         appendInt(buffer, a.length);
218         int runValue = a[0];
219         int runLength = 1;
220         for (int i=1; i<a.length; ++i) {
221             int s = a[i];
222             if (s == runValue && runLength < 0xFFFF) {
223                 ++runLength;
224             } else {
225                 encodeRun(buffer, runValue, runLength);
226                 runValue = s;
227                 runLength = 1;
228             }
229         }
230         encodeRun(buffer, runValue, runLength);
231         return buffer.toString();
232     }
233     ///CLOVER:ON
234 
235     /**
236      * Construct a string representing a short array.  Use run-length encoding.
237      * A character represents itself, unless it is the ESCAPE character.  Then
238      * the following notations are possible:
239      *   ESCAPE ESCAPE   ESCAPE literal
240      *   ESCAPE n c      n instances of character c
241      * Since an encoded run occupies 3 characters, we only encode runs of 4 or
242      * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
243      * If we encounter a run where n == ESCAPE, we represent this as:
244      *   c ESCAPE n-1 c
245      * The ESCAPE value is chosen so as not to collide with commonly
246      * seen values.
247      */
248     ///CLOVER:OFF
249     static public final String   arrayToRLEString(short[] a) {
250         StringBuffer   buffer = new StringBuffer  ();
251         // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);
252         buffer.append((char) (a.length >> 16));
253         buffer.append((char) a.length);
254         short runValue = a[0];
255         int runLength = 1;
256         for (int i=1; i<a.length; ++i) {
257             short s = a[i];
258             if (s == runValue && runLength < 0xFFFF) ++runLength;
259             else {
260             encodeRun(buffer, runValue, runLength);
261             runValue = s;
262             runLength = 1;
263             }
264         }
265         encodeRun(buffer, runValue, runLength);
266         return buffer.toString();
267     }
268     ///CLOVER:ON
269 
270     /**
271      * Construct a string representing a char array.  Use run-length encoding.
272      * A character represents itself, unless it is the ESCAPE character.  Then
273      * the following notations are possible:
274      *   ESCAPE ESCAPE   ESCAPE literal
275      *   ESCAPE n c      n instances of character c
276      * Since an encoded run occupies 3 characters, we only encode runs of 4 or
277      * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
278      * If we encounter a run where n == ESCAPE, we represent this as:
279      *   c ESCAPE n-1 c
280      * The ESCAPE value is chosen so as not to collide with commonly
281      * seen values.
282      */
283     static public final String   arrayToRLEString(char[] a) {
284         StringBuffer   buffer = new StringBuffer  ();
285         buffer.append((char) (a.length >> 16));
286         buffer.append((char) a.length);
287         char runValue = a[0];
288         int runLength = 1;
289         for (int i=1; i<a.length; ++i) {
290             char s = a[i];
291             if (s == runValue && runLength < 0xFFFF) ++runLength;
292             else {
293             encodeRun(buffer, (short)runValue, runLength);
294             runValue = s;
295             runLength = 1;
296             }
297         }
298         encodeRun(buffer, (short)runValue, runLength);
299         return buffer.toString();
300     }
301 
302     /**
303      * Construct a string representing a byte array.  Use run-length encoding.
304      * Two bytes are packed into a single char, with a single extra zero byte at
305      * the end if needed.  A byte represents itself, unless it is the
306      * ESCAPE_BYTE.  Then the following notations are possible:
307      *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
308      *   ESCAPE_BYTE n b           n instances of byte b
309      * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
310      * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
311      * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
312      *   b ESCAPE_BYTE n-1 b
313      * The ESCAPE_BYTE value is chosen so as not to collide with commonly
314      * seen values.
315      */
316     static public final String   arrayToRLEString(byte[] a) {
317         StringBuffer   buffer = new StringBuffer  ();
318         buffer.append((char) (a.length >> 16));
319         buffer.append((char) a.length);
320         byte runValue = a[0];
321         int runLength = 1;
322         byte[] state = new byte[2];
323         for (int i=1; i<a.length; ++i) {
324             byte b = a[i];
325             if (b == runValue && runLength < 0xFF) ++runLength;
326             else {
327                 encodeRun(buffer, runValue, runLength, state);
328                 runValue = b;
329                 runLength = 1;
330             }
331         }
332         encodeRun(buffer, runValue, runLength, state);
333 
334         // We must save the final byte, if there is one, by padding
335         // an extra zero.
336         if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state);
337 
338         return buffer.toString();
339     }
340 
341     /**
342      * Encode a run, possibly a degenerate run (of < 4 values).
343      * @param length The length of the run; must be > 0 && <= 0xFFFF.
344      */
345     ///CLOVER:OFF
346     private static final void encodeRun(StringBuffer   buffer, int value, int length) {
347         if (length < 4) {
348             for (int j=0; j<length; ++j) {
349                 if (value == ESCAPE) {
350                     appendInt(buffer, value);
351                 }
352                 appendInt(buffer, value);
353             }
354         }
355         else {
356             if (length == (int) ESCAPE) {
357                 if (value == (int) ESCAPE) {
358                     appendInt(buffer, ESCAPE);
359                 }
360                 appendInt(buffer, value);
361                 --length;
362             }
363             appendInt(buffer, ESCAPE);
364             appendInt(buffer, length);
365             appendInt(buffer, value); // Don't need to escape this value
366         }
367     }
368     ///CLOVER:ON
369     
370     ///CLOVER:OFF
371     private static final void appendInt(StringBuffer   buffer, int value) {
372         buffer.append((char)(value >>> 16));
373         buffer.append((char)(value & 0xFFFF));
374     }
375     ///CLOVER:ON
376 
377     /**
378      * Encode a run, possibly a degenerate run (of < 4 values).
379      * @param length The length of the run; must be > 0 && <= 0xFFFF.
380      */
381     private static final void encodeRun(StringBuffer   buffer, short value, int length) {
382         if (length < 4) {
383             for (int j=0; j<length; ++j) {
384                 if (value == (int) ESCAPE) buffer.append(ESCAPE);
385                 buffer.append((char) value);
386             }
387         }
388         else {
389             if (length == (int) ESCAPE) {
390                 if (value == (int) ESCAPE) buffer.append(ESCAPE);
391                 buffer.append((char) value);
392                 --length;
393             }
394             buffer.append(ESCAPE);
395             buffer.append((char) length);
396             buffer.append((char) value); // Don't need to escape this value
397         }
398     }
399 
400     /**
401      * Encode a run, possibly a degenerate run (of < 4 values).
402      * @param length The length of the run; must be > 0 && <= 0xFF.
403      */
404     private static final void encodeRun(StringBuffer   buffer, byte value, int length,
405                     byte[] state) {
406         if (length < 4) {
407             for (int j=0; j<length; ++j) {
408                 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
409                 appendEncodedByte(buffer, value, state);
410             }
411         }
412         else {
413             if (length == ESCAPE_BYTE) {
414             if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
415             appendEncodedByte(buffer, value, state);
416             --length;
417             }
418             appendEncodedByte(buffer, ESCAPE_BYTE, state);
419             appendEncodedByte(buffer, (byte)length, state);
420             appendEncodedByte(buffer, value, state); // Don't need to escape this value
421         }
422     }
423 
424     /**
425      * Append a byte to the given StringBuffer, packing two bytes into each
426      * character.  The state parameter maintains intermediary data between
427      * calls.
428      * @param state A two-element array, with state[0] == 0 if this is the
429      * first byte of a pair, or state[0] != 0 if this is the second byte
430      * of a pair, in which case state[1] is the first byte.
431      */
432     private static final void appendEncodedByte(StringBuffer   buffer, byte value,
433                         byte[] state) {
434         if (state[0] != 0) {
435             char c = (char) ((state[1] << 8) | (((int) value) & 0xFF));
436             buffer.append(c);
437             state[0] = 0;
438         }
439         else {
440             state[0] = 1;
441             state[1] = value;
442         }
443     }
444     ///CLOVER:OFF
445     /**
446      * Construct an array of ints from a run-length encoded string.
447      */
448     static public final int[] RLEStringToIntArray(String   s) {
449         int length = getInt(s, 0);
450         int[] array = new int[length];
451         int ai = 0, i = 1;
452 
453         int maxI = s.length() / 2;
454         while (ai < length && i < maxI) {
455             int c = getInt(s, i++);
456 
457             if (c == ESCAPE) {
458                 c = getInt(s, i++);
459                 if (c == ESCAPE) {
460                     array[ai++] = c;
461                 } else {
462                     int runLength = c;
463                     int runValue = getInt(s, i++);
464                     for (int j=0; j<runLength; ++j) {
465                         array[ai++] = runValue;
466                     }
467                 }
468             }
469             else {
470                 array[ai++] = c;
471             }
472         }
473 
474         if (ai != length || i != maxI) {
475             throw new IllegalStateException  ("Bad run-length encoded int array");
476         }
477 
478         return array;
479     }
480     static final int getInt(String   s, int i) {
481         return (((int) s.charAt(2*i)) << 16) | (int) s.charAt(2*i+1);
482     }
483     ///CLOVER:ON
484 
485     /**
486      * Construct an array of shorts from a run-length encoded string.
487      */
488     ///CLOVER:OFF
489     static public final short[] RLEStringToShortArray(String   s) {
490         int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
491         short[] array = new short[length];
492         int ai = 0;
493         for (int i=2; i<s.length(); ++i) {
494             char c = s.charAt(i);
495             if (c == ESCAPE) {
496                 c = s.charAt(++i);
497                 if (c == ESCAPE) {
498                     array[ai++] = (short) c;
499                 } else {
500                     int runLength = (int) c;
501                     short runValue = (short) s.charAt(++i);
502                     for (int j=0; j<runLength; ++j) array[ai++] = runValue;
503                 }
504             }
505             else {
506                 array[ai++] = (short) c;
507             }
508         }
509 
510         if (ai != length)
511             throw new IllegalStateException  ("Bad run-length encoded short array");
512 
513         return array;
514     }
515     ///CLOVER:ON
516 
517     /**
518      * Construct an array of shorts from a run-length encoded string.
519      */
520     static public final char[] RLEStringToCharArray(String   s) {
521         int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
522         char[] array = new char[length];
523         int ai = 0;
524         for (int i=2; i<s.length(); ++i) {
525             char c = s.charAt(i);
526             if (c == ESCAPE) {
527                 c = s.charAt(++i);
528                 if (c == ESCAPE) {
529                     array[ai++] = c;
530                 } else {
531                     int runLength = (int) c;
532                     char runValue = s.charAt(++i);
533                     for (int j=0; j<runLength; ++j) array[ai++] = runValue;
534                 }
535             }
536             else {
537                 array[ai++] = c;
538             }
539         }
540 
541         if (ai != length)
542             throw new IllegalStateException  ("Bad run-length encoded short array");
543 
544         return array;
545     }
546 
547     /**
548      * Construct an array of bytes from a run-length encoded string.
549      */
550     static public final byte[] RLEStringToByteArray(String   s) {
551         int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
552         byte[] array = new byte[length];
553         boolean nextChar = true;
554         char c = 0;
555         int node = 0;
556         int runLength = 0;
557         int i = 2;
558         for (int ai=0; ai<length; ) {
559             // This part of the loop places the next byte into the local
560             // variable 'b' each time through the loop.  It keeps the
561             // current character in 'c' and uses the boolean 'nextChar'
562             // to see if we've taken both bytes out of 'c' yet.
563             byte b;
564             if (nextChar) {
565                 c = s.charAt(i++);
566                 b = (byte) (c >> 8);
567                 nextChar = false;
568             }
569             else {
570                 b = (byte) (c & 0xFF);
571                 nextChar = true;
572             }
573 
574             // This part of the loop is a tiny state machine which handles
575             // the parsing of the run-length encoding.  This would be simpler
576             // if we could look ahead, but we can't, so we use 'node' to
577             // move between three nodes in the state machine.
578             switch (node) {
579             case 0:
580                 // Normal idle node
581                 if (b == ESCAPE_BYTE) {
582                     node = 1;
583                 }
584                 else {
585                     array[ai++] = b;
586                 }
587                 break;
588             case 1:
589                 // We have seen one ESCAPE_BYTE; we expect either a second
590                 // one, or a run length and value.
591                 if (b == ESCAPE_BYTE) {
592                     array[ai++] = ESCAPE_BYTE;
593                     node = 0;
594                 }
595                 else {
596                     runLength = b;
597                     // Interpret signed byte as unsigned
598                     if (runLength < 0) runLength += 0x100;
599                     node = 2;
600                 }
601                 break;
602             case 2:
603                 // We have seen an ESCAPE_BYTE and length byte.  We interpret
604                 // the next byte as the value to be repeated.
605                 for (int j=0; j<runLength; ++j) array[ai++] = b;
606                 node = 0;
607                 break;
608             }
609         }
610 
611         if (node != 0)
612             throw new IllegalStateException  ("Bad run-length encoded byte array");
613 
614         if (i != s.length())
615             throw new IllegalStateException  ("Excess data in RLE byte array string");
616 
617         return array;
618     }
619 
620     static public String   LINE_SEPARATOR = System.getProperty("line.separator");
621 
622     /**
623      * Format a String for representation in a source file.  This includes
624      * breaking it into lines and escaping characters using octal notation
625      * when necessary (control characters and double quotes).
626      */
627     static public final String   formatForSource(String   s) {
628         StringBuffer   buffer = new StringBuffer  ();
629         for (int i=0; i<s.length();) {
630             if (i > 0) buffer.append('+').append(LINE_SEPARATOR);
631             buffer.append("        \"");
632             int count = 11;
633             while (i<s.length() && count<80) {
634                 char c = s.charAt(i++);
635                 if (c < '\u0020' || c == '"' || c == '\\') {
636                     if (c == '\n') {
637                         buffer.append("\\n");
638                         count += 2;
639                     } else if (c == '\t') {
640                         buffer.append("\\t");
641                         count += 2;
642                     } else if (c == '\r') {
643                         buffer.append("\\r");
644                         count += 2;
645                     } else {
646                         // Represent control characters, backslash and double quote
647                         // using octal notation; otherwise the string we form
648                         // won't compile, since Unicode escape sequences are
649                         // processed before tokenization.
650                         buffer.append('\\');
651                         buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
652                         buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
653                         buffer.append(HEX_DIGIT[(c & 0007)]);
654                         count += 4;
655                     }
656                 }
657                 else if (c <= '\u007E') {
658                     buffer.append(c);
659                     count += 1;
660                 }
661                 else {
662                     buffer.append("\\u");
663                     buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
664                     buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
665                     buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
666                     buffer.append(HEX_DIGIT[(c & 0x000F)]);
667                     count += 6;
668                 }
669             }
670             buffer.append('"');
671         }
672         return buffer.toString();
673     }
674 
675     static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',
676                      '8','9','A','B','C','D','E','F'};
677 
678     /**
679      * Format a String for representation in a source file.  Like
680      * formatForSource but does not do line breaking.
681      */
682     static public final String   format1ForSource(String   s) {
683         StringBuffer   buffer = new StringBuffer  ();
684         buffer.append("\"");
685         for (int i=0; i<s.length();) {
686             char c = s.charAt(i++);
687             if (c < '\u0020' || c == '"' || c == '\\') {
688                 if (c == '\n') {
689                     buffer.append("\\n");
690                 } else if (c == '\t') {
691                     buffer.append("\\t");
692                 } else if (c == '\r') {
693                     buffer.append("\\r");
694                 } else {
695                     // Represent control characters, backslash and double quote
696                     // using octal notation; otherwise the string we form
697                     // won't compile, since Unicode escape sequences are
698                     // processed before tokenization.
699                     buffer.append('\\');
700                     buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
701                     buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
702                     buffer.append(HEX_DIGIT[(c & 0007)]);
703                 }
704             }
705             else if (c <= '\u007E') {
706                 buffer.append(c);
707             }
708             else {
709                 buffer.append("\\u");
710                 buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
711                 buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
712                 buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
713                 buffer.append(HEX_DIGIT[(c & 0x000F)]);
714             }
715         }
716         buffer.append('"');
717         return buffer.toString();
718     }
719 
720     /**
721      * Convert characters outside the range U+0020 to U+007F to
722      * Unicode escapes, and convert backslash to a double backslash.
723      */
724     public static final String   escape(String   s) {
725         StringBuffer   buf = new StringBuffer  ();
726         for (int i=0; i<s.length(); ) {
727             int c = UTF16.charAt(s, i);
728             i += UTF16.getCharCount(c);
729             if (c >= ' ' && c <= 0x007F) {
730                 if (c == '\\') {
731                     buf.append("\\\\"); // That is, "\\"
732                 } else {
733                     buf.append((char)c);
734                 }
735             } else {
736                 boolean four = c <= 0xFFFF;
737                 buf.append(four ? "\\u" : "\\U");
738                 hex(c, four ? 4 : 8, buf);
739             }
740         }
741         return buf.toString();
742     }
743 
744     /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
745     static private final char[] UNESCAPE_MAP = {
746         /*"   0x22, 0x22 */
747         /*'   0x27, 0x27 */
748         /*?   0x3F, 0x3F */
749         /*\   0x5C, 0x5C */
750         /*a*/ 0x61, 0x07,
751         /*b*/ 0x62, 0x08,
752         /*e*/ 0x65, 0x1b,
753         /*f*/ 0x66, 0x0c,
754         /*n*/ 0x6E, 0x0a,
755         /*r*/ 0x72, 0x0d,
756         /*t*/ 0x74, 0x09,
757         /*v*/ 0x76, 0x0b
758     };
759 
760     /**
761      * Convert an escape to a 32-bit code point value.  We attempt
762      * to parallel the icu4c unescapeAt() function.
763      * @param offset16 an array containing offset to the character
764      * <em>after</em> the backslash.  Upon return offset16[0] will
765      * be updated to point after the escape sequence.
766      * @return character value from 0 to 10FFFF, or -1 on error.
767      */
768     public static int unescapeAt(String   s, int[] offset16) {
769         int c;
770         int result = 0;
771         int n = 0;
772         int minDig = 0;
773         int maxDig = 0;
774         int bitsPerDigit = 4;
775         int dig;
776         int i;
777         boolean braces = false;
778 
779         /* Check that offset is in range */
780         int offset = offset16[0];
781         int length = s.length();
782         if (offset < 0 || offset >= length) {
783             return -1;
784         }
785 
786         /* Fetch first UChar after '\\' */
787         c = UTF16.charAt(s, offset);
788         offset += UTF16.getCharCount(c);
789 
790         /* Convert hexadecimal and octal escapes */
791         switch (c) {
792         case 'u':
793             minDig = maxDig = 4;
794             break;
795         case 'U':
796             minDig = maxDig = 8;
797             break;
798         case 'x':
799             minDig = 1;
800             if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
801                 ++offset;
802                 braces = true;
803                 maxDig = 8;
804             } else {
805                 maxDig = 2;
806             }
807             break;
808         default:
809             dig = UCharacter.digit(c, 8);
810             if (dig >= 0) {
811                 minDig = 1;
812                 maxDig = 3;
813                 n = 1; /* Already have first octal digit */
814                 bitsPerDigit = 3;
815                 result = dig;
816             }
817             break;
818         }
819         if (minDig != 0) {
820             while (offset < length && n < maxDig) {
821                 c = UTF16.charAt(s, offset);
822                 dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
823                 if (dig < 0) {
824                     break;
825                 }
826                 result = (result << bitsPerDigit) | dig;
827                 offset += UTF16.getCharCount(c);
828                 ++n;
829             }
830             if (n < minDig) {
831                 return -1;
832             }
833             if (braces) {
834                 if (c != 0x7D /*}*/) {
835                     return -1;
836                 }
837                 ++offset;
838             }
839             if (result < 0 || result >= 0x110000) {
840                 return -1;
841             }
842             // If an escape sequence specifies a lead surrogate, see
843             // if there is a trail surrogate after it, either as an
844             // escape or as a literal.  If so, join them up into a
845             // supplementary.
846             if (offset < length &&
847                 UTF16.isLeadSurrogate((char) result)) {
848                 int ahead = offset+1;
849                 c = s.charAt(offset); // [sic] get 16-bit code unit
850                 if (c == '\\' && ahead < length) {
851                     int o[] = new int[] { ahead };
852                     c = unescapeAt(s, o);
853                     ahead = o[0];
854                 }
855                 if (UTF16.isTrailSurrogate((char) c)) {
856                     offset = ahead;
857                 result = UCharacterProperty.getRawSupplementary(
858                                   (char) result, (char) c);
859                 }
860             }
861             offset16[0] = offset;
862             return result;
863         }
864 
865         /* Convert C-style escapes in table */
866         for (i=0; i<UNESCAPE_MAP.length; i+=2) {
867             if (c == UNESCAPE_MAP[i]) {
868                 offset16[0] = offset;
869                 return UNESCAPE_MAP[i+1];
870             } else if (c < UNESCAPE_MAP[i]) {
871                 break;
872             }
873         }
874 
875         /* Map \cX to control-X: X & 0x1F */
876         if (c == 'c' && offset < length) {
877             c = UTF16.charAt(s, offset);
878             offset16[0] = offset + UTF16.getCharCount(c);
879             return 0x1F & c;
880         }
881 
882         /* If no special forms are recognized, then consider
883          * the backslash to generically escape the next character. */
884         offset16[0] = offset;
885         return c;
886     }
887 
888     /**
889      * Convert all escapes in a given string using unescapeAt().
890      * @exception IllegalArgumentException if an invalid escape is
891      * seen.
892      */
893     public static String   unescape(String   s) {
894         StringBuffer   buf = new StringBuffer  ();
895         int[] pos = new int[1];
896         for (int i=0; i<s.length(); ) {
897             char c = s.charAt(i++);
898             if (c == '\\') {
899                 pos[0] = i;
900                 int e = unescapeAt(s, pos);
901                 if (e < 0) {
902                     throw new IllegalArgumentException  ("Invalid escape sequence " +
903                                                        s.substring(i-1, Math.min(i+8, s.length())));
904                 }
905                 UTF16.append(buf, e);
906                 i = pos[0];
907             } else {
908                 buf.append(c);
909             }
910         }
911         return buf.toString();
912     }
913 
914     /**
915      * Convert all escapes in a given string using unescapeAt().
916      * Leave invalid escape sequences unchanged.
917      */
918     ///CLOVER:OFF
919     public static String   unescapeLeniently(String   s) {
920         StringBuffer   buf = new StringBuffer  ();
921         int[] pos = new int[1];
922         for (int i=0; i<s.length(); ) {
923             char c = s.charAt(i++);
924             if (c == '\\') {
925                 pos[0] = i;
926                 int e = unescapeAt(s, pos);
927                 if (e < 0) {
928                     buf.append(c);
929                 } else {
930                     UTF16.append(buf, e);
931                     i = pos[0];
932                 }
933             } else {
934                 buf.append(c);
935             }
936         }
937         return buf.toString();
938     }
939     ///CLOVER:ON
940 
941     /**
942      * Convert a char to 4 hex uppercase digits.  E.g., hex('a') =>
943      * "0041".
944      */
945     ///CLOVER:OFF
946     public static String   hex(char ch) {
947         StringBuffer   temp = new StringBuffer  ();
948         return hex(ch, temp).toString();
949     }
950     ///CLOVER:ON
951 
952     /**
953      * Convert a string to comma-separated groups of 4 hex uppercase
954      * digits.  E.g., hex('ab') => "0041,0042".
955      */
956     ///CLOVER:OFF
957     public static String   hex(String   s) {
958         StringBuffer   temp = new StringBuffer  ();
959         return hex(s, temp).toString();
960     }
961     ///CLOVER:ON
962 
963     /**
964      * Convert a string to comma-separated groups of 4 hex uppercase
965      * digits.  E.g., hex('ab') => "0041,0042".
966      */
967     ///CLOVER:OFF
968     public static String   hex(StringBuffer   s) {
969         return hex(s.toString());
970     }
971     ///CLOVER:ON
972 
973     /**
974      * Convert a char to 4 hex uppercase digits.  E.g., hex('a') =>
975      * "0041".  Append the output to the given StringBuffer.
976      */
977     ///CLOVER:OFF
978     public static StringBuffer   hex(char ch, StringBuffer   output) {
979         return appendNumber(output, ch, 16, 4);
980     }
981     ///CLOVER:ON
982 
983     /**
984      * Convert a integer to size width hex uppercase digits.
985      * E.g., hex('a', 4, str) => "0041".
986      * Append the output to the given StringBuffer.
987      * If width is too small to fit, nothing will be appended to output.
988      */
989     public static StringBuffer   hex(int ch, int width, StringBuffer   output) {
990         return appendNumber(output, ch, 16, width);
991     }
992 
993     /**
994      * Convert a integer to size width (minimum) hex uppercase digits.
995      * E.g., hex('a', 4, str) => "0041".  If the integer requires more
996      * than width digits, more will be used.
997      */
998     public static String   hex(int ch, int width) {
999         StringBuffer   buf = new StringBuffer  ();
1000        return appendNumber(buf, ch, 16, width).toString();
1001    }
1002    /**
1003     * Supplies a zero-padded hex representation of an integer (without 0x)
1004     */
1005    static public String   hex(long i, int places) {
1006        if (i == Long.MIN_VALUE) return "-8000000000000000";
1007        boolean negative = i < 0;
1008        if (negative) {
1009            i = -i;
1010        }
1011        String   result = Long.toString(i, 16).toUpperCase();
1012        if (result.length() < places) {
1013            result = "0000000000000000".substring(result.length(),places) + result;
1014        }
1015        if (negative) {
1016            return '-' + result;
1017        }
1018        return result;
1019    }
1020    
1021    public static String   hex(long ch) {
1022        return hex(ch,4);
1023    }
1024    
1025    /**
1026     * Convert a string to comma-separated groups of 4 hex uppercase
1027     * digits.  E.g., hex('ab') => "0041,0042".  Append the output
1028     * to the given StringBuffer.
1029     */
1030    ///CLOVER:OFF
1031    public static StringBuffer   hex(String   s, StringBuffer   result) {
1032        for (int i = 0; i < s.length(); ++i) {
1033            if (i != 0) result.append(',');
1034            hex(s.charAt(i), result);
1035        }
1036        return result;
1037    }
1038    ///CLOVER:ON
1039
1040    /**
1041     * Split a string into pieces based on the given divider character
1042     * @param s the string to split
1043     * @param divider the character on which to split.  Occurrences of
1044     * this character are not included in the output
1045     * @param output an array to receive the substrings between
1046     * instances of divider.  It must be large enough on entry to
1047     * accomodate all output.  Adjacent instances of the divider
1048     * character will place empty strings into output.  Before
1049     * returning, output is padded out with empty strings.
1050     */
1051    ///CLOVER:OFF
1052    public static void split(String   s, char divider, String  [] output) {
1053        int last = 0;
1054        int current = 0;
1055        int i;
1056        for (i = 0; i < s.length(); ++i) {
1057            if (s.charAt(i) == divider) {
1058                output[current++] = s.substring(last,i);
1059                last = i+1;
1060            }
1061        }
1062        output[current++] = s.substring(last,i);
1063        while (current < output.length) {
1064            output[current++] = "";
1065        }
1066    }
1067    /**
1068     * Split a string into pieces based on the given divider character
1069     * @param s the string to split
1070     * @param divider the character on which to split.  Occurrences of
1071     * this character are not included in the output
1072     * @return output an array to receive the substrings between
1073     * instances of divider. Adjacent instances of the divider
1074     * character will place empty strings into output.
1075     */
1076    public static String  [] split(String   s, char divider) {
1077        int last = 0;
1078        int i;
1079        ArrayList   output = new ArrayList  ();
1080        for (i = 0; i < s.length(); ++i) {
1081            if (s.charAt(i) == divider) {
1082                output.add(s.substring(last,i));
1083                last = i+1;
1084            }
1085        }
1086        output.add( s.substring(last,i));
1087        return (String  []) output.toArray(new String  [output.size()]);
1088    }
1089    
1090    ///CLOVER:ON
1091    
1092    /**
1093     * Look up a given string in a string array.  Returns the index at
1094     * which the first occurrence of the string was found in the
1095     * array, or -1 if it was not found.
1096     * @param source the string to search for
1097     * @param target the array of zero or more strings in which to
1098     * look for source
1099     * @return the index of target at which source first occurs, or -1
1100     * if not found
1101     */
1102    ///CLOVER:OFF
1103    public static int lookup(String   source, String  [] target) {
1104        for (int i = 0; i < target.length; ++i) {
1105            if (source.equals(target[i])) return i;
1106        }
1107        return -1;
1108    }
1109    ///CLOVER:ON
1110
1111    /**
1112     * Skip over a sequence of zero or more white space characters
1113     * at pos.  Return the index of the first non-white-space character
1114     * at or after pos, or str.length(), if there is none.
1115     */
1116    public static int skipWhitespace(String   str, int pos) {
1117        while (pos < str.length()) {
1118            int c = UTF16.charAt(str, pos);
1119            if (!UCharacterProperty.isRuleWhiteSpace(c)) {
1120                break;
1121            }
1122            pos += UTF16.getCharCount(c);
1123        }
1124        return pos;
1125    }
1126
1127    /**
1128     * Skip over a sequence of zero or more white space characters
1129     * at pos[0], advancing it.
1130     */
1131    public static void skipWhitespace(String   str, int[] pos) {
1132        pos[0] = skipWhitespace(str, pos[0]);
1133    }
1134
1135    /**
1136     * Remove all rule white space from a string.
1137     */
1138    public static String   deleteRuleWhiteSpace(String   str) {
1139        StringBuffer   buf = new StringBuffer  ();
1140        for (int i=0; i<str.length(); ) {
1141            int ch = UTF16.charAt(str, i);
1142            i += UTF16.getCharCount(ch);
1143            if (UCharacterProperty.isRuleWhiteSpace(ch)) {
1144                continue;
1145            }
1146            UTF16.append(buf, ch);
1147        }
1148        return buf.toString();
1149    }
1150
1151    /**
1152     * Parse a single non-whitespace character 'ch', optionally
1153     * preceded by whitespace.
1154     * @param id the string to be parsed
1155     * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
1156     * offset of the first character to be parsed.  On output, pos[0]
1157     * is the index after the last parsed character.  If the parse
1158     * fails, pos[0] will be unchanged.
1159     * @param ch the non-whitespace character to be parsed.
1160     * @return true if 'ch' is seen preceded by zero or more
1161     * whitespace characters.
1162     */
1163    public static boolean parseChar(String   id, int[] pos, char ch) {
1164        int start = pos[0];
1165        skipWhitespace(id, pos);
1166        if (pos[0] == id.length() ||
1167            id.charAt(pos[0]) != ch) {
1168            pos[0] = start;
1169            return false;
1170        }
1171        ++pos[0];
1172        return true;
1173    }
1174
1175    /**
1176     * Parse a pattern string starting at offset pos.  Keywords are
1177     * matched case-insensitively.  Spaces may be skipped and may be
1178     * optional or required.  Integer values may be parsed, and if
1179     * they are, they will be returned in the given array.  If
1180     * successful, the offset of the next non-space character is
1181     * returned.  On failure, -1 is returned.
1182     * @param pattern must only contain lowercase characters, which
1183     * will match their uppercase equivalents as well.  A space
1184     * character matches one or more required spaces.  A '~' character
1185     * matches zero or more optional spaces.  A '#' character matches
1186     * an integer and stores it in parsedInts, which the caller must
1187     * ensure has enough capacity.
1188     * @param parsedInts array to receive parsed integers.  Caller
1189     * must ensure that parsedInts.length is >= the number of '#'
1190     * signs in 'pattern'.
1191     * @return the position after the last character parsed, or -1 if
1192     * the parse failed
1193     */
1194    public static int parsePattern(String   rule, int pos, int limit,
1195                                   String   pattern, int[] parsedInts) {
1196        // TODO Update this to handle surrogates
1197        int[] p = new int[1];
1198        int intCount = 0; // number of integers parsed
1199        for (int i=0; i<pattern.length(); ++i) {
1200            char cpat = pattern.charAt(i);
1201            char c;
1202            switch (cpat) {
1203            case ' ':
1204                if (pos >= limit) {
1205                    return -1;
1206                }
1207                c = rule.charAt(pos++);
1208                if (!UCharacterProperty.isRuleWhiteSpace(c)) {
1209                    return -1;
1210                }
1211                // FALL THROUGH to skipWhitespace
1212            case '~':
1213                pos = skipWhitespace(rule, pos);
1214                break;
1215            case '#':
1216                p[0] = pos;
1217                parsedInts[intCount++] = parseInteger(rule, p, limit);
1218                if (p[0] == pos) {
1219                    // Syntax error; failed to parse integer
1220                    return -1;
1221                }
1222                pos = p[0];
1223                break;
1224            default:
1225                if (pos >= limit) {
1226                    return -1;
1227                }
1228                c = (char) UCharacter.toLowerCase(rule.charAt(pos++));
1229                if (c != cpat) {
1230                    return -1;
1231                }
1232                break;
1233            }
1234        }
1235        return pos;
1236    }
1237
1238    /**
1239     * Parse a pattern string within the given Replaceable and a parsing
1240     * pattern.  Characters are matched literally and case-sensitively
1241     * except for the following special characters:
1242     *
1243     * ~  zero or more uprv_isRuleWhiteSpace chars
1244     *
1245     * If end of pattern is reached with all matches along the way,
1246     * pos is advanced to the first unparsed index and returned.
1247     * Otherwise -1 is returned.
1248     * @param pat pattern that controls parsing
1249     * @param text text to be parsed, starting at index
1250     * @param index offset to first character to parse
1251     * @param limit offset after last character to parse
1252     * @return index after last parsed character, or -1 on parse failure.
1253     */
1254    public static int parsePattern(String   pat,
1255                                   Replaceable text,
1256                                   int index,
1257                                   int limit) {
1258        int ipat = 0;
1259
1260        // empty pattern matches immediately
1261        if (ipat == pat.length()) {
1262            return index;
1263        }
1264
1265        int cpat = UTF16.charAt(pat, ipat);
1266
1267        while (index < limit) {
1268            int c = text.char32At(index);
1269
1270            // parse \s*
1271            if (cpat == '~') {
1272                if (UCharacterProperty.isRuleWhiteSpace(c)) {
1273                    index += UTF16.getCharCount(c);
1274                    continue;
1275                } else {
1276                    if (++ipat == pat.length()) {
1277                        return index; // success; c unparsed
1278                    }
1279                    // fall thru; process c again with next cpat
1280                }
1281            }
1282
1283            // parse literal
1284            else if (c == cpat) {
1285                int n = UTF16.getCharCount(c);
1286                index += n;
1287                ipat += n;
1288                if (ipat == pat.length()) {
1289                    return index; // success; c parsed
1290                }
1291                // fall thru; get next cpat
1292            }
1293
1294            // match failure of literal
1295            else {
1296                return -1;
1297            }
1298
1299            cpat = UTF16.charAt(pat, ipat);
1300        }
1301
1302        return -1; // text ended before end of pat
1303    }
1304
1305    /**
1306     * Parse an integer at pos, either of the form \d+ or of the form
1307     * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
1308     * or octal format.
1309     * @param pos INPUT-OUTPUT parameter.  On input, the first
1310     * character to parse.  On output, the character after the last
1311     * parsed character.
1312     */
1313    public static int parseInteger(String   rule, int[] pos, int limit) {
1314        int count = 0;
1315        int value = 0;
1316        int p = pos[0];
1317        int radix = 10;
1318
1319        if (rule.regionMatches(true, p, "0x", 0, 2)) {
1320            p += 2;
1321            radix = 16;
1322        } else if (p < limit && rule.charAt(p) == '0') {
1323            p++;
1324            count = 1;
1325            radix = 8;
1326        }
1327
1328        while (p < limit) {
1329            int d = UCharacter.digit(rule.charAt(p++), radix);
1330            if (d < 0) {
1331                --p;
1332                break;
1333            }
1334            ++count;
1335            int v = (value * radix) + d;
1336            if (v <= value) {
1337                // If there are too many input digits, at some point
1338                // the value will go negative, e.g., if we have seen
1339                // "0x8000000" already and there is another '0', when
1340                // we parse the next 0 the value will go negative.
1341                return 0;
1342            }
1343            value = v;
1344        }
1345        if (count > 0) {
1346            pos[0] = p;
1347        }
1348        return value;
1349    }
1350
1351    /**
1352     * Parse a Unicode identifier from the given string at the given
1353     * position.  Return the identifier, or null if there is no
1354     * identifier.
1355     * @param str the string to parse
1356     * @param pos INPUT-OUPUT parameter.  On INPUT, pos[0] is the
1357     * first character to examine.  It must be less than str.length(),
1358     * and it must not point to a whitespace character.  That is, must
1359     * have pos[0] < str.length() and
1360     * !UCharacterProperty.isRuleWhiteSpace(UTF16.charAt(str, pos[0])).  On
1361     * OUTPUT, the position after the last parsed character.
1362     * @return the Unicode identifier, or null if there is no valid
1363     * identifier at pos[0].
1364     */
1365    public static String   parseUnicodeIdentifier(String   str, int[] pos) {
1366        // assert(pos[0] < str.length());
1367        // assert(!UCharacterProperty.isRuleWhiteSpace(UTF16.charAt(str, pos[0])));
1368        StringBuffer   buf = new StringBuffer  ();
1369        int p = pos[0];
1370        while (p < str.length()) {
1371            int ch = UTF16.charAt(str, p);
1372            if (buf.length() == 0) {
1373                if (UCharacter.isUnicodeIdentifierStart(ch)) {
1374                    UTF16.append(buf, ch);
1375                } else {
1376                    return null;
1377                }
1378            } else {
1379                if (UCharacter.isUnicodeIdentifierPart(ch)) {
1380                    UTF16.append(buf, ch);
1381                } else {
1382                    break;
1383                }
1384            }
1385            p += UTF16.getCharCount(ch);
1386        }
1387        pos[0] = p;
1388        return buf.toString();
1389    }
1390
1391    /**
1392     * Trim whitespace from ends of a StringBuffer.
1393     */
1394    ///CLOVER:OFF
1395    public static StringBuffer   trim(StringBuffer   b) {
1396        // TODO update to handle surrogates
1397        int i;
1398        for (i=0; i<b.length() && Character.isWhitespace(b.charAt(i)); ++i) {}
1399        b.delete(0, i);
1400        for (i=b.length()-1; i>=0 && Character.isWhitespace(b.charAt(i)); --i) {}
1401        return b.delete(i+1, b.length());
1402    }
1403    ///CLOVER:ON
1404
1405    static final char DIGITS[] = {
1406        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
1407        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
1408        'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
1409        'U', 'V', 'W', 'X', 'Y', 'Z'
1410    };
1411
1412    /**
1413     * Append a number to the given StringBuffer in the radix 10
1414     * generating at least one digit.
1415     */
1416    ///CLOVER:OFF
1417    public static StringBuffer   appendNumber(StringBuffer   result, int n) {
1418        return appendNumber(result, n, 10, 1);
1419    }
1420    ///CLOVER:ON
1421
1422    /**
1423     * Append the digits of a positive integer to the given
1424     * <code>StringBuffer</code> in the given radix. This is
1425     * done recursively since it is easiest to generate the low-
1426     * order digit first, but it must be appended last.
1427     *
1428     * @param result is the <code>StringBuffer</code> to append to
1429     * @param n is the positive integer
1430     * @param radix is the radix, from 2 to 36 inclusive
1431     * @param minDigits is the minimum number of digits to append.
1432     */
1433    private static void recursiveAppendNumber(StringBuffer   result, int n,
1434                                                int radix, int minDigits)
1435    {
1436        int digit = n % radix;
1437
1438        if (n >= radix || minDigits > 1) {
1439            recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
1440        }
1441
1442        result.append(DIGITS[digit]);
1443    }
1444
1445    /**
1446     * Append a number to the given StringBuffer in the given radix.
1447     * Standard digits '0'-'9' are used and letters 'A'-'Z' for
1448     * radices 11 through 36.
1449     * @param result the digits of the number are appended here
1450     * @param n the number to be converted to digits; may be negative.
1451     * If negative, a '-' is prepended to the digits.
1452     * @param radix a radix from 2 to 36 inclusive.
1453     * @param minDigits the minimum number of digits, not including
1454     * any '-', to produce.  Values less than 2 have no effect.  One
1455     * digit is always emitted regardless of this parameter.
1456     * @return a reference to result
1457     */
1458    public static StringBuffer   appendNumber(StringBuffer   result, int n,
1459                                             int radix, int minDigits)
1460        throws IllegalArgumentException  
1461    {
1462        if (radix < 2 || radix > 36) {
1463            throw new IllegalArgumentException  ("Illegal radix " + radix);
1464        }
1465
1466
1467        int abs = n;
1468
1469        if (n < 0) {
1470            abs = -n;
1471            result.append("-");
1472        }
1473
1474        recursiveAppendNumber(result, abs, radix, minDigits);
1475
1476        return result;
1477    }
1478
1479    /**
1480     * Parse an unsigned 31-bit integer at the given offset.  Use
1481     * UCharacter.digit() to parse individual characters into digits.
1482     * @param text the text to be parsed
1483     * @param pos INPUT-OUTPUT parameter.  On entry, pos[0] is the
1484     * offset within text at which to start parsing; it should point
1485     * to a valid digit.  On exit, pos[0] is the offset after the last
1486     * parsed character.  If the parse failed, it will be unchanged on
1487     * exit.  Must be >= 0 on entry.
1488     * @param radix the radix in which to parse; must be >= 2 and <=
1489     * 36.
1490     * @return a non-negative parsed number, or -1 upon parse failure.
1491     * Parse fails if there are no digits, that is, if pos[0] does not
1492     * point to a valid digit on entry, or if the number to be parsed
1493     * does not fit into a 31-bit unsigned integer.
1494     */
1495    public static int parseNumber(String   text, int[] pos, int radix) {
1496        // assert(pos[0] >= 0);
1497        // assert(radix >= 2);
1498        // assert(radix <= 36);
1499        int n = 0;
1500        int p = pos[0];
1501        while (p < text.length()) {
1502            int ch = UTF16.charAt(text, p);
1503            int d = UCharacter.digit(ch, radix);
1504            if (d < 0) {
1505                break;
1506            }
1507            n = radix*n + d;
1508            // ASSUME that when a 32-bit integer overflows it becomes
1509            // negative.  E.g., 214748364 * 10 + 8 => negative value.
1510            if (n < 0) {
1511                return -1;
1512            }
1513            ++p;
1514        }
1515        if (p == pos[0]) {
1516            return -1;
1517        }
1518        pos[0] = p;
1519        return n;
1520    }
1521
1522    /**
1523     * Return true if the character is NOT printable ASCII.  The tab,
1524     * newline and linefeed characters are considered unprintable.
1525     */
1526    public static boolean isUnprintable(int c) {
1527        return !(c >= 0x20 && c <= 0x7E);
1528    }
1529
1530    /**
1531     * Escape unprintable characters using <backslash>uxxxx notation
1532     * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
1533     * above.  If the character is printable ASCII, then do nothing
1534     * and return FALSE.  Otherwise, append the escaped notation and
1535     * return TRUE.
1536     */
1537    public static boolean escapeUnprintable(StringBuffer   result, int c) {
1538        if (isUnprintable(c)) {
1539            result.append('\\');
1540            if ((c & ~0xFFFF) != 0) {
1541                result.append('U');
1542                result.append(DIGITS[0xF&(c>>28)]);
1543                result.append(DIGITS[0xF&(c>>24)]);
1544                result.append(DIGITS[0xF&(c>>20)]);
1545                result.append(DIGITS[0xF&(c>>16)]);
1546            } else {
1547                result.append('u');
1548            }
1549            result.append(DIGITS[0xF&(c>>12)]);
1550            result.append(DIGITS[0xF&(c>>8)]);
1551            result.append(DIGITS[0xF&(c>>4)]);
1552            result.append(DIGITS[0xF&c]);
1553            return true;
1554        }
1555        return false;
1556    }
1557
1558    /**
1559     * Returns the index of the first character in a set, ignoring quoted text.
1560     * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
1561     * found by a search for "h".  Unlike String.indexOf(), this method searches
1562     * not for a single character, but for any character of the string
1563     * <code>setOfChars</code>.
1564     * @param text text to be searched
1565     * @param start the beginning index, inclusive; <code>0 <= start
1566     * <= limit</code>.
1567     * @param limit the ending index, exclusive; <code>start <= limit
1568     * <= text.length()</code>.
1569     * @param setOfChars string with one or more distinct characters
1570     * @return Offset of the first character in <code>setOfChars</code>
1571     * found, or -1 if not found.
1572     * @see String#indexOf
1573     */
1574    public static int quotedIndexOf(String   text, int start, int limit,
1575                                    String   setOfChars) {
1576        for (int i=start; i<limit; ++i) {
1577            char c = text.charAt(i);
1578            if (c == BACKSLASH) {
1579                ++i;
1580            } else if (c == APOSTROPHE) {
1581                while (++i < limit
1582                       && text.charAt(i) != APOSTROPHE) {}
1583            } else if (setOfChars.indexOf(c) >= 0) {
1584                return i;
1585            }
1586        }
1587        return -1;
1588    }
1589
1590    /**
1591    * Similar to StringBuffer.getChars, version 1.3.
1592    * Since JDK 1.2 implements StringBuffer.getChars differently, this method
1593    * is here to provide consistent results.
1594    * To be removed after JDK 1.2 ceased to be the reference platform.
1595    * @param src source string buffer
1596    * @param srcBegin offset to the start of the src to retrieve from
1597    * @param srcEnd offset to the end of the src to retrieve from
1598    * @param dst char array to store the retrieved chars
1599    * @param dstBegin offset to the start of the destination char array to
1600    *                 store the retrieved chars
1601    * @draft since ICU4J 2.0
1602    */
1603    public static void getChars(StringBuffer   src, int srcBegin, int srcEnd,
1604                                char dst[], int dstBegin)
1605    {
1606        if (srcBegin == srcEnd) {
1607            return;
1608        }
1609        src.getChars(srcBegin, srcEnd, dst, dstBegin);
1610    }
1611
1612    /**
1613     * Append a character to a rule that is being built up.  To flush
1614     * the quoteBuf to rule, make one final call with isLiteral == true.
1615     * If there is no final character, pass in (int)-1 as c.
1616     * @param rule the string to append the character to
1617     * @param c the character to append, or (int)-1 if none.
1618     * @param isLiteral if true, then the given character should not be
1619     * quoted or escaped.  Usually this means it is a syntactic element
1620     * such as > or $
1621     * @param escapeUnprintable if true, then unprintable characters
1622     * should be escaped using escapeUnprintable().  These escapes will
1623     * appear outside of quotes.
1624     * @param quoteBuf a buffer which is used to build up quoted
1625     * substrings.  The caller should initially supply an empty buffer,
1626     * and thereafter should not modify the buffer.  The buffer should be
1627     * cleared out by, at the end, calling this method with a literal
1628     * character (which may be -1).
1629     */
1630    public static void appendToRule(StringBuffer   rule,
1631                                    int c,
1632                                    boolean isLiteral,
1633                                    boolean escapeUnprintable,
1634                                    StringBuffer   quoteBuf) {
1635        // If we are escaping unprintables, then escape them outside
1636        // quotes.  \\u and \\U are not recognized within quotes.  The same
1637        // logic applies to literals, but literals are never escaped.
1638        if (isLiteral ||
1639            (escapeUnprintable && Utility.isUnprintable(c))) {
1640            if (quoteBuf.length() > 0) {
1641                // We prefer backslash APOSTROPHE to double APOSTROPHE
1642                // (more readable, less similar to ") so if there are
1643                // double APOSTROPHEs at the ends, we pull them outside
1644                // of the quote.
1645
1646                // If the first thing in the quoteBuf is APOSTROPHE
1647                // (doubled) then pull it out.
1648                while (quoteBuf.length() >= 2 &&
1649                       quoteBuf.charAt(0) == APOSTROPHE &&
1650                       quoteBuf.charAt(1) == APOSTROPHE) {
1651                    rule.append(BACKSLASH).append(APOSTROPHE);
1652                    quoteBuf.delete(0, 2);
1653                }
1654                // If the last thing in the quoteBuf is APOSTROPHE
1655                // (doubled) then remove and count it and add it after.
1656                int trailingCount = 0;
1657                while (quoteBuf.length() >= 2 &&
1658                       quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
1659                       quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
1660                    quoteBuf.setLength(quoteBuf.length()-2);
1661                    ++trailingCount;
1662                }
1663                if (quoteBuf.length() > 0) {
1664                    rule.append(APOSTROPHE);
1665                    // jdk 1.3.1 does not have append(StringBuffer) yet
1666                    if(ICUDebug.isJDK14OrHigher){
1667                        rule.append(quoteBuf);
1668                    }else{
1669                        rule.append(quoteBuf.toString());
1670                    }
1671                    rule.append(APOSTROPHE);
1672                    quoteBuf.setLength(0);
1673                }
1674                while (trailingCount-- > 0) {
1675                    rule.append(BACKSLASH).append(APOSTROPHE);
1676                }
1677            }
1678            if (c != -1) {
1679                /* Since spaces are ignored during parsing, they are
1680                 * emitted only for readability.  We emit one here
1681                 * only if there isn't already one at the end of the
1682                 * rule.
1683                 */
1684                if (c == ' ') {
1685                    int len = rule.length();
1686                    if (len > 0 && rule.charAt(len-1) != ' ') {
1687                        rule.append(' ');
1688                    }
1689                } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) {
1690                    UTF16.append(rule, c);
1691                }
1692            }
1693        }
1694
1695        // Escape ' and '\' and don't begin a quote just for them
1696        else if (quoteBuf.length() == 0 &&
1697                 (c == APOSTROPHE || c == BACKSLASH)) {
1698            rule.append(BACKSLASH).append((char)c);
1699        }
1700
1701        // Specials (printable ascii that isn't [0-9a-zA-Z]) and
1702        // whitespace need quoting.  Also append stuff to quotes if we are
1703        // building up a quoted substring already.
1704        else if (quoteBuf.length() > 0 ||
1705                 (c >= 0x0021 && c <= 0x007E &&
1706                  !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
1707                    (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
1708                    (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
1709                 UCharacterProperty.isRuleWhiteSpace(c)) {
1710            UTF16.append(quoteBuf, c);
1711            // Double ' within a quote
1712            if (c == APOSTROPHE) {
1713                quoteBuf.append((char)c);
1714            }
1715        }
1716
1717        // Otherwise just append
1718        else {
1719            UTF16.append(rule, c);
1720        }
1721    }
1722
1723    /**
1724     * Append the given string to the rule.  Calls the single-character
1725     * version of appendToRule for each character.
1726     */
1727    public static void appendToRule(StringBuffer   rule,
1728                                    String   text,
1729                                    boolean isLiteral,
1730                                    boolean escapeUnprintable,
1731                                    StringBuffer   quoteBuf) {
1732        for (int i=0; i<text.length(); ++i) {
1733            // Okay to process in 16-bit code units here
1734            appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
1735        }
1736    }
1737
1738    /**
1739     * Given a matcher reference, which may be null, append its
1740     * pattern as a literal to the given rule.
1741     */
1742    public static void appendToRule(StringBuffer   rule,
1743                                    UnicodeMatcher matcher,
1744                                    boolean escapeUnprintable,
1745                                    StringBuffer   quoteBuf) {
1746        if (matcher != null) {
1747            appendToRule(rule, matcher.toPattern(escapeUnprintable),
1748                         true, escapeUnprintable, quoteBuf);
1749        }
1750    }
1751
1752    /**
1753     * Compares 2 unsigned integers
1754     * @param source 32 bit unsigned integer
1755     * @param target 32 bit unsigned integer
1756     * @return 0 if equals, 1 if source is greater than target and -1
1757     *         otherwise
1758     */
1759    public static final int compareUnsigned(int source, int target)
1760    {
1761        source += MAGIC_UNSIGNED;
1762        target += MAGIC_UNSIGNED;
1763        if (source < target) {
1764            return -1;
1765        } 
1766        else if (source > target) {
1767            return 1;
1768        }
1769        return 0;
1770    }
1771
1772    /**
1773     * Find the highest bit in a positive integer. This is done
1774     * by doing a binary search through the bits.
1775     *
1776     * @param n is the integer
1777     *
1778     * @return the bit number of the highest bit, with 0 being
1779     * the low order bit, or -1 if <code>n</code> is not positive
1780     */
1781    public static final byte highBit(int n)
1782    {
1783        if (n <= 0) {
1784            return -1;
1785        }
1786
1787        byte bit = 0;
1788
1789        if (n >= 1 << 16) {
1790            n >>= 16;
1791            bit += 16;
1792        }
1793
1794        if (n >= 1 << 8) {
1795            n >>= 8;
1796            bit += 8;
1797        }
1798
1799        if (n >= 1 << 4) {
1800            n >>= 4;
1801            bit += 4;
1802        }
1803
1804        if (n >= 1 << 2) {
1805            n >>= 2;
1806            bit += 2;
1807        }
1808
1809        if (n >= 1 << 1) {
1810            n >>= 1;
1811            bit += 1;
1812        }
1813
1814        return bit;
1815    }
1816    /**
1817     * Utility method to take a int[] containing codepoints and return
1818     * a string representation with code units. 
1819     */
1820    public static String   valueOf(int[]source){
1821    // TODO: Investigate why this method is not on UTF16 class
1822        StringBuffer   result = new StringBuffer  (source.length);
1823        for(int i=0; i<source.length; i++){
1824            UTF16.append(result,source[i]);
1825        }
1826        return result.toString();
1827    }
1828    
1829    
1830    /**
1831     * Utility to duplicate a string count times
1832     * @param s
1833     * @param count
1834     */
1835    public static String   repeat(String   s, int count) {
1836        if (count <= 0) return "";
1837        if (count == 1) return s;
1838        StringBuffer   result = new StringBuffer  ();
1839        for (int i = 0; i < count; ++i) {
1840            result.append(s);
1841        }
1842        return result.toString();
1843    }
1844
1845    
1846    // !!! 1.3 compatibiliy
1847    public static int indexOf(StringBuffer   buf, String   s) {
1848//#ifndef FOUNDATION
1849//##        return buf.indexOf(s);
1850//#else
1851        return buf.toString().indexOf(s);
1852//#endif
1853    }
1854    
1855    // !!! 1.3 compatibiliy
1856    public static int lastIndexOf(StringBuffer   buf, String   s) {
1857//#ifndef FOUNDATION
1858//##        return buf.lastIndexOf(s);
1859//#else
1860        return buf.toString().lastIndexOf(s);
1861//#endif
1862    }
1863    
1864    // !!! 1.3 compatibiliy
1865    public static int indexOf(StringBuffer   buf, String   s, int i) {
1866//#ifndef FOUNDATION
1867//##        return buf.indexOf(s, i);
1868//#else
1869        return buf.toString().indexOf(s, i);
1870//#endif
1871    }
1872    
1873    // !!! 1.3 compatibiliy
1874   public static int lastIndexOf(StringBuffer   buf, String   s, int i) {
1875//#ifndef FOUNDATION
1876//##        return buf.lastIndexOf(s, i);
1877//#else
1878        return buf.toString().lastIndexOf(s, i);
1879//#endif
1880    }
1881   
1882   // !!! 1.3 compatibiliy
1883   public static String   replaceAll(String   src, String   target, String   replacement) {
1884//#ifndef FOUNDATION
1885//##       return src.replaceAll(target, replacement);
1886//#else
1887       int i = src.indexOf(target);
1888       if (i == -1) {
1889           return src;
1890       }
1891       StringBuffer   buf = new StringBuffer  ();
1892       int n = 0;
1893       do {
1894           buf.append(src.substring(n, i));
1895           buf.append(replacement);
1896           n = i + target.length();
1897           i = src.indexOf(target, n);
1898       } while (i != -1);
1899       if (n < src.length()) {
1900           buf.append(src.substring(n));
1901       }
1902       return buf.toString();
1903//#endif
1904   }
1905}
1906///CLOVER:ON
1907
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags