1 19 package gcc.util; 20 21 public abstract class UTF8 22 { 23 public static byte[] fromString(String value) 24 { 25 int n = value.length(), u = 0; 26 for (int i = 0; i < n; i++) 27 { 28 int c = value.charAt(i); 29 if (c >= 0x0001 && c <= 0x007F) 30 { 31 u++; 32 } 33 else if (c > 0x07FF) 34 { 35 u += 3; 36 } 37 else 38 { 39 u += 2; 40 } 41 } 42 byte[] bytes = new byte[u]; 43 for (int i = 0, j = 0; i < n; i++) 44 { 45 int c = value.charAt(i); 46 if (c >= 0x0001 && c <= 0x007F) 47 { 48 bytes[j++] = (byte)c; 49 } 50 else if (c > 0x07FF) 51 { 52 bytes[j++] = (byte)(0xE0 | ((c >> 12) & 0x0F)); 53 bytes[j++] = (byte)(0x80 | ((c >> 6) & 0x3F)); 54 bytes[j++] = (byte)(0x80 | (c & 0x3F)); 55 } 56 else 57 { 58 bytes[j++] = (byte)(0xC0 | ((c >> 6) & 0x1F)); 59 bytes[j++] = (byte)(0x80 | (c & 0x3F)); 60 } 61 } 62 return bytes; 63 } 64 65 71 public static int fromString(String value, byte[] buffer, int offset, int length) 72 { 73 int n = value.length(), j = offset; 74 for (int i = 0; i < n; i++) 75 { 76 if (j + 3 > length) 77 { 78 return -1; 79 } 80 int c = value.charAt(i); 81 if (c >= 0x0001 && c <= 0x007F) 82 { 83 buffer[j++] = (byte)c; 84 } 85 else if (c > 0x07FF) 86 { 87 buffer[j++] = (byte)(0xE0 | ((c >> 12) & 0x0F)); 88 buffer[j++] = (byte)(0x80 | ((c >> 6) & 0x3F)); 89 buffer[j++] = (byte)(0x80 | (c & 0x3F)); 90 } 91 else 92 { 93 buffer[j++] = (byte)(0xC0 | ((c >> 6) & 0x1F)); 94 buffer[j++] = (byte)(0x80 | (c & 0x3F)); 95 } 96 } 97 return j - offset; 98 } 99 100 public static String toString(byte[] value) 101 { 102 return toString(value, 0, value.length); 103 } 104 105 public static String toString(byte[] value, int offset, int length) 106 { 107 int n = offset + length, j = 0; 108 char[] chars = new char[length]; for (int i = offset; i < n; i++) 110 { 111 int c = (value[i] + 256) & 255; int c2, c3; 113 114 switch (c >> 4) 115 { 116 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: 117 chars[j++] = (char)c; 119 break; 120 121 case 12: case 13: 122 if (i + 1 >= n) 124 { 125 badUtf8Data(); 126 } 127 c2 = (value[++i] + 256) & 255; if ((c2 & 0xC0) != 0x80) 129 { 130 badUtf8Data(); 131 } 132 chars[j++] = (char)(((c & 0x1F) << 6) | (c2 & 0x3F)); 133 break; 134 135 case 14: 136 if (i + 2 >= n) 138 { 139 badUtf8Data(); 140 } 141 c2 = (value[++i] + 256) & 255; c3 = (value[++i] + 256) & 255; if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80) 144 { 145 badUtf8Data(); 146 } 147 chars[j++] = (char)(((c & 0x0F) << 12) 148 | ((c2 & 0x3F) << 6) 149 | (c3 & 0x3F)); 150 break; 151 152 default: 153 badUtf8Data(); 154 } 155 } 156 return new String(chars, 0, j); 157 } 158 159 private static void badUtf8Data() 160 { 161 throw new org.omg.CORBA.MARSHAL("bad UTF-8 data"); 162 } 163 } 164 | Popular Tags |