1 7 package com.ibm.icu.impl; 8 9 import com.ibm.icu.text.UCharacterIterator; 10 11 80 public class BOCU 81 { 82 84 86 99 public static int compress(String source, byte buffer[], int offset) 100 { 101 int prev = 0; 102 UCharacterIterator iterator = UCharacterIterator.getInstance(source); 103 int codepoint = iterator.nextCodePoint(); 104 while (codepoint != UCharacterIterator.DONE) { 105 if (prev < 0x4e00 || prev >= 0xa000) { 106 prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_; 107 } 108 else { 109 prev = 0x9fff - SLOPE_REACH_POS_2_; 112 } 113 114 offset = writeDiff(codepoint - prev, buffer, offset); 115 prev = codepoint; 116 codepoint = iterator.nextCodePoint(); 117 } 118 return offset; 119 } 120 121 127 public static int getCompressionLength(String source) 128 { 129 int prev = 0; 130 int result = 0; 131 UCharacterIterator iterator = UCharacterIterator.getInstance(source); 132 int codepoint = iterator.nextCodePoint(); 133 while (codepoint != UCharacterIterator.DONE) { 134 if (prev < 0x4e00 || prev >= 0xa000) { 135 prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_; 136 } 137 else { 138 prev = 0x9fff - SLOPE_REACH_POS_2_; 141 } 142 143 codepoint = iterator.nextCodePoint(); 144 result += lengthOfDiff(codepoint - prev); 145 prev = codepoint; 146 } 147 return result; 148 } 149 150 152 154 156 158 160 162 164 167 private static final int SLOPE_MIN_ = 3; 168 private static final int SLOPE_MAX_ = 0xff; 169 private static final int SLOPE_MIDDLE_ = 0x81; 170 private static final int SLOPE_TAIL_COUNT_ = SLOPE_MAX_ - SLOPE_MIN_ + 1; 171 private static final int SLOPE_MAX_BYTES_ = 4; 172 173 196 private static final int SLOPE_SINGLE_ = 80; 197 private static final int SLOPE_LEAD_2_ = 42; 198 private static final int SLOPE_LEAD_3_ = 3; 199 private static final int SLOPE_LEAD_4_ = 1; 200 201 204 private static final int SLOPE_REACH_POS_1_ = SLOPE_SINGLE_; 205 private static final int SLOPE_REACH_NEG_1_ = (-SLOPE_SINGLE_); 206 207 210 private static final int SLOPE_REACH_POS_2_ = 211 SLOPE_LEAD_2_ * SLOPE_TAIL_COUNT_ + SLOPE_LEAD_2_ - 1; 212 private static final int SLOPE_REACH_NEG_2_ = (-SLOPE_REACH_POS_2_ - 1); 213 214 217 private static final int SLOPE_REACH_POS_3_ = SLOPE_LEAD_3_ 218 * SLOPE_TAIL_COUNT_ 219 * SLOPE_TAIL_COUNT_ 220 + (SLOPE_LEAD_3_ - 1) 221 * SLOPE_TAIL_COUNT_ + 222 (SLOPE_TAIL_COUNT_ - 1); 223 private static final int SLOPE_REACH_NEG_3_ = (-SLOPE_REACH_POS_3_ - 1); 224 225 228 private static final int SLOPE_START_POS_2_ = SLOPE_MIDDLE_ 229 + SLOPE_SINGLE_ + 1; 230 private static final int SLOPE_START_POS_3_ = SLOPE_START_POS_2_ 231 + SLOPE_LEAD_2_; 232 private static final int SLOPE_START_NEG_2_ = SLOPE_MIDDLE_ + 233 SLOPE_REACH_NEG_1_; 234 private static final int SLOPE_START_NEG_3_ = SLOPE_START_NEG_2_ 235 - SLOPE_LEAD_2_; 236 237 239 242 private BOCU() 244 { 245 } 246 248 250 258 private static final long getNegDivMod(int number, int factor) 259 { 260 int modulo = number % factor; 261 long result = number / factor; 262 if (modulo < 0) { 263 -- result; 264 modulo += factor; 265 } 266 return (result << 32) | modulo; 267 } 268 269 277 private static final int writeDiff(int diff, byte buffer[], int offset) 278 { 279 if (diff >= SLOPE_REACH_NEG_1_) { 280 if (diff <= SLOPE_REACH_POS_1_) { 281 buffer[offset ++] = (byte)(SLOPE_MIDDLE_ + diff); 282 } 283 else if (diff <= SLOPE_REACH_POS_2_) { 284 buffer[offset ++] = (byte)(SLOPE_START_POS_2_ 285 + (diff / SLOPE_TAIL_COUNT_)); 286 buffer[offset ++] = (byte)(SLOPE_MIN_ + 287 (diff % SLOPE_TAIL_COUNT_)); 288 } 289 else if (diff <= SLOPE_REACH_POS_3_) { 290 buffer[offset + 2] = (byte)(SLOPE_MIN_ 291 + (diff % SLOPE_TAIL_COUNT_)); 292 diff /= SLOPE_TAIL_COUNT_; 293 buffer[offset + 1] = (byte)(SLOPE_MIN_ 294 + (diff % SLOPE_TAIL_COUNT_)); 295 buffer[offset] = (byte)(SLOPE_START_POS_3_ 296 + (diff / SLOPE_TAIL_COUNT_)); 297 offset += 3; 298 } 299 else { 300 buffer[offset + 3] = (byte)(SLOPE_MIN_ 301 + diff % SLOPE_TAIL_COUNT_); 302 diff /= SLOPE_TAIL_COUNT_; 303 buffer[offset] = (byte)(SLOPE_MIN_ 304 + diff % SLOPE_TAIL_COUNT_); 305 diff /= SLOPE_TAIL_COUNT_; 306 buffer[offset + 1] = (byte)(SLOPE_MIN_ 307 + diff % SLOPE_TAIL_COUNT_); 308 buffer[offset] = (byte)SLOPE_MAX_; 309 offset += 4; 310 } 311 } 312 else { 313 long division = getNegDivMod(diff, SLOPE_TAIL_COUNT_); 314 int modulo = (int)division; 315 if (diff >= SLOPE_REACH_NEG_2_) { 316 diff = (int)(division >> 32); 317 buffer[offset ++] = (byte)(SLOPE_START_NEG_2_ + diff); 318 buffer[offset ++] = (byte)(SLOPE_MIN_ + modulo); 319 } 320 else if (diff >= SLOPE_REACH_NEG_3_) { 321 buffer[offset + 2] = (byte)(SLOPE_MIN_ + modulo); 322 diff = (int)(division >> 32); 323 division = getNegDivMod(diff, SLOPE_TAIL_COUNT_); 324 modulo = (int)division; 325 diff = (int)(division >> 32); 326 buffer[offset + 1] = (byte)(SLOPE_MIN_ + modulo); 327 buffer[offset] = (byte)(SLOPE_START_NEG_3_ + diff); 328 offset += 3; 329 } 330 else { 331 buffer[offset + 3] = (byte)(SLOPE_MIN_ + modulo); 332 diff = (int)(division >> 32); 333 division = getNegDivMod(diff, SLOPE_TAIL_COUNT_); 334 modulo = (int)division; 335 diff = (int)(division >> 32); 336 buffer[offset + 2] = (byte)(SLOPE_MIN_ + modulo); 337 division = getNegDivMod(diff, SLOPE_TAIL_COUNT_); 338 modulo = (int)division; 339 buffer[offset + 1] = (byte)(SLOPE_MIN_ + modulo); 340 buffer[offset] = SLOPE_MIN_; 341 offset += 4; 342 } 343 } 344 return offset; 345 } 346 347 351 private static final int lengthOfDiff(int diff) 352 { 353 if (diff >= SLOPE_REACH_NEG_1_) { 354 if (diff <= SLOPE_REACH_POS_1_) { 355 return 1; 356 } 357 else if (diff <= SLOPE_REACH_POS_2_) { 358 return 2; 359 } 360 else if(diff <= SLOPE_REACH_POS_3_) { 361 return 3; 362 } 363 else { 364 return 4; 365 } 366 } 367 else { 368 if (diff >= SLOPE_REACH_NEG_2_) { 369 return 2; 370 } 371 else if (diff >= SLOPE_REACH_NEG_3_) { 372 return 3; 373 } 374 else { 375 return 4; 376 } 377 } 378 } 379 } 380 | Popular Tags |