1 7 package com.ibm.icu.text; 8 9 import java.io.BufferedInputStream ; 10 import java.io.ByteArrayInputStream ; 11 import java.io.IOException ; 12 import java.io.InputStream ; 13 14 import com.ibm.icu.impl.CharTrie; 15 import com.ibm.icu.impl.StringPrepDataReader; 16 import com.ibm.icu.impl.Trie; 17 import com.ibm.icu.impl.NormalizerImpl; 18 import com.ibm.icu.impl.UBiDiProps; 19 20 import com.ibm.icu.util.VersionInfo; 21 22 import com.ibm.icu.lang.UCharacter; 23 import com.ibm.icu.lang.UCharacterDirection; 24 25 58 public final class StringPrep { 59 65 public static final int DEFAULT = 0x0000; 66 67 73 public static final int ALLOW_UNASSIGNED = 0x0001; 74 75 private static final int UNASSIGNED = 0x0000; 76 private static final int MAP = 0x0001; 77 private static final int PROHIBITED = 0x0002; 78 private static final int DELETE = 0x0003; 79 private static final int TYPE_LIMIT = 0x0004; 80 81 private static final int NORMALIZATION_ON = 0x0001; 82 private static final int CHECK_BIDI_ON = 0x0002; 83 84 private static final int TYPE_THRESHOLD = 0xFFF0; 85 private static final int MAX_INDEX_VALUE = 0x3FBF; 86 private static final int MAX_INDEX_TOP_LENGTH = 0x0003; 87 88 89 private static final int INDEX_TRIE_SIZE = 0; 90 private static final int INDEX_MAPPING_DATA_SIZE = 1; 91 private static final int NORM_CORRECTNS_LAST_UNI_VERSION = 2; 92 private static final int ONE_UCHAR_MAPPING_INDEX_START = 3; 93 private static final int TWO_UCHARS_MAPPING_INDEX_START = 4; 94 private static final int THREE_UCHARS_MAPPING_INDEX_START = 5; 95 private static final int FOUR_UCHARS_MAPPING_INDEX_START = 6; 96 private static final int OPTIONS = 7; 97 private static final int INDEX_TOP = 16; 98 99 100 103 private static final int DATA_BUFFER_SIZE = 25000; 104 105 private CharTrie sprepTrie; 107 private int[] indexes; 109 private char[] mappingData; 111 private byte[] formatVersion; 113 private VersionInfo sprepUniVer; 115 private VersionInfo normCorrVer; 119 private boolean doNFKC; 121 private boolean checkBiDi; 123 private UBiDiProps bdp; 125 126 private char getCodePointValue(int ch){ 127 return sprepTrie.getCodePointValue(ch); 128 } 129 130 private static VersionInfo getVersionInfo(int comp){ 131 int micro = comp & 0xFF; 132 int milli =(comp >> 8) & 0xFF; 133 int minor =(comp >> 16) & 0xFF; 134 int major =(comp >> 24) & 0xFF; 135 return VersionInfo.getInstance(major,minor,milli,micro); 136 } 137 private static VersionInfo getVersionInfo(byte[] version){ 138 if(version.length != 4){ 139 return null; 140 } 141 return VersionInfo.getInstance((int)version[0],(int) version[1],(int) version[2],(int) version[3]); 142 } 143 152 public StringPrep(InputStream inputStream) throws IOException { 153 154 BufferedInputStream b = new BufferedInputStream (inputStream,DATA_BUFFER_SIZE); 155 156 StringPrepDataReader reader = new StringPrepDataReader(b); 157 158 indexes = reader.readIndexes(INDEX_TOP); 160 161 byte[] sprepBytes = new byte[indexes[INDEX_TRIE_SIZE]]; 162 163 164 mappingData = new char[indexes[INDEX_MAPPING_DATA_SIZE]/2]; 166 reader.read(sprepBytes,mappingData); 168 169 sprepTrie = new CharTrie(new ByteArrayInputStream (sprepBytes), null); 170 171 formatVersion = reader.getDataFormatVersion(); 173 174 doNFKC = ((indexes[OPTIONS] & NORMALIZATION_ON) > 0); 176 checkBiDi = ((indexes[OPTIONS] & CHECK_BIDI_ON) > 0); 177 sprepUniVer = getVersionInfo(reader.getUnicodeVersion()); 178 normCorrVer = getVersionInfo(indexes[NORM_CORRECTNS_LAST_UNI_VERSION]); 179 VersionInfo normUniVer = Normalizer.getUnicodeVersion(); 180 if(normUniVer.compareTo(sprepUniVer) < 0 && 181 normUniVer.compareTo(normCorrVer) < 0 && 182 ((indexes[OPTIONS] & NORMALIZATION_ON) > 0) 183 ){ 184 throw new IOException ("Normalization Correction version not supported"); 185 } 186 b.close(); 187 188 if(checkBiDi) { 189 bdp=UBiDiProps.getSingleton(); 190 } 191 } 192 193 private static final class Values{ 194 boolean isIndex; 195 int value; 196 int type; 197 public void reset(){ 198 isIndex = false; 199 value = 0; 200 type = -1; 201 } 202 } 203 204 private static final void getValues(char trieWord,Values values){ 205 values.reset(); 206 if(trieWord == 0){ 207 212 values.type = TYPE_LIMIT; 213 }else if(trieWord >= TYPE_THRESHOLD){ 214 values.type = (trieWord - TYPE_THRESHOLD); 215 }else{ 216 217 values.type = MAP; 218 219 if((trieWord & 0x02)>0){ 220 values.isIndex = true; 221 values.value = trieWord >> 2; 223 }else{ 224 values.isIndex = false; 225 values.value = ((int)(trieWord<<16))>>16; 226 values.value = (values.value >> 2); 227 228 } 229 230 if((trieWord>>2) == MAX_INDEX_VALUE){ 231 values.type = DELETE; 232 values.isIndex = false; 233 values.value = 0; 234 } 235 } 236 } 237 238 239 240 private StringBuffer map( UCharacterIterator iter, int options) 241 throws StringPrepParseException{ 242 243 Values val = new Values(); 244 char result = 0; 245 int ch = UCharacterIterator.DONE; 246 StringBuffer dest = new StringBuffer (); 247 boolean allowUnassigned = ((options & ALLOW_UNASSIGNED)>0); 248 249 while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){ 250 251 result = getCodePointValue(ch); 252 getValues(result,val); 253 254 if(val.type == UNASSIGNED && allowUnassigned == false){ 256 throw new StringPrepParseException("An unassigned code point was found in the input", 257 StringPrepParseException.UNASSIGNED_ERROR, 258 iter.getText(),iter.getIndex()); 259 }else if((val.type == MAP)){ 260 int index, length; 261 262 if(val.isIndex){ 263 index = val.value; 264 if(index >= indexes[ONE_UCHAR_MAPPING_INDEX_START] && 265 index < indexes[TWO_UCHARS_MAPPING_INDEX_START]){ 266 length = 1; 267 }else if(index >= indexes[TWO_UCHARS_MAPPING_INDEX_START] && 268 index < indexes[THREE_UCHARS_MAPPING_INDEX_START]){ 269 length = 2; 270 }else if(index >= indexes[THREE_UCHARS_MAPPING_INDEX_START] && 271 index < indexes[FOUR_UCHARS_MAPPING_INDEX_START]){ 272 length = 3; 273 }else{ 274 length = mappingData[index++]; 275 } 276 277 dest.append(mappingData,index,length); 278 continue; 279 280 }else{ 281 ch -= val.value; 282 } 283 }else if(val.type == DELETE){ 284 continue; 286 } 287 UTF16.append(dest,ch); 289 } 290 291 return dest; 292 } 293 294 295 private StringBuffer normalize(StringBuffer src){ 296 307 return new StringBuffer ( 308 Normalizer.normalize( 309 src.toString(), 310 Normalizer.NFKC, 311 Normalizer.UNICODE_3_2|NormalizerImpl.BEFORE_PRI_29)); 312 } 313 322 360 377 public StringBuffer prepare(UCharacterIterator src, int options) 378 throws StringPrepParseException{ 379 380 StringBuffer mapOut = map(src,options); 382 StringBuffer normOut = mapOut; 384 if(doNFKC){ 385 normOut = normalize(mapOut); 387 } 388 389 int ch; 390 char result; 391 UCharacterIterator iter = UCharacterIterator.getInstance(normOut); 392 Values val = new Values(); 393 int direction=UCharacterDirection.CHAR_DIRECTION_COUNT, 394 firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT; 395 int rtlPos=-1, ltrPos=-1; 396 boolean rightToLeft=false, leftToRight=false; 397 398 while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){ 399 result = getCodePointValue(ch); 400 getValues(result,val); 401 402 if(val.type == PROHIBITED ){ 403 throw new StringPrepParseException("A prohibited code point was found in the input", 404 StringPrepParseException.PROHIBITED_ERROR,iter.getText(),val.value); 405 } 406 407 if(checkBiDi) { 408 direction = bdp.getClass(ch); 409 if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){ 410 firstCharDir = direction; 411 } 412 if(direction == UCharacterDirection.LEFT_TO_RIGHT){ 413 leftToRight = true; 414 ltrPos = iter.getIndex()-1; 415 } 416 if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){ 417 rightToLeft = true; 418 rtlPos = iter.getIndex()-1; 419 } 420 } 421 } 422 if(checkBiDi == true){ 423 if( leftToRight == true && rightToLeft == true){ 425 throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.", 426 StringPrepParseException.CHECK_BIDI_ERROR,iter.getText(), 427 (rtlPos>ltrPos) ? rtlPos : ltrPos); 428 } 429 430 if( rightToLeft == true && 432 !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) && 433 (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC)) 434 ){ 435 throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.", 436 StringPrepParseException.CHECK_BIDI_ERROR,iter.getText(), 437 (rtlPos>ltrPos) ? rtlPos : ltrPos); 438 } 439 } 440 return normOut; 441 442 } 443 } 444 | Popular Tags |