1 7 8 package com.ibm.icu.text; 9 10 import java.util.Vector ; 11 import java.util.Stack ; 12 import com.ibm.icu.impl.Assert; 13 import java.text.CharacterIterator ; 14 import java.io.InputStream ; 15 import java.io.IOException ; 16 17 18 43 public class DictionaryBasedBreakIterator extends RuleBasedBreakIterator { 44 45 49 private BreakDictionary dictionary; 50 51 56 private boolean[] categoryFlags; 57 58 59 65 private int[] cachedBreakPositions; 66 67 71 private int positionInCache; 72 73 76 77 85 public DictionaryBasedBreakIterator(String rules, 86 InputStream dictionaryStream) throws IOException { 87 super(rules); 88 dictionary = new BreakDictionary(dictionaryStream); 89 } 90 91 92 99 public DictionaryBasedBreakIterator(InputStream compiledRules, 100 InputStream dictionaryStream) throws IOException { 101 fRData = RBBIDataWrapper.get(compiledRules); dictionary = new BreakDictionary(dictionaryStream); 103 } 104 105 106 107 public void setText(CharacterIterator newText) { 108 super.setText(newText); 109 cachedBreakPositions = null; 110 fDictionaryCharCount = 0; 111 positionInCache = 0; 112 } 113 114 120 public int first() { 121 cachedBreakPositions = null; 122 fDictionaryCharCount = 0; 123 positionInCache = 0; 124 return super.first(); 125 } 126 127 133 public int last() { 134 cachedBreakPositions = null; 135 fDictionaryCharCount = 0; 136 positionInCache = 0; 137 return super.last(); 138 } 139 140 146 public int previous() { 147 CharacterIterator text = getText(); 148 149 if (cachedBreakPositions != null && positionInCache > 0) { 152 --positionInCache; 153 text.setIndex(cachedBreakPositions[positionInCache]); 154 return cachedBreakPositions[positionInCache]; 155 } 156 157 else { 163 cachedBreakPositions = null; 164 int offset = current(); 165 int result = super.previous(); 166 167 if (cachedBreakPositions != null) { 168 positionInCache = cachedBreakPositions.length - 2; 169 return result; 170 } 171 172 while (result < offset) { 173 int nextResult = next(); 174 175 if (nextResult >= offset) { 176 break; 177 } 178 179 result = nextResult; 180 } 181 182 if (cachedBreakPositions != null) { 183 positionInCache = cachedBreakPositions.length - 2; 184 } 185 186 if (result != BreakIterator.DONE) { 187 text.setIndex(result); 188 } 189 190 return result; 191 } 192 } 193 194 201 public int preceding(int offset) { 202 CharacterIterator text = getText(); 203 checkOffset(offset, text); 204 205 if (cachedBreakPositions == null || offset <= cachedBreakPositions[0] || 210 offset > cachedBreakPositions[cachedBreakPositions.length - 1]) { 211 cachedBreakPositions = null; 212 return super.preceding(offset); 213 } 214 215 else { 219 positionInCache = 0; 220 while (positionInCache < cachedBreakPositions.length 221 && offset > cachedBreakPositions[positionInCache]) 222 ++positionInCache; 223 --positionInCache; 224 text.setIndex(cachedBreakPositions[positionInCache]); 225 return text.getIndex(); 226 } 227 } 228 229 236 public int following(int offset) { 237 CharacterIterator text = getText(); 238 checkOffset(offset, text); 239 240 if (cachedBreakPositions == null || offset < cachedBreakPositions[0] || 245 offset >= cachedBreakPositions[cachedBreakPositions.length - 1]) { 246 cachedBreakPositions = null; 247 return super.following(offset); 248 } 249 250 else { 254 positionInCache = 0; 255 while (positionInCache < cachedBreakPositions.length 256 && offset >= cachedBreakPositions[positionInCache]) 257 ++positionInCache; 258 text.setIndex(cachedBreakPositions[positionInCache]); 259 return text.getIndex(); 260 } 261 } 262 263 264 275 public int getRuleStatus() { 276 return 0; 277 } 278 279 280 297 public int getRuleStatusVec(int[] fillInArray) { 298 if (fillInArray != null && fillInArray.length>=1) { 299 fillInArray[0] = 0; 300 } 301 return 1; 302 } 303 304 305 306 311 protected int handleNext() { 312 CharacterIterator text = getText(); 313 314 if (cachedBreakPositions == null || positionInCache == cachedBreakPositions.length - 1) { 318 319 int startPos = text.getIndex(); 323 fDictionaryCharCount = 0; 324 int result = super.handleNext(); 325 326 if (fDictionaryCharCount > 1 && result - startPos > 1) { 330 divideUpDictionaryRange(startPos, result); 331 } 332 333 else { 336 cachedBreakPositions = null; 337 return result; 338 } 339 } 340 341 if (cachedBreakPositions != null) { 345 ++positionInCache; 346 text.setIndex(cachedBreakPositions[positionInCache]); 347 return cachedBreakPositions[positionInCache]; 348 } 349 Assert.assrt(false); 350 return -9999; } 352 353 361 private void divideUpDictionaryRange(int startPos, int endPos) { 362 CharacterIterator text = getText(); 363 364 text.setIndex(startPos); 369 int c = CICurrent32(text); 370 while (isDictionaryChar(c) == false) { 371 c = CINext32(text); 372 } 373 374 376 Stack currentBreakPositions = new Stack (); 388 Stack possibleBreakPositions = new Stack (); 389 Vector wrongBreakPositions = new Vector (); 390 391 int state = 0; 396 397 int farthestEndPoint = text.getIndex(); 405 Stack bestBreakPositions = null; 406 407 c = CICurrent32(text); 409 while (true) { 410 412 if (dictionary.at(state, 0) == -1) { 416 possibleBreakPositions.push(new Integer (text.getIndex())); 417 } 418 419 state = (dictionary.at(state, (char)c)) & 0xFFFF; 426 if (state == 0xFFFF) { 431 currentBreakPositions.push(new Integer (text.getIndex())); 432 break; 433 } 434 435 else if (state == 0 || text.getIndex() >= endPos) { 440 441 if (text.getIndex() > farthestEndPoint) { 444 farthestEndPoint = text.getIndex(); 445 bestBreakPositions = (Stack )(currentBreakPositions.clone()); 446 } 447 448 while (!possibleBreakPositions.isEmpty() && wrongBreakPositions.contains( 460 possibleBreakPositions.peek())) { 461 possibleBreakPositions.pop(); 462 } 463 464 if (possibleBreakPositions.isEmpty()) { 470 if (bestBreakPositions != null) { 471 currentBreakPositions = bestBreakPositions; 472 if (farthestEndPoint < endPos) { 473 text.setIndex(farthestEndPoint + 1); 474 } 475 else { 476 break; 477 } 478 } 479 else { 480 if ((currentBreakPositions.size() == 0 481 || ((Integer )(currentBreakPositions.peek())).intValue() != text.getIndex()) 482 && text.getIndex() != startPos) { 483 currentBreakPositions.push(new Integer (text.getIndex())); 484 } 485 CINext32(text); 486 currentBreakPositions.push(new Integer (text.getIndex())); 487 } 488 } 489 490 else { 496 Integer temp = (Integer )possibleBreakPositions.pop(); 497 Object temp2 = null; 498 while (!currentBreakPositions.isEmpty() && temp.intValue() < 499 ((Integer )currentBreakPositions.peek()).intValue()) { 500 temp2 = currentBreakPositions.pop(); 501 wrongBreakPositions.addElement(temp2); 502 } 503 currentBreakPositions.push(temp); 504 text.setIndex(((Integer )currentBreakPositions.peek()).intValue()); 505 } 506 507 c = CICurrent32(text); 510 state = 0; 511 if (text.getIndex() >= endPos) { 512 break; 513 } 514 } 515 516 else { 519 c = CINext32(text); 520 } 521 } 524 525 if (!currentBreakPositions.isEmpty()) { 530 currentBreakPositions.pop(); 531 } 532 currentBreakPositions.push(new Integer (endPos)); 533 534 cachedBreakPositions = new int[currentBreakPositions.size() + 1]; 540 cachedBreakPositions[0] = startPos; 541 542 for (int i = 0; i < currentBreakPositions.size(); i++) { 543 cachedBreakPositions[i + 1] = ((Integer )currentBreakPositions.elementAt(i)).intValue(); 544 } 545 positionInCache = 0; 546 } 547 } 548 | Popular Tags |