1 7 8 23 24 package java.text; 25 26 import java.util.Vector ; 27 import java.util.Stack ; 28 import java.util.Hashtable ; 29 import java.text.CharacterIterator ; 30 import java.io.InputStream ; 31 import java.io.IOException ; 32 33 58 class DictionaryBasedBreakIterator extends RuleBasedBreakIterator { 59 60 64 private BreakDictionary dictionary; 65 66 71 private boolean[] categoryFlags; 72 73 77 private int dictionaryCharCount; 78 79 85 private int[] cachedBreakPositions; 86 87 91 private int positionInCache; 92 93 100 public DictionaryBasedBreakIterator(String dataFile, String dictionaryFile) 101 throws IOException { 102 super(dataFile); 103 byte[] tmp = super.getAdditionalData(); 104 if (tmp != null) { 105 prepareCategoryFlags(tmp); 106 super.setAdditionalData(null); 107 } 108 dictionary = new BreakDictionary (dictionaryFile); 109 } 110 111 private void prepareCategoryFlags(byte[] data) { 112 categoryFlags = new boolean[data.length]; 113 for (int i = 0; i < data.length; i++) { 114 categoryFlags[i] = (data[i] == (byte)1) ? true : false; 115 } 116 } 117 118 public void setText(CharacterIterator newText) { 119 super.setText(newText); 120 cachedBreakPositions = null; 121 dictionaryCharCount = 0; 122 positionInCache = 0; 123 } 124 125 130 public int first() { 131 cachedBreakPositions = null; 132 dictionaryCharCount = 0; 133 positionInCache = 0; 134 return super.first(); 135 } 136 137 142 public int last() { 143 cachedBreakPositions = null; 144 dictionaryCharCount = 0; 145 positionInCache = 0; 146 return super.last(); 147 } 148 149 154 public int previous() { 155 CharacterIterator text = getText(); 156 157 if (cachedBreakPositions != null && positionInCache > 0) { 160 --positionInCache; 161 text.setIndex(cachedBreakPositions[positionInCache]); 162 return cachedBreakPositions[positionInCache]; 163 } 164 165 else { 169 cachedBreakPositions = null; 170 int result = super.previous(); 171 if (cachedBreakPositions != null) { 172 positionInCache = cachedBreakPositions.length - 2; 173 } 174 return result; 175 } 176 } 177 178 184 public int preceding(int offset) { 185 CharacterIterator text = getText(); 186 checkOffset(offset, text); 187 188 if (cachedBreakPositions == null || offset <= cachedBreakPositions[0] || 193 offset > cachedBreakPositions[cachedBreakPositions.length - 1]) { 194 cachedBreakPositions = null; 195 return super.preceding(offset); 196 } 197 198 else { 202 positionInCache = 0; 203 while (positionInCache < cachedBreakPositions.length 204 && offset > cachedBreakPositions[positionInCache]) { 205 ++positionInCache; 206 } 207 --positionInCache; 208 text.setIndex(cachedBreakPositions[positionInCache]); 209 return text.getIndex(); 210 } 211 } 212 213 219 public int following(int offset) { 220 CharacterIterator text = getText(); 221 checkOffset(offset, text); 222 223 if (cachedBreakPositions == null || offset < cachedBreakPositions[0] || 228 offset >= cachedBreakPositions[cachedBreakPositions.length - 1]) { 229 cachedBreakPositions = null; 230 return super.following(offset); 231 } 232 233 else { 237 positionInCache = 0; 238 while (positionInCache < cachedBreakPositions.length 239 && offset >= cachedBreakPositions[positionInCache]) { 240 ++positionInCache; 241 } 242 text.setIndex(cachedBreakPositions[positionInCache]); 243 return text.getIndex(); 244 } 245 } 246 247 250 protected int handleNext() { 251 CharacterIterator text = getText(); 252 253 if (cachedBreakPositions == null || 257 positionInCache == cachedBreakPositions.length - 1) { 258 259 int startPos = text.getIndex(); 263 dictionaryCharCount = 0; 264 int result = super.handleNext(); 265 266 if (dictionaryCharCount > 1 && result - startPos > 1) { 270 divideUpDictionaryRange(startPos, result); 271 } 272 273 else { 276 cachedBreakPositions = null; 277 return result; 278 } 279 } 280 281 if (cachedBreakPositions != null) { 285 ++positionInCache; 286 text.setIndex(cachedBreakPositions[positionInCache]); 287 return cachedBreakPositions[positionInCache]; 288 } 289 return -9999; } 291 292 295 protected int lookupCategory(int c) { 296 int result = super.lookupCategory(c); 302 if (result != RuleBasedBreakIterator.IGNORE && categoryFlags[result]) { 303 ++dictionaryCharCount; 304 } 305 return result; 306 } 307 308 316 private void divideUpDictionaryRange(int startPos, int endPos) { 317 CharacterIterator text = getText(); 318 319 text.setIndex(startPos); 324 int c = getCurrent(); 325 int category = lookupCategory(c); 326 while (category == IGNORE || !categoryFlags[category]) { 327 c = getNext(); 328 category = lookupCategory(c); 329 } 330 331 Stack currentBreakPositions = new Stack (); 343 Stack possibleBreakPositions = new Stack (); 344 Vector wrongBreakPositions = new Vector (); 345 346 int state = 0; 351 352 int farthestEndPoint = text.getIndex(); 360 Stack bestBreakPositions = null; 361 362 c = getCurrent(); 364 while (true) { 365 366 if (dictionary.getNextState(state, 0) == -1) { 370 possibleBreakPositions.push(new Integer (text.getIndex())); 371 } 372 373 state = dictionary.getNextStateFromCharacter(state, c); 375 376 if (state == -1) { 381 currentBreakPositions.push(new Integer (text.getIndex())); 382 break; 383 } 384 385 else if (state == 0 || text.getIndex() >= endPos) { 390 391 if (text.getIndex() > farthestEndPoint) { 394 farthestEndPoint = text.getIndex(); 395 bestBreakPositions = (Stack )(currentBreakPositions.clone()); 396 } 397 398 Integer newStartingSpot = null; 412 while (!possibleBreakPositions.isEmpty() && wrongBreakPositions.contains( 413 possibleBreakPositions.peek())) { 414 possibleBreakPositions.pop(); 415 } 416 417 if (possibleBreakPositions.isEmpty()) { 423 if (bestBreakPositions != null) { 424 currentBreakPositions = bestBreakPositions; 425 if (farthestEndPoint < endPos) { 426 text.setIndex(farthestEndPoint + 1); 427 } 428 else { 429 break; 430 } 431 } 432 else { 433 if ((currentBreakPositions.size() == 0 || 434 ((Integer )(currentBreakPositions.peek())).intValue() != text.getIndex()) 435 && text.getIndex() != startPos) { 436 currentBreakPositions.push(new Integer (text.getIndex())); 437 } 438 getNext(); 439 currentBreakPositions.push(new Integer (text.getIndex())); 440 } 441 } 442 443 else { 449 Integer temp = (Integer )possibleBreakPositions.pop(); 450 Object temp2 = null; 451 while (!currentBreakPositions.isEmpty() && temp.intValue() < 452 ((Integer )currentBreakPositions.peek()).intValue()) { 453 temp2 = currentBreakPositions.pop(); 454 wrongBreakPositions.addElement(temp2); 455 } 456 currentBreakPositions.push(temp); 457 text.setIndex(((Integer )currentBreakPositions.peek()).intValue()); 458 } 459 460 c = getCurrent(); 463 if (text.getIndex() >= endPos) { 464 break; 465 } 466 } 467 468 else { 471 c = getNext(); 472 } 473 } 474 475 if (!currentBreakPositions.isEmpty()) { 480 currentBreakPositions.pop(); 481 } 482 currentBreakPositions.push(new Integer (endPos)); 483 484 cachedBreakPositions = new int[currentBreakPositions.size() + 1]; 490 cachedBreakPositions[0] = startPos; 491 492 for (int i = 0; i < currentBreakPositions.size(); i++) { 493 cachedBreakPositions[i + 1] = ((Integer )currentBreakPositions.elementAt(i)).intValue(); 494 } 495 positionInCache = 0; 496 } 497 } 498 | Popular Tags |