1 30 31 package de.susebox.jtopas.impl; 32 33 import java.util.Iterator ; 37 import java.util.TreeMap ; 38 import java.util.NoSuchElementException ; 39 40 import de.susebox.java.lang.ExtRuntimeException; 41 42 import de.susebox.jtopas.Token; 43 import de.susebox.jtopas.TokenizerProperty; 44 import de.susebox.jtopas.TokenizerProperties; 45 import de.susebox.jtopas.TokenizerException; 46 47 import de.susebox.jtopas.spi.SequenceHandler; 48 import de.susebox.jtopas.spi.KeywordHandler; 49 import de.susebox.jtopas.spi.DataProvider; 50 51 52 56 66 public class SequenceStore implements SequenceHandler, KeywordHandler { 67 68 72 75 public static char DIRECT_INDEX_COUNT = 256; 76 77 78 82 89 public SequenceStore(boolean useExactLength) { 90 _useExactLength = useExactLength; 91 _maxLength = 0; 92 _asciiArray = new PropertyList[DIRECT_INDEX_COUNT]; 93 _nonASCIIMap = new TreeMap (); 94 } 95 96 97 101 109 public boolean hasSequenceCommentOrString() { 110 return _maxLength > 0; 111 } 112 113 124 public TokenizerProperty startsWithSequenceCommentOrString(DataProvider dataProvider) 125 throws TokenizerException, NullPointerException 126 { 127 if (dataProvider.getLength() > 0) { 129 int len = dataProvider.getLength(); 130 char startChar = getStartChar(dataProvider.getCharAt(0)); 131 PropertyList list = getList(startChar); 132 133 while (list != null) { 134 TokenizerProperty prop = list._property; 135 String image = prop.getImages()[0]; 136 int imageLen = image.length(); 137 138 if (_useExactLength && imageLen < len) { 140 break; } else if (imageLen <= len && comparePrefix(image, dataProvider, 1) == 0) { 142 return prop; } 144 list = list._next; 145 } 146 } 147 148 return null; 150 } 151 152 160 public int getSequenceMaxLength() { 161 return _maxLength; 162 } 163 164 168 176 public boolean hasKeywords() { 177 return hasSequenceCommentOrString(); 179 } 180 181 191 public TokenizerProperty isKeyword(DataProvider dataProvider) throws TokenizerException, NullPointerException { 192 return startsWithSequenceCommentOrString(dataProvider); 193 } 194 195 196 200 208 protected char getStartChar(char startChar) { 209 return startChar; 210 } 211 212 218 public TokenizerProperty addSpecialSequence(TokenizerProperty property) { 219 String image = property.getImages()[0]; 220 int length = image.length(); 221 char startChar = getStartChar(image.charAt(0)); 222 223 if (_maxLength < length) { 224 _maxLength = length; 225 } 226 if (startChar >= 0 && startChar < DIRECT_INDEX_COUNT) { 227 return insertDirect(startChar, property); 228 } else { 229 return insertMapped(startChar, property); 230 } 231 } 232 233 240 public TokenizerProperty removeSpecialSequence(String image) { 241 return searchString(image, true); 242 } 243 244 250 public TokenizerProperty getSpecialSequence(String image) { 251 return searchString(image, false); 252 } 253 254 261 public Iterator getSpecialSequences(int type) { 262 return new SpecialSequencesIterator(this, type); 263 } 264 265 271 public TokenizerProperty addKeyword(TokenizerProperty property) { 272 return addSpecialSequence(property); 273 } 274 275 282 public TokenizerProperty removeKeyword(String image) { 283 return removeSpecialSequence(image); 284 } 285 286 292 public TokenizerProperty getKeyword(String image) { 293 return getSpecialSequence(image); 294 } 295 296 303 public Iterator getKeywords() { 304 return getSpecialSequences(Token.KEYWORD); 305 } 306 307 313 private PropertyList getList(char startChar) { 314 PropertyList list; 317 318 try { 319 list = _asciiArray[startChar]; 321 } catch (IndexOutOfBoundsException ex) { 322 list = (PropertyList)_nonASCIIMap.get(new Character (startChar)); 324 } 325 return list; 326 } 327 328 329 337 private TokenizerProperty searchString(String image, boolean removeIt) { 338 char startChar = getStartChar(image.charAt(0)); 339 PropertyList list = getList(startChar); 340 PropertyList prev = null; 341 342 while (list != null) { 343 TokenizerProperty prop = list._property; 344 String img = prop.getImages()[0]; 345 int res = compare(img, image, 1); 346 347 if (res == 0) { 348 if (removeIt) { 349 if (prev != null) { 350 prev._next = list._next; 351 } else { 352 list = list._next; 353 if (startChar >= 0 && startChar < DIRECT_INDEX_COUNT) { 354 _asciiArray[startChar] = list; 355 } else if (list != null) { 356 _nonASCIIMap.put(new Character (startChar), list); 357 } else { 358 _nonASCIIMap.remove(new Character (startChar)); 359 } 360 } 361 } 362 return prop; 363 } else if (res < 0) { 364 break; 365 } 366 prev = list; 367 list = list._next; 368 } 369 return null; 370 } 371 372 373 379 private TokenizerProperty insertDirect(char startChar, TokenizerProperty property) { 380 if (_asciiArray[startChar] == null) { 382 _asciiArray[startChar] = new PropertyList(property); 383 return null; 384 385 } else { 387 return putIntoList(_asciiArray[startChar], property); 388 } 389 } 390 391 392 398 private TokenizerProperty insertMapped(char startChar, TokenizerProperty property) { 399 Character key = new Character (getStartChar(startChar)); 400 PropertyList list = (PropertyList)_nonASCIIMap.get(key); 401 402 if (list == null) { 403 _nonASCIIMap.put(key, new PropertyList(property)); 404 return null; 405 } else { 406 return putIntoList(list, property); 407 } 408 } 409 410 411 419 private TokenizerProperty putIntoList(PropertyList list, TokenizerProperty property) { 420 String newImage = property.getImages()[0]; 421 PropertyList prev; 422 423 do { 424 TokenizerProperty prop = list._property; 425 String image = prop.getImages()[0]; 426 int res = compare(image, newImage, 1); 427 428 if (res == 0) { 429 list._property = property; 430 return prop; 431 } else if (res < 0) { 432 list._next = new PropertyList(prop, list._next); 433 list._property = property; 434 return null; 435 } 436 prev = list; 437 } while ((list = prev._next) != null); 438 439 prev._next = new PropertyList(property); 441 return null; 442 } 443 444 445 454 private int compare(String thisImage, String thatImage, int fromIndex) { 455 int thisLength = thisImage.length(); 456 int thatLength = thatImage.length(); 457 458 if (thisLength != thatLength) { 459 return thisLength - thatLength; 460 } 461 462 while (fromIndex < thisLength) { 463 int res = compare(thisImage.charAt(fromIndex), thatImage.charAt(fromIndex)); 464 465 if (res != 0) { 466 return res; 467 } 468 fromIndex++; 469 } 470 return 0; 471 } 472 473 484 private int comparePrefix(String prefix, DataProvider dataProvider, int offset) { 485 while (offset < prefix.length()) { 486 int res = compare(prefix.charAt(offset), dataProvider.getCharAt(offset)); 487 488 if (res != 0) { 489 return res; 490 } 491 offset++; 492 } 493 return 0; 494 } 495 496 505 protected int compare(char char1, char char2) { 506 return char1 - char2; 507 } 508 509 510 511 515 518 final class PropertyList { 519 520 525 PropertyList(TokenizerProperty property) { 526 this(property, null); 527 } 528 529 536 PropertyList(TokenizerProperty property, PropertyList next) { 537 _property = property; 538 _next = next; 539 } 540 541 public PropertyList _next; 543 public TokenizerProperty _property; 544 } 545 546 547 554 final class SpecialSequencesIterator implements Iterator { 555 556 564 public SpecialSequencesIterator(SequenceStore parent, int type) { 565 _type = type; 566 _parent = parent; 567 } 568 569 577 private boolean listHasNext() { 578 while (_currentList != null) { 579 if (_type == 0 || _currentList._property.getType() == _type) { 580 return true; 581 } 582 _currentList = _currentList._next; 583 } 584 return false; 585 } 586 587 593 public boolean hasNext() { 594 if (listHasNext()) { 596 return true; 597 } 598 599 if (_mapIterator != null) { 601 while (_mapIterator.hasNext()) { 602 _currentList = (PropertyList)_mapIterator.next(); 603 if (listHasNext()) { 604 return true; 605 } 606 } 607 608 } else { 610 if (_parent._asciiArray != null) { 611 while (++_currentIndex < DIRECT_INDEX_COUNT) { 612 if ((_currentList = _parent._asciiArray[_currentIndex]) != null) { 613 if (listHasNext()) { 614 return true; 615 } 616 } 617 } 618 } 619 if (_parent._nonASCIIMap != null) { 620 _mapIterator = _parent._nonASCIIMap.values().iterator(); 621 _currentList = null; 622 return hasNext(); 623 } 624 } 625 626 return false; 628 } 629 630 636 public Object next() throws NoSuchElementException { 637 if (! hasNext()) { 638 throw new NoSuchElementException (); 639 } 640 641 _currentElem = _currentList; 642 _currentList = _currentList._next; 643 return _currentElem._property; 644 } 645 646 653 public void remove() throws IllegalStateException { 654 if (_currentElem == null) { 656 throw new IllegalStateException (); 657 } 658 659 TokenizerProperty prop = _currentElem._property; 661 662 _currentElem = null; 663 _parent.searchString(prop.getImages()[0], true); 664 } 665 666 667 private SequenceStore _parent = null; 669 private int _type = Token.UNKNOWN; 670 private Iterator _mapIterator = null; 671 private int _currentIndex = -1; 672 private PropertyList _currentList = null; 673 private PropertyList _currentElem = null; 674 } 675 676 677 private PropertyList[] _asciiArray; 681 private TreeMap _nonASCIIMap = null; 682 private int _maxLength; 683 private boolean _useExactLength; 684 } 685 | Popular Tags |