|                                                                                                              1
 30
 31  package de.susebox.jtopas.impl;
 32
 33  import java.util.Iterator
  ; 37  import java.util.TreeMap
  ; 38  import java.util.NoSuchElementException
  ; 39
 40  import de.susebox.java.lang.ExtRuntimeException;
 41
 42  import de.susebox.jtopas.Token;
 43  import de.susebox.jtopas.TokenizerProperty;
 44  import de.susebox.jtopas.TokenizerProperties;
 45  import de.susebox.jtopas.TokenizerException;
 46
 47  import de.susebox.jtopas.spi.SequenceHandler;
 48  import de.susebox.jtopas.spi.KeywordHandler;
 49  import de.susebox.jtopas.spi.DataProvider;
 50
 51
 52
 56
 66  public class SequenceStore implements SequenceHandler, KeywordHandler {
 67
 68
 72
 75    public static char DIRECT_INDEX_COUNT = 256;
 76
 77
 78
 82
 89    public SequenceStore(boolean useExactLength) {
 90      _useExactLength = useExactLength;
 91      _maxLength      = 0;
 92      _asciiArray     = new PropertyList[DIRECT_INDEX_COUNT];
 93      _nonASCIIMap    = new TreeMap
  (); 94    }
 95
 96
 97
 101
 109   public boolean hasSequenceCommentOrString() {
 110     return _maxLength > 0;
 111   }
 112
 113
 124   public TokenizerProperty startsWithSequenceCommentOrString(DataProvider dataProvider)
 125     throws TokenizerException, NullPointerException
  126   {
 127         if (dataProvider.getLength() > 0) {
 129       int           len       = dataProvider.getLength();
 130       char          startChar = getStartChar(dataProvider.getCharAt(0));
 131       PropertyList  list      = getList(startChar);
 132
 133       while (list != null) {
 134         TokenizerProperty prop      = list._property;
 135         String
  image     = prop.getImages()[0]; 136         int               imageLen  = image.length();
 137
 138                 if (_useExactLength && imageLen < len) {
 140           break;                    } else if (imageLen <= len && comparePrefix(image, dataProvider, 1) == 0) {
 142           return prop;              }
 144         list = list._next;
 145       }
 146     }
 147
 148         return null;
 150   }
 151
 152
 160   public int getSequenceMaxLength() {
 161     return _maxLength;
 162   }
 163
 164
 168
 176   public boolean hasKeywords() {
 177         return hasSequenceCommentOrString();
 179   }
 180
 181
 191   public TokenizerProperty isKeyword(DataProvider dataProvider) throws TokenizerException, NullPointerException
  { 192     return startsWithSequenceCommentOrString(dataProvider);
 193   }
 194
 195
 196
 200
 208   protected char getStartChar(char startChar) {
 209     return startChar;
 210   }
 211
 212
 218   public TokenizerProperty addSpecialSequence(TokenizerProperty property) {
 219     String
  image     = property.getImages()[0]; 220     int     length    = image.length();
 221     char    startChar = getStartChar(image.charAt(0));
 222
 223     if (_maxLength < length) {
 224       _maxLength = length;
 225     }
 226     if (startChar >= 0 && startChar < DIRECT_INDEX_COUNT) {
 227       return insertDirect(startChar, property);
 228     } else {
 229       return insertMapped(startChar, property);
 230     }
 231   }
 232
 233
 240   public TokenizerProperty removeSpecialSequence(String
  image) { 241     return searchString(image, true);
 242   }
 243
 244
 250   public TokenizerProperty getSpecialSequence(String
  image) { 251     return searchString(image, false);
 252   }
 253
 254
 261   public Iterator
  getSpecialSequences(int type) { 262     return new SpecialSequencesIterator(this, type);
 263   }
 264
 265
 271   public TokenizerProperty addKeyword(TokenizerProperty property) {
 272     return addSpecialSequence(property);
 273   }
 274
 275
 282   public TokenizerProperty removeKeyword(String
  image) { 283     return removeSpecialSequence(image);
 284   }
 285
 286
 292   public TokenizerProperty getKeyword(String
  image) { 293     return getSpecialSequence(image);
 294   }
 295
 296
 303   public Iterator
  getKeywords() { 304     return getSpecialSequences(Token.KEYWORD);
 305   }
 306
 307
 313   private PropertyList getList(char startChar) {
 314             PropertyList list;
 317
 318     try {
 319             list = _asciiArray[startChar];
 321     } catch (IndexOutOfBoundsException
  ex) { 322             list = (PropertyList)_nonASCIIMap.get(new Character
  (startChar)); 324     }
 325     return list;
 326   }
 327
 328
 329
 337   private TokenizerProperty searchString(String
  image, boolean removeIt) { 338     char          startChar = getStartChar(image.charAt(0));
 339     PropertyList  list      = getList(startChar);
 340     PropertyList  prev      = null;
 341
 342     while (list != null) {
 343       TokenizerProperty prop  = list._property;
 344       String
  img   = prop.getImages()[0]; 345       int               res   = compare(img, image, 1);
 346
 347       if (res == 0) {
 348         if (removeIt) {
 349           if (prev != null) {
 350             prev._next = list._next;
 351           } else {
 352             list = list._next;
 353             if (startChar >= 0 && startChar < DIRECT_INDEX_COUNT) {
 354               _asciiArray[startChar] = list;
 355             } else if (list != null) {
 356               _nonASCIIMap.put(new Character
  (startChar), list); 357             } else {
 358               _nonASCIIMap.remove(new Character
  (startChar)); 359             }
 360           }
 361         }
 362         return prop;
 363       } else if (res < 0) {
 364         break;
 365       }
 366       prev = list;
 367       list = list._next;
 368     }
 369     return null;
 370   }
 371
 372
 373
 379   private TokenizerProperty insertDirect(char startChar, TokenizerProperty property) {
 380         if (_asciiArray[startChar] == null) {
 382       _asciiArray[startChar] = new PropertyList(property);
 383       return null;
 384
 385         } else {
 387       return putIntoList(_asciiArray[startChar], property);
 388     }
 389   }
 390
 391
 392
 398   private TokenizerProperty insertMapped(char startChar, TokenizerProperty property) {
 399     Character
  key  = new Character  (getStartChar(startChar)); 400     PropertyList list = (PropertyList)_nonASCIIMap.get(key);
 401
 402     if (list == null) {
 403       _nonASCIIMap.put(key, new PropertyList(property));
 404       return null;
 405     } else {
 406       return putIntoList(list, property);
 407     }
 408   }
 409
 410
 411
 419   private TokenizerProperty putIntoList(PropertyList list, TokenizerProperty property) {
 420     String
  newImage = property.getImages()[0]; 421     PropertyList  prev;
 422
 423     do {
 424       TokenizerProperty prop  = list._property;
 425       String
  image = prop.getImages()[0]; 426       int               res   = compare(image, newImage, 1);
 427
 428       if (res == 0) {
 429         list._property = property;
 430         return prop;
 431       } else if (res < 0) {
 432         list._next     = new PropertyList(prop, list._next);
 433         list._property = property;
 434         return null;
 435       }
 436       prev = list;
 437     } while ((list = prev._next) != null);
 438
 439         prev._next = new PropertyList(property);
 441     return null;
 442   }
 443
 444
 445
 454   private int compare(String
  thisImage, String  thatImage, int fromIndex) { 455     int thisLength = thisImage.length();
 456     int thatLength = thatImage.length();
 457
 458     if (thisLength != thatLength) {
 459       return thisLength - thatLength;
 460     }
 461
 462     while (fromIndex < thisLength) {
 463       int res = compare(thisImage.charAt(fromIndex), thatImage.charAt(fromIndex));
 464
 465       if (res != 0) {
 466         return res;
 467       }
 468       fromIndex++;
 469     }
 470     return 0;
 471   }
 472
 473
 484   private int comparePrefix(String
  prefix, DataProvider dataProvider, int offset) { 485     while (offset < prefix.length()) {
 486       int res = compare(prefix.charAt(offset), dataProvider.getCharAt(offset));
 487
 488       if (res != 0) {
 489         return res;
 490       }
 491       offset++;
 492     }
 493     return 0;
 494   }
 495
 496
 505   protected int compare(char char1, char char2) {
 506     return char1 - char2;
 507   }
 508
 509
 510
 511
 515
 518   final class PropertyList {
 519
 520
 525     PropertyList(TokenizerProperty property) {
 526       this(property, null);
 527     }
 528
 529
 536     PropertyList(TokenizerProperty property, PropertyList next) {
 537       _property = property;
 538       _next     = next;
 539     }
 540
 541         public PropertyList       _next;
 543     public TokenizerProperty  _property;
 544   }
 545
 546
 547
 554   final class SpecialSequencesIterator implements Iterator
  { 555
 556
 564     public SpecialSequencesIterator(SequenceStore parent, int type) {
 565       _type      = type;
 566       _parent    = parent;
 567     }
 568
 569
 577     private boolean listHasNext() {
 578       while (_currentList != null) {
 579         if (_type == 0 || _currentList._property.getType() == _type) {
 580           return true;
 581         }
 582         _currentList = _currentList._next;
 583       }
 584       return false;
 585     }
 586
 587
 593     public boolean hasNext() {
 594             if (listHasNext()) {
 596         return true;
 597       }
 598
 599             if (_mapIterator != null) {
 601         while (_mapIterator.hasNext()) {
 602           _currentList = (PropertyList)_mapIterator.next();
 603           if (listHasNext()) {
 604             return true;
 605           }
 606         }
 607
 608             } else {
 610         if (_parent._asciiArray != null) {
 611           while (++_currentIndex < DIRECT_INDEX_COUNT) {
 612             if ((_currentList = _parent._asciiArray[_currentIndex]) != null) {
 613               if (listHasNext()) {
 614                 return true;
 615               }
 616             }
 617           }
 618         }
 619         if (_parent._nonASCIIMap != null) {
 620           _mapIterator = _parent._nonASCIIMap.values().iterator();
 621           _currentList = null;
 622           return hasNext();
 623         }
 624       }
 625
 626             return false;
 628     }
 629
 630
 636     public Object
  next() throws NoSuchElementException  { 637       if (! hasNext()) {
 638         throw new NoSuchElementException
  (); 639       }
 640
 641       _currentElem = _currentList;
 642       _currentList = _currentList._next;
 643       return _currentElem._property;
 644     }
 645
 646
 653     public void remove() throws IllegalStateException
  { 654             if (_currentElem == null) {
 656         throw new IllegalStateException
  (); 657       }
 658
 659             TokenizerProperty prop  = _currentElem._property;
 661
 662       _currentElem = null;
 663       _parent.searchString(prop.getImages()[0], true);
 664     }
 665
 666
 667         private SequenceStore _parent       = null;
 669     private int           _type         = Token.UNKNOWN;
 670     private Iterator
  _mapIterator  = null; 671     private int           _currentIndex = -1;
 672     private PropertyList  _currentList  = null;
 673     private PropertyList  _currentElem  = null;
 674   }
 675
 676
 677         private PropertyList[]  _asciiArray;
 681   private TreeMap
  _nonASCIIMap    = null; 682   private int             _maxLength;
 683   private boolean         _useExactLength;
 684 }
 685
                                                                                                                                                                                                             |                                                                       
 
 
 
 
 
                                                                                   Popular Tags                                                                                                                                                                                              |