1 30 31 package de.susebox.jtopas; 32 33 import java.util.Arrays ; 37 import java.util.ArrayList ; 38 import java.util.Map ; 39 import java.util.HashMap ; 40 import java.util.Iterator ; 41 import java.util.NoSuchElementException ; 42 43 import de.susebox.java.lang.ExtRuntimeException; 44 import de.susebox.java.lang.ExtUnsupportedOperationException; 45 import de.susebox.java.lang.ExtIllegalArgumentException; 46 47 import de.susebox.jtopas.spi.DataMapper; 48 import de.susebox.jtopas.spi.DataProvider; 49 import de.susebox.jtopas.spi.PatternHandler; 50 51 import de.susebox.jtopas.impl.PatternMatcher; 52 import de.susebox.jtopas.impl.SequenceStore; 53 import de.susebox.jtopas.impl.NoCaseSequenceStore; 54 55 56 60 73 public class StandardTokenizerProperties 74 extends AbstractTokenizerProperties 75 implements TokenizerProperties, DataMapper 76 { 77 78 82 87 public static final short MAX_NONFREE_MATCHLEN = 1024; 88 89 90 94 100 public StandardTokenizerProperties() { 101 this(0); 102 } 103 104 121 public StandardTokenizerProperties(int flags) { 122 this(flags, DEFAULT_WHITESPACES, DEFAULT_SEPARATORS); 123 } 124 125 126 143 public StandardTokenizerProperties(int flags, String whitespaces, String separators) { 144 Arrays.fill(_charFlags, 0); 145 setParseFlags(flags); 146 setWhitespaces(whitespaces); 147 setSeparators(separators); 148 } 149 150 151 155 163 protected TokenizerProperty doGetProperty(int type, String startImage) { 164 TokenizerProperty prop = null; 165 166 switch (type) { 167 case Token.KEYWORD: 168 if (_keywords[0] != null) { 169 prop = _keywords[0].getKeyword(startImage); 170 } 171 if (prop == null && _keywords[1] != null) { 172 prop = _keywords[1].getKeyword(startImage); 173 } 174 break; 175 176 case Token.STRING: 177 case Token.LINE_COMMENT: 178 case Token.BLOCK_COMMENT: 179 case Token.SPECIAL_SEQUENCE: 180 if (_sequences[0] != null) { 181 prop = _sequences[0].getSpecialSequence(startImage); 182 } 183 if (prop == null && _sequences[1] != null) { 184 prop = _sequences[1].getSpecialSequence(startImage); 185 } 186 break; 187 188 case Token.PATTERN: 189 for (int index = 0; index < _patterns.size(); ++index) { 190 PatternMatcher data = (PatternMatcher)_patterns.get(index); 191 192 prop = data.getProperty(); 193 if (prop.getImages()[0].equals(startImage)) { 194 break; 195 } 196 prop = null; 197 } 198 break; 199 200 case Token.WHITESPACE: 201 case Token.SEPARATOR: 202 default: 203 throw new ExtIllegalArgumentException("Unsupported property type {0}. (Leading) image \"{1}\".", 204 new Object [] { new Integer (type), startImage } ); 205 } 206 207 return prop; 209 } 210 211 212 219 protected String doSetSeparators(String separators) { 220 String oldValue; 221 222 if ((_flags & Flags.F_NO_CASE) == 0) { 224 oldValue = (_separatorsCase.length() > 0) ? _separatorsCase : _separatorsNoCase; 225 _separatorsCase = separators; 226 _separatorsNoCase = ""; 227 } else { 228 oldValue = (_separatorsNoCase.length() > 0) ? _separatorsNoCase : _separatorsCase; 229 _separatorsCase = ""; 230 _separatorsNoCase = separators; 231 } 232 233 putCharSet(oldValue, Token.SEPARATOR, false); 235 putCharSet(separators, Token.SEPARATOR, true); 236 237 if (oldValue == null || oldValue.length() == 0) { 239 return null; 240 } else { 241 return oldValue; 242 } 243 } 244 245 252 protected String doSetWhitespaces(String whitespaces) { 253 String oldValue; 255 256 if ((_flags & Flags.F_NO_CASE) == 0) { 257 oldValue = (_whitespacesCase.length() > 0) ? _whitespacesCase : _whitespacesNoCase; 258 _whitespacesCase = whitespaces; 259 _whitespacesNoCase = ""; 260 } else { 261 oldValue = (_whitespacesNoCase.length() > 0) ? _whitespacesNoCase : _whitespacesCase; 262 _whitespacesCase = ""; 263 _whitespacesNoCase = whitespaces; 264 } 265 266 putCharSet(oldValue, Token.WHITESPACE, false); 268 putCharSet(whitespaces, Token.WHITESPACE, true); 269 270 if (oldValue == null || oldValue.length() == 0) { 272 return null; 273 } else { 274 return oldValue; 275 } 276 } 277 278 285 protected TokenizerProperty doAddProperty(TokenizerProperty property) { 286 switch (property.getType()) { 287 case Token.STRING: 288 case Token.LINE_COMMENT: 289 case Token.BLOCK_COMMENT: 290 case Token.SPECIAL_SEQUENCE: 291 return addSpecialSequence(property); 292 293 case Token.KEYWORD: 294 return addKeyword(property); 295 296 case Token.PATTERN: 297 return addPattern(property); 298 299 case Token.WHITESPACE: 300 case Token.SEPARATOR: 301 default: 302 throw new ExtIllegalArgumentException("Unsupported property type {0}. (Leading) image \"{1}\".", 303 new Object [] { new Integer (property.getType()), property.getImages()[0] } ); 304 } 305 } 306 307 314 protected TokenizerProperty doRemoveProperty(TokenizerProperty property) { 315 TokenizerProperty prop = null; 317 String image = property.getImages()[0]; 318 319 switch (property.getType()) { 320 case Token.LINE_COMMENT: 321 case Token.BLOCK_COMMENT: 322 case Token.STRING: 323 case Token.SPECIAL_SEQUENCE: 324 if (_sequences[0] != null) { 325 prop = _sequences[0].removeSpecialSequence(image); 326 } 327 if (prop == null && _sequences[1] != null) { 328 prop = _sequences[1].removeSpecialSequence(image); 329 } 330 break; 331 332 case Token.KEYWORD: 333 if (_keywords[0] != null) { 334 prop = _keywords[0].removeKeyword(image); 335 } 336 if (prop == null && _keywords[1] != null) { 337 prop = _keywords[1].removeKeyword(image); 338 } 339 break; 340 341 case Token.PATTERN: 342 for (int index = 0; index < _patterns.size(); ++index) { 343 PatternMatcher data = (PatternMatcher)_patterns.get(index); 344 345 prop = data.getProperty(); 346 if (prop.getImages()[0].equals(image)) { 347 _patterns.remove(index); 348 break; 349 } else { 350 prop = null; 351 } 352 } 353 break; 354 355 case Token.WHITESPACE: 356 case Token.SEPARATOR: 357 default: 358 throw new ExtIllegalArgumentException("Unsupported property type {0}. (Leading) image \"{1}\".", 359 new Object [] { new Integer (property.getType()), image } ); 360 } 361 362 return prop; 364 } 365 366 367 371 377 public Iterator getStrings() { 378 return new SpecialSequencesIterator(this, _sequences, Token.STRING); 379 } 380 381 388 public String getWhitespaces() { 389 synchronized(this) { 390 return _whitespacesCase + _whitespacesNoCase; 391 } 392 } 393 394 401 public String getSeparators() { 402 synchronized(this) { 403 return _separatorsCase + _separatorsNoCase; 404 } 405 } 406 407 414 public Iterator getLineComments() { 415 return new SpecialSequencesIterator(this, _sequences, Token.LINE_COMMENT); 416 } 417 418 425 public Iterator getBlockComments() { 426 return new SpecialSequencesIterator(this, _sequences, Token.BLOCK_COMMENT); 427 } 428 429 436 public Iterator getSpecialSequences() { 437 return new SpecialSequencesIterator(this, _sequences, Token.SPECIAL_SEQUENCE); 438 } 439 440 447 public Iterator getKeywords() { 448 return new SpecialSequencesIterator(this, _keywords, Token.KEYWORD); 449 } 450 451 458 public Iterator getPatterns() { 459 return new PatternIterator(this); 460 } 461 462 463 470 public Iterator getProperties() { 471 return new FullIterator(this); 472 } 473 474 475 479 497 public void setTokenizerProperties(TokenizerProperties props) 498 throws UnsupportedOperationException , NullPointerException 499 { 500 throw new ExtUnsupportedOperationException( 501 "Class {0} already defines the {1} interface.", 502 new Object [] { StandardTokenizerProperties.class.getName(), 503 DataMapper.class.getName() } ); 504 } 505 506 519 public TokenizerProperties getTokenizerProperties() { 520 return this; 521 } 522 523 532 public boolean isWhitespace(char testChar) { 533 try { 534 return (_charFlags[testChar] & CHARFLAG_WHITESPACE) != 0; 535 } catch (ArrayIndexOutOfBoundsException ex) { 536 Integer extFlags = (Integer )_extCharFlags.get(new Integer (testChar)); 537 return (extFlags != null && (extFlags.intValue() & CHARFLAG_WHITESPACE) != 0); 538 } 539 } 540 541 542 552 public int countLeadingWhitespaces(DataProvider dataProvider) throws NullPointerException { 553 int maxChars = dataProvider.getLength(); 554 int len = 0; 555 556 while (len < maxChars && isWhitespace(dataProvider.getCharAt(len))) { 557 len++; 558 } 559 return len; 560 } 561 562 563 572 public boolean newlineIsWhitespace() { 573 return (_charFlags['\n'] & CHARFLAG_WHITESPACE) != 0 574 && (_charFlags['\r'] & CHARFLAG_WHITESPACE) != 0; 575 } 576 577 578 585 public boolean isSeparator(char testChar) { 586 try { 587 return (_charFlags[testChar] & CHARFLAG_SEPARATOR) != 0; 588 } catch (ArrayIndexOutOfBoundsException ex) { 589 Integer extFlags = (Integer )_extCharFlags.get(new Integer (testChar)); 590 return (extFlags != null && (extFlags.intValue() & CHARFLAG_SEPARATOR) != 0); 591 } 592 } 593 594 595 604 public boolean hasSequenceCommentOrString() { 605 synchronized(_sequences) { 606 return (_sequences[0] != null || _sequences[1] != null); 607 } 608 } 609 610 632 public TokenizerProperty startsWithSequenceCommentOrString(DataProvider dataProvider) 633 throws TokenizerException, NullPointerException 634 { 635 synchronized(_sequences) { 637 TokenizerProperty caseProp = (_sequences[0] != null) ? 638 _sequences[0].startsWithSequenceCommentOrString(dataProvider) : null; 639 640 TokenizerProperty noCaseProp = (_sequences[1] != null) ? 641 _sequences[1].startsWithSequenceCommentOrString(dataProvider) : null; 642 643 if (noCaseProp == null) { 644 return caseProp; 645 } else if (caseProp == null) { 646 return noCaseProp; 647 } else if (caseProp.getImages()[0].length() >= noCaseProp.getImages()[0].length()) { 648 return caseProp; 649 } else { 650 return noCaseProp; 651 } 652 } 653 } 654 655 665 public int getSequenceMaxLength() { 666 int maxLength = 0; 667 668 synchronized(_sequences) { 669 if (_sequences[0] != null) { 670 maxLength = _sequences[0].getSequenceMaxLength(); 671 } 672 if (_sequences[1] != null && _sequences[1].getSequenceMaxLength() > maxLength) { 673 maxLength = _sequences[1].getSequenceMaxLength(); 674 } 675 } 676 return maxLength; 677 } 678 679 680 688 public boolean hasKeywords() { 689 synchronized(_keywords) { 690 return (_keywords[0] != null || _keywords[1] != null); 691 } 692 } 693 694 704 public TokenizerProperty isKeyword(DataProvider dataProvider) 705 throws TokenizerException, NullPointerException 706 { 707 synchronized(_keywords) { 708 TokenizerProperty prop; 709 710 if (_keywords[0] != null) { 711 prop = _keywords[0].isKeyword(dataProvider); 712 } else { 713 prop = null; 714 } 715 if (prop == null && _keywords[1] != null) { 716 prop = _keywords[1].isKeyword(dataProvider); 717 } 718 return prop; 719 } 720 } 721 722 723 731 public boolean hasPattern() { 732 synchronized(_patterns) { 733 return (_patterns.size() > 0); 734 } 735 } 736 737 747 public PatternHandler.Result matches(DataProvider dataProvider) 748 throws TokenizerException, NullPointerException 749 { 750 synchronized(_patterns) { 751 int longestMatch = 0; 752 PatternHandler.Result bestResult = null; 753 754 for (int index = 0; index < _patterns.size(); ++index) { 756 PatternMatcher data = (PatternMatcher)_patterns.get(index); 757 PatternHandler.Result result = data.matches(dataProvider); 758 759 if (result != null) { 760 if (bestResult == null || bestResult.getLengthOfMatch() < result.getLengthOfMatch()) { 761 bestResult = result; 762 } 763 } 764 } 765 766 return bestResult; 768 } 769 } 770 771 772 776 786 protected TokenizerProperty addPattern(TokenizerProperty patternProp) throws IllegalArgumentException { 787 PatternMatcher data = null; 789 String pattern = patternProp.getImages()[0]; 790 791 try { 792 data = new PatternMatcher(patternProp, getParseFlags()); 793 } catch (Throwable ex) { 794 throw new ExtIllegalArgumentException(ex, "Pattern matching is not available (use JDK 1.4 or above)."); 795 } 796 797 for (int index = 0; index < _patterns.size(); ++index) { 799 PatternMatcher oldData = (PatternMatcher)_patterns.get(index); 800 TokenizerProperty oldProp = oldData.getProperty(); 801 802 if (oldProp.getImages()[0].equals(pattern)) { 803 _patterns.set(index, data); 804 return oldProp; 805 } 806 } 807 808 _patterns.add(data); 810 return null; 811 } 812 813 821 protected TokenizerProperty addKeyword(TokenizerProperty keywordProp) { 822 boolean noCase = isFlagSet(keywordProp, Flags.F_NO_CASE); 824 int arrayIdx = noCase ? 1 : 0; 825 826 if (_keywords[arrayIdx] == null) { 828 if (noCase) { 829 _keywords[arrayIdx] = new NoCaseSequenceStore(true); 830 } else { 831 _keywords[arrayIdx] = new SequenceStore(true); 832 } 833 } 834 835 return _keywords[arrayIdx].addKeyword(keywordProp); 837 } 838 839 840 849 protected TokenizerProperty addSpecialSequence(TokenizerProperty property) { 850 boolean noCase = isFlagSet(property, Flags.F_NO_CASE); 852 int arrayIdx = noCase ? 1 : 0; 853 854 if (_sequences[arrayIdx] == null) { 856 if (noCase) { 857 _sequences[arrayIdx] = new NoCaseSequenceStore(false); 858 } else { 859 _sequences[arrayIdx] = new SequenceStore(false); 860 } 861 } 862 863 return _sequences[arrayIdx].addSpecialSequence(property); 865 } 866 867 875 private void putCharSet(String set, int type, boolean setIt) { 876 int charFlags = 0; 878 879 switch (type) { 880 case Token.WHITESPACE: 881 charFlags = CHARFLAG_WHITESPACE; 882 break; 883 case Token.SEPARATOR: 884 charFlags = CHARFLAG_SEPARATOR; 885 break; 886 } 887 888 int length = (set != null) ? set.length() : 0; 890 char start, end, setChar; 891 892 for (int ii = 0; ii < length; ++ii) { 893 setChar = set.charAt(ii); 894 895 switch (setChar) { 896 case '-': 897 start = (ii > 0) ? set.charAt(ii - 1) : 0; 898 end = (ii < length - 1) ? set.charAt(ii + 1) : 0xFFFF; 899 ii += 2; 900 break; 901 902 case '\\': 903 setChar = (ii + 1 >= length) ? 0 : set.charAt(ii + 1); 904 ii++; 905 906 907 default: 908 start = end = setChar; 909 } 910 911 for (char index = start; index <= end; ++index) { 913 char currChar = index; 914 915 do { 916 if (currChar < _charFlags.length) { 917 if (setIt) { 919 _charFlags[currChar] |= charFlags; 920 } else { 921 _charFlags[currChar] &= ~charFlags; 922 } 923 924 } else { 925 Integer key = new Integer (currChar); 927 Integer extFlags = (Integer )_extCharFlags.get(key); 928 929 if (setIt) { 930 extFlags = new Integer (extFlags.intValue() | charFlags); 931 } else { 932 extFlags = new Integer (extFlags.intValue() & ~charFlags); 933 } 934 _extCharFlags.put(key, extFlags); 935 } 936 937 if (Character.isLowerCase(currChar)) { 939 currChar = Character.toUpperCase(currChar); 940 } else if (Character.isUpperCase(currChar)) { 941 currChar = Character.toLowerCase(currChar); 942 } 943 } while ((_flags & Flags.F_NO_CASE) != 0 && currChar != index); 944 } 945 } 946 } 947 948 949 953 956 public static final int CHARFLAG_WHITESPACE = 1; 957 958 961 public static final int CHARFLAG_SEPARATOR = 2; 962 963 964 968 971 protected int _charFlags[] = new int[256]; 972 973 976 protected HashMap _extCharFlags = new HashMap (); 977 978 981 protected String _whitespacesCase = DEFAULT_WHITESPACES; 982 983 986 protected String _whitespacesNoCase = ""; 987 988 991 protected String _separatorsCase = DEFAULT_SEPARATORS; 992 993 996 protected String _separatorsNoCase = ""; 997 998 1002 protected SequenceStore[] _sequences = new SequenceStore[2]; 1003 1004 1009 protected SequenceStore[] _keywords = new SequenceStore[2]; 1010 1011 1014 protected ArrayList _patterns = new ArrayList (); 1015 1016 1019 private Class _patternClass = null; 1020 1021 1024 private StringBuffer _foundMatch = new StringBuffer (); 1025} 1026 1027 1028 1029 1033 1038final class FullIterator implements Iterator { 1039 1040 1048 public FullIterator(StandardTokenizerProperties parent) { 1049 _parent = parent; 1050 1051 _iterators = new Object [3]; 1053 _iterators[0] = new SpecialSequencesIterator(parent, parent._keywords, Token.KEYWORD); 1054 _iterators[1] = new SpecialSequencesIterator(parent, parent._sequences, 0); 1055 _iterators[2] = new PatternIterator(parent); 1056 _currIndex = 0; 1057 } 1058 1059 1066 public boolean hasNext() { 1067 synchronized(this) { 1068 while (_currIndex < _iterators.length) { 1069 Iterator iter = (Iterator )_iterators[_currIndex]; 1070 1071 if (iter.hasNext()) { 1072 return true; 1073 } 1074 _currIndex++; 1075 } 1076 return false; 1077 } 1078 } 1079 1080 1086 public Object next() { 1087 if (hasNext()) { 1088 synchronized(this) { 1089 Iterator iter = (Iterator )_iterators[_currIndex]; 1090 return iter.next(); 1091 } 1092 } else { 1093 return null; 1094 } 1095 } 1096 1097 1103 public void remove() { 1104 if (_currIndex < _iterators.length) { 1105 Iterator iter = (Iterator )_iterators[_currIndex]; 1106 iter.remove(); 1107 } 1108 } 1109 1110 1111 private StandardTokenizerProperties _parent = null; 1113 private Object [] _iterators = null; 1114 private int _currIndex = -1; 1115} 1116 1117 1123final class MapIterator implements Iterator { 1124 1125 1132 public MapIterator(StandardTokenizerProperties parent, Map caseSensitiveMap, Map caseInsensitiveMap) { 1133 synchronized(this) { 1134 _parent = parent; 1135 if (caseSensitiveMap != null) { 1136 _iterators[0] = caseSensitiveMap.values().iterator(); 1137 } 1138 if (caseInsensitiveMap != null) { 1139 _iterators[1] = caseInsensitiveMap.values().iterator(); 1140 } 1141 } 1142 } 1143 1144 1150 public boolean hasNext() { 1151 synchronized(_iterators) { 1153 if (_iterators[0] != null) { 1154 if (_iterators[0].hasNext()) { 1155 return true; 1156 } else { 1157 _iterators[0] = null; 1158 } 1159 } 1160 if (_iterators[1] != null) { 1161 if (_iterators[1].hasNext()) { 1162 return true; 1163 } else { 1164 _iterators[1] = null; 1165 } 1166 } 1167 return false; 1168 } 1169 } 1170 1171 1177 public Object next() { 1178 if ( ! hasNext()) { 1179 throw new NoSuchElementException (); 1180 } 1181 1182 synchronized(this) { 1183 if (_iterators[0] != null) { 1184 _currentData = (TokenizerProperty)_iterators[0].next(); 1185 } else { 1186 _currentData = (TokenizerProperty)_iterators[1].next(); 1187 } 1188 return _currentData; 1189 } 1190 } 1191 1192 1198 public void remove() { 1199 synchronized(this) { 1200 if (_currentData == null) { 1202 throw new IllegalStateException (); 1203 } 1204 1205 if (_iterators[0] != null) { 1206 _iterators[0].remove(); 1207 } else { 1208 _iterators[1].remove(); 1209 } 1210 _parent.notifyListeners(new TokenizerPropertyEvent(TokenizerPropertyEvent.PROPERTY_REMOVED, _currentData)); 1211 _currentData = null; 1212 } 1213 } 1214 1215 private StandardTokenizerProperties _parent = null; 1217 private Iterator [] _iterators = new Iterator [2]; 1218 private TokenizerProperty _currentData = null; 1219} 1220 1221 1222 1223 1239final class SpecialSequencesIterator implements Iterator { 1240 1241 1250 public SpecialSequencesIterator(StandardTokenizerProperties parent, SequenceStore[] stores, int type) { 1251 _type = type; 1252 _parent = parent; 1253 _stores = stores; 1254 } 1255 1256 1262 public boolean hasNext() { 1263 synchronized(this) { 1264 if (_currentIterator != null && _currentIterator.hasNext()) { 1265 return true; 1266 } 1267 1268 while (_stores != null && ++_currentIndex < _stores.length) { 1269 if (_stores[_currentIndex] != null) { 1270 _currentIterator = _stores[_currentIndex].getSpecialSequences(_type); 1271 if (_currentIterator.hasNext()) { 1272 return true; 1273 } 1274 } 1275 } 1276 return false; 1277 } 1278 } 1279 1280 1286 public Object next() throws NoSuchElementException { 1287 synchronized(this) { 1288 if (! hasNext()) { 1289 throw new NoSuchElementException (); 1290 } 1291 _currentElement = (TokenizerProperty)_currentIterator.next(); 1292 return _currentElement; 1293 } 1294 } 1295 1296 1303 public void remove() throws IllegalStateException { 1304 synchronized(this) { 1305 if (_currentElement == null) { 1307 throw new IllegalStateException (); 1308 } 1309 1310 try { 1312 _currentIterator.remove(); 1313 _parent.notifyListeners(new TokenizerPropertyEvent(TokenizerPropertyEvent.PROPERTY_REMOVED, _currentElement)); 1314 _currentElement = null; 1315 } catch (Exception ex) { 1316 throw new ExtRuntimeException(ex, "While trying to remove current element of a SpecialSequencesIterator."); 1317 } 1318 } 1319 } 1320 1321 1322 private StandardTokenizerProperties _parent = null; 1324 private SequenceStore[] _stores = null; 1325 private TokenizerProperty _currentElement = null; 1326 private Iterator _currentIterator = null; 1327 private int _currentIndex = -1; 1328 private int _type = Token.UNKNOWN; 1329} 1330 1331 1332 1335final class PatternIterator implements Iterator { 1336 1341 public PatternIterator(StandardTokenizerProperties parent) { 1342 _parent = parent; 1343 synchronized(parent._patterns) { 1344 _iterator = parent._patterns.iterator(); 1345 } 1346 } 1347 1348 1354 public boolean hasNext() { 1355 return _iterator.hasNext(); 1356 } 1357 1358 1364 public Object next() throws NoSuchElementException { 1365 synchronized(this) { 1366 _currentData = (PatternMatcher)_iterator.next(); 1367 return _currentData.getProperty(); 1368 } 1369 } 1370 1371 1374 public void remove() { 1375 synchronized(this) { 1376 _iterator.remove(); 1377 _parent.notifyListeners(new TokenizerPropertyEvent(TokenizerPropertyEvent.PROPERTY_REMOVED, _currentData.getProperty())); 1378 } 1379 } 1380 1381 private StandardTokenizerProperties _parent = null; 1383 private Iterator _iterator = null; 1384 private PatternMatcher _currentData = null; 1385} 1386 | Popular Tags |