1 30 31 package de.susebox.jtopas; 32 33 import java.io.Reader ; 37 import java.util.SortedMap ; 38 import java.util.TreeMap ; 39 import java.util.LinkedList ; 40 import java.util.Arrays ; 41 42 import de.susebox.java.lang.ExtIndexOutOfBoundsException; 43 44 import de.susebox.jtopas.spi.WhitespaceHandler; 45 import de.susebox.jtopas.spi.KeywordHandler; 46 import de.susebox.jtopas.spi.PatternHandler; 47 import de.susebox.jtopas.spi.SeparatorHandler; 48 import de.susebox.jtopas.spi.SequenceHandler; 49 50 import de.susebox.jtopas.spi.StandardWhitespaceHandler; 51 import de.susebox.jtopas.spi.StandardKeywordHandler; 52 import de.susebox.jtopas.spi.StandardSeparatorHandler; 53 import de.susebox.jtopas.spi.StandardSequenceHandler; 54 55 import de.susebox.jtopas.spi.DataProvider; 56 import de.susebox.jtopas.spi.DataMapper; 57 58 59 63 74 public abstract class AbstractTokenizer implements Tokenizer, TokenizerPropertyListener { 75 76 80 89 protected abstract DataProvider getDataProvider(int startPos, int length); 90 91 100 protected abstract int readMoreData() throws TokenizerException; 101 102 103 107 116 public AbstractTokenizer() { 117 _baseTokenizer = this; 118 if (_defaultProperties == null) { 119 _defaultProperties = new StandardTokenizerProperties(); 120 } 121 setTokenizerProperties(_defaultProperties); 122 } 123 124 131 public AbstractTokenizer(TokenizerProperties properties) { 132 _baseTokenizer = this; 133 setTokenizerProperties(properties); 134 } 135 136 137 141 155 public void setSource(TokenizerSource source) { 156 _source = source; 157 _eofReached = false; 158 _currentReadPos = 0; 159 _currentWritePos = 0; 160 if (isFlagSet(Flags.F_COUNT_LINES)) { 161 _lineNumber = 0; 162 _columnNumber = 0; 163 } else { 164 _lineNumber = -1; 165 _columnNumber = -1; 166 } 167 Arrays.fill(_scannedToken, null); 168 } 169 170 176 public void setSource(Reader reader) { 177 setSource(new ReaderSource(reader)); 178 } 179 180 188 public TokenizerSource getSource() { 189 return _source; 190 } 191 192 193 197 205 public void setTokenizerProperties(TokenizerProperties props) throws NullPointerException { 206 if (props == null) { 207 throw new NullPointerException (); 208 } 209 210 if (_properties != null) { 212 _properties.removeTokenizerPropertyListener(this); 213 } 214 _properties = props; 215 _properties.addTokenizerPropertyListener(this); 216 217 if (_properties instanceof WhitespaceHandler) { 219 setWhitespaceHandler((WhitespaceHandler)_properties); 220 } else { 221 setWhitespaceHandler(new StandardWhitespaceHandler(_properties)); 222 } 223 if (_properties instanceof SeparatorHandler) { 224 setSeparatorHandler((SeparatorHandler)_properties); 225 } else { 226 setSeparatorHandler(new StandardSeparatorHandler(_properties)); 227 } 228 if (_properties instanceof SequenceHandler) { 229 setSequenceHandler((SequenceHandler)_properties); 230 } else { 231 setSequenceHandler(new StandardSequenceHandler(_properties)); 232 } 233 if (props instanceof KeywordHandler) { 234 setKeywordHandler((KeywordHandler)props); 235 } else { 236 setKeywordHandler(new StandardKeywordHandler(_properties)); 237 } 238 if (_properties instanceof PatternHandler) { 239 setPatternHandler((PatternHandler)_properties); 240 } else { 241 setPatternHandler(null); 242 } 243 244 int newFlags = _properties.getParseFlags(); 246 247 if (newFlags != _flags) { 248 propertyChanged(new TokenizerPropertyEvent( 249 TokenizerPropertyEvent.PROPERTY_MODIFIED, 250 new TokenizerProperty(TokenizerProperty.PARSE_FLAG_MASK, 251 new String [] { Integer.toBinaryString(newFlags) } ), 252 new TokenizerProperty(TokenizerProperty.PARSE_FLAG_MASK, 253 new String [] { Integer.toBinaryString(_flags) } ))); 254 } 255 } 256 257 264 public TokenizerProperties getTokenizerProperties() { 265 return _properties; 266 } 267 268 277 public void changeParseFlags(int flags, int mask) throws TokenizerException { 278 if ((mask | VALID_FLAGS_MASK) != VALID_FLAGS_MASK) { 280 throw new TokenizerException( 281 "One or more flags cannot be set separately for a {0}. Violating flags in {1}: {2}.", 282 new Object [] { AbstractTokenizer.class.getName(), 283 Integer.toHexString(flags), 284 Integer.toHexString(mask & ~VALID_FLAGS_MASK) } ); 285 } 286 287 _flagMask = mask; 289 _flags = (flags & mask) | (getTokenizerProperties().getParseFlags() & ~mask); 290 291 if ( ! isFlagSet(Flags.F_COUNT_LINES)) { 293 _lineNumber = 0; 294 _columnNumber = 0; 295 } 296 } 297 298 305 public int getParseFlags() { 306 return (getTokenizerProperties().getParseFlags() & ~_flagMask) + (_flags & _flagMask); 307 } 308 309 316 public void setKeywordHandler(de.susebox.jtopas.spi.KeywordHandler handler) { 317 synchronized(this) { 318 if (handler == _properties) { 319 if (_properties != null && _properties.getKeywords().hasNext()) { 320 _keywordHandler = handler; 321 } else { 322 _keywordHandler = null; 323 } 324 _internalFlags &= ~IFLAG_EXTERNAL_KEYWORD_HANDLER; 325 } else { 326 _keywordHandler = handler; 327 _internalFlags |= IFLAG_EXTERNAL_KEYWORD_HANDLER; 328 } 329 } 330 } 331 332 339 public de.susebox.jtopas.spi.KeywordHandler getKeywordHandler() { 340 synchronized(this) { 341 if ((_internalFlags & IFLAG_EXTERNAL_KEYWORD_HANDLER) == 0) { 342 return (de.susebox.jtopas.spi.KeywordHandler)getTokenizerProperties(); 343 } else { 344 return _keywordHandler; 345 } 346 } 347 } 348 349 357 public void setWhitespaceHandler(de.susebox.jtopas.spi.WhitespaceHandler handler) { 358 _whitespaceHandler = handler; 359 } 360 361 368 public de.susebox.jtopas.spi.WhitespaceHandler getWhitespaceHandler() { 369 return _whitespaceHandler; 370 } 371 372 381 public void setSeparatorHandler(de.susebox.jtopas.spi.SeparatorHandler handler) { 382 _separatorHandler = handler; 383 } 384 385 393 public de.susebox.jtopas.spi.SeparatorHandler getSeparatorHandler() { 394 return _separatorHandler; 395 } 396 397 403 public void setSequenceHandler(de.susebox.jtopas.spi.SequenceHandler handler) { 404 synchronized(this) { 405 if (handler == _properties) { 406 if (_properties != null && ( _properties.getSpecialSequences().hasNext() 407 || _properties.getStrings().hasNext() 408 || _properties.getBlockComments().hasNext() 409 || _properties.getLineComments().hasNext())) { 410 _sequenceHandler = handler; 411 } else { 412 _sequenceHandler = null; 413 } 414 _internalFlags &= ~IFLAG_EXTERNAL_SEQUENCE_HANDLER; 415 } else { 416 _sequenceHandler = handler; 417 _internalFlags |= IFLAG_EXTERNAL_SEQUENCE_HANDLER; 418 } 419 } 420 } 421 422 429 public de.susebox.jtopas.spi.SequenceHandler getSequenceHandler() { 430 synchronized(this) { 431 if ((_internalFlags & IFLAG_EXTERNAL_SEQUENCE_HANDLER) == 0) { 432 return (de.susebox.jtopas.spi.SequenceHandler)getTokenizerProperties(); 433 } else { 434 return _sequenceHandler; 435 } 436 } 437 } 438 439 447 public void setPatternHandler(de.susebox.jtopas.spi.PatternHandler handler) { 448 synchronized(this) { 449 if (handler == _properties) { 450 if (_properties != null && _properties.getPatterns().hasNext()) { 451 _patternHandler = handler; 452 } else { 453 _patternHandler = null; 454 } 455 _internalFlags &= ~IFLAG_EXTERNAL_PATTERN_HANDLER; 456 } else { 457 _patternHandler = handler; 458 _internalFlags |= IFLAG_EXTERNAL_PATTERN_HANDLER; 459 } 460 } 461 } 462 463 471 public de.susebox.jtopas.spi.PatternHandler getPatternHandler() { 472 synchronized(this) { 473 if ((_internalFlags & IFLAG_EXTERNAL_PATTERN_HANDLER) == 0) { 474 return (de.susebox.jtopas.spi.PatternHandler)getTokenizerProperties(); 475 } else { 476 return _patternHandler; 477 } 478 } 479 } 480 481 491 public int getCurrentLine() { 492 return _lineNumber; 493 } 494 495 504 public int getCurrentColumn() { 505 return _columnNumber; 506 } 507 508 515 public boolean hasMoreToken() { 516 return _scannedToken[0] == null || _scannedToken[0].getType() != Token.EOF; 517 } 518 519 527 public Token nextToken() throws TokenizerException { 528 boolean returnIt = false; 529 530 __MAIN_LOOP__: 532 do { 533 if (_scannedToken[1] == null) { 535 if ( ! isEOF(0)) { 536 if ( ! isWhitespace(0)) { 537 if ( ! isPattern(0, false)) { 538 if ( ! isSpecialSequence(0)) { 539 if ( ! isSeparator(0)) { 540 _scannedToken[1] = new Token(Token.NORMAL); 541 } 542 } 543 } 544 } 545 } 546 } 547 _scannedToken[0] = _scannedToken[1]; 548 _scannedToken[1] = _scannedToken[2]; 549 _scannedToken[2] = null; 550 551 Token token = _scannedToken[0]; 553 TokenizerProperty prop = (TokenizerProperty)token.getCompanion(); 554 555 token.setCompanion((prop != null) ? prop.getCompanion() : null); 556 token.setStartPosition(getReadPosition()); 557 token.setStartLine(_lineNumber); 558 token.setStartColumn(_columnNumber); 559 560 returnIt = true; 561 562 switch (token.getType()) { 563 case Token.EOF: 564 token.setLength(0); 565 break; 566 case Token.WHITESPACE: 567 token.setLength(completeWhitespace()); 568 returnIt = isFlagSet(Flags.F_RETURN_SIMPLE_WHITESPACES); 569 break; 570 case Token.SEPARATOR: token.setLength(1); 572 break; 573 case Token.STRING: 574 token.setLength(completeString(prop)); 575 break; 576 case Token.LINE_COMMENT: 577 token.setLength(completeLineComment(prop)); 578 returnIt = isFlagSet(prop, Flags.F_RETURN_LINE_COMMENTS); 579 break; 580 case Token.BLOCK_COMMENT: 581 token.setLength(completeBlockComment(prop)); 582 returnIt = isFlagSet(prop, Flags.F_RETURN_BLOCK_COMMENTS); 583 break; 584 case Token.SPECIAL_SEQUENCE: 585 token.setLength(prop.getImages()[0].length()); 586 break; 587 case Token.PATTERN: 588 break; 590 default: 591 prop = completeBoundedToken(token); 592 } 593 594 adjustLineAndColumn(token.getType(), token.getLength()); 597 token.setEndLine(_lineNumber); 598 token.setEndColumn(_columnNumber); 599 600 if (returnIt) { 602 boolean tokenPosOnly = (prop != null) ? isFlagSet(prop, Flags.F_TOKEN_POS_ONLY) : 603 isFlagSet(Flags.F_TOKEN_POS_ONLY); 604 boolean returnImageParts = (prop != null) ? isFlagSet(prop, Flags.F_RETURN_IMAGE_PARTS) : 605 isFlagSet(Flags.F_RETURN_IMAGE_PARTS); 606 if ( ! tokenPosOnly || returnImageParts) { 607 token.setImage(getText(_currentReadPos, token.getLength())); 608 } 609 if (returnImageParts) { 610 switch (token.getType()) { 611 case Token.WHITESPACE: 612 token.setImageParts(splitIntoLines(token.getImage())); 613 break; 614 case Token.STRING: 615 token.setImageParts(splitString(prop, token.getImage())); 616 break; 617 case Token.LINE_COMMENT: 618 token.setImageParts(splitIntoLines(token.getImage().substring(prop.getImages()[0].length()))); 619 break; 620 case Token.BLOCK_COMMENT: 621 token.setImageParts(splitBlockComment(prop, token.getImage())); 622 break; 623 case Token.PATTERN: 624 break; 625 case Token.EOF: 626 token.setImageParts(new String [] {} ); 627 break; 628 default: 629 token.setImageParts(new String [] { token.getImage() } ); 630 } 631 } 632 } 633 634 _currentReadPos += token.getLength(); 637 638 } while ( ! returnIt); 639 640 return _scannedToken[0]; 642 } 643 644 654 public String nextImage() throws TokenizerException { 655 nextToken(); 656 return currentImage(); 657 } 658 659 666 public Token currentToken() throws TokenizerException { 667 if (_scannedToken[0] == null) { 668 throw new TokenizerException("No current token available (nextToken was not called / read position changed)"); 669 } 670 return _scannedToken[0]; 671 } 672 673 681 public String currentImage() throws TokenizerException { 682 Token token = currentToken(); 683 684 if (token.getType() == Token.EOF) { 685 return null; 686 } else if ( ! isFlagSet(Flags.F_TOKEN_POS_ONLY) || token.getImage() != null) { 687 return token.getImage(); 688 } else { 689 return getText(token.getStartPosition(), token.getLength()); 690 } 691 } 692 693 701 public int getLineNumber() { 702 return _lineNumber; 703 } 704 705 713 public int getColumnNumber() { 714 return _columnNumber; 715 } 716 717 726 public int getReadPosition() { 727 return _currentReadPos; 728 } 729 730 736 public int currentlyAvailable() { 737 return _currentWritePos - getRangeStart(); 738 } 739 740 748 public int readMore() throws TokenizerException { 749 readMoreDataFromBase(); 750 return currentlyAvailable(); 751 } 752 753 762 public char getChar(int pos) throws IndexOutOfBoundsException { 763 return getBaseDataProvider(pos, 1).getCharAt(0); 764 } 765 766 776 public String getText(int start, int len) throws IndexOutOfBoundsException { 777 return getBaseDataProvider(start, len).toString(); 778 } 779 780 794 public void setReadPositionAbsolute(int position) throws IndexOutOfBoundsException { 795 if (position < getRangeStart()) { 796 throw new ExtIndexOutOfBoundsException( 797 "Invalid read position {0} below the current text window start {1}.", 798 new Object [] { new Integer (position), new Integer (getRangeStart()) } 799 ); 800 } else if (position > _currentWritePos) { 801 throw new ExtIndexOutOfBoundsException( 802 "Invalid read position {0} at or above the current text window end {1}.", 803 new Object [] { new Integer (position), new Integer (currentlyAvailable() + getRangeStart()) } 804 ); 805 } 806 _currentReadPos = position; 807 Arrays.fill(_scannedToken, null); 808 809 if (isFlagSet(Flags.F_COUNT_LINES)) { 811 SortedMap map = _position2LineMap.headMap(new Integer (position + 1)); 812 813 if (map != null && ! map.isEmpty()) { 814 Integer lastLineStart = (Integer )map.lastKey(); 815 816 _lineNumber = ((Integer )map.get(lastLineStart)).intValue(); 817 _columnNumber = position - lastLineStart.intValue(); 818 } else { 819 _lineNumber = 0; 820 _columnNumber = position; 821 } 822 } 823 } 824 825 841 public void setReadPositionRelative(int offset) throws IndexOutOfBoundsException { 842 setReadPositionAbsolute(getReadPosition() + offset); 843 } 844 845 849 public void close() { 850 if (_properties != null) { 852 _properties.removeTokenizerPropertyListener(this); 853 _properties = null; 854 } 855 856 if (_position2LineMap != null) { 858 _position2LineMap.clear(); 859 _position2LineMap = null; 860 } 861 862 _eofReached = true; 864 _flags = 0; 865 _flagMask = 0; 866 _internalFlags = 0; 867 _currentReadPos = 0; 868 _currentWritePos = 0; 869 _lineNumber = -1; 870 _columnNumber = -1; 871 _nextTokenizer = null; 872 _prevTokenizer = null; 873 _whitespaceHandler = null; 874 _separatorHandler = null; 875 _keywordHandler = null; 876 _sequenceHandler = null; 877 _patternHandler = null; 878 _source = null; 879 Arrays.fill(_scannedToken, null); 880 } 881 882 883 887 906 public void addTokenizer(AbstractTokenizer tokenizer) throws TokenizerException { 907 AbstractTokenizer curr = this; 908 909 while (curr._nextTokenizer != null) { 910 curr = curr._nextTokenizer; 911 } 912 913 if (tokenizer != null) { 914 synchronized(tokenizer) { 915 curr._nextTokenizer = tokenizer; 916 tokenizer._prevTokenizer = curr; 917 918 AbstractTokenizer baseTokenizer = getBaseTokenizer(); 920 921 tokenizer._baseTokenizer = baseTokenizer; 922 923 tokenizer.changeParseFlags(baseTokenizer.getParseFlags(), Flags.F_COUNT_LINES); 925 } 926 } 927 } 928 929 968 public void switchTo(AbstractTokenizer tokenizer) throws TokenizerException { 969 if (tokenizer != null) { 970 synchronized(tokenizer) { 971 if (tokenizer._baseTokenizer != _baseTokenizer) { 972 throw new TokenizerException("Trying to switch to an alien tokenizer (not added with addTokenizer).", null); 973 } 974 tokenizer._eofReached = this._eofReached; 975 tokenizer._currentReadPos = this._currentReadPos; 976 tokenizer._currentWritePos = this._currentWritePos; 977 tokenizer._columnNumber = this._columnNumber; 978 tokenizer._lineNumber = this._lineNumber; 979 tokenizer._position2LineMap = this._position2LineMap; 980 } 981 } else { 982 throw new TokenizerException(new NullPointerException ()); 983 } 984 } 985 986 987 991 1000 protected boolean isWhitespace(char testChar) { 1001 if (_whitespaceHandler != null) { 1002 return _whitespaceHandler.isWhitespace(testChar); 1003 } else { 1004 return false; 1005 } 1006 } 1007 1008 1024 protected int readWhitespaces(int startingAtPos, int maxChars) throws TokenizerException { 1025 if (_whitespaceHandler != null) { 1026 DataProvider dataProvider = getBaseDataProvider(startingAtPos, maxChars); 1027 return _whitespaceHandler.countLeadingWhitespaces(dataProvider); 1028 } else { 1029 return 0; 1030 } 1031 } 1032 1033 1043 protected TokenizerProperty isKeyword(int startingAtPos, int length) throws TokenizerException { 1044 if (_keywordHandler != null) { 1045 DataProvider dataProvider = getBaseDataProvider(startingAtPos, length); 1046 return _keywordHandler.isKeyword(dataProvider); 1047 } else { 1048 return null; 1049 } 1050 } 1051 1052 1053 1057 1065 protected String [] splitIntoLines(String image) { 1066 LinkedList lines = new LinkedList (); 1067 int index = 0; 1068 int start = 0; 1069 1070 while (index < image.length()) { 1071 switch (image.charAt(index)) { 1072 case '\r': 1073 lines.add(image.substring(start, index)); 1074 if (index + 1 < image.length() && image.charAt(index + 1) == '\n') { 1075 index += 2; 1076 } else { 1077 index++; 1078 } 1079 start = index; 1080 break; 1081 case '\n': 1082 lines.add(image.substring(start, index)); 1083 start = ++index; 1084 break; 1085 default: 1086 index++; 1087 } 1088 } 1089 1090 if (start < index || start > 0) { 1091 lines.add(image.substring(start, index)); 1092 } 1093 1094 return (String [])lines.toArray(new String [lines.size()]); 1095 } 1096 1097 1106 protected String [] splitString(TokenizerProperty prop, String image) { 1107 String [] images = prop.getImages(); 1109 String begin = images[0]; 1110 String end = images[1]; 1111 String esc = images[2]; 1112 boolean noCase = isFlagSet(prop, Flags.F_NO_CASE); 1113 boolean escEqualsEnd = ( ! noCase && esc.compareTo(end) == 0) 1114 || ( noCase && esc.compareToIgnoreCase(end) == 0); 1115 1116 StringBuffer buffer = null; 1117 int index = begin.length(); 1118 int start = index; 1119 int endIndex; 1120 1121 if ( image.length() - start >= end.length() 1122 && ( ( ! noCase && end.equals(image.substring(image.length() - end.length()))) 1123 || ( noCase && end.equalsIgnoreCase(image.substring(image.length() - end.length()))))) { 1124 endIndex = image.length() - end.length(); 1125 } else { 1126 endIndex = image.length(); 1127 } 1128 1129 while (index < endIndex) { 1130 if ( ( ! noCase && image.startsWith(esc, index)) 1131 || ( noCase && image.substring(index, index + esc.length()).equalsIgnoreCase(esc))) { 1132 if (buffer == null) { 1133 buffer = new StringBuffer (image.length()); 1134 } 1135 buffer.append(image.substring(start, index)); 1136 index += esc.length(); 1137 if (index < image.length()) { 1138 if ( ( ! noCase && image.startsWith(esc, index)) 1139 || ( noCase && image.substring(index, index + esc.length()).equalsIgnoreCase(esc))) { 1140 buffer.append(esc); 1141 index += esc.length(); 1142 } else if ( ( ! noCase && image.startsWith(begin, index)) 1143 || ( noCase && image.substring(index, index + begin.length()).equalsIgnoreCase(begin))) { 1144 buffer.append(begin); 1145 index += begin.length(); 1146 } else if ( ( ! noCase && image.startsWith(end, index)) 1147 || ( noCase && image.substring(index, index + end.length()).equalsIgnoreCase(end))) { 1148 buffer.append(end); 1149 index += end.length(); 1150 } 1151 } 1152 start = index; 1153 } 1154 index++; 1155 } 1156 1157 if (buffer != null && start < index) { 1158 buffer.append(image.substring(start, endIndex)); 1159 } 1160 1161 return splitIntoLines((buffer != null) ? buffer.toString() : image.substring(start, endIndex)); 1162 } 1163 1164 1173 protected String [] splitBlockComment(TokenizerProperty prop, String image) { 1174 String [] images = prop.getImages(); 1176 String start = images[0]; 1177 String end = images[1]; 1178 boolean noCase = isFlagSet(prop, Flags.F_NO_CASE); 1179 1180 if ( image.length() - start.length() >= end.length() 1181 && ( ( ! noCase && end.equals(image.substring(image.length() - end.length()))) 1182 || ( noCase && end.equalsIgnoreCase(image.substring(image.length() - end.length()))))) { 1183 return splitIntoLines(image.substring(start.length(), image.length() - end.length())); 1184 } else { 1185 return splitIntoLines(image.substring(start.length())); 1186 } 1187 } 1188 1189 1202 public void propertyChanged(TokenizerPropertyEvent event) { 1203 TokenizerProperty prop = event.getProperty(); 1204 String [] images = prop.getImages(); 1205 1206 synchronized(this) { 1207 switch (event.getType()) { 1208 case TokenizerPropertyEvent.PROPERTY_ADDED: 1209 case TokenizerPropertyEvent.PROPERTY_REMOVED: 1210 switch (prop.getType()) { 1211 case Token.LINE_COMMENT: 1212 case Token.BLOCK_COMMENT: 1213 case Token.STRING: 1214 case Token.SPECIAL_SEQUENCE: 1215 if ( (_internalFlags & IFLAG_EXTERNAL_SEQUENCE_HANDLER) == 0 1216 && _properties instanceof de.susebox.jtopas.spi.SequenceHandler) { 1217 setSequenceHandler((de.susebox.jtopas.spi.SequenceHandler)_properties); 1218 } 1219 break; 1220 case Token.KEYWORD: 1221 if ( (_internalFlags & IFLAG_EXTERNAL_KEYWORD_HANDLER) == 0 1222 && _properties instanceof de.susebox.jtopas.spi.KeywordHandler) { 1223 setKeywordHandler((de.susebox.jtopas.spi.KeywordHandler)_properties); 1224 } 1225 break; 1226 case Token.PATTERN: 1227 if ( (_internalFlags & IFLAG_EXTERNAL_PATTERN_HANDLER) == 0 1228 && _properties instanceof de.susebox.jtopas.spi.PatternHandler) { 1229 setPatternHandler((de.susebox.jtopas.spi.PatternHandler)_properties); 1230 } 1231 break; 1232 } 1233 break; 1234 1235 case TokenizerPropertyEvent.PROPERTY_MODIFIED: 1236 switch (prop.getType()) { 1237 case TokenizerProperty.PARSE_FLAG_MASK: 1238 _flags = getTokenizerProperties().getParseFlags(); 1239 _flagMask = 0; 1240 if (isFlagSet(Flags.F_COUNT_LINES)) { 1241 if (_lineNumber < 0) { 1242 if (_position2LineMap != null) { 1243 _position2LineMap.clear(); 1244 } 1245 _lineNumber = 0; 1246 putPosition(_currentReadPos, _lineNumber); 1247 } 1248 if (_columnNumber < 0) { 1249 _columnNumber = 0; 1250 } 1251 } else { 1252 _lineNumber = -1; 1253 _columnNumber = -1; 1254 } 1255 break; 1256 } 1257 break; 1258 } 1259 } 1260 } 1261 1262 1263 1267 1273 protected AbstractTokenizer getBaseTokenizer() { 1274 return _baseTokenizer; 1275 } 1276 1277 1285 protected DataProvider getBaseDataProvider(int startPos, int length) { 1286 return getBaseTokenizer().getDataProvider(startPos, length); 1287 } 1288 1289 1299 protected int readMoreDataFromBase() throws TokenizerException { 1300 int readChars = -1; 1302 1303 if ( ! _eofReached) { 1304 AbstractTokenizer baseTokenizer = getBaseTokenizer(); 1305 1306 if (baseTokenizer != this) { 1307 readChars = baseTokenizer.readMoreData(); 1308 } else { 1309 readChars = readMoreData(); 1310 } 1311 if (readChars > 0) { 1312 _currentWritePos += readChars; 1313 } else if (readChars < 0) { 1314 readChars = -1; 1315 _eofReached = true; 1316 } 1317 1318 synchronizeAll(); 1320 } 1321 return readChars; 1322 } 1323 1324 1331 protected void synchronizeAll() throws TokenizerException { 1332 AbstractTokenizer embedded = getBaseTokenizer(); 1333 1334 while ((embedded = embedded._nextTokenizer) != null) { 1335 switchTo(embedded); } 1337 } 1338 1339 1346 protected boolean isEOF(int offset) throws TokenizerException { 1347 if (_currentReadPos + offset < _currentWritePos || readMoreDataFromBase() > 0) { 1348 return false; 1349 } else { 1350 _scannedToken[1] = new Token(Token.EOF); 1351 return true; 1352 } 1353 } 1354 1355 1364 protected TokenizerProperty completeBoundedToken(Token token) throws TokenizerException { 1365 int len = 1; 1368 while ( ! ( isEOF(len) 1369 || isWhitespace(len) 1370 || isPattern(len, true) 1371 || isSpecialSequence(len) 1372 || isSeparator(len))) { 1373 len++; 1374 } 1375 token.setLength(len); 1376 1377 TokenizerProperty prop = null; 1379 PatternHandler.Result result; 1380 1381 if ((prop = isKeyword(_currentReadPos, len)) != null) { 1382 token.setType(Token.KEYWORD); 1383 token.setCompanion(prop.getCompanion()); 1384 } else { 1385 token.setType(Token.NORMAL); 1386 } 1387 return prop; 1388 } 1389 1390 1397 protected int completeWhitespace() throws TokenizerException { 1398 int start = _currentReadPos + 1; int available = _currentWritePos - start; 1400 int len = readWhitespaces(start, available); 1401 1402 while (len == available) { 1403 if (readMoreDataFromBase() <= 0) { 1404 break; 1405 } 1406 start += len; 1407 available = _currentWritePos - start; 1408 len += readWhitespaces(start, available); 1409 } 1410 return len + 1; } 1412 1413 1421 protected boolean isWhitespace(int offset) throws TokenizerException { 1422 if (_whitespaceHandler != null) { 1423 if (_currentReadPos + offset >= _currentWritePos && readMoreDataFromBase() < 0) { 1424 return false; 1425 } 1426 1427 if (isWhitespace(getChar(_currentReadPos + offset))) { 1428 _scannedToken[1] = new Token(Token.WHITESPACE); 1429 return true; 1430 } 1431 } 1432 return false; 1433 } 1434 1435 1443 protected boolean isSeparator(int offset) throws TokenizerException { 1444 if ( _separatorHandler != null 1445 && (_currentReadPos + offset < _currentWritePos || readMoreDataFromBase() > 0) 1446 && _separatorHandler.isSeparator(getChar(_currentReadPos + offset))) { 1447 _scannedToken[1] = new Token(Token.SEPARATOR); 1448 return true; 1449 } else { 1450 return false; 1451 } 1452 } 1453 1454 1463 protected boolean isPattern(int offset, boolean freePatternOnly) throws TokenizerException { 1464 if (_patternHandler != null) { 1465 int startingAtPos = _currentReadPos + offset; 1467 1468 while (_currentWritePos - startingAtPos < PATTERN_MAX_SIZE) { 1469 if (readMoreDataFromBase() <= 0) { 1470 break; 1471 } 1472 } 1473 1474 DataProvider dataProvider = getBaseDataProvider(startingAtPos, _currentWritePos - startingAtPos); 1476 PatternHandler.Result result = _patternHandler.matches(dataProvider); 1477 boolean isFree = (result != null) ? isFlagSet(result.getProperty(), Flags.F_FREE_PATTERN) : false; 1478 1479 if (result != null && (isFree || ! freePatternOnly)) { 1480 if ( ! isFree) { 1481 int nextOffset = offset + result.getLengthOfMatch(); 1482 1483 if ( isEOF(nextOffset) 1484 || isWhitespace(nextOffset) 1485 || isPattern(nextOffset, true) 1486 || isSpecialSequence(nextOffset) 1487 || isSeparator(nextOffset)) { 1488 _scannedToken[2] = _scannedToken[1]; 1489 } else { 1490 return false; 1491 } 1492 } 1493 _scannedToken[1] = new Token(Token.PATTERN, null, result.getProperty()); 1494 _scannedToken[1].setLength(result.getLengthOfMatch()); 1495 if (isFlagSet(result.getProperty(), Flags.F_RETURN_IMAGE_PARTS)) { 1496 _scannedToken[1].setImageParts(result.getGroups()); 1497 } 1498 return true; 1499 } 1500 } 1501 1502 return false; 1504 } 1505 1506 1515 protected boolean isSpecialSequence(int offset) throws TokenizerException { 1516 if (_sequenceHandler != null) { 1517 int startingAtPos = _currentReadPos + offset; 1520 1521 while (_sequenceHandler.getSequenceMaxLength() > _currentWritePos - startingAtPos) { 1522 if (readMoreDataFromBase() <= 0) { 1523 break; 1524 } 1525 } 1526 1527 DataProvider dataProvider = getBaseDataProvider(startingAtPos, _currentWritePos - startingAtPos); 1529 TokenizerProperty prop = _sequenceHandler.startsWithSequenceCommentOrString(dataProvider); 1530 1531 if (prop != null) { 1532 _scannedToken[1] = new Token(prop.getType(), null, prop); 1533 return true; 1534 } 1535 } 1536 1537 return false; 1539 } 1540 1541 1551 protected int completeLineComment(TokenizerProperty prop) throws TokenizerException { 1552 String [] images = prop.getImages(); 1553 int len = images[0].length(); 1554 1555 while (_currentReadPos + len < _currentWritePos || readMoreDataFromBase() > 0) { 1556 switch (getChar(_currentReadPos + len)) { 1557 case '\r': 1558 len++; 1559 if (_currentReadPos + len < _currentWritePos || readMoreDataFromBase() > 0) { 1560 if (getChar(_currentReadPos + len) == '\n') { 1561 len++; 1562 } 1563 } 1564 return len; 1565 case '\n': 1566 len++; 1567 return len; 1568 default: 1569 len++; 1570 } 1571 } 1572 return len; 1573 } 1574 1575 1585 protected int completeBlockComment(TokenizerProperty prop) throws TokenizerException { 1586 String [] images = prop.getImages(); 1587 String start = images[0]; 1588 String end = images[1]; 1589 boolean noCase = isFlagSet(prop, Flags.F_NO_CASE); 1590 boolean nested = isFlagSet(prop, Flags.F_ALLOW_NESTED_COMMENTS); 1591 int len = start.length(); 1592 int level = 0; 1593 1594 __LOOP__: 1595 do { 1596 if (nested) { 1599 switch (comparePrefix(len, start, noCase)) { 1600 case 0: level++; 1602 len += start.length(); 1603 continue __LOOP__; 1604 case -1: return _currentWritePos - _currentReadPos; 1606 } 1607 } 1608 1609 switch (comparePrefix(len, end, noCase)) { 1611 case 0: level--; 1613 len += end.length(); 1614 break; 1615 case -1: return _currentWritePos - _currentReadPos; 1617 default: 1618 len++; 1619 } 1620 } while (level >= 0); 1621 1622 return len; 1624 } 1625 1626 1636 protected int completeString(TokenizerProperty prop) throws TokenizerException { 1637 String [] images = prop.getImages(); 1639 String start = images[0]; 1640 String end = images[1]; 1641 String esc = images[2]; 1642 int len = start.length(); 1643 boolean noCase = isFlagSet(prop, Flags.F_NO_CASE); 1644 boolean escEqualsEnd = ( ! noCase && esc.compareTo(end) == 0) 1645 || ( noCase && esc.compareToIgnoreCase(end) == 0); 1646 1647 while (true) { 1648 if (esc != null) { 1650 switch (comparePrefix(len, esc, noCase)) { 1651 case 0: len += esc.length(); 1653 if (escEqualsEnd) { 1654 switch (comparePrefix(len, end, noCase)) { 1655 case 0: 1656 len += end.length(); 1657 break; 1658 case -1: return _currentWritePos - _currentReadPos; 1660 default: return len; 1662 } 1663 } else { 1664 len++; } 1666 continue; 1667 case -1: return _currentWritePos - _currentReadPos; 1669 } 1670 } 1671 1672 switch (comparePrefix(len, end, noCase)) { 1674 case 0: len += end.length(); 1676 return len; 1677 case -1: return _currentWritePos - _currentReadPos; 1679 default: 1680 len++; 1681 } 1682 } 1683 } 1684 1685 1696 protected int comparePrefix(int offset, String prefix, boolean noCase) 1697 throws TokenizerException 1698 { 1699 int len = prefix.length(); 1701 1702 for (int pos = offset; pos < offset + len; ++pos) { 1703 if (_currentReadPos + pos >= _currentWritePos && readMoreDataFromBase() < 0) { 1705 return -1; 1706 } 1707 1708 char c1 = prefix.charAt(pos - offset); 1710 char c2 = getChar(_currentReadPos + pos); 1711 1712 if ( c1 != c2 1713 && (! noCase || Character.toUpperCase(c1) != Character.toUpperCase(c2))) { 1714 return 1; 1715 } 1716 } 1717 1718 return 0; 1720 } 1721 1722 1734 protected void adjustLineAndColumn(int type, int length) { 1735 if ( ! isFlagSet(Flags.F_COUNT_LINES)) { 1737 return; 1738 } 1739 1740 switch (type) { 1742 case Token.EOF: 1743 return; 1744 1745 case Token.LINE_COMMENT: _lineNumber++; 1747 _columnNumber = 0; 1748 putPosition(_currentReadPos + length, _lineNumber); 1749 return; 1750 1751 case Token.SPECIAL_SEQUENCE: 1752 case Token.SEPARATOR: 1753 case Token.NORMAL: 1754 case Token.KEYWORD: 1755 if (_whitespaceHandler != null && _whitespaceHandler.newlineIsWhitespace()) { _columnNumber += length; return; } 1759 break; 1760 1761 case Token.WHITESPACE: 1762 if ( ! (_whitespaceHandler.isWhitespace('\n') || _whitespaceHandler.isWhitespace('\r'))) { 1763 _columnNumber += length; return; } 1766 break; 1767 } 1768 1769 int newLineNumber = _lineNumber; 1771 1772 for (int pos = _currentReadPos; pos < _currentReadPos + length; ++pos) { 1773 switch (getChar(pos)) { 1774 case '\r': 1775 if (pos + 1 >= _currentReadPos + length || getChar(pos + 1) != '\n') { 1776 _lineNumber++; 1777 _columnNumber = 0; 1778 putPosition(pos + 1, _lineNumber); 1779 break; 1780 } 1781 pos++; 1782 1783 case '\n': 1784 _lineNumber++; 1785 _columnNumber = 0; 1786 putPosition(pos + 1, _lineNumber); 1787 break; 1788 1789 default: 1790 _columnNumber++; 1791 } 1792 } 1793 } 1794 1795 1800 private void putPosition(int position, int lineNumber) { 1801 if (_position2LineMap == null) { 1802 _position2LineMap = new TreeMap (); 1803 } 1804 _position2LineMap.put(new Integer (position), new Integer (lineNumber)); 1805 } 1806 1807 1814 protected boolean isFlagSet(int flag) { 1815 return (getParseFlags() & flag) != 0; 1816 } 1817 1818 1827 protected boolean isFlagSet(TokenizerProperty prop, int flag) { 1828 return prop.isFlagSet(flag, (getTokenizerProperties().getParseFlags() & flag) != 0 || isFlagSet(flag)); 1829 } 1830 1831 1832 1836 1839 protected static final int VALID_FLAGS_MASK = 1840 Flags.F_RETURN_WHITESPACES 1841 | Flags.F_TOKEN_POS_ONLY 1842 | Flags.F_KEEP_DATA 1843 | Flags.F_COUNT_LINES; 1844 1845 1849 protected StandardTokenizerProperties _defaultProperties = null; 1850 1851 1854 private static final int PATTERN_MAX_SIZE = 0x40000; 1856 1859 private static final byte IFLAG_EXTERNAL_PATTERN_HANDLER = 0x01; 1860 private static final byte IFLAG_EXTERNAL_KEYWORD_HANDLER = 0x02; 1861 private static final byte IFLAG_EXTERNAL_SEQUENCE_HANDLER = 0x04; 1862 1863 1864 1868 1871 protected int _flags = 0; 1872 1873 1880 private int _flagMask = 0; 1881 1882 1885 private boolean _eofReached = true; 1886 1887 1890 protected int _currentReadPos = 0; 1891 1892 1895 protected int _currentWritePos = 0; 1896 1897 1901 protected int _lineNumber = -1; 1902 1903 1907 protected int _columnNumber = -1; 1908 1909 1914 protected Token[] _scannedToken = new Token[] { null, null, null }; 1915 1916 1919 protected AbstractTokenizer _nextTokenizer = null; 1920 1921 1924 protected AbstractTokenizer _baseTokenizer = null; 1925 1926 1929 protected AbstractTokenizer _prevTokenizer = null; 1930 1931 1934 private de.susebox.jtopas.spi.WhitespaceHandler _whitespaceHandler = null; 1935 1936 1939 private de.susebox.jtopas.spi.SeparatorHandler _separatorHandler = null; 1940 1941 1944 private de.susebox.jtopas.spi.KeywordHandler _keywordHandler = null; 1945 1946 1949 private de.susebox.jtopas.spi.SequenceHandler _sequenceHandler = null; 1950 1951 1954 private de.susebox.jtopas.spi.PatternHandler _patternHandler = null; 1955 1956 1959 private TokenizerSource _source = null; 1960 1961 1964 private TokenizerProperties _properties = null; 1965 1966 1969 private TreeMap _position2LineMap = null; 1970 1971 1974 private long _internalFlags = 0; 1975} 1976 | Popular Tags |