1 31 32 package org.antlr.works.ate.syntax.generic; 33 34 import org.antlr.works.ate.syntax.misc.ATELine; 35 import org.antlr.works.ate.syntax.misc.ATEToken; 36 37 import java.util.ArrayList ; 38 import java.util.List ; 39 40 public class ATESyntaxLexer { 41 42 public static final int TOKEN_SINGLE_QUOTE_STRING = 1; 43 public static final int TOKEN_DOUBLE_QUOTE_STRING = 2; 44 public static final int TOKEN_SINGLE_COMMENT = 3; 45 public static final int TOKEN_COMPLEX_COMMENT = 4; 46 public static final int TOKEN_ID = 5; 47 public static final int TOKEN_CHAR = 6; 48 public static final int TOKEN_LPAREN = 7; 49 public static final int TOKEN_RPAREN = 8; 50 public static final int TOKEN_LCURLY = 9; 51 public static final int TOKEN_RCURLY = 10; 52 public static final int TOKEN_LBRACK = 11; 53 public static final int TOKEN_RBRACK = 12; 54 public static final int TOKEN_COLON = 13; 55 public static final int TOKEN_SEMI = 14; 56 public static final int TOKEN_OTHER = 15; 57 58 protected List <ATEToken> tokens; 59 protected String text; 60 protected int position; 61 62 protected int lineNumber; 63 protected int lineIndex; protected List <ATELine> lines; 65 66 67 protected boolean controlCharacter; 68 69 72 protected char c0; 73 protected char c1; 74 75 public ATESyntaxLexer() { 76 lines = new ArrayList <ATELine>(); 77 tokens = new ArrayList <ATEToken>(); 78 } 79 80 public List <ATEToken> getTokens() { 81 return tokens; 82 } 83 84 public List <ATELine> getLines() { 85 return lines; 86 } 87 88 public int getLineNumber() { 89 return lineNumber; 90 } 91 92 public void tokenize(String text) { 93 this.text = text; 94 95 position = -1; 96 lineNumber = 0; 97 lines.clear(); 98 lines.add(new ATELine(0)); 99 100 tokens.clear(); 101 tokenize(); 102 } 103 104 protected void tokenize() { 105 while(nextCharacter()) { 106 ATEToken token = customMatch(); 107 108 if(token != null) { 109 } else if(c0 == '\'') 111 token = matchSingleQuoteString(); 112 else if(c0 == '\"') 113 token = matchDoubleQuoteString(); 114 else if(c0 == '/' && c1 == '/') 115 token = matchSingleComment(); 116 else if(c0 == '/' && c1 == '*') 117 token = matchComplexComment(); 118 else if(isLetter()) 119 token = matchID(); 120 else if(c0 == '(') 121 token = createNewToken(TOKEN_LPAREN); 122 else if(c0 == ')') 123 token = createNewToken(TOKEN_RPAREN); 124 else if(c0 == '{') 125 token = createNewToken(TOKEN_LCURLY); 126 else if(c0 == '}') 127 token = createNewToken(TOKEN_RCURLY); 128 else if(c0 == '[') 129 token = createNewToken(TOKEN_LBRACK); 130 else if(c0 == ']') 131 token = createNewToken(TOKEN_RBRACK); 132 else if(c0 == ':') 133 token = createNewToken(TOKEN_COLON); 134 else if(c0 == ';') 135 token = createNewToken(TOKEN_SEMI); 136 else if(!isWhitespace()) 137 token = createNewToken(TOKEN_CHAR); 138 139 addToken(token); 140 } 141 } 142 143 protected ATEToken customMatch() { 144 return null; 145 } 146 147 public void addToken(ATEToken token) { 148 if(token != null) { 149 token.index = tokens.size(); 150 tokens.add(token); 151 } 152 } 153 154 protected ATEToken matchID() { 155 int sp = position; 156 while(isID(c1) && nextCharacter()) { 157 } 158 return createNewToken(TOKEN_ID, sp); 159 } 160 161 public ATEToken matchSingleQuoteString() { 162 int sp = position; 163 while(nextCharacter()) { 164 if((c0 == '\'' || matchNewLine()) && !controlCharacter) { 165 return createNewToken(TOKEN_SINGLE_QUOTE_STRING, sp); 166 } 167 } 168 return null; 169 } 170 171 public ATEToken matchDoubleQuoteString() { 172 int sp = position; 173 while(nextCharacter()) { 174 if((c0 == '\"' || matchNewLine()) && !controlCharacter) { 175 return createNewToken(TOKEN_DOUBLE_QUOTE_STRING, sp); 176 } 177 } 178 return null; 179 } 180 181 public ATEToken matchSingleComment() { 182 int sp = position; 183 while(nextCharacter()) { 184 if(matchNewLine()) { 185 return createNewToken(TOKEN_SINGLE_COMMENT, sp); 186 } 187 } 188 return createNewToken(TOKEN_SINGLE_COMMENT, sp, position); 189 } 190 191 public ATEToken matchComplexComment() { 192 int sp = position; 193 while(nextCharacter()) { 194 if(c0 == '*' && c1 == '/') { 195 nextCharacter(); 197 return createNewToken(TOKEN_COMPLEX_COMMENT, sp, Math.min(position+1, text.length())); 198 } 199 } 200 return createNewToken(TOKEN_COMPLEX_COMMENT, sp, position); 202 } 203 204 public boolean nextCharacter() { 205 boolean valid = false; 206 final int length = text.length(); 207 controlCharacter = false; 208 209 c0 = c1 = 0; 210 position++; 211 if(position < length) { 212 if(text.charAt(position) == '\\') { 214 controlCharacter = true; 215 position += 1; 216 } 217 218 valid = position < length; 219 if(valid) { 220 c0 = text.charAt(position); 221 if(position + 1 < length) 222 c1 = text.charAt(position+1); 223 } 224 225 if(matchNewLine()) { 226 lineNumber++; 227 lineIndex = position+1; 228 lines.add(new ATELine(lineIndex)); 229 } 230 } 231 return valid; 232 } 233 234 public boolean matchNewLine() { 235 if(c0 == '\n') { 236 return true; 238 } else if(c0 == '\r' && c1 == '\n') { 239 return true; 241 } else if(c0 == '\r') { 242 return true; 244 } else { 245 return false; 246 } 247 } 248 249 public boolean isWhitespace() { 250 return Character.isWhitespace(c0); 251 } 252 253 public boolean isLetter() { 254 return Character.isLetter(c0); 255 } 256 257 public boolean isLetterOrDigit() { 258 return isLetterOrDigit(c0); 259 } 260 261 public boolean isLetterOrDigit(char c) { 262 return Character.isLetterOrDigit(c); 263 } 264 265 public boolean isID(char c) { 266 if(Character.isLetterOrDigit(c)) 267 return true; 268 269 return c == '_' || c == '$'; 270 } 271 272 public ATEToken createNewToken(int type) { 273 return createNewToken(type, position); 274 } 275 276 public ATEToken createNewToken(int type, int start) { 277 return createNewToken(type, start, position+1); 278 } 279 280 public ATEToken createNewToken(int type, int start, int end) { 281 return createNewToken(type, start, end, lineNumber, lineNumber, lineIndex, lineIndex); 282 } 283 284 public ATEToken createNewToken(int type, int start, int end, 285 int startLineNumber, int endLineNumber, 286 int startLineIndex, int endLineIndex) { 287 return new ATEToken(type, start, end, startLineNumber, endLineNumber, startLineIndex, endLineIndex, text); 288 } 289 290 } 291 | Popular Tags |