1 21 22 package org.armedbear.j; 23 24 import java.util.ArrayList ; 25 import java.util.List ; 26 27 public final class PythonFormatter extends Formatter 28 { 29 private static final int PYTHON_STATE_NEUTRAL = 0; 30 private static final int PYTHON_STATE_SINGLE_QUOTE = 1; 31 private static final int PYTHON_STATE_DOUBLE_QUOTE = 2; 32 private static final int PYTHON_STATE_IDENTIFIER = 3; 33 private static final int PYTHON_STATE_COMMENT = 4; 34 private static final int PYTHON_STATE_BRACE = 5; 35 private static final int PYTHON_STATE_NUMBER = 6; 36 private static final int PYTHON_STATE_HEXNUMBER = 7; 37 private static final int PYTHON_STATE_OPERATOR = 8; 38 private static final int PYTHON_STATE_TRIPLE_SINGLE = 9; 39 private static final int PYTHON_STATE_TRIPLE_DOUBLE = 10; 40 41 private static final int PYTHON_FORMAT_TEXT = 0; 42 private static final int PYTHON_FORMAT_COMMENT = 1; 43 private static final int PYTHON_FORMAT_STRING = 2; 44 private static final int PYTHON_FORMAT_IDENTIFIER = 3; 45 private static final int PYTHON_FORMAT_KEYWORD = 4; 46 private static final int PYTHON_FORMAT_FUNCTION = 5; 47 private static final int PYTHON_FORMAT_OPERATOR = 6; 48 private static final int PYTHON_FORMAT_BRACE = 7; 49 private static final int PYTHON_FORMAT_NUMBER = 8; 50 51 private static final PythonMode mode = PythonMode.getMode(); 52 53 public PythonFormatter(Buffer buffer) 54 { 55 this.buffer = buffer; 56 } 57 58 private int begin = 0; 59 60 private void endSegment(String text, int offset, int state) 61 { 62 if (offset - begin > 0) { 63 int format; 64 switch (state) { 65 case PYTHON_STATE_NEUTRAL: 66 format = PYTHON_FORMAT_TEXT; 67 break; 68 case PYTHON_STATE_SINGLE_QUOTE: 69 case PYTHON_STATE_DOUBLE_QUOTE: 70 case PYTHON_STATE_TRIPLE_SINGLE: 71 case PYTHON_STATE_TRIPLE_DOUBLE: 72 format = PYTHON_FORMAT_STRING; 73 break; 74 case PYTHON_STATE_IDENTIFIER: 75 format = PYTHON_FORMAT_IDENTIFIER; 76 break; 77 case PYTHON_STATE_COMMENT: 78 format = PYTHON_FORMAT_COMMENT; 79 break; 80 case PYTHON_STATE_OPERATOR: 81 format = PYTHON_FORMAT_OPERATOR; 82 break; 83 case PYTHON_STATE_BRACE: 84 format = PYTHON_FORMAT_BRACE; 85 break; 86 case PYTHON_STATE_NUMBER: 87 case PYTHON_STATE_HEXNUMBER: 88 format = PYTHON_FORMAT_NUMBER; 89 break; 90 default: 91 format = PYTHON_FORMAT_TEXT; 92 break; 93 } 94 addSegment(text, begin, offset, format); 95 begin = offset; 96 } 97 } 98 99 private void parseLine(Line line) 100 { 101 String text; 102 if (Editor.tabsAreVisible()) 103 text = Utilities.makeTabsVisible(line.getText(), buffer.getTabWidth()); 104 else 105 text = Utilities.detab(line.getText(), buffer.getTabWidth()); 106 begin = 0; 107 int state = line.flags(); 108 int i = 0; 109 final int limit = text.length(); 110 111 while (i < limit) { 113 if (Character.isWhitespace(text.charAt(i))) { 114 ++i; 115 } else { 116 endSegment(text, i, state); 117 break; 118 } 119 } 120 121 while (i < limit) { 122 char c = text.charAt(i); 123 if (c == '\\' && i < limit-1) { 124 i += 2; 126 continue; 127 } 128 129 if (state == PYTHON_STATE_SINGLE_QUOTE) { 130 if (c == '\'') { 131 endSegment(text, i+1, state); 132 state = PYTHON_STATE_NEUTRAL; 133 } 134 ++i; 135 continue; 136 } 137 138 if (state == PYTHON_STATE_DOUBLE_QUOTE) { 139 if (c == '"') { 140 endSegment(text, i+1, state); 141 state = PYTHON_STATE_NEUTRAL; 142 } 143 ++i; 144 continue; 145 } 146 147 if (state == PYTHON_STATE_TRIPLE_SINGLE) { 148 if (c == '\'' && text.regionMatches(i, "'''", 0, 3)) { 149 i += 3; 150 endSegment(text, i, state); 151 state = PYTHON_STATE_NEUTRAL; 152 } else 153 ++i; 154 continue; 155 } 156 157 if (state == PYTHON_STATE_TRIPLE_DOUBLE) { 158 if (c == '"' && text.regionMatches(i, "\"\"\"", 0, 3)) { 159 i += 3; 160 endSegment(text, i, state); 161 state = PYTHON_STATE_NEUTRAL; 162 } else 163 ++i; 164 continue; 165 } 166 167 if (c == '\'') { 169 endSegment(text, i, state); 170 if (text.regionMatches(i, "'''", 0, 3)) { 171 state = PYTHON_STATE_TRIPLE_SINGLE; 172 i += 3; 173 } else { 174 state = PYTHON_STATE_SINGLE_QUOTE; 175 ++i; 176 } 177 continue; 178 } 179 180 if (c == '"') { 181 endSegment(text, i, state); 182 if (text.regionMatches(i, "\"\"\"", 0, 3)) { 183 state = PYTHON_STATE_TRIPLE_DOUBLE; 184 i += 3; 185 } else { 186 state = PYTHON_STATE_DOUBLE_QUOTE; 187 ++i; 188 } 189 continue; 190 } 191 192 if (c == '#') { 193 endSegment(text, i, state); 194 endSegment(text, limit, PYTHON_STATE_COMMENT); 195 return; 196 } 197 198 if (isOperatorChar(c)) { 199 if (state != PYTHON_STATE_OPERATOR) { 200 endSegment(text, i, state); 201 state = PYTHON_STATE_OPERATOR; 202 } 203 ++i; 204 continue; 205 } 206 207 if (c == '{' || c == '}') { 208 if (state != PYTHON_STATE_BRACE) { 209 endSegment(text, i, state); 210 LineSegment segment = getLastSegment(); 212 if (segment != null && isKeyword(segment.getText())) 213 segment.setFormat(PYTHON_FORMAT_KEYWORD); 214 state = PYTHON_STATE_BRACE; 215 } 216 ++i; 217 continue; 218 } 219 220 if (state == PYTHON_STATE_OPERATOR || state == PYTHON_STATE_BRACE) { 221 if (mode.isIdentifierStart(c)) { 222 endSegment(text, i, state); 223 state = PYTHON_STATE_IDENTIFIER; 224 } else if (Character.isDigit(c)) { 225 endSegment(text, i, state); 226 state = PYTHON_STATE_NUMBER; 227 } else { 228 endSegment(text, i, state); 229 state = PYTHON_STATE_NEUTRAL; 230 } 231 ++i; 232 continue; 233 } 234 235 if (state == PYTHON_STATE_IDENTIFIER) { 236 if (!mode.isIdentifierPart(c)) { 237 endSegment(text, i, state); 238 LineSegment segment = getLastSegment(); 240 if (segment != null) { 241 String segmentText = segment.getText(); 242 if (isKeyword(segment.getText())) { 243 segment.setFormat(PYTHON_FORMAT_KEYWORD); 244 } else if (c == '(') { 245 segment.setFormat(PYTHON_FORMAT_FUNCTION); 246 } else if (Character.isWhitespace(c)) { 247 int j = i+1; 249 while (j < limit && Character.isWhitespace(c = text.charAt(j))) 250 ++j; 251 if (c == '(') 252 segment.setFormat(PYTHON_FORMAT_FUNCTION); 253 } 254 } 255 state = PYTHON_STATE_NEUTRAL; 256 } 257 ++i; 258 continue; 259 } 260 261 if (state == PYTHON_STATE_NUMBER) { 262 if (Character.isDigit(c)) 263 ; 264 else if (c == 'l' || c == 'L') 265 ; 266 else if (i - begin == 1 && c == 'x' || c == 'X') 267 state = PYTHON_STATE_HEXNUMBER; 268 else { 269 endSegment(text, i, state); 270 if (mode.isIdentifierStart(c)) 271 state = PYTHON_STATE_IDENTIFIER; 272 else 273 state = PYTHON_STATE_NEUTRAL; 274 } 275 ++i; 276 continue; 277 } 278 279 if (state == PYTHON_STATE_HEXNUMBER) { 280 if (Character.isDigit(c)) 281 ; 282 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) 283 ; 284 else if (c == 'l' || c == 'L') 285 ; 286 else { 287 endSegment(text, i, state); 288 if (mode.isIdentifierStart(c)) 289 state = PYTHON_STATE_IDENTIFIER; 290 else 291 state = PYTHON_STATE_NEUTRAL; 292 } 293 ++i; 294 continue; 295 } 296 297 if (state == PYTHON_STATE_NEUTRAL) { 298 if (mode.isIdentifierStart(c)) { 299 endSegment(text, i, state); 300 state = PYTHON_STATE_IDENTIFIER; 301 } else if (Character.isDigit(c)) { 302 endSegment(text, i, state); 303 state = PYTHON_STATE_NUMBER; 304 } 305 } 306 ++i; 307 } 308 309 endSegment(text, i, state); 311 312 if (state == PYTHON_STATE_IDENTIFIER) { 313 LineSegment segment = getLastSegment(); 315 if (segment != null && isKeyword(segment.getText())) 316 segment.setFormat(PYTHON_FORMAT_KEYWORD); 317 } 318 } 319 320 public LineSegmentList formatLine(Line line) 321 { 322 clearSegmentList(); 323 parseLine(line); 324 return segmentList; 325 } 326 327 public boolean parseBuffer() 328 { 329 int state = PYTHON_STATE_NEUTRAL; 330 Line line = buffer.getFirstLine(); 331 boolean changed = false; 332 while (line != null) { 333 if (state != line.flags()) { 334 line.setFlags(state); 335 changed = true; 336 } 337 final String text = line.getText(); 338 final int limit = line.length(); 339 int i = 0; 340 while (i < limit) { 341 char c = text.charAt(i); 342 if (c == '\\') { 343 i += 2; 345 continue; 346 } 347 if (state == PYTHON_STATE_SINGLE_QUOTE) { 348 if (c == '\'') 349 state = PYTHON_STATE_NEUTRAL; 350 ++i; 351 continue; 352 } 353 if (state == PYTHON_STATE_DOUBLE_QUOTE) { 354 if (c == '"') 355 state = PYTHON_STATE_NEUTRAL; 356 ++i; 357 continue; 358 } 359 if (state == PYTHON_STATE_TRIPLE_SINGLE) { 360 if (c == '\'' && text.regionMatches(i, "'''", 0, 3)) { 361 state = PYTHON_STATE_NEUTRAL; 362 i += 3; 363 } else 364 ++i; 365 continue; 366 } 367 if (state == PYTHON_STATE_TRIPLE_DOUBLE) { 368 if (c == '"' && text.regionMatches(i, "\"\"\"", 0, 3)) { 369 state = PYTHON_STATE_NEUTRAL; 370 i += 3; 371 } else 372 ++i; 373 continue; 374 } 375 if (c == '\'') { 377 if (text.regionMatches(i, "'''", 0, 3)) { 378 state = PYTHON_STATE_TRIPLE_SINGLE; 379 i += 3; 380 } else { 381 state = PYTHON_STATE_SINGLE_QUOTE; 382 ++i; 383 } 384 continue; 385 } 386 if (c == '"') { 387 if (text.regionMatches(i, "\"\"\"", 0, 3)) { 388 state = PYTHON_STATE_TRIPLE_DOUBLE; 389 i += 3; 390 } else { 391 state = PYTHON_STATE_DOUBLE_QUOTE; 392 ++i; 393 } 394 continue; 395 } 396 if (c == '#') 397 break; 398 ++i; 399 } 400 line = line.next(); 401 } 402 buffer.setNeedsParsing(false); 403 return changed; 404 } 405 406 private static final boolean isOperatorChar(char c) 407 { 408 return "!&|<>=+/*-".indexOf(c) >= 0; 409 } 410 411 public FormatTable getFormatTable() 412 { 413 if (formatTable == null) { 414 formatTable = new FormatTable(null); 415 formatTable.addEntryFromPrefs(PYTHON_FORMAT_TEXT, "text"); 416 formatTable.addEntryFromPrefs(PYTHON_FORMAT_COMMENT, "comment"); 417 formatTable.addEntryFromPrefs(PYTHON_FORMAT_STRING, "string"); 418 formatTable.addEntryFromPrefs(PYTHON_FORMAT_IDENTIFIER, "identifier", "text"); 419 formatTable.addEntryFromPrefs(PYTHON_FORMAT_KEYWORD, "keyword"); 420 formatTable.addEntryFromPrefs(PYTHON_FORMAT_FUNCTION, "function"); 421 formatTable.addEntryFromPrefs(PYTHON_FORMAT_OPERATOR, "operator"); 422 formatTable.addEntryFromPrefs(PYTHON_FORMAT_BRACE, "brace"); 423 formatTable.addEntryFromPrefs(PYTHON_FORMAT_NUMBER, "number"); 424 } 425 return formatTable; 426 } 427 } 428 | Popular Tags |