KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > armedbear > j > PythonFormatter


1 /*
2  * PythonFormatter.java
3  *
4  * Copyright (C) 2002 Peter Graves
5  * $Id: PythonFormatter.java,v 1.1.1.1 2002/09/24 16:08:58 piso Exp $
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20  */

21
22 package org.armedbear.j;
23
24 import java.util.ArrayList JavaDoc;
25 import java.util.List JavaDoc;
26
27 public final class PythonFormatter extends Formatter
28 {
29     private static final int PYTHON_STATE_NEUTRAL = 0;
30     private static final int PYTHON_STATE_SINGLE_QUOTE = 1;
31     private static final int PYTHON_STATE_DOUBLE_QUOTE = 2;
32     private static final int PYTHON_STATE_IDENTIFIER = 3;
33     private static final int PYTHON_STATE_COMMENT = 4;
34     private static final int PYTHON_STATE_BRACE = 5;
35     private static final int PYTHON_STATE_NUMBER = 6;
36     private static final int PYTHON_STATE_HEXNUMBER = 7;
37     private static final int PYTHON_STATE_OPERATOR = 8;
38     private static final int PYTHON_STATE_TRIPLE_SINGLE = 9;
39     private static final int PYTHON_STATE_TRIPLE_DOUBLE = 10;
40
41     private static final int PYTHON_FORMAT_TEXT = 0;
42     private static final int PYTHON_FORMAT_COMMENT = 1;
43     private static final int PYTHON_FORMAT_STRING = 2;
44     private static final int PYTHON_FORMAT_IDENTIFIER = 3;
45     private static final int PYTHON_FORMAT_KEYWORD = 4;
46     private static final int PYTHON_FORMAT_FUNCTION = 5;
47     private static final int PYTHON_FORMAT_OPERATOR = 6;
48     private static final int PYTHON_FORMAT_BRACE = 7;
49     private static final int PYTHON_FORMAT_NUMBER = 8;
50
51     private static final PythonMode mode = PythonMode.getMode();
52
53     public PythonFormatter(Buffer buffer)
54     {
55         this.buffer = buffer;
56     }
57
58     private int begin = 0;
59
60     private void endSegment(String JavaDoc text, int offset, int state)
61     {
62         if (offset - begin > 0) {
63             int format;
64             switch (state) {
65                 case PYTHON_STATE_NEUTRAL:
66                     format = PYTHON_FORMAT_TEXT;
67                     break;
68                 case PYTHON_STATE_SINGLE_QUOTE:
69                 case PYTHON_STATE_DOUBLE_QUOTE:
70                 case PYTHON_STATE_TRIPLE_SINGLE:
71                 case PYTHON_STATE_TRIPLE_DOUBLE:
72                     format = PYTHON_FORMAT_STRING;
73                     break;
74                 case PYTHON_STATE_IDENTIFIER:
75                     format = PYTHON_FORMAT_IDENTIFIER;
76                     break;
77                 case PYTHON_STATE_COMMENT:
78                     format = PYTHON_FORMAT_COMMENT;
79                     break;
80                 case PYTHON_STATE_OPERATOR:
81                     format = PYTHON_FORMAT_OPERATOR;
82                     break;
83                 case PYTHON_STATE_BRACE:
84                     format = PYTHON_FORMAT_BRACE;
85                     break;
86                 case PYTHON_STATE_NUMBER:
87                 case PYTHON_STATE_HEXNUMBER:
88                     format = PYTHON_FORMAT_NUMBER;
89                     break;
90                 default:
91                     format = PYTHON_FORMAT_TEXT;
92                     break;
93             }
94             addSegment(text, begin, offset, format);
95             begin = offset;
96         }
97     }
98
99     private void parseLine(Line line)
100     {
101         String JavaDoc text;
102         if (Editor.tabsAreVisible())
103             text = Utilities.makeTabsVisible(line.getText(), buffer.getTabWidth());
104         else
105             text = Utilities.detab(line.getText(), buffer.getTabWidth());
106         begin = 0;
107         int state = line.flags();
108         int i = 0;
109         final int limit = text.length();
110
111         // Skip whitespace at start of line.
112
while (i < limit) {
113             if (Character.isWhitespace(text.charAt(i))) {
114                 ++i;
115             } else {
116                 endSegment(text, i, state);
117                 break;
118             }
119         }
120
121         while (i < limit) {
122             char c = text.charAt(i);
123             if (c == '\\' && i < limit-1) {
124                 // Escape char.
125
i += 2;
126                 continue;
127             }
128
129             if (state == PYTHON_STATE_SINGLE_QUOTE) {
130                 if (c == '\'') {
131                     endSegment(text, i+1, state);
132                     state = PYTHON_STATE_NEUTRAL;
133                 }
134                 ++i;
135                 continue;
136             }
137
138             if (state == PYTHON_STATE_DOUBLE_QUOTE) {
139                 if (c == '"') {
140                     endSegment(text, i+1, state);
141                     state = PYTHON_STATE_NEUTRAL;
142                 }
143                 ++i;
144                 continue;
145             }
146
147             if (state == PYTHON_STATE_TRIPLE_SINGLE) {
148                 if (c == '\'' && text.regionMatches(i, "'''", 0, 3)) {
149                     i += 3;
150                     endSegment(text, i, state);
151                     state = PYTHON_STATE_NEUTRAL;
152                 } else
153                     ++i;
154                 continue;
155             }
156
157             if (state == PYTHON_STATE_TRIPLE_DOUBLE) {
158                 if (c == '"' && text.regionMatches(i, "\"\"\"", 0, 3)) {
159                     i += 3;
160                     endSegment(text, i, state);
161                     state = PYTHON_STATE_NEUTRAL;
162                 } else
163                     ++i;
164                 continue;
165             }
166
167             // Reaching here, we're not in a quoted string.
168
if (c == '\'') {
169                 endSegment(text, i, state);
170                 if (text.regionMatches(i, "'''", 0, 3)) {
171                     state = PYTHON_STATE_TRIPLE_SINGLE;
172                     i += 3;
173                 } else {
174                     state = PYTHON_STATE_SINGLE_QUOTE;
175                     ++i;
176                 }
177                 continue;
178             }
179
180             if (c == '"') {
181                 endSegment(text, i, state);
182                 if (text.regionMatches(i, "\"\"\"", 0, 3)) {
183                     state = PYTHON_STATE_TRIPLE_DOUBLE;
184                     i += 3;
185                 } else {
186                     state = PYTHON_STATE_DOUBLE_QUOTE;
187                     ++i;
188                 }
189                 continue;
190             }
191
192             if (c == '#') {
193                 endSegment(text, i, state);
194                 endSegment(text, limit, PYTHON_STATE_COMMENT);
195                 return;
196             }
197
198             if (isOperatorChar(c)) {
199                 if (state != PYTHON_STATE_OPERATOR) {
200                     endSegment(text, i, state);
201                     state = PYTHON_STATE_OPERATOR;
202                 }
203                 ++i;
204                 continue;
205             }
206
207             if (c == '{' || c == '}') {
208                 if (state != PYTHON_STATE_BRACE) {
209                     endSegment(text, i, state);
210                     // Check for keyword.
211
LineSegment segment = getLastSegment();
212                     if (segment != null && isKeyword(segment.getText()))
213                         segment.setFormat(PYTHON_FORMAT_KEYWORD);
214                     state = PYTHON_STATE_BRACE;
215                 }
216                 ++i;
217                 continue;
218             }
219
220             if (state == PYTHON_STATE_OPERATOR || state == PYTHON_STATE_BRACE) {
221                 if (mode.isIdentifierStart(c)) {
222                     endSegment(text, i, state);
223                     state = PYTHON_STATE_IDENTIFIER;
224                 } else if (Character.isDigit(c)) {
225                     endSegment(text, i, state);
226                     state = PYTHON_STATE_NUMBER;
227                 } else {
228                     endSegment(text, i, state);
229                     state = PYTHON_STATE_NEUTRAL;
230                 }
231                 ++i;
232                 continue;
233             }
234
235             if (state == PYTHON_STATE_IDENTIFIER) {
236                 if (!mode.isIdentifierPart(c)) {
237                     endSegment(text, i, state);
238                     // Check for keyword or function.
239
LineSegment segment = getLastSegment();
240                     if (segment != null) {
241                         String JavaDoc segmentText = segment.getText();
242                         if (isKeyword(segment.getText())) {
243                             segment.setFormat(PYTHON_FORMAT_KEYWORD);
244                         } else if (c == '(') {
245                             segment.setFormat(PYTHON_FORMAT_FUNCTION);
246                         } else if (Character.isWhitespace(c)) {
247                             // Look ahead to see if next non-whitespace char is '('.
248
int j = i+1;
249                             while (j < limit && Character.isWhitespace(c = text.charAt(j)))
250                                 ++j;
251                             if (c == '(')
252                                 segment.setFormat(PYTHON_FORMAT_FUNCTION);
253                         }
254                     }
255                     state = PYTHON_STATE_NEUTRAL;
256                 }
257                 ++i;
258                 continue;
259             }
260
261             if (state == PYTHON_STATE_NUMBER) {
262                 if (Character.isDigit(c))
263                     ;
264                 else if (c == 'l' || c == 'L')
265                     ;
266                 else if (i - begin == 1 && c == 'x' || c == 'X')
267                     state = PYTHON_STATE_HEXNUMBER;
268                 else {
269                     endSegment(text, i, state);
270                     if (mode.isIdentifierStart(c))
271                         state = PYTHON_STATE_IDENTIFIER;
272                     else
273                         state = PYTHON_STATE_NEUTRAL;
274                 }
275                 ++i;
276                 continue;
277             }
278
279             if (state == PYTHON_STATE_HEXNUMBER) {
280                 if (Character.isDigit(c))
281                     ;
282                 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
283                     ;
284                 else if (c == 'l' || c == 'L')
285                     ;
286                 else {
287                     endSegment(text, i, state);
288                     if (mode.isIdentifierStart(c))
289                         state = PYTHON_STATE_IDENTIFIER;
290                     else
291                         state = PYTHON_STATE_NEUTRAL;
292                 }
293                 ++i;
294                 continue;
295             }
296
297             if (state == PYTHON_STATE_NEUTRAL) {
298                 if (mode.isIdentifierStart(c)) {
299                     endSegment(text, i, state);
300                     state = PYTHON_STATE_IDENTIFIER;
301                 } else if (Character.isDigit(c)) {
302                     endSegment(text, i, state);
303                     state = PYTHON_STATE_NUMBER;
304                 }
305             }
306             ++i;
307         }
308
309         // Reached end of line.
310
endSegment(text, i, state);
311
312         if (state == PYTHON_STATE_IDENTIFIER) {
313             // Last token might be a keyword.
314
LineSegment segment = getLastSegment();
315             if (segment != null && isKeyword(segment.getText()))
316                 segment.setFormat(PYTHON_FORMAT_KEYWORD);
317         }
318     }
319
320     public LineSegmentList formatLine(Line line)
321     {
322         clearSegmentList();
323         parseLine(line);
324         return segmentList;
325     }
326
327     public boolean parseBuffer()
328     {
329         int state = PYTHON_STATE_NEUTRAL;
330         Line line = buffer.getFirstLine();
331         boolean changed = false;
332         while (line != null) {
333             if (state != line.flags()) {
334                 line.setFlags(state);
335                 changed = true;
336             }
337             final String JavaDoc text = line.getText();
338             final int limit = line.length();
339             int i = 0;
340             while (i < limit) {
341                 char c = text.charAt(i);
342                 if (c == '\\') {
343                     // Escape.
344
i += 2;
345                     continue;
346                 }
347                 if (state == PYTHON_STATE_SINGLE_QUOTE) {
348                     if (c == '\'')
349                         state = PYTHON_STATE_NEUTRAL;
350                     ++i;
351                     continue;
352                 }
353                 if (state == PYTHON_STATE_DOUBLE_QUOTE) {
354                     if (c == '"')
355                         state = PYTHON_STATE_NEUTRAL;
356                     ++i;
357                     continue;
358                 }
359                 if (state == PYTHON_STATE_TRIPLE_SINGLE) {
360                     if (c == '\'' && text.regionMatches(i, "'''", 0, 3)) {
361                         state = PYTHON_STATE_NEUTRAL;
362                         i += 3;
363                     } else
364                         ++i;
365                     continue;
366                 }
367                 if (state == PYTHON_STATE_TRIPLE_DOUBLE) {
368                     if (c == '"' && text.regionMatches(i, "\"\"\"", 0, 3)) {
369                         state = PYTHON_STATE_NEUTRAL;
370                         i += 3;
371                     } else
372                         ++i;
373                     continue;
374                 }
375                 // Not in quoted string.
376
if (c == '\'') {
377                     if (text.regionMatches(i, "'''", 0, 3)) {
378                         state = PYTHON_STATE_TRIPLE_SINGLE;
379                         i += 3;
380                     } else {
381                         state = PYTHON_STATE_SINGLE_QUOTE;
382                         ++i;
383                     }
384                     continue;
385                 }
386                 if (c == '"') {
387                     if (text.regionMatches(i, "\"\"\"", 0, 3)) {
388                         state = PYTHON_STATE_TRIPLE_DOUBLE;
389                         i += 3;
390                     } else {
391                         state = PYTHON_STATE_DOUBLE_QUOTE;
392                         ++i;
393                     }
394                     continue;
395                 }
396                 if (c == '#')
397                     break;
398                 ++i;
399             }
400             line = line.next();
401         }
402         buffer.setNeedsParsing(false);
403         return changed;
404     }
405
406     private static final boolean isOperatorChar(char c)
407     {
408         return "!&|<>=+/*-".indexOf(c) >= 0;
409     }
410
411     public FormatTable getFormatTable()
412     {
413         if (formatTable == null) {
414             formatTable = new FormatTable(null);
415             formatTable.addEntryFromPrefs(PYTHON_FORMAT_TEXT, "text");
416             formatTable.addEntryFromPrefs(PYTHON_FORMAT_COMMENT, "comment");
417             formatTable.addEntryFromPrefs(PYTHON_FORMAT_STRING, "string");
418             formatTable.addEntryFromPrefs(PYTHON_FORMAT_IDENTIFIER, "identifier", "text");
419             formatTable.addEntryFromPrefs(PYTHON_FORMAT_KEYWORD, "keyword");
420             formatTable.addEntryFromPrefs(PYTHON_FORMAT_FUNCTION, "function");
421             formatTable.addEntryFromPrefs(PYTHON_FORMAT_OPERATOR, "operator");
422             formatTable.addEntryFromPrefs(PYTHON_FORMAT_BRACE, "brace");
423             formatTable.addEntryFromPrefs(PYTHON_FORMAT_NUMBER, "number");
424         }
425         return formatTable;
426     }
427 }
428
Popular Tags