KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > armedbear > j > RubyFormatter


1 /*
2  * RubyFormatter.java
3  *
4  * Copyright (C) 2002 Jens Luedicke <jens@irs-net.com>
5  * based on PythonFormatter.java
6  * $Id: RubyFormatter.java,v 1.1.1.1 2002/09/24 16:09:23 piso Exp $
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version 2
11  * of the License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21  */

22
23 package org.armedbear.j;
24
25 public final class RubyFormatter extends Formatter
26 {
27     private static final int RUBY_STATE_NEUTRAL = 0;
28     private static final int RUBY_STATE_SINGLE_QUOTE = 1;
29     private static final int RUBY_STATE_DOUBLE_QUOTE = 2;
30     private static final int RUBY_STATE_IDENTIFIER = 3;
31     private static final int RUBY_STATE_COMMENT = 4;
32     private static final int RUBY_STATE_BRACE = 5;
33     private static final int RUBY_STATE_NUMBER = 6;
34     private static final int RUBY_STATE_HEXNUMBER = 7;
35     private static final int RUBY_STATE_OPERATOR = 8;
36     private static final int RUBY_STATE_HERE_DOCUMENT = 9;
37     private static final int RUBY_STATE_POD = 10;
38     private static final int RUBY_STATE_REGEXP = 11;
39     private static final int RUBY_STATE_REGEXP_DELIMITER = 12;
40
41     private static final int RUBY_FORMAT_TEXT = 0;
42     private static final int RUBY_FORMAT_COMMENT = 1;
43     private static final int RUBY_FORMAT_STRING = 2;
44     private static final int RUBY_FORMAT_IDENTIFIER = 3;
45     private static final int RUBY_FORMAT_KEYWORD = 4;
46     private static final int RUBY_FORMAT_FUNCTION = 5;
47     private static final int RUBY_FORMAT_OPERATOR = 6;
48     private static final int RUBY_FORMAT_BRACE = 7;
49     private static final int RUBY_FORMAT_NUMBER = 8;
50
51     private static final RubyMode mode = RubyMode.getMode();
52
53     private String JavaDoc endOfText;
54
55     public RubyFormatter(Buffer buffer)
56     {
57         this.buffer = buffer;
58     }
59
60     private int begin = 0;
61
62     private void endSegment(String JavaDoc text, int offset, int state)
63     {
64         if (offset - begin > 0) {
65             int format;
66             switch (state) {
67                 case RUBY_STATE_NEUTRAL:
68                     format = RUBY_FORMAT_TEXT;
69                     break;
70                 case RUBY_STATE_SINGLE_QUOTE:
71                 case RUBY_STATE_DOUBLE_QUOTE:
72                 case RUBY_STATE_HERE_DOCUMENT:
73                 case RUBY_STATE_REGEXP:
74                     format = RUBY_FORMAT_STRING;
75                     break;
76                 case RUBY_STATE_REGEXP_DELIMITER:
77                     format = RUBY_FORMAT_FUNCTION;
78                     break;
79                 case RUBY_STATE_IDENTIFIER:
80                     format = RUBY_FORMAT_IDENTIFIER;
81                     break;
82                 case RUBY_STATE_COMMENT:
83                 case RUBY_STATE_POD:
84                     format = RUBY_FORMAT_COMMENT;
85                     break;
86                 case RUBY_STATE_OPERATOR:
87                     format = RUBY_FORMAT_OPERATOR;
88                     break;
89                 case RUBY_STATE_BRACE:
90                     format = RUBY_FORMAT_BRACE;
91                     break;
92                 case RUBY_STATE_NUMBER:
93                 case RUBY_STATE_HEXNUMBER:
94                     format = RUBY_FORMAT_NUMBER;
95                     break;
96                 default:
97                     format = RUBY_FORMAT_TEXT;
98                     break;
99             }
100             addSegment(text, begin, offset, format);
101             begin = offset;
102         }
103     }
104
105     private void parseLine(Line line)
106     {
107         String JavaDoc text;
108         if (Editor.tabsAreVisible())
109             text = Utilities.makeTabsVisible(line.getText(), buffer.getTabWidth());
110         else
111             text = Utilities.detab(line.getText(), buffer.getTabWidth());
112         begin = 0;
113         int state = line.flags();
114         int i = 0;
115         final int limit = text.length();
116         if (state == RUBY_STATE_HERE_DOCUMENT) {
117             if (text.trim().startsWith(endOfText))
118                 state = RUBY_STATE_NEUTRAL;
119             else {
120                 endSegment(text, limit, state);
121                 return;
122             }
123         }
124         if (state == RUBY_STATE_POD) {
125             endSegment(text, limit, state);
126             return;
127         }
128         // Skip whitespace at start of line.
129
while (i < limit) {
130             if (Character.isWhitespace(text.charAt(i))) {
131                 ++i;
132             } else {
133                 endSegment(text, i, state);
134                 break;
135             }
136         }
137         while (i < limit) {
138             char c = text.charAt(i);
139             if (c == '\\' && i < limit-1) {
140                 // Escape char.
141
i += 2;
142                 continue;
143             }
144             if (state == RUBY_STATE_SINGLE_QUOTE) {
145                 if (c == '\'') {
146                     endSegment(text, i+1, state);
147                     state = RUBY_STATE_NEUTRAL;
148                 }
149                 ++i;
150                 continue;
151             }
152             if (state == RUBY_STATE_DOUBLE_QUOTE) {
153                 if (c == '"') {
154                     endSegment(text, i+1, state);
155                     state = RUBY_STATE_NEUTRAL;
156                 }
157                 ++i;
158                 continue;
159             }
160             if (state == RUBY_STATE_REGEXP) {
161                 if (c == '/') {
162                     endSegment(text, i, state);
163                     endSegment(text, i+1, RUBY_STATE_REGEXP_DELIMITER);
164                     state = RUBY_STATE_NEUTRAL;
165                 }
166                 ++i;
167                 continue;
168             }
169             // Reaching here, we're not in a quoted string or regexp.
170
if (c == '\'') {
171                 if (i == 0 || text.charAt(i-1) != '$') {
172                     endSegment(text, i, state);
173                     state = RUBY_STATE_SINGLE_QUOTE;
174                 }
175                 ++i;
176                 continue;
177             }
178             if (c == '"') {
179                 if (i == 0 || text.charAt(i-1) != '$') {
180                     endSegment(text, i, state);
181                     state = RUBY_STATE_DOUBLE_QUOTE;
182                 }
183                 ++i;
184                 continue;
185             }
186             if (c == '/') {
187                 if (isRegExp(text, i)) {
188                     endSegment(text, i, state);
189                     endSegment(text, i+1, RUBY_STATE_REGEXP_DELIMITER);
190                     state = RUBY_STATE_REGEXP;
191                 }
192                 ++i;
193                 continue;
194             }
195             if (c == '#') {
196                 endSegment(text, i, state);
197                 endSegment(text, limit, RUBY_STATE_COMMENT);
198                 return;
199             }
200             if (isOperatorChar(c)) {
201                 if (state != RUBY_STATE_OPERATOR) {
202                     endSegment(text, i, state);
203                     state = RUBY_STATE_OPERATOR;
204                 }
205                 ++i;
206                 continue;
207             }
208             if (c == '{' || c == '}') {
209                 if (state != RUBY_STATE_BRACE) {
210                     endSegment(text, i, state);
211                     // Check for keyword.
212
LineSegment segment = getLastSegment();
213                     if (segment != null && isKeyword(segment.getText()))
214                         segment.setFormat(RUBY_FORMAT_KEYWORD);
215                     state = RUBY_STATE_BRACE;
216                 }
217                 ++i;
218                 continue;
219             }
220             if (state == RUBY_STATE_OPERATOR || state == RUBY_STATE_BRACE) {
221                 if (mode.isIdentifierStart(c)) {
222                     endSegment(text, i, state);
223                     state = RUBY_STATE_IDENTIFIER;
224                 } else if (Character.isDigit(c)) {
225                     endSegment(text, i, state);
226                     state = RUBY_STATE_NUMBER;
227                 } else {
228                     endSegment(text, i, state);
229                     state = RUBY_STATE_NEUTRAL;
230                 }
231                 ++i;
232                 continue;
233             }
234             if (state == RUBY_STATE_IDENTIFIER) {
235                 if (!mode.isIdentifierPart(c)) {
236                     endSegment(text, i, state);
237                     // Check for keyword or function.
238
LineSegment segment = getLastSegment();
239                     if (segment != null) {
240                         String JavaDoc segmentText = segment.getText();
241                         if (isKeyword(segment.getText())) {
242                             segment.setFormat(RUBY_FORMAT_KEYWORD);
243                         } else if (c == '(') {
244                             segment.setFormat(RUBY_FORMAT_FUNCTION);
245                         } else if (Character.isWhitespace(c)) {
246                             // Look ahead to see if next non-whitespace char is '('.
247
int j = i+1;
248                             while (j < limit && Character.isWhitespace(c = text.charAt(j)))
249                                 ++j;
250                             if (c == '(')
251                                 segment.setFormat(RUBY_FORMAT_FUNCTION);
252                         }
253                     }
254                     state = RUBY_STATE_NEUTRAL;
255                 }
256                 ++i;
257                 continue;
258             }
259             if (state == RUBY_STATE_NUMBER) {
260                 if (Character.isDigit(c))
261                     ;
262                 else if (c == 'l' || c == 'L')
263                     ;
264                 else if (i - begin == 1 && c == 'x' || c == 'X')
265                     state = RUBY_STATE_HEXNUMBER;
266                 else {
267                     endSegment(text, i, state);
268                     if (mode.isIdentifierStart(c))
269                         state = RUBY_STATE_IDENTIFIER;
270                     else
271                         state = RUBY_STATE_NEUTRAL;
272                 }
273                 ++i;
274                 continue;
275             }
276             if (state == RUBY_STATE_HEXNUMBER) {
277                 if (Character.isDigit(c))
278                     ;
279                 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
280                     ;
281                 else if (c == 'l' || c == 'L')
282                     ;
283                 else {
284                     endSegment(text, i, state);
285                     if (mode.isIdentifierStart(c))
286                         state = RUBY_STATE_IDENTIFIER;
287                     else
288                         state = RUBY_STATE_NEUTRAL;
289                 }
290                 ++i;
291                 continue;
292             }
293             if (state == RUBY_STATE_NEUTRAL) {
294                 if (mode.isIdentifierStart(c)) {
295                     endSegment(text, i, state);
296                     state = RUBY_STATE_IDENTIFIER;
297                 } else if (Character.isDigit(c)) {
298                     if (i == 0 || text.charAt(i-1) != '$') {
299                         endSegment(text, i, state);
300                         state = RUBY_STATE_NUMBER;
301                     }
302                 }
303             }
304             ++i;
305         }
306         // Reached end of line.
307
endSegment(text, i, state);
308         if (state == RUBY_STATE_IDENTIFIER) {
309             // Last token might be a keyword.
310
LineSegment segment = getLastSegment();
311             if (segment != null && isKeyword(segment.getText()))
312                 segment.setFormat(RUBY_FORMAT_KEYWORD);
313         }
314     }
315
316     // Make sure the '/' at i is not the division operator.
317
public static boolean isRegExp(String JavaDoc text, int i)
318     {
319         Debug.assertTrue(text.charAt(i) == '/');
320         if (i == 0) {
321             // It's the first character on the line.
322
return true;
323         }
324         // Consider the previous character.
325
char c = text.charAt(i-1);
326         if (c == '(')
327             return true;
328
329         if (mode.isIdentifierPart(c))
330             return false;
331
332         if (!Character.isWhitespace(c))
333             return false;
334
335         // The immediately previous character is whitespace.
336
final String JavaDoc s = text.substring(0, i-1).trim();
337         final int length = s.length();
338         if (length == 0) {
339             // The '/' is the first non-whitespace character on the line.
340
return true;
341         }
342         c = s.charAt(length-1);
343         if (c == ')')
344             return false; // "(a + b) / c"
345
if (c == '}')
346             return false;
347         if (!mode.isIdentifierPart(c))
348             return true;
349
350         // Last non-whitespace character is a valid identifier character.
351
FastStringBuffer sb = new FastStringBuffer(c);
352         for (int j = s.length()-2; j >= 0; j--) {
353             c = s.charAt(j);
354             if (mode.isIdentifierPart(c))
355                 sb.append(c);
356             else
357                 break;
358         }
359         String JavaDoc token = sb.reverse().toString();
360         String JavaDoc[] ok = { "and", "or", "not", "if", "unless", "when" };
361         if (Utilities.isOneOf(token, ok))
362             return true;
363
364         return false;
365     }
366
367     public LineSegmentList formatLine(Line line)
368     {
369         clearSegmentList();
370         parseLine(line);
371         return segmentList;
372     }
373
374     public boolean parseBuffer()
375     {
376         int state = RUBY_STATE_NEUTRAL;
377         Line line = buffer.getFirstLine();
378         boolean changed = false;
379         while (line != null) {
380             int oldflags = line.flags();
381             if (state == RUBY_STATE_HERE_DOCUMENT) {
382                 if (line.getText().equals(endOfText))
383                     state = RUBY_STATE_NEUTRAL;
384             }
385             if (state == RUBY_STATE_POD) {
386                 if (line.getText().startsWith("=end")) {
387                     if (state != oldflags) {
388                         line.setFlags(state);
389                         changed = true;
390                     }
391                     state = RUBY_STATE_NEUTRAL;
392                     line = line.next();
393                     continue;
394                 }
395             }
396             if (state == RUBY_STATE_NEUTRAL)
397                 if (line.getText().startsWith("=begin"))
398                     state = RUBY_STATE_POD;
399             if (state != oldflags) {
400                 line.setFlags(state);
401                 changed = true;
402             }
403             if (state == RUBY_STATE_HERE_DOCUMENT || state == RUBY_STATE_POD) {
404                 line = line.next();
405                 continue;
406             }
407             final String JavaDoc text = line.getText();
408             final int limit = line.length();
409             int i = 0;
410             while (i < limit) {
411                 char c = text.charAt(i);
412                 if (c == '\\') {
413                     // Escape.
414
i += 2;
415                     continue;
416                 }
417                 if (state == RUBY_STATE_SINGLE_QUOTE) {
418                     if (c == '\'')
419                         state = RUBY_STATE_NEUTRAL;
420                     ++i;
421                     continue;
422                 }
423                 if (state == RUBY_STATE_DOUBLE_QUOTE) {
424                     if (c == '"')
425                         state = RUBY_STATE_NEUTRAL;
426                     ++i;
427                     continue;
428                 }
429                 if (state == RUBY_STATE_REGEXP) {
430                     if (c == '/')
431                         state = RUBY_STATE_NEUTRAL;
432                     ++i;
433                     continue;
434                 }
435                 // Not in quoted string or regexp.
436
if (c == '/') {
437                     if (isRegExp(text, i))
438                         state = RUBY_STATE_REGEXP;
439                     ++i;
440                     continue;
441                 }
442                 if (c == '<' && i < limit-3 && line.charAt(i+1) == '<') {
443                     // There must be no space between "<<" and the terminator.
444
if (!Character.isWhitespace(line.charAt(i+2))) {
445                         endOfText = line.substring(i+2).trim();
446                         int length = endOfText.length();
447                         // Remove ';' at end of line.
448
if (length > 0 && endOfText.charAt(length-1) == ';')
449                             endOfText = endOfText.substring(0, --length);
450                         // Remove leading '-'.
451
if (length > 0 && endOfText.charAt(0) == '-') {
452                             endOfText = endOfText.substring(1);
453                             --length;
454                         }
455                         // Remove enclosing quotes.
456
if (length > 2) {
457                             char firstChar = endOfText.charAt(0);
458                             if ("\"'`".indexOf(firstChar) >= 0)
459                                 if (endOfText.charAt(length-1) == firstChar)
460                                     endOfText = endOfText.substring(1, length-1);
461                         }
462                         if (endOfText.length() > 0) {
463                             // Make sure "<<" is not shift operator.
464
if (Character.isLetter(endOfText.charAt(0))) {
465                                 state = RUBY_STATE_HERE_DOCUMENT;
466                                 break;
467                             }
468                         }
469                     }
470                     ++i;
471                     continue;
472                 }
473                 if (c == '\'') {
474                     if (i == 0 || line.charAt(i-1) != '$')
475                         state = RUBY_STATE_SINGLE_QUOTE;
476                     ++i;
477                     continue;
478                 }
479                 if (c == '"') {
480                     if (i == 0 || line.charAt(i-1) != '$')
481                         state = RUBY_STATE_DOUBLE_QUOTE;
482                     ++i;
483                     continue;
484                 }
485                 if (c == '#')
486                     break;
487                 ++i;
488             }
489             line = line.next();
490         }
491         buffer.setNeedsParsing(false);
492         return changed;
493     }
494
495     private static final boolean isOperatorChar(char c)
496     {
497         return "!&|<>=+/*-".indexOf(c) >= 0;
498     }
499
500     public FormatTable getFormatTable()
501     {
502         if (formatTable == null) {
503             formatTable = new FormatTable(null);
504             formatTable.addEntryFromPrefs(RUBY_FORMAT_TEXT, "text");
505             formatTable.addEntryFromPrefs(RUBY_FORMAT_COMMENT, "comment");
506             formatTable.addEntryFromPrefs(RUBY_FORMAT_STRING, "string");
507             formatTable.addEntryFromPrefs(RUBY_FORMAT_IDENTIFIER, "identifier", "text");
508             formatTable.addEntryFromPrefs(RUBY_FORMAT_KEYWORD, "keyword");
509             formatTable.addEntryFromPrefs(RUBY_FORMAT_FUNCTION, "function");
510             formatTable.addEntryFromPrefs(RUBY_FORMAT_OPERATOR, "operator");
511             formatTable.addEntryFromPrefs(RUBY_FORMAT_BRACE, "brace");
512             formatTable.addEntryFromPrefs(RUBY_FORMAT_NUMBER, "number");
513         }
514         return formatTable;
515     }
516 }
517
Popular Tags