KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > armedbear > j > PerlFormatter


1 /*
2  * PerlFormatter.java
3  *
4  * Copyright (C) 1998-2003 Peter Graves
5  * $Id: PerlFormatter.java,v 1.2 2003/04/25 14:20:12 piso Exp $
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20  */

21
22 package org.armedbear.j;
23
24 import gnu.regexp.RE;
25 import gnu.regexp.REMatch;
26 import gnu.regexp.UncheckedRE;
27
28 public final class PerlFormatter extends Formatter
29 {
30     private static final int STATE_VARIABLE = STATE_LAST + 1;
31     private static final int STATE_HERE_DOCUMENT = STATE_LAST + 2;
32     private static final int STATE_POD = STATE_LAST + 3;
33     private static final int STATE_REGEXP_DELIMITER = STATE_LAST + 4;
34     private static final int STATE_REGEXP = STATE_LAST + 5;
35     private static final int STATE_SUBST = STATE_LAST + 6;
36
37     private static final String JavaDoc punctuation = "&`^:+#-%'\"/~_";
38
39     // Formats.
40
private static final int PERL_FORMAT_TEXT = 0;
41     private static final int PERL_FORMAT_COMMENT = 1;
42     private static final int PERL_FORMAT_STRING = 2;
43     private static final int PERL_FORMAT_KEYWORD = 3;
44     private static final int PERL_FORMAT_FUNCTION = 4;
45     private static final int PERL_FORMAT_BRACE = 5;
46     private static final int PERL_FORMAT_NUMBER = 6;
47     private static final int PERL_FORMAT_SCALAR = 7;
48     private static final int PERL_FORMAT_LIST = 8;
49
50     private static StringSet functions;
51
52     private FastStringBuffer sb = new FastStringBuffer();
53
54     private String JavaDoc endOfText;
55
56     private static RE matchRE = new UncheckedRE("(=~|!~)[ \t]+m[^a-zA-Z0-9]");
57
58     public PerlFormatter(Buffer buffer)
59     {
60         this.buffer = buffer;
61         if (functions == null)
62             functions = new StringSet(perlFunctions);
63     }
64
65     private void endToken(int state)
66     {
67         if (sb.length() > 0) {
68             int format = -1;
69             switch (state) {
70                 case STATE_NEUTRAL:
71                     break;
72                 case STATE_QUOTE:
73                 case STATE_SINGLEQUOTE:
74                 case STATE_HERE_DOCUMENT:
75                 case STATE_REGEXP:
76                 case STATE_SUBST:
77                     format = PERL_FORMAT_STRING;
78                     break;
79                 case STATE_REGEXP_DELIMITER:
80                     format = PERL_FORMAT_FUNCTION;
81                     break;
82                 case STATE_IDENTIFIER:
83                     break;
84                 case STATE_COMMENT:
85                 case STATE_POD:
86                     format = PERL_FORMAT_COMMENT;
87                     break;
88                 case STATE_BRACE:
89                     format = PERL_FORMAT_BRACE;
90                     break;
91                 case STATE_NUMBER:
92                 case STATE_HEXNUMBER:
93                     format = PERL_FORMAT_NUMBER;
94                     break;
95             }
96             addSegment(sb.toString(), format);
97             sb.setLength(0);
98         }
99     }
100
101     private void parseLine(String JavaDoc text, int state)
102     {
103         if (Editor.tabsAreVisible())
104             text = Utilities.makeTabsVisible(text, buffer.getTabWidth());
105         else
106             text = Utilities.detab(text, buffer.getTabWidth());
107         clearSegmentList();
108         sb.setLength(0);
109         int i = 0;
110         if (state == STATE_HERE_DOCUMENT) {
111             if (text.startsWith(endOfText))
112                 state = STATE_NEUTRAL;
113             else {
114                 sb.append(text);
115                 endToken(state);
116                 return;
117             }
118         }
119         if (state == STATE_POD) {
120             sb.append(text);
121             endToken(state);
122             return;
123         }
124         final int limit = text.length();
125         char c;
126         // Skip whitespace at start of line.
127
while (i < limit) {
128             c = text.charAt(i);
129             if (Character.isWhitespace(c)) {
130                 sb.append(c);
131                 ++i;
132             } else {
133                 endToken(state);
134                 break;
135             }
136         }
137         char delimiter = 0;
138         while (i < limit) {
139             c = text.charAt(i);
140             if (c == '\\') {
141                 // Escape.
142
sb.append(c);
143                 if (i < limit-1)
144                     sb.append(text.charAt(++i));
145                 ++i;
146                 continue;
147             }
148             if (state == STATE_QUOTE) {
149                 sb.append(c);
150                 if (c == '"') {
151                     endToken(state);
152                     state = STATE_NEUTRAL;
153                 }
154                 ++i;
155                 continue;
156             }
157             if (state == STATE_SINGLEQUOTE) {
158                 sb.append(c);
159                 if (c == '\'') {
160                     endToken(state);
161                     state = STATE_NEUTRAL;
162                 }
163                 ++i;
164                 continue;
165             }
166             if (state == STATE_REGEXP) {
167                 if (c == delimiter) {
168                     endToken(state);
169                     sb.append(c);
170                     endToken(STATE_REGEXP_DELIMITER);
171                     state = STATE_NEUTRAL;
172                 } else
173                     sb.append(c);
174                 ++i;
175                 continue;
176             }
177             if (state == STATE_SUBST) {
178                 if (c == delimiter) {
179                     endToken(state);
180                     sb.append(c);
181                     endToken(STATE_REGEXP_DELIMITER);
182                     state = STATE_REGEXP;
183                 } else
184                     sb.append(c);
185                 ++i;
186                 continue;
187             }
188             // Reaching here, we're not in a quoted string or regexp.
189
if (c == '{' || c == '}') {
190                 endToken(state);
191                 sb.append(c);
192                 endToken(STATE_BRACE);
193                 state = STATE_NEUTRAL;
194                 ++i;
195                 continue;
196             }
197             if (state == STATE_VARIABLE) {
198                 boolean ok = false;
199                 if (PerlMode.isIdentifierChar(c))
200                     ok = true;
201                 else if (sb.length() == 1 && punctuation.indexOf(c) >= 0)
202                     ok = true;
203                 if (ok)
204                     sb.append(c);
205                 else {
206                     endToken(state);
207                     sb.append(c);
208                     state = STATE_NEUTRAL;
209                 }
210                 ++i;
211                 continue;
212             }
213             if (c == '"') {
214                 endToken(state);
215                 sb.append(c);
216                 state = STATE_QUOTE;
217                 ++i;
218                 continue;
219             }
220             if (c == '\'') {
221                 endToken(state);
222                 sb.append(c);
223                 state = STATE_SINGLEQUOTE;
224                 ++i;
225                 continue;
226             }
227             if (c == '=' || c == '!') {
228                 REMatch match = matchRE.getMatch(text.substring(i));
229                 if (match != null) {
230                     final String JavaDoc s = match.toString();
231                     final int length = s.length();
232                     // End the previous token.
233
endToken(state);
234                     sb.append(s.substring(0, 2));
235                     endToken(STATE_NEUTRAL);
236                     i += 2;
237                     sb.append(s.substring(2));
238                     endToken(STATE_REGEXP_DELIMITER);
239                     i += length - 2;
240                     delimiter = s.charAt(length - 1);
241                     if (delimiter == '{')
242                         delimiter = '}';
243                     state = STATE_REGEXP;
244                 } else {
245                     sb.append(c);
246                     ++i;
247                 }
248                 continue;
249             }
250             if (c == '/') {
251                 if (isSubst(text, i)) {
252                     delimiter = '/';
253                     sb.append(c);
254                     endToken(STATE_REGEXP_DELIMITER);
255                     state = STATE_SUBST;
256                 } else if (isRegExp(text, i)) {
257                     delimiter = '/';
258                     // End the previous token unless we've got "m/".
259
if (i > 0 && text.charAt(i-1) != 'm')
260                         endToken(state);
261                     sb.append(c);
262                     endToken(STATE_REGEXP_DELIMITER);
263                     state = STATE_REGEXP;
264                 } else {
265                     // It's the division operator.
266
sb.append(c);
267                 }
268                 ++i;
269                 continue;
270             }
271             if (c == '#') {
272                 endToken(state);
273                 state = STATE_COMMENT;
274                 sb.append(text.substring(i));
275                 endToken(state);
276                 return;
277             }
278             if (state == STATE_IDENTIFIER) {
279                 if (PerlMode.isIdentifierChar(c))
280                     sb.append(c);
281                 else {
282                     endToken(state);
283                     sb.append(c);
284                     state = STATE_NEUTRAL;
285                 }
286                 ++i;
287                 continue;
288             }
289             if (state == STATE_NUMBER) {
290                 if (Character.isDigit(c))
291                     sb.append(c);
292                 else if (sb.length() == 1 && c == 'x' || c == 'X') {
293                     sb.append(c);
294                     state = STATE_HEXNUMBER;
295                 } else {
296                     endToken(state);
297                     sb.append(c);
298                     if (PerlMode.isIdentifierChar(c))
299                         state = STATE_IDENTIFIER;
300                     else
301                         state = STATE_NEUTRAL;
302                 }
303                 ++i;
304                 continue;
305             }
306             if (state == STATE_HEXNUMBER) {
307                 if (Character.isDigit(c))
308                     sb.append(c);
309                 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
310                     sb.append(c);
311                 else {
312                     endToken(state);
313                     sb.append(c);
314                     if (PerlMode.isIdentifierChar(c))
315                         state = STATE_IDENTIFIER;
316                     else
317                         state = STATE_NEUTRAL;
318                 }
319                 ++i;
320                 continue;
321             }
322             if (state == STATE_NEUTRAL) {
323                 if (c == '$') {
324                     endToken(state);
325                     sb.append(c);
326                     state = STATE_VARIABLE;
327                 } else if (PerlMode.isIdentifierChar(c)) {
328                     endToken(state);
329                     sb.append(c);
330                     state = STATE_IDENTIFIER;
331                 } else if (Character.isDigit(c)) {
332                     endToken(state);
333                     sb.append(c);
334                     state = STATE_NUMBER;
335                 } else // Still neutral...
336
sb.append(c);
337             }
338             ++i;
339         }
340         endToken(state);
341     }
342
343     // i is the index of '/'.
344
public static boolean isSubst(String JavaDoc text, int i)
345     {
346         Debug.assertTrue(text.charAt(i) == '/');
347         if (text.regionMatches(i-2, "tr/", 0, 3)) {
348             if (i < 3)
349                 return true;
350             char c = text.charAt(i-3);
351             if (PerlMode.getMode().isIdentifierPart(c))
352                 return false;
353             else
354                 return true;
355         }
356         if (text.regionMatches(i-1, "s/", 0, 2)) {
357             if (i < 2)
358                 return true;
359             char c = text.charAt(i-2);
360             if (PerlMode.getMode().isIdentifierPart(c))
361                 return false;
362             else
363                 return true;
364         }
365         if (text.regionMatches(i-1, "y/", 0, 2)) {
366             if (i < 2)
367                 return true;
368             char c = text.charAt(i-2);
369             if (PerlMode.getMode().isIdentifierPart(c))
370                 return false;
371             else
372                 return true;
373         }
374         return false;
375     }
376
377     // Make sure the '/' at i is not the division operator.
378
public static boolean isRegExp(String JavaDoc text, int i)
379     {
380         Debug.assertTrue(text.charAt(i) == '/');
381         if (i == 0) {
382             // It's the first character on the line.
383
return true;
384         }
385         // Consider the previous character.
386
char c = text.charAt(i-1);
387         if (c == '(')
388             return true;
389         if (c == 'm') {
390             if (i-2 < 0)
391                 return true;
392             c = text.charAt(i-2);
393             if (c == '(' || Character.isWhitespace(c))
394                 return true;
395              return false;
396         }
397         // If it's an identifier character, we're not looking at a regexp,
398
// since we've already tested for substitution and translation
399
// patterns and "m/".
400
if (PerlMode.isIdentifierChar(c))
401             return false;
402
403         if (!Character.isWhitespace(c))
404             return false;
405
406         // The immediately previous character is whitespace.
407
final String JavaDoc s = text.substring(0, i-1).trim();
408         final int length = s.length();
409         if (length == 0) {
410             // The '/' is the first non-whitespace character on the line.
411
return true;
412         }
413         c = s.charAt(length-1);
414         if (c == ')')
415             return false; // "(a + b) / c"
416
if (c == '}')
417             return false;
418         if (!PerlMode.isIdentifierChar(c))
419             return true;
420
421         // Last non-whitespace character is an identifier character.
422
if (s.endsWith("and")) {
423             if (length == 3 || Character.isWhitespace(s.charAt(length-4)))
424                 return true;
425         } else if (s.endsWith("or")) {
426             if (length == 2 || Character.isWhitespace(s.charAt(length-3)))
427                 return true;
428         } else if (s.endsWith("not")) {
429             if (length == 3 || Character.isWhitespace(s.charAt(length-4)))
430                 return true;
431         }
432
433         return false;
434     }
435
436     public LineSegmentList formatLine(Line line)
437     {
438         if (line == null) {
439             clearSegmentList();
440             addSegment("", PERL_FORMAT_TEXT);
441             return segmentList;
442         }
443         parseLine(line.getText(), line.flags());
444         final int tokenCount = segmentList.size();
445         for (int i = 0; i < tokenCount; i++) {
446             LineSegment segment = segmentList.getSegment(i);
447             if (segment.getFormat() >= 0)
448                 continue;
449             String JavaDoc s = segment.getText();
450             if (isKeyword(s)) {
451                 segment.setFormat(PERL_FORMAT_KEYWORD);
452                 continue;
453             }
454             char c = s.charAt(0);
455             if (c == '$') {
456                 segment.setFormat(PERL_FORMAT_SCALAR);
457                 continue;
458             }
459             if (c == '%' || c == '@') {
460                 segment.setFormat(PERL_FORMAT_LIST);
461                 continue;
462             }
463             boolean isFunction = false;
464             if (PerlMode.isIdentifierChar(c)) {
465                 boolean maybeFunction = true;
466                 final int length = s.length();
467                 for (int j = 1; j < length; j++) {
468                     if (!PerlMode.isIdentifierChar(s.charAt(j))) {
469                         maybeFunction = false;
470                         break;
471                     }
472                 }
473                 if (maybeFunction) {
474                     if (isFunction(s))
475                         isFunction = true;
476                     else if (i > 1) {
477                         // See if "sub" is two segments back (one segment back
478
// would be intervening whitespace).
479
LineSegment prevSegment = segmentList.getSegment(i-2);
480                         if (prevSegment.getText().trim().equals("sub"))
481                             isFunction = true;
482                     }
483                     if (!isFunction && i < segmentList.size()-1) {
484                         LineSegment nextSegment = segmentList.getSegment(i+1);
485                         if (nextSegment.getText().trim().startsWith("("))
486                             isFunction = true;
487                     }
488                 }
489             }
490             segment.setFormat(isFunction ? PERL_FORMAT_FUNCTION : PERL_FORMAT_TEXT);
491         }
492         return segmentList;
493     }
494
495     public boolean parseBuffer()
496     {
497         int state = STATE_NEUTRAL;
498         Line line = buffer.getFirstLine();
499         boolean changed = false;
500         while (line != null) {
501             int oldflags = line.flags();
502             if (state == STATE_HERE_DOCUMENT) {
503                 if (line.getText().equals(endOfText))
504                     state = STATE_NEUTRAL;
505             }
506             if (state == STATE_POD) {
507                 if (line.getText().startsWith("=cut")) {
508                     if (state != oldflags) {
509                         line.setFlags(state);
510                         changed = true;
511                     }
512                     state = STATE_NEUTRAL;
513                     line = line.next();
514                     continue;
515                 }
516             }
517             // Assume no multiline quotes.
518
if (state == STATE_QUOTE || state == STATE_SINGLEQUOTE)
519                 state = STATE_NEUTRAL;
520             if (state == STATE_NEUTRAL)
521                 if (line.getText().startsWith("="))
522                     state = STATE_POD;
523             if (state != oldflags) {
524                 line.setFlags(state);
525                 changed = true;
526             }
527             if (state == STATE_HERE_DOCUMENT || state == STATE_POD) {
528                 line = line.next();
529                 continue;
530             }
531             final int limit = line.length();
532             for (int i = 0; i < limit; i++) {
533                 char c = line.charAt(i);
534                 if (c == '\\' && i < limit-1) {
535                     // Escape.
536
++i;
537                     continue;
538                 }
539                 if (state == STATE_QUOTE) {
540                     if (c == '"')
541                         state = STATE_NEUTRAL;
542                     continue;
543                 }
544                 if (state == STATE_SINGLEQUOTE) {
545                     if (c == '\'')
546                         state = STATE_NEUTRAL;
547                     continue;
548                 }
549                 // Not in comment or quoted string.
550
if (c == '$' && i < limit-1) {
551                     // In effect, another kind of escape.
552
// Next char can be quote or single quote but should be ignored.
553
++i;
554                     continue;
555                 }
556                 if (c == '<' && i < limit-2) {
557                     if (line.charAt(i+1) == '<') {
558                         // Line must have semicolon at end.
559
if (line.trim().endsWith(";")) {
560                             endOfText = line.substring(i+2).trim();
561                             int length = endOfText.length();
562                             // Remove ';' at end of line.
563
if (length > 0 && endOfText.charAt(length-1) == ';')
564                                 endOfText = endOfText.substring(0, --length);
565                             // Remove ')' if any.
566
if (length > 0 && endOfText.charAt(length-1) == ')')
567                                 endOfText = endOfText.substring(0, --length);
568                             if (length > 2) {
569                                 if (endOfText.charAt(0) == '"' && endOfText.charAt(length-1) == '"')
570                                     // Removed enclosing double quotes.
571
endOfText = endOfText.substring(1, length - 1);
572                                 else if (endOfText.charAt(0) == '\'' && endOfText.charAt(length-1) == '\'')
573                                     // Removed enclosing single quotes.
574
endOfText = endOfText.substring(1, length - 1);
575                             }
576                             if (endOfText.length() > 0) {
577                                 // Make sure "<<" is not shift operator.
578
if (Character.isLetter(endOfText.charAt(0))) {
579                                     state = STATE_HERE_DOCUMENT;
580                                     break;
581                                 }
582                             }
583                         }
584                     }
585                     continue;
586                 }
587                 if (c == '#')
588                     // Single-line comment beginning. Ignore rest of line.
589
break;
590                 else if (c == '"')
591                     state = STATE_QUOTE;
592                 else if (c == '\'')
593                     state = STATE_SINGLEQUOTE;
594             }
595             line = line.next();
596         }
597         buffer.setNeedsParsing(false);
598         return changed;
599     }
600
601     private static final String JavaDoc[] perlFunctions =
602     {
603         "abs",
604         "accept",
605         "alarm",
606         "atan2",
607         "bind",
608         "binmode",
609         "bless",
610         "caller",
611         "chdir",
612         "chmod",
613         "chomp",
614         "chop",
615         "chown",
616         "chr",
617         "chroot",
618         "close",
619         "closedir",
620         "connect",
621         "cos",
622         "crypt",
623         "dbmclose",
624         "dbmopen",
625         "defined",
626         "delete",
627         "die",
628         "dump",
629         "each",
630         "eof",
631         "eval",
632         "exec",
633         "exists",
634         "exit",
635         "exp",
636         "fcntl",
637         "fileno",
638         "flock",
639         "fork",
640         "format",
641         "formline",
642         "getc",
643         "getgrent",
644         "getgrgid",
645         "getgrnam",
646         "gethostbyaddr",
647         "gethostbyname",
648         "gethostent",
649         "getlogin",
650         "getnetbyaddr",
651         "getnetbyname",
652         "getnetent",
653         "getpeername",
654         "getpgrp",
655         "getppid",
656         "getpriority",
657         "getprotobyname",
658         "getprotobynumber",
659         "getprotoent",
660         "getpwent",
661         "getpwnam",
662         "getpwuid",
663         "getservbyname",
664         "getservbyport",
665         "getservent",
666         "getsockname",
667         "getsockopt",
668         "glob",
669         "gmtime",
670         "grep",
671         "hex",
672         "import",
673         "index",
674         "int",
675         "ioctl",
676         "join",
677         "keys",
678         "kill",
679         "lc",
680         "lcfirst",
681         "length",
682         "link",
683         "listen",
684         "localtime",
685         "log",
686         "lstat",
687         "map",
688         "mkdir",
689         "msgctl",
690         "msgget",
691         "msgrcv",
692         "msgsnd",
693         "oct",
694         "open",
695         "opendir",
696         "ord",
697         "pack",
698         "pipe",
699         "pop",
700         "pos",
701         "print",
702         "printf",
703         "push",
704         "quotemeta",
705         "rand",
706         "read",
707         "readdir",
708         "readlink",
709         "recv",
710         "rename",
711         "reset",
712         "reverse",
713         "rewinddir",
714         "rindex",
715         "rmdir",
716         "scalar",
717         "seek",
718         "seekdir",
719         "select",
720         "semctl",
721         "semget",
722         "semop",
723         "send",
724         "setpgrp",
725         "setpriority",
726         "setsockopt",
727         "shift",
728         "shmctl",
729         "shmget",
730         "shmread",
731         "shmwrite",
732         "shutdown",
733         "sin",
734         "sleep",
735         "socket",
736         "socketpair",
737         "sort",
738         "splice",
739         "split",
740         "sprintf",
741         "sqrt",
742         "srand",
743         "stat",
744         "study",
745         "substr",
746         "symlink",
747         "syscall",
748         "sysopen",
749         "sysread",
750         "system",
751         "syswrite",
752         "tell",
753         "telldir",
754         "time",
755         "times",
756         "truncate",
757         "uc",
758         "ucfirst",
759         "umask",
760         "unlink",
761         "unpack",
762         "unshift",
763         "utime",
764         "values",
765         "vec",
766         "wait",
767         "waitpid",
768         "wantarray",
769         "warn",
770         "write"
771     };
772
773     private final boolean isFunction(String JavaDoc s)
774     {
775         if (functions == null)
776             return false;
777         return functions.contains(s);
778     }
779
780     public FormatTable getFormatTable()
781     {
782         if (formatTable == null) {
783             formatTable = new FormatTable("PerlMode");
784             formatTable.addEntryFromPrefs(PERL_FORMAT_TEXT, "text");
785             formatTable.addEntryFromPrefs(PERL_FORMAT_COMMENT, "comment");
786             formatTable.addEntryFromPrefs(PERL_FORMAT_STRING, "string");
787             formatTable.addEntryFromPrefs(PERL_FORMAT_KEYWORD, "keyword");
788             formatTable.addEntryFromPrefs(PERL_FORMAT_FUNCTION, "function");
789             formatTable.addEntryFromPrefs(PERL_FORMAT_BRACE, "brace");
790             formatTable.addEntryFromPrefs(PERL_FORMAT_NUMBER, "number");
791             formatTable.addEntryFromPrefs(PERL_FORMAT_SCALAR, "scalar");
792             formatTable.addEntryFromPrefs(PERL_FORMAT_LIST, "list");
793         }
794         return formatTable;
795     }
796 }
797
Popular Tags