1 21 22 package org.armedbear.j; 23 24 import gnu.regexp.RE; 25 import gnu.regexp.REMatch; 26 import gnu.regexp.UncheckedRE; 27 28 public final class PerlFormatter extends Formatter 29 { 30 private static final int STATE_VARIABLE = STATE_LAST + 1; 31 private static final int STATE_HERE_DOCUMENT = STATE_LAST + 2; 32 private static final int STATE_POD = STATE_LAST + 3; 33 private static final int STATE_REGEXP_DELIMITER = STATE_LAST + 4; 34 private static final int STATE_REGEXP = STATE_LAST + 5; 35 private static final int STATE_SUBST = STATE_LAST + 6; 36 37 private static final String punctuation = "&`^:+#-%'\"/~_"; 38 39 private static final int PERL_FORMAT_TEXT = 0; 41 private static final int PERL_FORMAT_COMMENT = 1; 42 private static final int PERL_FORMAT_STRING = 2; 43 private static final int PERL_FORMAT_KEYWORD = 3; 44 private static final int PERL_FORMAT_FUNCTION = 4; 45 private static final int PERL_FORMAT_BRACE = 5; 46 private static final int PERL_FORMAT_NUMBER = 6; 47 private static final int PERL_FORMAT_SCALAR = 7; 48 private static final int PERL_FORMAT_LIST = 8; 49 50 private static StringSet functions; 51 52 private FastStringBuffer sb = new FastStringBuffer(); 53 54 private String endOfText; 55 56 private static RE matchRE = new UncheckedRE("(=~|!~)[ \t]+m[^a-zA-Z0-9]"); 57 58 public PerlFormatter(Buffer buffer) 59 { 60 this.buffer = buffer; 61 if (functions == null) 62 functions = new StringSet(perlFunctions); 63 } 64 65 private void endToken(int state) 66 { 67 if (sb.length() > 0) { 68 int format = -1; 69 switch (state) { 70 case STATE_NEUTRAL: 71 break; 72 case STATE_QUOTE: 73 case STATE_SINGLEQUOTE: 74 case STATE_HERE_DOCUMENT: 75 case STATE_REGEXP: 76 case STATE_SUBST: 77 format = PERL_FORMAT_STRING; 78 break; 79 case STATE_REGEXP_DELIMITER: 80 format = PERL_FORMAT_FUNCTION; 81 break; 82 case STATE_IDENTIFIER: 83 break; 84 case STATE_COMMENT: 85 case STATE_POD: 86 format = PERL_FORMAT_COMMENT; 87 break; 88 case STATE_BRACE: 89 format = PERL_FORMAT_BRACE; 90 break; 91 case STATE_NUMBER: 92 case STATE_HEXNUMBER: 93 format = PERL_FORMAT_NUMBER; 94 break; 95 } 96 addSegment(sb.toString(), format); 97 sb.setLength(0); 98 } 99 } 100 101 private void parseLine(String text, int state) 102 { 103 if (Editor.tabsAreVisible()) 104 text = Utilities.makeTabsVisible(text, buffer.getTabWidth()); 105 else 106 text = Utilities.detab(text, buffer.getTabWidth()); 107 clearSegmentList(); 108 sb.setLength(0); 109 int i = 0; 110 if (state == STATE_HERE_DOCUMENT) { 111 if (text.startsWith(endOfText)) 112 state = STATE_NEUTRAL; 113 else { 114 sb.append(text); 115 endToken(state); 116 return; 117 } 118 } 119 if (state == STATE_POD) { 120 sb.append(text); 121 endToken(state); 122 return; 123 } 124 final int limit = text.length(); 125 char c; 126 while (i < limit) { 128 c = text.charAt(i); 129 if (Character.isWhitespace(c)) { 130 sb.append(c); 131 ++i; 132 } else { 133 endToken(state); 134 break; 135 } 136 } 137 char delimiter = 0; 138 while (i < limit) { 139 c = text.charAt(i); 140 if (c == '\\') { 141 sb.append(c); 143 if (i < limit-1) 144 sb.append(text.charAt(++i)); 145 ++i; 146 continue; 147 } 148 if (state == STATE_QUOTE) { 149 sb.append(c); 150 if (c == '"') { 151 endToken(state); 152 state = STATE_NEUTRAL; 153 } 154 ++i; 155 continue; 156 } 157 if (state == STATE_SINGLEQUOTE) { 158 sb.append(c); 159 if (c == '\'') { 160 endToken(state); 161 state = STATE_NEUTRAL; 162 } 163 ++i; 164 continue; 165 } 166 if (state == STATE_REGEXP) { 167 if (c == delimiter) { 168 endToken(state); 169 sb.append(c); 170 endToken(STATE_REGEXP_DELIMITER); 171 state = STATE_NEUTRAL; 172 } else 173 sb.append(c); 174 ++i; 175 continue; 176 } 177 if (state == STATE_SUBST) { 178 if (c == delimiter) { 179 endToken(state); 180 sb.append(c); 181 endToken(STATE_REGEXP_DELIMITER); 182 state = STATE_REGEXP; 183 } else 184 sb.append(c); 185 ++i; 186 continue; 187 } 188 if (c == '{' || c == '}') { 190 endToken(state); 191 sb.append(c); 192 endToken(STATE_BRACE); 193 state = STATE_NEUTRAL; 194 ++i; 195 continue; 196 } 197 if (state == STATE_VARIABLE) { 198 boolean ok = false; 199 if (PerlMode.isIdentifierChar(c)) 200 ok = true; 201 else if (sb.length() == 1 && punctuation.indexOf(c) >= 0) 202 ok = true; 203 if (ok) 204 sb.append(c); 205 else { 206 endToken(state); 207 sb.append(c); 208 state = STATE_NEUTRAL; 209 } 210 ++i; 211 continue; 212 } 213 if (c == '"') { 214 endToken(state); 215 sb.append(c); 216 state = STATE_QUOTE; 217 ++i; 218 continue; 219 } 220 if (c == '\'') { 221 endToken(state); 222 sb.append(c); 223 state = STATE_SINGLEQUOTE; 224 ++i; 225 continue; 226 } 227 if (c == '=' || c == '!') { 228 REMatch match = matchRE.getMatch(text.substring(i)); 229 if (match != null) { 230 final String s = match.toString(); 231 final int length = s.length(); 232 endToken(state); 234 sb.append(s.substring(0, 2)); 235 endToken(STATE_NEUTRAL); 236 i += 2; 237 sb.append(s.substring(2)); 238 endToken(STATE_REGEXP_DELIMITER); 239 i += length - 2; 240 delimiter = s.charAt(length - 1); 241 if (delimiter == '{') 242 delimiter = '}'; 243 state = STATE_REGEXP; 244 } else { 245 sb.append(c); 246 ++i; 247 } 248 continue; 249 } 250 if (c == '/') { 251 if (isSubst(text, i)) { 252 delimiter = '/'; 253 sb.append(c); 254 endToken(STATE_REGEXP_DELIMITER); 255 state = STATE_SUBST; 256 } else if (isRegExp(text, i)) { 257 delimiter = '/'; 258 if (i > 0 && text.charAt(i-1) != 'm') 260 endToken(state); 261 sb.append(c); 262 endToken(STATE_REGEXP_DELIMITER); 263 state = STATE_REGEXP; 264 } else { 265 sb.append(c); 267 } 268 ++i; 269 continue; 270 } 271 if (c == '#') { 272 endToken(state); 273 state = STATE_COMMENT; 274 sb.append(text.substring(i)); 275 endToken(state); 276 return; 277 } 278 if (state == STATE_IDENTIFIER) { 279 if (PerlMode.isIdentifierChar(c)) 280 sb.append(c); 281 else { 282 endToken(state); 283 sb.append(c); 284 state = STATE_NEUTRAL; 285 } 286 ++i; 287 continue; 288 } 289 if (state == STATE_NUMBER) { 290 if (Character.isDigit(c)) 291 sb.append(c); 292 else if (sb.length() == 1 && c == 'x' || c == 'X') { 293 sb.append(c); 294 state = STATE_HEXNUMBER; 295 } else { 296 endToken(state); 297 sb.append(c); 298 if (PerlMode.isIdentifierChar(c)) 299 state = STATE_IDENTIFIER; 300 else 301 state = STATE_NEUTRAL; 302 } 303 ++i; 304 continue; 305 } 306 if (state == STATE_HEXNUMBER) { 307 if (Character.isDigit(c)) 308 sb.append(c); 309 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) 310 sb.append(c); 311 else { 312 endToken(state); 313 sb.append(c); 314 if (PerlMode.isIdentifierChar(c)) 315 state = STATE_IDENTIFIER; 316 else 317 state = STATE_NEUTRAL; 318 } 319 ++i; 320 continue; 321 } 322 if (state == STATE_NEUTRAL) { 323 if (c == '$') { 324 endToken(state); 325 sb.append(c); 326 state = STATE_VARIABLE; 327 } else if (PerlMode.isIdentifierChar(c)) { 328 endToken(state); 329 sb.append(c); 330 state = STATE_IDENTIFIER; 331 } else if (Character.isDigit(c)) { 332 endToken(state); 333 sb.append(c); 334 state = STATE_NUMBER; 335 } else sb.append(c); 337 } 338 ++i; 339 } 340 endToken(state); 341 } 342 343 public static boolean isSubst(String text, int i) 345 { 346 Debug.assertTrue(text.charAt(i) == '/'); 347 if (text.regionMatches(i-2, "tr/", 0, 3)) { 348 if (i < 3) 349 return true; 350 char c = text.charAt(i-3); 351 if (PerlMode.getMode().isIdentifierPart(c)) 352 return false; 353 else 354 return true; 355 } 356 if (text.regionMatches(i-1, "s/", 0, 2)) { 357 if (i < 2) 358 return true; 359 char c = text.charAt(i-2); 360 if (PerlMode.getMode().isIdentifierPart(c)) 361 return false; 362 else 363 return true; 364 } 365 if (text.regionMatches(i-1, "y/", 0, 2)) { 366 if (i < 2) 367 return true; 368 char c = text.charAt(i-2); 369 if (PerlMode.getMode().isIdentifierPart(c)) 370 return false; 371 else 372 return true; 373 } 374 return false; 375 } 376 377 public static boolean isRegExp(String text, int i) 379 { 380 Debug.assertTrue(text.charAt(i) == '/'); 381 if (i == 0) { 382 return true; 384 } 385 char c = text.charAt(i-1); 387 if (c == '(') 388 return true; 389 if (c == 'm') { 390 if (i-2 < 0) 391 return true; 392 c = text.charAt(i-2); 393 if (c == '(' || Character.isWhitespace(c)) 394 return true; 395 return false; 396 } 397 if (PerlMode.isIdentifierChar(c)) 401 return false; 402 403 if (!Character.isWhitespace(c)) 404 return false; 405 406 final String s = text.substring(0, i-1).trim(); 408 final int length = s.length(); 409 if (length == 0) { 410 return true; 412 } 413 c = s.charAt(length-1); 414 if (c == ')') 415 return false; if (c == '}') 417 return false; 418 if (!PerlMode.isIdentifierChar(c)) 419 return true; 420 421 if (s.endsWith("and")) { 423 if (length == 3 || Character.isWhitespace(s.charAt(length-4))) 424 return true; 425 } else if (s.endsWith("or")) { 426 if (length == 2 || Character.isWhitespace(s.charAt(length-3))) 427 return true; 428 } else if (s.endsWith("not")) { 429 if (length == 3 || Character.isWhitespace(s.charAt(length-4))) 430 return true; 431 } 432 433 return false; 434 } 435 436 public LineSegmentList formatLine(Line line) 437 { 438 if (line == null) { 439 clearSegmentList(); 440 addSegment("", PERL_FORMAT_TEXT); 441 return segmentList; 442 } 443 parseLine(line.getText(), line.flags()); 444 final int tokenCount = segmentList.size(); 445 for (int i = 0; i < tokenCount; i++) { 446 LineSegment segment = segmentList.getSegment(i); 447 if (segment.getFormat() >= 0) 448 continue; 449 String s = segment.getText(); 450 if (isKeyword(s)) { 451 segment.setFormat(PERL_FORMAT_KEYWORD); 452 continue; 453 } 454 char c = s.charAt(0); 455 if (c == '$') { 456 segment.setFormat(PERL_FORMAT_SCALAR); 457 continue; 458 } 459 if (c == '%' || c == '@') { 460 segment.setFormat(PERL_FORMAT_LIST); 461 continue; 462 } 463 boolean isFunction = false; 464 if (PerlMode.isIdentifierChar(c)) { 465 boolean maybeFunction = true; 466 final int length = s.length(); 467 for (int j = 1; j < length; j++) { 468 if (!PerlMode.isIdentifierChar(s.charAt(j))) { 469 maybeFunction = false; 470 break; 471 } 472 } 473 if (maybeFunction) { 474 if (isFunction(s)) 475 isFunction = true; 476 else if (i > 1) { 477 LineSegment prevSegment = segmentList.getSegment(i-2); 480 if (prevSegment.getText().trim().equals("sub")) 481 isFunction = true; 482 } 483 if (!isFunction && i < segmentList.size()-1) { 484 LineSegment nextSegment = segmentList.getSegment(i+1); 485 if (nextSegment.getText().trim().startsWith("(")) 486 isFunction = true; 487 } 488 } 489 } 490 segment.setFormat(isFunction ? PERL_FORMAT_FUNCTION : PERL_FORMAT_TEXT); 491 } 492 return segmentList; 493 } 494 495 public boolean parseBuffer() 496 { 497 int state = STATE_NEUTRAL; 498 Line line = buffer.getFirstLine(); 499 boolean changed = false; 500 while (line != null) { 501 int oldflags = line.flags(); 502 if (state == STATE_HERE_DOCUMENT) { 503 if (line.getText().equals(endOfText)) 504 state = STATE_NEUTRAL; 505 } 506 if (state == STATE_POD) { 507 if (line.getText().startsWith("=cut")) { 508 if (state != oldflags) { 509 line.setFlags(state); 510 changed = true; 511 } 512 state = STATE_NEUTRAL; 513 line = line.next(); 514 continue; 515 } 516 } 517 if (state == STATE_QUOTE || state == STATE_SINGLEQUOTE) 519 state = STATE_NEUTRAL; 520 if (state == STATE_NEUTRAL) 521 if (line.getText().startsWith("=")) 522 state = STATE_POD; 523 if (state != oldflags) { 524 line.setFlags(state); 525 changed = true; 526 } 527 if (state == STATE_HERE_DOCUMENT || state == STATE_POD) { 528 line = line.next(); 529 continue; 530 } 531 final int limit = line.length(); 532 for (int i = 0; i < limit; i++) { 533 char c = line.charAt(i); 534 if (c == '\\' && i < limit-1) { 535 ++i; 537 continue; 538 } 539 if (state == STATE_QUOTE) { 540 if (c == '"') 541 state = STATE_NEUTRAL; 542 continue; 543 } 544 if (state == STATE_SINGLEQUOTE) { 545 if (c == '\'') 546 state = STATE_NEUTRAL; 547 continue; 548 } 549 if (c == '$' && i < limit-1) { 551 ++i; 554 continue; 555 } 556 if (c == '<' && i < limit-2) { 557 if (line.charAt(i+1) == '<') { 558 if (line.trim().endsWith(";")) { 560 endOfText = line.substring(i+2).trim(); 561 int length = endOfText.length(); 562 if (length > 0 && endOfText.charAt(length-1) == ';') 564 endOfText = endOfText.substring(0, --length); 565 if (length > 0 && endOfText.charAt(length-1) == ')') 567 endOfText = endOfText.substring(0, --length); 568 if (length > 2) { 569 if (endOfText.charAt(0) == '"' && endOfText.charAt(length-1) == '"') 570 endOfText = endOfText.substring(1, length - 1); 572 else if (endOfText.charAt(0) == '\'' && endOfText.charAt(length-1) == '\'') 573 endOfText = endOfText.substring(1, length - 1); 575 } 576 if (endOfText.length() > 0) { 577 if (Character.isLetter(endOfText.charAt(0))) { 579 state = STATE_HERE_DOCUMENT; 580 break; 581 } 582 } 583 } 584 } 585 continue; 586 } 587 if (c == '#') 588 break; 590 else if (c == '"') 591 state = STATE_QUOTE; 592 else if (c == '\'') 593 state = STATE_SINGLEQUOTE; 594 } 595 line = line.next(); 596 } 597 buffer.setNeedsParsing(false); 598 return changed; 599 } 600 601 private static final String [] perlFunctions = 602 { 603 "abs", 604 "accept", 605 "alarm", 606 "atan2", 607 "bind", 608 "binmode", 609 "bless", 610 "caller", 611 "chdir", 612 "chmod", 613 "chomp", 614 "chop", 615 "chown", 616 "chr", 617 "chroot", 618 "close", 619 "closedir", 620 "connect", 621 "cos", 622 "crypt", 623 "dbmclose", 624 "dbmopen", 625 "defined", 626 "delete", 627 "die", 628 "dump", 629 "each", 630 "eof", 631 "eval", 632 "exec", 633 "exists", 634 "exit", 635 "exp", 636 "fcntl", 637 "fileno", 638 "flock", 639 "fork", 640 "format", 641 "formline", 642 "getc", 643 "getgrent", 644 "getgrgid", 645 "getgrnam", 646 "gethostbyaddr", 647 "gethostbyname", 648 "gethostent", 649 "getlogin", 650 "getnetbyaddr", 651 "getnetbyname", 652 "getnetent", 653 "getpeername", 654 "getpgrp", 655 "getppid", 656 "getpriority", 657 "getprotobyname", 658 "getprotobynumber", 659 "getprotoent", 660 "getpwent", 661 "getpwnam", 662 "getpwuid", 663 "getservbyname", 664 "getservbyport", 665 "getservent", 666 "getsockname", 667 "getsockopt", 668 "glob", 669 "gmtime", 670 "grep", 671 "hex", 672 "import", 673 "index", 674 "int", 675 "ioctl", 676 "join", 677 "keys", 678 "kill", 679 "lc", 680 "lcfirst", 681 "length", 682 "link", 683 "listen", 684 "localtime", 685 "log", 686 "lstat", 687 "map", 688 "mkdir", 689 "msgctl", 690 "msgget", 691 "msgrcv", 692 "msgsnd", 693 "oct", 694 "open", 695 "opendir", 696 "ord", 697 "pack", 698 "pipe", 699 "pop", 700 "pos", 701 "print", 702 "printf", 703 "push", 704 "quotemeta", 705 "rand", 706 "read", 707 "readdir", 708 "readlink", 709 "recv", 710 "rename", 711 "reset", 712 "reverse", 713 "rewinddir", 714 "rindex", 715 "rmdir", 716 "scalar", 717 "seek", 718 "seekdir", 719 "select", 720 "semctl", 721 "semget", 722 "semop", 723 "send", 724 "setpgrp", 725 "setpriority", 726 "setsockopt", 727 "shift", 728 "shmctl", 729 "shmget", 730 "shmread", 731 "shmwrite", 732 "shutdown", 733 "sin", 734 "sleep", 735 "socket", 736 "socketpair", 737 "sort", 738 "splice", 739 "split", 740 "sprintf", 741 "sqrt", 742 "srand", 743 "stat", 744 "study", 745 "substr", 746 "symlink", 747 "syscall", 748 "sysopen", 749 "sysread", 750 "system", 751 "syswrite", 752 "tell", 753 "telldir", 754 "time", 755 "times", 756 "truncate", 757 "uc", 758 "ucfirst", 759 "umask", 760 "unlink", 761 "unpack", 762 "unshift", 763 "utime", 764 "values", 765 "vec", 766 "wait", 767 "waitpid", 768 "wantarray", 769 "warn", 770 "write" 771 }; 772 773 private final boolean isFunction(String s) 774 { 775 if (functions == null) 776 return false; 777 return functions.contains(s); 778 } 779 780 public FormatTable getFormatTable() 781 { 782 if (formatTable == null) { 783 formatTable = new FormatTable("PerlMode"); 784 formatTable.addEntryFromPrefs(PERL_FORMAT_TEXT, "text"); 785 formatTable.addEntryFromPrefs(PERL_FORMAT_COMMENT, "comment"); 786 formatTable.addEntryFromPrefs(PERL_FORMAT_STRING, "string"); 787 formatTable.addEntryFromPrefs(PERL_FORMAT_KEYWORD, "keyword"); 788 formatTable.addEntryFromPrefs(PERL_FORMAT_FUNCTION, "function"); 789 formatTable.addEntryFromPrefs(PERL_FORMAT_BRACE, "brace"); 790 formatTable.addEntryFromPrefs(PERL_FORMAT_NUMBER, "number"); 791 formatTable.addEntryFromPrefs(PERL_FORMAT_SCALAR, "scalar"); 792 formatTable.addEntryFromPrefs(PERL_FORMAT_LIST, "list"); 793 } 794 return formatTable; 795 } 796 } 797 | Popular Tags |