1 5 package net.sourceforge.pmd.cpd; 6 7 import java.util.List ; 8 9 public class RubyTokenizer implements Tokenizer { 10 private boolean downcaseString = true; 11 12 public void tokenize(SourceCode tokens, Tokens tokenEntries) { 13 List code = tokens.getCode(); 14 for (int i = 0; i < code.size(); i++) { 15 String currentLine = (String ) code.get(i); 16 int loc = 0; 17 while (loc < currentLine.length()) { 18 StringBuffer token = new StringBuffer (); 19 loc = getTokenFromLine(currentLine, token, loc); 20 if (token.length() > 0 && !isIgnorableString(token.toString())) { 21 if (downcaseString) { 22 token = new StringBuffer (token.toString().toLowerCase()); 23 } 24 tokenEntries.add(new TokenEntry(token.toString(), 25 tokens.getFileName(), 26 i + 1)); 27 } 28 } 29 } 30 tokenEntries.add(TokenEntry.getEOF()); 31 } 32 33 private int getTokenFromLine(String line, StringBuffer token, int loc) { 34 for (int j = loc; j < line.length(); j++) { 35 char tok = line.charAt(j); 36 if (!Character.isWhitespace(tok) && !ignoreCharacter(tok)) { 37 if (isComment(tok)) { 38 if (token.length() > 0) { 39 return j; 40 } else { 41 return getCommentToken(line, token, loc); 42 } 43 } else if (isString(tok)) { 44 if (token.length() > 0) { 45 return j; } else { 48 return parseString(line, token, j, tok); 50 } 51 } else { 52 token.append(tok); 53 } 54 } else { 55 if (token.length() > 0) { 56 return j; 57 } 58 } 59 loc = j; 60 } 61 return loc + 1; 62 } 63 64 private int parseString(String line, StringBuffer token, int loc, char stringType) { 65 boolean escaped = false; 66 boolean done = false; 67 char tok = ' '; while ((loc < line.length()) && !done) { 72 tok = line.charAt(loc); 73 if (escaped && tok == stringType) { 74 escaped = false; 76 } else if (tok == stringType && (token.length() > 0)) { 77 done = true; 80 } else if (tok == '\\') { 81 escaped = true; 83 } else { 84 escaped = false; 86 } 87 token.append(tok); 89 loc++; 90 } 91 return loc + 1; 92 } 93 94 private boolean ignoreCharacter(char tok) { 95 boolean result = false; 96 switch (tok) { 97 case '{': 98 case '}': 99 case '(': 100 case ')': 101 case ';': 102 case ',': 103 result = true; 104 break; 105 default : 106 result = false; 107 } 108 return result; 109 } 110 111 private boolean isString(char tok) { 112 boolean result = false; 113 switch (tok) { 114 case '\'': 115 case '"': 116 result = true; 117 break; 118 default: 119 result = false; 120 } 121 return result; 122 } 123 124 private boolean isComment(char tok) { 125 return tok == '#'; 126 } 127 128 private int getCommentToken(String line, StringBuffer token, int loc) { 129 while (loc < line.length()) { 130 token.append(line.charAt(loc)); 131 loc++; 132 } 133 return loc; 134 } 135 136 private boolean isIgnorableString(String token) { 137 return "do".equals(token) || "end".equals(token); 138 } 139 } 140 | Popular Tags |