KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > modules > ruby > lexer > RubyCommentLexer


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19 package org.netbeans.modules.ruby.lexer;
20
21 import java.util.ArrayList JavaDoc;
22 import java.util.List JavaDoc;
23 import java.util.prefs.BackingStoreException JavaDoc;
24 import java.util.prefs.Preferences JavaDoc;
25 import org.netbeans.modules.ruby.*;
26 import org.netbeans.api.lexer.Token;
27 import org.netbeans.spi.lexer.Lexer;
28 import org.netbeans.spi.lexer.LexerInput;
29 import org.netbeans.spi.lexer.LexerRestartInfo;
30 import org.netbeans.spi.lexer.TokenFactory;
31 import org.openide.ErrorManager;
32 import org.openide.util.NbPreferences;
33
34
35 /**
36  * Lexical analyzer for Ruby comments which identifies TODO markers
37  * and highlights them specially.
38  *
39  * @todo Handle rdoc on/off directives (#++,#--). Since these occur on separate
40  * lines I can't handle it now.
41  * @todo Highlight only RDoc reserved words, or all that fit the pattern? For
42  * now I'm highlighting :\w+: sequences. Possibly I should only highlight
43  *
44  * @author Tor Norbye
45  */

46 public final class RubyCommentLexer implements Lexer<RubyCommentTokenId> {
47     private static final int EOF = LexerInput.EOF;
48     private static final String JavaDoc[] RDOC_DIRECTIVES =
49         {
50             "arg", "args", "yield", "yields", "notnew", "not-new", "not_new", "doc", "nodoc",
51             "stopdoc", "startdoc", "enddoc", "main", "title", "section", "include"
52         };
53     private LexerInput input;
54     private TokenFactory<RubyCommentTokenId> tokenFactory;
55     private boolean inWord;
56     private String JavaDoc[] markers;
57
58     public RubyCommentLexer(LexerRestartInfo<RubyCommentTokenId> info) {
59         this.input = info.input();
60         this.tokenFactory = info.tokenFactory();
61         assert (info.state() == null); // passed argument always null
62
}
63
64     public Object JavaDoc state() {
65         return null;
66     }
67
68     /**
69      * Compute the set of markers to scan for in the user source code.
70      * The code tries to look for the same markers used by the TODO module
71      * in case the user has customized the set. (However, it is doing this
72      * by peeking at the Preferences possibly left by the docscan module,
73      * rather than having a contract API with it, based on
74      * tasklist/docscan/src/org/netbeans/modules/tasklist/docscan/Settings.java)
75      */

76     private String JavaDoc[] getTodoMarkers() {
77         if (markers == null) {
78             final String JavaDoc MARKER_PREFIX = "Tag"; // NOI18N
79
final int MARKER_PREFIX_LENGTH = MARKER_PREFIX.length();
80             List JavaDoc<String JavaDoc> markerList = new ArrayList JavaDoc<String JavaDoc>();
81
82             try {
83                 Preferences JavaDoc preferences =
84                     NbPreferences.root().node("org/netbeans/modules/tasklist/docscan"); // NOI18N
85
String JavaDoc[] keys = preferences.keys();
86
87                 for (int i = 0; i < keys.length; i++) {
88                     String JavaDoc key = keys[i];
89
90                     if ((key != null) && key.startsWith(MARKER_PREFIX)) {
91                         markerList.add(key.substring(MARKER_PREFIX_LENGTH));
92                     }
93                 }
94             } catch (BackingStoreException JavaDoc bse) {
95                 ErrorManager.getDefault().notify(bse);
96             }
97
98             if (markerList.size() > 0) {
99                 markerList.remove("@todo"); // Applies to javadoc, and these tags are now colorized separately
100
markers = markerList.toArray(new String JavaDoc[markerList.size()]);
101             } else {
102                 // Additional candidates: HACK, WORKAROUND, REMOVE, OLD
103
markers = new String JavaDoc[] { "TODO", "FIXME", "XXX", "PENDING" }; // NOI18N
104
}
105         }
106
107         return markers;
108     }
109
110     public Preferences JavaDoc getDocscanPreferences() {
111         return NbPreferences.root().node("org/netbeans/modules/tasklist/docscan");
112     }
113
114     public Token<RubyCommentTokenId> nextToken() {
115         inWord = false;
116
117         while (true) {
118             int ch = input.read();
119
120             switch (ch) {
121             case EOF: {
122                 if (input.readLength() > 0) {
123                     return token(RubyCommentTokenId.COMMENT_TEXT);
124                 } else {
125                     return null;
126                 }
127             }
128
129             case '\\':
130                 // The next character is escaped...
131
input.read();
132
133                 continue;
134
135             case '\n':
136                 return token(RubyCommentTokenId.COMMENT_TEXT);
137
138             case '#': { // Linked method
139

140                 if (inWord) {
141                     break;
142                 }
143
144                 int originalLength = input.readLength();
145
146                 // See if we have what looks like a method name:
147
// method-only characters followed by whitespace, newlines or EOF:
148
while (ch != EOF) {
149                     ch = input.read();
150
151                     if ((ch == '$') || !Character.isJavaIdentifierPart(ch)) {
152                         input.backup(1);
153
154                         break;
155                     }
156                 }
157
158                 if (Character.isWhitespace(ch) || (ch == EOF) || (ch == '.') || (ch == ',') ||
159                         (ch == ')') || (ch == '}')) {
160                     if (originalLength > 1) {
161                         input.backup(input.readLengthEOF() - originalLength + 1);
162
163                         return token(RubyCommentTokenId.COMMENT_TEXT);
164                     }
165
166                     if (input.readLength() > 2) {
167                         return token(RubyCommentTokenId.COMMENT_LINK);
168                     }
169                 }
170
171                 break;
172             }
173
174             case 'f': // ftp:
175
case 'm': // mailto:
176
case 'w': // www.
177
case 'h': { // http links. TODO: link:, ftp:, mailto:, and www.
178

179                 if (inWord) {
180                     break;
181                 }
182
183                 int originalLength = input.readLength();
184                 boolean foundLinkBegin = false;
185
186                 if (ch == 'h') { // http:
187

188                     if (input.read() == 't') {
189                         if (input.read() == 't') {
190                             if (input.read() == 'p') {
191                                 if (input.read() == ':') {
192                                     foundLinkBegin = true;
193                                 } else {
194                                     input.backup(4);
195                                 }
196                             } else {
197                                 input.backup(3);
198                             }
199                         } else {
200                             input.backup(2);
201                         }
202                     } else {
203                         input.backup(1);
204                     }
205                 } else if (ch == 'f') { // ftp:
206

207                     if (input.read() == 't') {
208                         if (input.read() == 'p') {
209                             if (input.read() == ':') {
210                                 foundLinkBegin = true;
211                             } else {
212                                 input.backup(3);
213                             }
214                         } else {
215                             input.backup(2);
216                         }
217                     } else {
218                         input.backup(1);
219                     }
220                 } else if (ch == 'm') { // mailto:
221

222                     if (input.read() == 'a') {
223                         if (input.read() == 'i') {
224                             if (input.read() == 'l') {
225                                 if (input.read() == 't') {
226                                     if (input.read() == 'o') {
227                                         if (input.read() == ':') {
228                                             foundLinkBegin = true;
229                                         } else {
230                                             input.backup(6);
231                                         }
232                                     } else {
233                                         input.backup(5);
234                                     }
235                                 } else {
236                                     input.backup(4);
237                                 }
238                             } else {
239                                 input.backup(3);
240                             }
241                         } else {
242                             input.backup(2);
243                         }
244                     } else {
245                         input.backup(1);
246                     }
247                 } else if (ch == 'w') { // www.
248

249                     if (input.read() == 'w') {
250                         if (input.read() == 'w') {
251                             if (input.read() == '.') {
252                                 foundLinkBegin = true;
253                             } else {
254                                 input.backup(3);
255                             }
256                         } else {
257                             input.backup(2);
258                         }
259                     } else {
260                         input.backup(1);
261                     }
262                 }
263
264                 if (foundLinkBegin) {
265                     while (ch != EOF) {
266                         ch = input.read();
267
268                         if ((ch == ']') || (ch == ')') || Character.isWhitespace(ch) ||
269                                 (ch == '\'') || (ch == '"')) {
270                             input.backup(1);
271
272                             break;
273                         }
274                     }
275
276                     if (originalLength > 1) {
277                         input.backup(input.readLengthEOF() - originalLength + 1);
278
279                         return token(RubyCommentTokenId.COMMENT_TEXT);
280                     }
281
282                     if (input.readLength() > 2) {
283                         return token(RubyCommentTokenId.COMMENT_LINK);
284                     }
285                 }
286             }
287
288             case '_': // Italic text
289

290                 if (inWord) {
291                     break;
292                 }
293
294                 if (input.readLength() > 1) {
295                     input.backup(1);
296
297                     return token(RubyCommentTokenId.COMMENT_TEXT);
298                 }
299
300                 while (ch != EOF) {
301                     ch = input.read();
302
303                     if (ch == '_') {
304                         int next = input.read();
305                         input.backup(1);
306
307                         if (Character.isLetter(next) || (next == '_')) {
308                             continue;
309                         }
310
311                         if (input.readLength() > 2) {
312                             return token(RubyCommentTokenId.COMMENT_ITALIC);
313                         }
314                     } else if (!(Character.isLetter(ch) || (ch == '_'))) {
315                         break;
316                     }
317                 }
318
319                 break;
320
321             case '*': // Bold text
322

323                 if (inWord) {
324                     break;
325                 }
326
327                 if (input.readLength() > 1) {
328                     input.backup(1);
329
330                     return token(RubyCommentTokenId.COMMENT_TEXT);
331                 }
332
333                 while (ch != EOF) {
334                     ch = input.read();
335
336                     if ((ch == '*') && (input.readLength() > 2)) {
337                         return token(RubyCommentTokenId.COMMENT_BOLD);
338                     } else if (!(Character.isLetter(ch) || (ch == '_'))) {
339                         break;
340                     }
341                 }
342
343                 break;
344
345             case '+': // Typewriter text
346

347                 if (inWord) {
348                     break;
349                 }
350
351                 if (input.readLength() > 1) {
352                     input.backup(1);
353
354                     return token(RubyCommentTokenId.COMMENT_TEXT);
355                 }
356
357                 while (ch != EOF) {
358                     ch = input.read();
359
360                     if ((ch == '+') && (input.readLength() > 2)) {
361                         return token(RubyCommentTokenId.COMMENT_HTMLTAG);
362                     } else if (!(Character.isLetter(ch) || (ch == '_'))) {
363                         break;
364                     }
365                 }
366
367                 break;
368
369             case '<': { // Html tag - rdoc
370

371                 // Only accept things that look like tags: <foo> or </foo>, not
372
// <<, < >, etc.
373
int next = input.read();
374                 input.backup(1);
375
376                 if (!((next == '/') || Character.isLetter(next))) {
377                     break;
378                 }
379
380                 if (input.readLength() > 1) {
381                     input.backup(1);
382
383                     return token(RubyCommentTokenId.COMMENT_TEXT);
384                 }
385
386                 while (ch != EOF) {
387                     ch = input.read();
388
389                     if (ch == '\n') {
390                         break;
391                     } else if (ch == '>') {
392                         return token(RubyCommentTokenId.COMMENT_HTMLTAG);
393                     }
394                 }
395
396                 break;
397             }
398
399             case ':': { // Possible rdoc tag, like :nodoc:
400

401                 if (input.readLength() > 1) {
402                     input.backup(1);
403
404                     return token(RubyCommentTokenId.COMMENT_TEXT);
405                 }
406
407                 int backup = 0;
408
409                 while (ch != EOF) {
410                     ch = input.read();
411                     backup++;
412
413                     if ((ch == '\n') || (!Character.isLetter(ch) && ch != '_' && ch != '-')) {
414                         if ((ch == ':') && (input.readLength() > 2)) { // Don't recognize "::" since it's used a lot when mentioning modules
415
// I should be able to use input.readText(1, ...) here but it doesn't work right
416

417                             String JavaDoc seen = input.readText().toString();
418                             String JavaDoc directive = seen.substring(1, seen.length() - 1);
419
420                             for (String JavaDoc keyword : RDOC_DIRECTIVES) {
421                                 if (keyword.equals(directive)) {
422                                     return token(RubyCommentTokenId.COMMENT_RDOC);
423                                 }
424                             }
425                         }
426
427                         input.backup(backup);
428                         
429                         break;
430                     }
431                 }
432
433                 continue;
434             }
435
436             default: {
437                 if (!inWord) {
438                     // See if we have a match from here on for any of the markers
439
String JavaDoc[] markers = getTodoMarkers();
440
441                     for (int i = 0; i < markers.length; i++) {
442                         if (markers[i].charAt(0) == ch) {
443                             if (input.readLength() > 1) {
444                                 input.backup(1);
445
446                                 return token(RubyCommentTokenId.COMMENT_TEXT);
447                             }
448
449                             // Possible match!
450
// Read ahead while matching further characters, but if they
451
// stop matching, back up and try another
452
int backup = 0;
453                             String JavaDoc marker = markers[i];
454
455                             for (int c = 1, n = marker.length(); c < n; c++) {
456                                 backup++;
457
458                                 if (input.read() != marker.charAt(c)) {
459                                     input.backup(backup);
460
461                                     break;
462                                 }
463                             }
464
465                             if (backup == (marker.length() - 1)) { // Found it
466
// Peek ahead and make sure this match is a whole word
467

468                                 boolean separate = !Character.isJavaIdentifierPart(input.read());
469                                 input.backup(1);
470
471                                 if (separate) {
472                                     return tokenFactory.createToken(RubyCommentTokenId.COMMENT_TODO,
473                                         input.readLength());
474                                 }
475                             }
476                         }
477                     }
478                 }
479             }
480             }
481
482             inWord = Character.isJavaIdentifierPart(ch);
483         }
484     }
485
486     private Token<RubyCommentTokenId> token(RubyCommentTokenId id) {
487         return tokenFactory.createToken(id);
488     }
489
490     public void release() {
491     }
492 }
493
Popular Tags