RubyCommentLexer


1   /*
2    * The contents of this file are subject to the terms of the Common Development
3    * and Distribution License (the License). You may not use this file except in
4    * compliance with the License.
5    *
6    * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7    * or http://www.netbeans.org/cddl.txt.
8    *
9    * When distributing Covered Code, include this CDDL Header Notice in each file
10   * and include the License file at http://www.netbeans.org/cddl.txt.
11   * If applicable, add the following below the CDDL Header, with the fields
12   * enclosed by brackets [] replaced by your own identifying information:
13   * "Portions Copyrighted [year] [name of copyright owner]"
14   *
15   * The Original Software is NetBeans. The Initial Developer of the Original
16   * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17   * Microsystems, Inc. All Rights Reserved.
18   */
19  package org.netbeans.modules.ruby.lexer;
20  
21  import java.util.ArrayList  ;
22  import java.util.List  ;
23  import java.util.prefs.BackingStoreException  ;
24  import java.util.prefs.Preferences  ;
25  import org.netbeans.modules.ruby.*;
26  import org.netbeans.api.lexer.Token;
27  import org.netbeans.spi.lexer.Lexer;
28  import org.netbeans.spi.lexer.LexerInput;
29  import org.netbeans.spi.lexer.LexerRestartInfo;
30  import org.netbeans.spi.lexer.TokenFactory;
31  import org.openide.ErrorManager;
32  import org.openide.util.NbPreferences;
33  
34  
35  /**
36   * Lexical analyzer for Ruby comments which identifies TODO markers
37   * and highlights them specially.
38   *
39   * @todo Handle rdoc on/off directives (#++,#--). Since these occur on separate
40   *   lines I can't handle it now.
41   * @todo Highlight only RDoc reserved words, or all that fit the pattern? For
42   *   now I'm highlighting :\w+: sequences. Possibly I should only highlight
43   *
44   * @author Tor Norbye
45   */
46  public final class RubyCommentLexer implements Lexer<RubyCommentTokenId> {
47      private static final int EOF = LexerInput.EOF;
48      private static final String  [] RDOC_DIRECTIVES =
49          {
50              "arg", "args", "yield", "yields", "notnew", "not-new", "not_new", "doc", "nodoc",
51              "stopdoc", "startdoc", "enddoc", "main", "title", "section", "include"
52          };
53      private LexerInput input;
54      private TokenFactory<RubyCommentTokenId> tokenFactory;
55      private boolean inWord;
56      private String  [] markers;
57  
58      public RubyCommentLexer(LexerRestartInfo<RubyCommentTokenId> info) {
59          this.input = info.input();
60          this.tokenFactory = info.tokenFactory();
61          assert (info.state() == null); // passed argument always null
62      }
63  
64      public Object   state() {
65          return null;
66      }
67  
68      /**
69       * Compute the set of markers to scan for in the user source code.
70       * The code tries to look for the same markers used by the TODO module
71       * in case the user has customized the set. (However, it is doing this
72       * by peeking at the Preferences possibly left by the docscan module,
73       * rather than having a contract API with it, based on
74       * tasklist/docscan/src/org/netbeans/modules/tasklist/docscan/Settings.java)
75       */
76      private String  [] getTodoMarkers() {
77          if (markers == null) {
78              final String   MARKER_PREFIX = "Tag"; // NOI18N
79              final int MARKER_PREFIX_LENGTH = MARKER_PREFIX.length();
80              List  <String  > markerList = new ArrayList  <String  >();
81  
82              try {
83                  Preferences   preferences =
84                      NbPreferences.root().node("org/netbeans/modules/tasklist/docscan"); // NOI18N
85                  String  [] keys = preferences.keys();
86  
87                  for (int i = 0; i < keys.length; i++) {
88                      String   key = keys[i];
89  
90                      if ((key != null) && key.startsWith(MARKER_PREFIX)) {
91                          markerList.add(key.substring(MARKER_PREFIX_LENGTH));
92                      }
93                  }
94              } catch (BackingStoreException   bse) {
95                  ErrorManager.getDefault().notify(bse);
96              }
97  
98              if (markerList.size() > 0) {
99                  markerList.remove("@todo"); // Applies to javadoc, and these tags are now colorized separately
100                 markers = markerList.toArray(new String  [markerList.size()]);
101             } else {
102                 // Additional candidates: HACK, WORKAROUND, REMOVE, OLD
103                 markers = new String  [] { "TODO", "FIXME", "XXX", "PENDING" }; // NOI18N
104             }
105         }
106 
107         return markers;
108     }
109 
110     public Preferences   getDocscanPreferences() {
111         return NbPreferences.root().node("org/netbeans/modules/tasklist/docscan");
112     }
113 
114     public Token<RubyCommentTokenId> nextToken() {
115         inWord = false;
116 
117         while (true) {
118             int ch = input.read();
119 
120             switch (ch) {
121             case EOF: {
122                 if (input.readLength() > 0) {
123                     return token(RubyCommentTokenId.COMMENT_TEXT);
124                 } else {
125                     return null;
126                 }
127             }
128 
129             case '\\':
130                 // The next character is escaped...
131                 input.read();
132 
133                 continue;
134 
135             case '\n':
136                 return token(RubyCommentTokenId.COMMENT_TEXT);
137 
138             case '#': { // Linked method
139 
140                 if (inWord) {
141                     break;
142                 }
143 
144                 int originalLength = input.readLength();
145 
146                 // See if we have what looks like a method name:
147                 // method-only characters followed by whitespace, newlines or EOF:
148                 while (ch != EOF) {
149                     ch = input.read();
150 
151                     if ((ch == '$') || !Character.isJavaIdentifierPart(ch)) {
152                         input.backup(1);
153 
154                         break;
155                     }
156                 }
157 
158                 if (Character.isWhitespace(ch) || (ch == EOF) || (ch == '.') || (ch == ',') ||
159                         (ch == ')') || (ch == '}')) {
160                     if (originalLength > 1) {
161                         input.backup(input.readLengthEOF() - originalLength + 1);
162 
163                         return token(RubyCommentTokenId.COMMENT_TEXT);
164                     }
165 
166                     if (input.readLength() > 2) {
167                         return token(RubyCommentTokenId.COMMENT_LINK);
168                     }
169                 }
170 
171                 break;
172             }
173 
174             case 'f': // ftp:
175             case 'm': // mailto:
176             case 'w': // www.
177             case 'h': { // http links. TODO: link:, ftp:, mailto:, and www.
178 
179                 if (inWord) {
180                     break;
181                 }
182 
183                 int originalLength = input.readLength();
184                 boolean foundLinkBegin = false;
185 
186                 if (ch == 'h') { // http:
187 
188                     if (input.read() == 't') {
189                         if (input.read() == 't') {
190                             if (input.read() == 'p') {
191                                 if (input.read() == ':') {
192                                     foundLinkBegin = true;
193                                 } else {
194                                     input.backup(4);
195                                 }
196                             } else {
197                                 input.backup(3);
198                             }
199                         } else {
200                             input.backup(2);
201                         }
202                     } else {
203                         input.backup(1);
204                     }
205                 } else if (ch == 'f') { // ftp:
206 
207                     if (input.read() == 't') {
208                         if (input.read() == 'p') {
209                             if (input.read() == ':') {
210                                 foundLinkBegin = true;
211                             } else {
212                                 input.backup(3);
213                             }
214                         } else {
215                             input.backup(2);
216                         }
217                     } else {
218                         input.backup(1);
219                     }
220                 } else if (ch == 'm') { // mailto:
221 
222                     if (input.read() == 'a') {
223                         if (input.read() == 'i') {
224                             if (input.read() == 'l') {
225                                 if (input.read() == 't') {
226                                     if (input.read() == 'o') {
227                                         if (input.read() == ':') {
228                                             foundLinkBegin = true;
229                                         } else {
230                                             input.backup(6);
231                                         }
232                                     } else {
233                                         input.backup(5);
234                                     }
235                                 } else {
236                                     input.backup(4);
237                                 }
238                             } else {
239                                 input.backup(3);
240                             }
241                         } else {
242                             input.backup(2);
243                         }
244                     } else {
245                         input.backup(1);
246                     }
247                 } else if (ch == 'w') { // www.
248 
249                     if (input.read() == 'w') {
250                         if (input.read() == 'w') {
251                             if (input.read() == '.') {
252                                 foundLinkBegin = true;
253                             } else {
254                                 input.backup(3);
255                             }
256                         } else {
257                             input.backup(2);
258                         }
259                     } else {
260                         input.backup(1);
261                     }
262                 }
263 
264                 if (foundLinkBegin) {
265                     while (ch != EOF) {
266                         ch = input.read();
267 
268                         if ((ch == ']') || (ch == ')') || Character.isWhitespace(ch) ||
269                                 (ch == '\'') || (ch == '"')) {
270                             input.backup(1);
271 
272                             break;
273                         }
274                     }
275 
276                     if (originalLength > 1) {
277                         input.backup(input.readLengthEOF() - originalLength + 1);
278 
279                         return token(RubyCommentTokenId.COMMENT_TEXT);
280                     }
281 
282                     if (input.readLength() > 2) {
283                         return token(RubyCommentTokenId.COMMENT_LINK);
284                     }
285                 }
286             }
287 
288             case '_': // Italic text
289 
290                 if (inWord) {
291                     break;
292                 }
293 
294                 if (input.readLength() > 1) {
295                     input.backup(1);
296 
297                     return token(RubyCommentTokenId.COMMENT_TEXT);
298                 }
299 
300                 while (ch != EOF) {
301                     ch = input.read();
302 
303                     if (ch == '_') {
304                         int next = input.read();
305                         input.backup(1);
306 
307                         if (Character.isLetter(next) || (next == '_')) {
308                             continue;
309                         }
310 
311                         if (input.readLength() > 2) {
312                             return token(RubyCommentTokenId.COMMENT_ITALIC);
313                         }
314                     } else if (!(Character.isLetter(ch) || (ch == '_'))) {
315                         break;
316                     }
317                 }
318 
319                 break;
320 
321             case '*': // Bold text
322 
323                 if (inWord) {
324                     break;
325                 }
326 
327                 if (input.readLength() > 1) {
328                     input.backup(1);
329 
330                     return token(RubyCommentTokenId.COMMENT_TEXT);
331                 }
332 
333                 while (ch != EOF) {
334                     ch = input.read();
335 
336                     if ((ch == '*') && (input.readLength() > 2)) {
337                         return token(RubyCommentTokenId.COMMENT_BOLD);
338                     } else if (!(Character.isLetter(ch) || (ch == '_'))) {
339                         break;
340                     }
341                 }
342 
343                 break;
344 
345             case '+': // Typewriter text
346 
347                 if (inWord) {
348                     break;
349                 }
350 
351                 if (input.readLength() > 1) {
352                     input.backup(1);
353 
354                     return token(RubyCommentTokenId.COMMENT_TEXT);
355                 }
356 
357                 while (ch != EOF) {
358                     ch = input.read();
359 
360                     if ((ch == '+') && (input.readLength() > 2)) {
361                         return token(RubyCommentTokenId.COMMENT_HTMLTAG);
362                     } else if (!(Character.isLetter(ch) || (ch == '_'))) {
363                         break;
364                     }
365                 }
366 
367                 break;
368 
369             case '<': { // Html tag - rdoc
370 
371                 // Only accept things that look like tags: <foo> or </foo>, not
372                 // <<, < >, etc.
373                 int next = input.read();
374                 input.backup(1);
375 
376                 if (!((next == '/') || Character.isLetter(next))) {
377                     break;
378                 }
379 
380                 if (input.readLength() > 1) {
381                     input.backup(1);
382 
383                     return token(RubyCommentTokenId.COMMENT_TEXT);
384                 }
385 
386                 while (ch != EOF) {
387                     ch = input.read();
388 
389                     if (ch == '\n') {
390                         break;
391                     } else if (ch == '>') {
392                         return token(RubyCommentTokenId.COMMENT_HTMLTAG);
393                     }
394                 }
395 
396                 break;
397             }
398 
399             case ':': { // Possible rdoc tag, like :nodoc:
400 
401                 if (input.readLength() > 1) {
402                     input.backup(1);
403 
404                     return token(RubyCommentTokenId.COMMENT_TEXT);
405                 }
406 
407                 int backup = 0;
408 
409                 while (ch != EOF) {
410                     ch = input.read();
411                     backup++;
412 
413                     if ((ch == '\n') || (!Character.isLetter(ch) && ch != '_' && ch != '-')) {
414                         if ((ch == ':') && (input.readLength() > 2)) { // Don't recognize "::" since it's used a lot when mentioning modules
415                                                                        // I should be able to use input.readText(1, ...) here but it doesn't work right
416 
417                             String   seen = input.readText().toString();
418                             String   directive = seen.substring(1, seen.length() - 1);
419 
420                             for (String   keyword : RDOC_DIRECTIVES) {
421                                 if (keyword.equals(directive)) {
422                                     return token(RubyCommentTokenId.COMMENT_RDOC);
423                                 }
424                             }
425                         }
426 
427                         input.backup(backup);
428                         
429                         break;
430                     }
431                 }
432 
433                 continue;
434             }
435 
436             default: {
437                 if (!inWord) {
438                     // See if we have a match from here on for any of the markers
439                     String  [] markers = getTodoMarkers();
440 
441                     for (int i = 0; i < markers.length; i++) {
442                         if (markers[i].charAt(0) == ch) {
443                             if (input.readLength() > 1) {
444                                 input.backup(1);
445 
446                                 return token(RubyCommentTokenId.COMMENT_TEXT);
447                             }
448 
449                             // Possible match!
450                             // Read ahead while matching further characters, but if they
451                             // stop matching, back up and try another
452                             int backup = 0;
453                             String   marker = markers[i];
454 
455                             for (int c = 1, n = marker.length(); c < n; c++) {
456                                 backup++;
457 
458                                 if (input.read() != marker.charAt(c)) {
459                                     input.backup(backup);
460 
461                                     break;
462                                 }
463                             }
464 
465                             if (backup == (marker.length() - 1)) { // Found it
466                                                                    // Peek ahead and make sure this match is a whole word
467 
468                                 boolean separate = !Character.isJavaIdentifierPart(input.read());
469                                 input.backup(1);
470 
471                                 if (separate) {
472                                     return tokenFactory.createToken(RubyCommentTokenId.COMMENT_TODO,
473                                         input.readLength());
474                                 }
475                             }
476                         }
477                     }
478                 }
479             }
480             }
481 
482             inWord = Character.isJavaIdentifierPart(ch);
483         }
484     }
485 
486     private Token<RubyCommentTokenId> token(RubyCommentTokenId id) {
487         return tokenFactory.createToken(id);
488     }
489 
490     public void release() {
491     }
492 }
493
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags