LinkLexer


1   /*
2    * The contents of this file are subject to the terms of the Common Development
3    * and Distribution License (the License). You may not use this file except in
4    * compliance with the License.
5    *
6    * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7    * or http://www.netbeans.org/cddl.txt.
8    *
9    * When distributing Covered Code, include this CDDL Header Notice in each file
10   * and include the License file at http://www.netbeans.org/cddl.txt.
11   * If applicable, add the following below the CDDL Header, with the fields
12   * enclosed by brackets [] replaced by your own identifying information:
13   * "Portions Copyrighted [year] [name of copyright owner]"
14   *
15   * The Original Software is NetBeans. The Initial Developer of the Original
16   * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17   * Microsystems, Inc. All Rights Reserved.
18   */
19  
20  package org.netbeans.modules.lexer.demo.handcoded.link;
21  
22  import java.util.HashMap  ;
23  import java.util.Map  ;
24  import org.netbeans.api.lexer.Language;
25  import org.netbeans.api.lexer.Lexer;
26  import org.netbeans.api.lexer.LexerInput;
27  import org.netbeans.api.lexer.TokenId;
28  import org.netbeans.api.lexer.Token;
29  import org.netbeans.spi.lexer.util.Compatibility;
30  
31  /**
32   * Lexer that recognizes LinkLanguage.
33   *
34   * @author Miloslav Metelka
35   * @version 1.00
36   */
37  
38  final class LinkLexer implements Lexer {
39  
40      private static final LinkLanguage language = LinkLanguage.get();
41      
42      private static final int INIT = 0;
43      private static final int IN_SCHEME = 1;
44      private static final int AFTER_COLON = 2;
45      private static final int AFTER_SLASH = 3;
46      
47      /** Map for mapping scheme to uri type */
48      private static final Map   scheme2uri = new HashMap  ();
49      
50      static {
51          scheme2uri.put("http", LinkLanguage.HTTP_URI);
52          scheme2uri.put("ftp", LinkLanguage.FTP_URI);
53      }
54      
55      private LexerInput lexerInput;
56      
57      /** Index of first char after scheme name e.g. "http" or "ftp" */
58      private int schemeEnd;
59      
60      /** Reused text buffer of the uri scheme */
61      private Object   uriSchemeReusedText;
62      
63      public LinkLexer() {
64      }
65      
66      public Object   getState() {
67          return null;
68      }
69  
70      public void restart(LexerInput input, Object   state) {
71          this.lexerInput = input;
72          if (input == null) { // this input is no longer being used by this lexer
73              uriSchemeReusedText = null; // free the reused text
74          }
75      }
76  
77      public Token nextToken() {
78          Token token = null;
79          int uriStart = findURIStart();
80          switch (uriStart) {
81              case -1: // no link found
82                  if (lexerInput.getReadLength() > 0) { // at least one char read
83                      token = lexerInput.createToken(LinkLanguage.TEXT);
84                  }
85                  break;
86                  
87              case 0: // link at the begining of token
88                  // Reading is positioned after "scheme://"
89                  findURIEnd();
90                  // Now read is positioned at the first non-matching char
91   
92                  // Get the scheme in compatible way - replacement of LexerInput.getReadText()
93                  uriSchemeReusedText = Compatibility.getCompatibleReadText(
94                      lexerInput, 0, schemeEnd, uriSchemeReusedText);
95  
96                  TokenId uriType = (TokenId)scheme2uri.get(uriSchemeReusedText);
97                  if (uriType == null) {
98                      uriType = LinkLanguage.URI;
99                  }
100                 
101                 token = lexerInput.createToken(uriType);
102                 break;
103                 
104             default: // link occurs on the line but not at the begining
105                 token = lexerInput.createToken(LinkLanguage.TEXT, uriStart);
106                 lexerInput.backup(lexerInput.getReadLength()); // backup the extra read chars
107                 break;
108         }
109         
110         return token;
111     }
112     
113     private int findURIStart() {
114         int state = INIT;
115         int uriStart = -1;
116 
117         schemeEnd = 0;
118 
119         int ch = lexerInput.read();
120         while (ch != LexerInput.EOF && ch != '\n') {
121             switch (ch) {
122                 case ':':
123                     switch (state) {
124                         case IN_SCHEME:
125                             state = AFTER_COLON;
126                             schemeEnd = lexerInput.getReadLength() - 1; // exclude ':'
127                             break;
128 
129                         default:
130                             uriStart = -1;
131                             state = INIT;
132                             break;
133                     }
134                     break;
135 
136                 case '/':
137                     switch (state) {
138                         case AFTER_COLON:
139                             state = AFTER_SLASH;
140                             break;
141 
142                         case AFTER_SLASH: // found "scheme://" => return success
143                             return uriStart;
144 
145                         default:
146                             uriStart = -1;
147                             state = INIT;
148                             break;
149                     }
150                     break;
151 
152                 case '.': // can be part of URI scheme
153                 case '+': // can be part of URI scheme
154                 case '-': // can be part of URI scheme
155                     switch (state) {
156                         // case IN_SCHEME: // stay in scheme
157                         default:
158                             uriStart = -1;
159                             state = INIT;
160                             break;
161                     }
162                     break;
163 
164                 default:
165                     if (isAlpha(ch)) { // alpha char
166                         switch (state) {
167                             case INIT:
168                                 // mark begining of possible uri
169                                 uriStart = lexerInput.getReadLength() - 1;
170                                 state = IN_SCHEME;
171                                 break;
172 
173                             case IN_SCHEME: // stay in scheme
174                                 break;
175 
176                             default:
177                                 uriStart = -1;
178                                 state = INIT;
179                                 break;
180                         }
181                         
182                     } else if (isDigit(ch)) {
183                         switch (state) {
184                             case IN_SCHEME: // stay in scheme
185                                 break;
186                                 
187                             default:
188                                 uriStart = -1;
189                                 state = INIT;
190                                 break;
191                         }
192                         
193                     } else {
194                         uriStart = -1;
195                         state = INIT;
196                     }
197             }
198          
199             ch = lexerInput.read();
200         }
201         
202         // EOF or '\n' reached
203         return -1;
204     }
205     
206     private int findURIEnd() {
207         int ch = lexerInput.read();
208         while (ch != LexerInput.EOF && ch != '\n') {
209             boolean stop = false;
210 
211             switch (ch) {
212                 // Allowed chars after "scheme://" follow - there is no particular
213                 // syntax observed although normally it should be
214                 case '#':
215                 case ':':
216                 case '?':
217                 case ';':
218                 case '&':
219                 case '@':
220                 case '=':
221                 case '+':
222                 case '-':
223                 case '$':
224                 case ',':
225                 case '/':
226                 case '.':
227                 case '_':
228                 case '!':
229                 case '~':
230                 case '\'':
231                 case ')':
232                 case '(':
233                 case '%':
234                     break;
235                     
236                 default:
237                     if (!isAlpha(ch) && !isDigit(ch)) {
238                         stop = true;
239                     }
240                     break;
241                     
242             }
243             
244             if (stop) {
245                 break;
246             }
247             
248             ch = lexerInput.read();
249         }
250         
251         if (ch != LexerInput.EOF) { // rollback the last char
252             lexerInput.backup(1);
253         }
254         
255         // EOF or '\n' reached
256         return -1;
257     }
258     
259     private static boolean isAlpha(int ch) {
260         return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z');
261     }
262     
263     private static boolean isDigit(int ch) {
264         return ('0' <= ch && ch <= '9');
265     }
266     
267 
268 }
269
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags