KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > modules > lexer > demo > handcoded > link > LinkLexer


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.modules.lexer.demo.handcoded.link;
21
22 import java.util.HashMap JavaDoc;
23 import java.util.Map JavaDoc;
24 import org.netbeans.api.lexer.Language;
25 import org.netbeans.api.lexer.Lexer;
26 import org.netbeans.api.lexer.LexerInput;
27 import org.netbeans.api.lexer.TokenId;
28 import org.netbeans.api.lexer.Token;
29 import org.netbeans.spi.lexer.util.Compatibility;
30
31 /**
32  * Lexer that recognizes LinkLanguage.
33  *
34  * @author Miloslav Metelka
35  * @version 1.00
36  */

37
38 final class LinkLexer implements Lexer {
39
40     private static final LinkLanguage language = LinkLanguage.get();
41     
42     private static final int INIT = 0;
43     private static final int IN_SCHEME = 1;
44     private static final int AFTER_COLON = 2;
45     private static final int AFTER_SLASH = 3;
46     
47     /** Map for mapping scheme to uri type */
48     private static final Map JavaDoc scheme2uri = new HashMap JavaDoc();
49     
50     static {
51         scheme2uri.put("http", LinkLanguage.HTTP_URI);
52         scheme2uri.put("ftp", LinkLanguage.FTP_URI);
53     }
54     
55     private LexerInput lexerInput;
56     
57     /** Index of first char after scheme name e.g. "http" or "ftp" */
58     private int schemeEnd;
59     
60     /** Reused text buffer of the uri scheme */
61     private Object JavaDoc uriSchemeReusedText;
62     
63     public LinkLexer() {
64     }
65     
66     public Object JavaDoc getState() {
67         return null;
68     }
69
70     public void restart(LexerInput input, Object JavaDoc state) {
71         this.lexerInput = input;
72         if (input == null) { // this input is no longer being used by this lexer
73
uriSchemeReusedText = null; // free the reused text
74
}
75     }
76
77     public Token nextToken() {
78         Token token = null;
79         int uriStart = findURIStart();
80         switch (uriStart) {
81             case -1: // no link found
82
if (lexerInput.getReadLength() > 0) { // at least one char read
83
token = lexerInput.createToken(LinkLanguage.TEXT);
84                 }
85                 break;
86                 
87             case 0: // link at the begining of token
88
// Reading is positioned after "scheme://"
89
findURIEnd();
90                 // Now read is positioned at the first non-matching char
91

92                 // Get the scheme in compatible way - replacement of LexerInput.getReadText()
93
uriSchemeReusedText = Compatibility.getCompatibleReadText(
94                     lexerInput, 0, schemeEnd, uriSchemeReusedText);
95
96                 TokenId uriType = (TokenId)scheme2uri.get(uriSchemeReusedText);
97                 if (uriType == null) {
98                     uriType = LinkLanguage.URI;
99                 }
100                 
101                 token = lexerInput.createToken(uriType);
102                 break;
103                 
104             default: // link occurs on the line but not at the begining
105
token = lexerInput.createToken(LinkLanguage.TEXT, uriStart);
106                 lexerInput.backup(lexerInput.getReadLength()); // backup the extra read chars
107
break;
108         }
109         
110         return token;
111     }
112     
113     private int findURIStart() {
114         int state = INIT;
115         int uriStart = -1;
116
117         schemeEnd = 0;
118
119         int ch = lexerInput.read();
120         while (ch != LexerInput.EOF && ch != '\n') {
121             switch (ch) {
122                 case ':':
123                     switch (state) {
124                         case IN_SCHEME:
125                             state = AFTER_COLON;
126                             schemeEnd = lexerInput.getReadLength() - 1; // exclude ':'
127
break;
128
129                         default:
130                             uriStart = -1;
131                             state = INIT;
132                             break;
133                     }
134                     break;
135
136                 case '/':
137                     switch (state) {
138                         case AFTER_COLON:
139                             state = AFTER_SLASH;
140                             break;
141
142                         case AFTER_SLASH: // found "scheme://" => return success
143
return uriStart;
144
145                         default:
146                             uriStart = -1;
147                             state = INIT;
148                             break;
149                     }
150                     break;
151
152                 case '.': // can be part of URI scheme
153
case '+': // can be part of URI scheme
154
case '-': // can be part of URI scheme
155
switch (state) {
156                         // case IN_SCHEME: // stay in scheme
157
default:
158                             uriStart = -1;
159                             state = INIT;
160                             break;
161                     }
162                     break;
163
164                 default:
165                     if (isAlpha(ch)) { // alpha char
166
switch (state) {
167                             case INIT:
168                                 // mark begining of possible uri
169
uriStart = lexerInput.getReadLength() - 1;
170                                 state = IN_SCHEME;
171                                 break;
172
173                             case IN_SCHEME: // stay in scheme
174
break;
175
176                             default:
177                                 uriStart = -1;
178                                 state = INIT;
179                                 break;
180                         }
181                         
182                     } else if (isDigit(ch)) {
183                         switch (state) {
184                             case IN_SCHEME: // stay in scheme
185
break;
186                                 
187                             default:
188                                 uriStart = -1;
189                                 state = INIT;
190                                 break;
191                         }
192                         
193                     } else {
194                         uriStart = -1;
195                         state = INIT;
196                     }
197             }
198          
199             ch = lexerInput.read();
200         }
201         
202         // EOF or '\n' reached
203
return -1;
204     }
205     
206     private int findURIEnd() {
207         int ch = lexerInput.read();
208         while (ch != LexerInput.EOF && ch != '\n') {
209             boolean stop = false;
210
211             switch (ch) {
212                 // Allowed chars after "scheme://" follow - there is no particular
213
// syntax observed although normally it should be
214
case '#':
215                 case ':':
216                 case '?':
217                 case ';':
218                 case '&':
219                 case '@':
220                 case '=':
221                 case '+':
222                 case '-':
223                 case '$':
224                 case ',':
225                 case '/':
226                 case '.':
227                 case '_':
228                 case '!':
229                 case '~':
230                 case '\'':
231                 case ')':
232                 case '(':
233                 case '%':
234                     break;
235                     
236                 default:
237                     if (!isAlpha(ch) && !isDigit(ch)) {
238                         stop = true;
239                     }
240                     break;
241                     
242             }
243             
244             if (stop) {
245                 break;
246             }
247             
248             ch = lexerInput.read();
249         }
250         
251         if (ch != LexerInput.EOF) { // rollback the last char
252
lexerInput.backup(1);
253         }
254         
255         // EOF or '\n' reached
256
return -1;
257     }
258     
259     private static boolean isAlpha(int ch) {
260         return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z');
261     }
262     
263     private static boolean isDigit(int ch) {
264         return ('0' <= ch && ch <= '9');
265     }
266     
267
268 }
269
Popular Tags