KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > spi > lexer > LexerInput


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.spi.lexer;
21
22 import org.netbeans.lib.editor.util.AbstractCharSequence;
23 import org.netbeans.lib.lexer.CharProvider;
24 import org.netbeans.lib.lexer.IntegerCache;
25 import org.netbeans.lib.lexer.LexerUtilsConstants;
26
27 /**
28  * Provides characters to feed the {@link Lexer}.
29  * It logically corresponds to <CODE>java.io.Reader</CODE> but its {@link #read()} method
30  * does not throw any checked exception.
31  * <br>
32  * It allows to backup one or more characters that were already read
33  * by {@link #read()} so that they can be re-read again later.
34  * <br>
35  * It supports viewing of the previously read characters as <CODE>java.lang.CharSequence</CODE>
36  * by {@link #readText(int, int)}.
37  *
38  * <p>
39  * The <code>LexerInput</code> can only be used safely by a single thread.
40  *
41  * <p>The following picture shows an example of java identifier recognition:
42  *
43  * <p><IMG SRC="doc-files/lexer-input.gif">.
44  *
45  * @author Miloslav Metelka
46  * @version 1.00
47  */

48
49 public final class LexerInput {
50     
51     /**
52      * Integer constant -1 returned by {@link #read()} to signal
53      * that there are no more characters available on input.
54      * <br/>
55      * It cannot be a part of any token's text but it is counted
56      * as a single character in {@link #backup(int)} operations.
57      * <br/>
58      * Translates to <code>0xFFFF</code> when casted to <code>char</code>.
59      */

60     public static final int EOF = -1;
61     
62     /**
63      * Character provider to which this lexer input delegates
64      * its operation.
65      */

66     private CharProvider charProvider;
67     
68     /**
69      * Character sequence that corresponds
70      * to the text that was read after past the end
71      * of the last returned token.
72      */

73     private ReadText readText;
74
75     /**
76      * 1 if after EOF was just read or 0 otherwise.
77      */

78     private int eof;
79     
80     /**
81      * Construct instance of the lexer input.
82      *
83      * @param charProvider non-null character provider for this lexer input.
84      */

85     LexerInput(CharProvider charProvider) {
86         this.charProvider = charProvider;
87     }
88     
89     /**
90      * Read a single character from input or return {@link #EOF}.
91      *
92      * @return valid character from input
93      * or {@link #EOF} when there are no more characters available
94      * on input. It's allowed to repeat the reads once EOF was returned
95      * - all of them will return EOF.
96      */

97     public int read() {
98         int c = charProvider.read();
99         if (c == EOF) {
100             eof = 1;
101         }
102         return c;
103     }
104     
105     /**
106      * Undo last <code>count</code> of {@link #read()} operations.
107      * <br>
108      * The operation moves back read-offset (from which {@link #read()}
109      * reads characters) so that subsequent read operations
110      * will re-read the characters that were backed up.
111      * <br/>
112      * If {@link LexerInput#EOF} was returned by {@link #read()} then
113      * it will count as a single character in the backup operation
114      * (even if returned multiple times)
115      * i.e backup(1) will undo reading of (previously read) EOF.
116      *
117      * <p/>
118      * <i>Example:</i><pre>
119      * // backup last character that was read - either regular char or EOF
120      * lexerInput.backup(1);
121      *
122      * // Backup all characters read during recognition of current token
123      * lexerInput.backup(readLengthEOF());
124      * </pre>
125      *
126      * @param count >=0 amount of characters to return back to the input.
127      * @throws IndexOutOfBoundsException in case
128      * the <code>count > readLengthEOF()</code>.
129      */

130     public void backup(int count) {
131         if (count < 0) {
132             throw new IndexOutOfBoundsException JavaDoc("count=" + count + " <0"); // NOI18N
133
}
134         // count >= 0
135
LexerUtilsConstants.checkValidBackup(count, readLengthEOF());
136         if (eof != 0) {
137             eof = 0; // backup EOF
138
count--;
139         }
140         charProvider.backup(count);
141     }
142     
143     /**
144      * Get distance between the current reading point and the begining of a token
145      * being currently recognized (excluding possibly read EOF).
146      *
147      * @return &gt;=0 number of characters obtained from the input
148      * by subsequent {@link #read()} operations since
149      * the last token was returned. The {@link #backup(int)}
150      * operations with positive argument decrease that value
151      * while those with negative argument increase it.
152      * <p>
153      * Once a token gets created by
154      * {@link TokenFactory#createToken(TokenId)}
155      * the value returned by <CODE>readLength()</CODE> becomes zero.
156      * <br>
157      * If {@link LexerInput#EOF} was read then it is not counted into read length.
158      */

159     public int readLength() {
160         return charProvider.readIndex();
161     }
162     
163     /**
164      * Read length that includes EOF as a single character
165      * if it was just read from this input.
166      */

167     public int readLengthEOF() {
168         return readLength() + eof;
169     }
170     
171     /**
172      * Get character sequence that corresponds to characters
173      * that were read by previous {@link #read()} operations in the current token.
174      * <br><i>Example:</i><pre>
175      *
176      * private static final Map kwdStr2id = new HashMap();
177      *
178      * static {
179      * String[] keywords = new String[] { "private", "protected", ... };
180      * TokenId[] ids = new TokenId[] { JavaLanguage.PRIVATE, JavaLanguage.PROTECTED, ... };
181      * for (int i = keywords.length - 1; i >= 0; i--) {
182      * kwdStr2id.put(keywords[i], ids[i]);
183      * }
184      * }
185      *
186      * public Token nextToken() {
187      * ... read characters of identifier/keyword by lexerInput.read() ...
188      *
189      * // Now decide between keyword or identifier
190      * CharSequence text = lexerInput.readText(0, lexerInput.readLength());
191      * TokenId id = (TokenId)kwdStr2id.get(text);
192      * return (id != null) ? id : JavaLanguage.IDENTIFIER;
193      * }
194      *
195      * </pre>
196      *
197      * <p>
198      * If {@link LexerInput#EOF} was previously returned by {@link #read()}
199      * then it will not be a part of the returned charcter sequence
200      * (it also does not count into {@link #readLength()}.
201      *
202      * <p>
203      * Subsequent invocations of this method are cheap as the returned
204      * CharSequence instance is reused and just reinitialized.
205      *
206      * @param start &gt;=0 and =&lt;{@link #readLength()}
207      * is the starting index of the character sequence in the previously read characters.
208      * @param end &gt;=start and =&lt;{@link #readLength()}
209      * is the starting index of the character sequence in the previously read characters.
210      * @return character sequence corresponding to read characters.
211      * <P>The returned character sequence is only valid
212      * until any of <CODE>read()</CODE>, <CODE>backup()</CODE>,
213      * <CODE>createToken()</CODE> or another <CODE>readText()</CODE> is called.
214      * <P>The <CODE>length()</CODE> of the returned
215      * character sequence will be equal
216      * to the <CODE>end - start</CODE>.
217      * <BR>The <CODE>hashCode()</CODE> method of the returned
218      * character sequence works in the same way like
219      * {@link String#hashCode()}.
220      * <BR>The <CODE>equals()</CODE> method
221      * attempts to cast the compared object to {@link CharSequence}
222      * and compare the lengths and if they match
223      * then compare every character of the given
224      * character sequence i.e. the same way like <CODE>String.equals()</CODE> works.
225      * @throws IndexOutOfBoundsException in case the parameters are not in the
226      * required bounds.
227      */

228     public CharSequence JavaDoc readText(int start, int end) {
229         assert (start >= 0 && end >= start && end <= readLength())
230             : "start=" + start + ", end=" + end + ", readLength()=" + readLength(); // NOI18N
231

232         if (readText == null) {
233             readText = new ReadText();
234         }
235         readText.reinit(start, end);
236         return readText;
237     }
238
239     /**
240      * Return the read text for all the characters consumed from the input
241      * for the current token recognition.
242      */

243     public CharSequence JavaDoc readText() {
244         return readText(0, readLength());
245     }
246     
247     /**
248      * Read the next character and check whether it's '\n'
249      * and if not backup it (otherwise leave it consumed).
250      *
251      * <p>
252      * This method is useful in the following scenario:
253      * <pre>
254      * switch (ch) {
255      * case 'x':
256      * ...
257      * break;
258      * case 'y':
259      * ...
260      * break;
261      * case '\r': input.consumeNewline();
262      * case '\n':
263      * // Line separator recognized
264      * }
265      * </pre>
266      *
267      * @return true if newline was consumed or false otherwise.
268      */

269     public boolean consumeNewline() {
270         if (read() == '\n') {
271             return true;
272         } else {
273             backup(1);
274             return false;
275         }
276     }
277     
278     /**
279      * Lexer may call this method to get cached <code>java.lang.Integer</code> instance.
280      * <br/>
281      * The caching is only guaranteed if the given int value is below or equal to certain value
282      * - the present implementation uses 127.
283      * <br/>
284      * If the value is above this constant a new value will be constructed
285      * during each call. In such case the clients could possibly
286      * implement their own caching.
287      */

288     public static Integer JavaDoc integerState(int state) {
289         return IntegerCache.integer(state);
290     }
291     
292     /**
293      * Helper character sequence being returned from <code>readText()</code>.
294      */

295     private final class ReadText extends AbstractCharSequence.StringLike {
296         
297         private int start;
298         
299         private int length;
300         
301         private void reinit(int start, int end) {
302             this.start = start;
303             this.length = (end - start);
304         }
305         
306         public int length() {
307             return length;
308         }
309
310         public char charAt(int index) {
311             if (index < 0 || index >= length) {
312                 throw new IndexOutOfBoundsException JavaDoc("index=" + index + ", length=" + length); // NOI18N
313
}
314             return charProvider.readExisting(index);
315         }
316         
317     }
318     
319 }
320
Popular Tags