LexerInput


1   /*
2    * The contents of this file are subject to the terms of the Common Development
3    * and Distribution License (the License). You may not use this file except in
4    * compliance with the License.
5    *
6    * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7    * or http://www.netbeans.org/cddl.txt.
8    *
9    * When distributing Covered Code, include this CDDL Header Notice in each file
10   * and include the License file at http://www.netbeans.org/cddl.txt.
11   * If applicable, add the following below the CDDL Header, with the fields
12   * enclosed by brackets [] replaced by your own identifying information:
13   * "Portions Copyrighted [year] [name of copyright owner]"
14   *
15   * The Original Software is NetBeans. The Initial Developer of the Original
16   * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17   * Microsystems, Inc. All Rights Reserved.
18   */
19  
20  package org.netbeans.spi.lexer;
21  
22  import org.netbeans.lib.editor.util.AbstractCharSequence;
23  import org.netbeans.lib.lexer.CharProvider;
24  import org.netbeans.lib.lexer.IntegerCache;
25  import org.netbeans.lib.lexer.LexerUtilsConstants;
26  
27  /**
28   * Provides characters to feed the {@link Lexer}.
29   * It logically corresponds to <CODE>java.io.Reader</CODE> but its {@link #read()} method
30   * does not throw any checked exception.
31   * <br>
32   * It allows to backup one or more characters that were already read
33   * by {@link #read()} so that they can be re-read again later.
34   * <br>
35   * It supports viewing of the previously read characters as <CODE>java.lang.CharSequence</CODE>
36   * by {@link #readText(int, int)}.
37   *
38   * <p>
39   * The <code>LexerInput</code> can only be used safely by a single thread.
40   *
41   * <p>The following picture shows an example of java identifier recognition:
42   *
43   * <p><IMG SRC="doc-files/lexer-input.gif">.
44   *
45   * @author Miloslav Metelka
46   * @version 1.00
47   */
48  
49  public final class LexerInput {
50      
51      /**
52       * Integer constant -1 returned by {@link #read()} to signal
53       * that there are no more characters available on input.
54       * <br/>
55       * It cannot be a part of any token's text but it is counted
56       * as a single character in {@link #backup(int)} operations.
57       * <br/>
58       * Translates to <code>0xFFFF</code> when casted to <code>char</code>.
59       */
60      public static final int EOF = -1;
61      
62      /**
63       * Character provider to which this lexer input delegates
64       * its operation.
65       */
66      private CharProvider charProvider;
67      
68      /**
69       * Character sequence that corresponds
70       * to the text that was read after past the end
71       * of the last returned token.
72       */
73      private ReadText readText;
74  
75      /**
76       * 1 if after EOF was just read or 0 otherwise.
77       */
78      private int eof;
79      
80      /**
81       * Construct instance of the lexer input.
82       *
83       * @param charProvider non-null character provider for this lexer input.
84       */
85      LexerInput(CharProvider charProvider) {
86          this.charProvider = charProvider;
87      }
88      
89      /**
90       * Read a single character from input or return {@link #EOF}.
91       *
92       * @return valid character from input
93       *   or {@link #EOF} when there are no more characters available
94       *   on input. It's allowed to repeat the reads once EOF was returned
95       *   - all of them will return EOF.
96       */
97      public int read() {
98          int c = charProvider.read();
99          if (c == EOF) {
100             eof = 1;
101         }
102         return c;
103     }
104     
105     /**
106      * Undo last <code>count</code> of {@link #read()} operations.
107      * <br>
108      * The operation moves back read-offset (from which {@link #read()}
109      * reads characters) so that subsequent read operations
110      * will re-read the characters that were backed up.
111      * <br/>
112      * If {@link LexerInput#EOF} was returned by {@link #read()} then
113      * it will count as a single character in the backup operation
114      * (even if returned multiple times)
115      * i.e backup(1) will undo reading of (previously read) EOF.
116      *
117      * <p/>
118      * <i>Example:</i><pre>
119      *   // backup last character that was read - either regular char or EOF
120      *   lexerInput.backup(1);
121      *
122      *   // Backup all characters read during recognition of current token
123      *   lexerInput.backup(readLengthEOF());
124      * </pre>
125      *
126      * @param count >=0 amount of characters to return back to the input.
127      * @throws IndexOutOfBoundsException in case
128      *  the <code>count > readLengthEOF()</code>.
129      */
130     public void backup(int count) {
131         if (count < 0) {
132             throw new IndexOutOfBoundsException  ("count=" + count + " <0"); // NOI18N
133         }
134         // count >= 0
135         LexerUtilsConstants.checkValidBackup(count, readLengthEOF());
136         if (eof != 0) {
137             eof = 0; // backup EOF
138             count--;
139         }
140         charProvider.backup(count);
141     }
142     
143     /**
144      * Get distance between the current reading point and the begining of a token
145      * being currently recognized (excluding possibly read EOF).
146      *
147      * @return &gt;=0 number of characters obtained from the input
148      *   by subsequent {@link #read()} operations since
149      *   the last token was returned. The {@link #backup(int)}
150      *   operations with positive argument decrease that value
151      *   while those with negative argument increase it.
152      *   <p>
153      *   Once a token gets created by
154      *   {@link TokenFactory#createToken(TokenId)}
155      *   the value returned by <CODE>readLength()</CODE> becomes zero.
156      *   <br>
157      *   If {@link LexerInput#EOF} was read then it is not counted into read length.
158      */
159     public int readLength() {
160         return charProvider.readIndex();
161     }
162     
163     /**
164      * Read length that includes EOF as a single character
165      * if it was just read from this input.
166      */
167     public int readLengthEOF() {
168         return readLength() + eof;
169     }
170     
171     /**
172      * Get character sequence that corresponds to characters
173      * that were read by previous {@link #read()} operations in the current token.
174      * <br><i>Example:</i><pre>
175      *
176      *   private static final Map kwdStr2id = new HashMap();
177      *
178      *   static {
179      *       String[] keywords = new String[] { "private", "protected", ... };
180      *       TokenId[] ids = new TokenId[] { JavaLanguage.PRIVATE, JavaLanguage.PROTECTED, ... };
181      *       for (int i = keywords.length - 1; i >= 0; i--) {
182      *           kwdStr2id.put(keywords[i], ids[i]);
183      *       }
184      *   }
185      *   
186      *   public Token nextToken() {
187      *       ... read characters of identifier/keyword by lexerInput.read() ...
188      *
189      *       // Now decide between keyword or identifier
190      *       CharSequence text = lexerInput.readText(0, lexerInput.readLength());
191      *       TokenId id = (TokenId)kwdStr2id.get(text);
192      *       return (id != null) ? id : JavaLanguage.IDENTIFIER;
193      *   }
194      *
195      * </pre>
196      *
197      * <p>
198      * If {@link LexerInput#EOF} was previously returned by {@link #read()}
199      * then it will not be a part of the returned charcter sequence
200      * (it also does not count into {@link #readLength()}.
201      *
202      * <p>
203      * Subsequent invocations of this method are cheap as the returned
204      * CharSequence instance is reused and just reinitialized.
205      *
206      * @param start &gt;=0 and =&lt;{@link #readLength()}
207      *  is the starting index of the character sequence in the previously read characters.
208      * @param end &gt;=start and =&lt;{@link #readLength()}
209      *  is the starting index of the character sequence in the previously read characters.
210      * @return character sequence corresponding to read characters.
211      *   <P>The returned character sequence is only valid
212      *   until any of <CODE>read()</CODE>, <CODE>backup()</CODE>,
213      *   <CODE>createToken()</CODE> or another <CODE>readText()</CODE> is called.
214      *   <P>The <CODE>length()</CODE> of the returned
215      *   character sequence will be equal
216      *   to the <CODE>end - start</CODE>.
217      *   <BR>The <CODE>hashCode()</CODE> method of the returned
218      *   character sequence works in the same way like
219      *   {@link String#hashCode()}.
220      *   <BR>The <CODE>equals()</CODE> method
221      *   attempts to cast the compared object to {@link CharSequence}
222      *   and compare the lengths and if they match
223      *   then compare every character of the given
224      *   character sequence i.e. the same way like <CODE>String.equals()</CODE> works.
225      * @throws IndexOutOfBoundsException in case the parameters are not in the
226      *   required bounds.
227      */
228     public CharSequence   readText(int start, int end) {
229         assert (start >= 0 && end >= start && end <= readLength())
230             : "start=" + start + ", end=" + end + ", readLength()=" + readLength(); // NOI18N
231 
232         if (readText == null) {
233             readText = new ReadText();
234         }
235         readText.reinit(start, end);
236         return readText;
237     }
238 
239     /**
240      * Return the read text for all the characters consumed from the input
241      * for the current token recognition.
242      */
243     public CharSequence   readText() {
244         return readText(0, readLength());
245     }
246     
247     /**
248      * Read the next character and check whether it's '\n'
249      * and if not backup it (otherwise leave it consumed).
250      *
251      * <p>
252      * This method is useful in the following scenario:
253      * <pre>
254      *  switch (ch) {
255      *      case 'x':
256      *          ...
257      *          break;
258      *      case 'y':
259      *          ...
260      *          break;
261      *      case '\r': input.consumeNewline();
262      *      case '\n':
263      *          // Line separator recognized
264      *  }
265      * </pre>
266      *
267      * @return true if newline was consumed or false otherwise.
268      */
269     public boolean consumeNewline() {
270         if (read() == '\n') {
271             return true;
272         } else {
273             backup(1);
274             return false;
275         }
276     }
277     
278     /**
279      * Lexer may call this method to get cached <code>java.lang.Integer</code> instance.
280      * <br/>
281      * The caching is only guaranteed if the given int value is below or equal to certain value
282      * - the present implementation uses 127.
283      * <br/>
284      * If the value is above this constant a new value will be constructed
285      * during each call. In such case the clients could possibly
286      * implement their own caching.
287      */
288     public static Integer   integerState(int state) {
289         return IntegerCache.integer(state);
290     }
291     
292     /**
293      * Helper character sequence being returned from <code>readText()</code>.
294      */
295     private final class ReadText extends AbstractCharSequence.StringLike {
296         
297         private int start;
298         
299         private int length;
300         
301         private void reinit(int start, int end) {
302             this.start = start;
303             this.length = (end - start);
304         }
305         
306         public int length() {
307             return length;
308         }
309 
310         public char charAt(int index) {
311             if (index < 0 || index >= length) {
312                 throw new IndexOutOfBoundsException  ("index=" + index + ", length=" + length); // NOI18N
313             }
314             return charProvider.readExisting(index);
315         }
316         
317     }
318     
319 }
320
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags