Lexer


1   /*
2    * The contents of this file are subject to the terms of the Common Development
3    * and Distribution License (the License). You may not use this file except in
4    * compliance with the License.
5    *
6    * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7    * or http://www.netbeans.org/cddl.txt.
8    *
9    * When distributing Covered Code, include this CDDL Header Notice in each file
10   * and include the License file at http://www.netbeans.org/cddl.txt.
11   * If applicable, add the following below the CDDL Header, with the fields
12   * enclosed by brackets [] replaced by your own identifying information:
13   * "Portions Copyrighted [year] [name of copyright owner]"
14   *
15   * The Original Software is NetBeans. The Initial Developer of the Original
16   * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17   * Microsystems, Inc. All Rights Reserved.
18   */
19  
20  package org.netbeans.spi.lexer;
21  
22  import org.netbeans.api.lexer.TokenId;
23  import org.netbeans.api.lexer.Token;
24  
25  /**
26   * Lexer reads input characters from {@link LexerInput} and groups
27   * them into tokens.
28   * <br/>
29   * The lexer delegates token creation
30   * to {@link TokenFactory#createToken(TokenId)}.
31   * Token factory instance should be given to the lexer in its constructor.
32   *
33   * <p>
34   * The lexer must be able to express its internal lexing
35   * state at token boundaries and it must be able
36   * to restart lexing from such state.
37   * <br>
38   * It is expected that if the input characters following the restart point
39   * would not change then the lexer will return the same tokens
40   * regardless whether it was restarted at the restart point
41   * or run from the input begining as a batch lexer.
42   * </p>
43   *
44   * <p>
45   * <b>Testing of the lexers</b>:
46   * <br/>
47   * Testing of newly written lexers can be performed in several ways.
48   * The most simple way is to test batch lexing first
49   * (see e.g.
50   * <a HREF="http://www.netbeans.org/source/browse/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/Attic/SimpleLexerBatchTest.java">
51   * org.netbeans.lib.lexer.test.simple.SimpleLexerBatchTest</a> in lexer module tests).
52   * <br/>
53   * Then an "incremental" behavior of the new lexer can be tested
54   * (see e.g. <a HREF="http://www.netbeans.org/source/browse/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/Attic/SimpleLexerIncTest.java">
55   * org.netbeans.lib.lexer.test.simple.SimpleLexerIncTest</a>).
56   * <br/>
57   * Finally the lexer can be tested by random tests that randomly insert and remove
58   * characters from the document
59   * (see e.g. <a HREF="http://www.netbeans.org/source/browse/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/Attic/SimpleLexerRandomTest.java">
60   * org.netbeans.lib.lexer.test.simple.SimpleLexerRandomTest</a>).
61   * <br/>
62   * Once these tests pass the lexer can be considered stable.
63   * </p>
64   *
65   * @author Miloslav Metelka
66   * @version 1.00
67   */
68  
69  public interface Lexer<T extends TokenId> {
70  
71      /**
72       * Return a token based on characters of the input
73       * and possibly additional input properties.
74       * <br>
75       * Characters can be read by using
76       * {@link LexerInput#read()} method. Once the lexer
77       * knows that it has read enough characters to recognize
78       * a token it calls
79       * {@link TokenFactory#createToken(TokenId)}
80       * to obtain an instance of a {@link Token} and then returns it.
81       *
82       * <p>
83       * <b>Note:</B>&nbsp;Lexer must *not* return any other <code>Token</code> instances than
84       * those obtained from the TokenFactory.
85       * </p>
86       *
87       * <p>
88       * The lexer is required to tokenize all the characters (except EOF)
89       * provided by the {@link LexerInput} prior to returning null
90       * from this method. Not doing so is treated
91       * as malfunctioning of the lexer.
92       * </p>
93       *
94       * @return token recognized by the lexer
95       *  or null if there are no more characters (available in the input) to be tokenized.
96       *  <br/>
97       *  Return {@link TokenFactory#SKIP_TOKEN}
98       *  if the token should be skipped because of a token filter.
99       *
100      * @throws IllegalStateException if the token instance created by the lexer
101      *  was not created by the methods of TokenFactory (there is a common superclass
102      *  for those token implementations).
103      * @throws IllegalStateException if this method returns null but not all
104      *  the characters of the lexer input were tokenized.
105      */
106     Token<T> nextToken();
107     
108     /**
109      * This method is called by lexer's infrastructure
110      * to return present lexer's state
111      * once the lexer has recognized and returned a token.
112      * <br/>
113      * In mutable environment this method is called after each recognized token
114      * and its result is paired (together with token's lookahead) with the token
115      * for later use - when lexer needs to be restarted at the token boundary.
116      *
117      * <p>
118      * If the lexer is in no extra state (it is in a default state)
119      * it should return <code>null</code>. Most lexers are in the default state
120      * only at all the time.
121      * <br/>
122      * If possible the non-default lexer states should be expressed
123      * as small non-negative integers.
124      * The {@link LexerInput#integerState(int)} can be used for convenience.
125      * <br/>
126      * There is an optimization that shrinks the storage costs for small
127      * <code>java.lang.Integer</code>s to single bytes.
128      * </p>
129      *
130      * <p>
131      * The returned value should not be tied to this particular lexer instance in any way.
132      * Another lexer instance may be restarted from this state later.
133      * </p>
134      *
135      * @return valid state object or null if the lexer is in a default state.
136      */
137     Object   state();
138 
139     /**
140      * Infrastructure calls this method when it no longer needs this lexer for lexing
141      * so it becomes unused.
142      * <br/>
143      * If lexer instances are cached and reused later
144      * then this method should first release all the references that might cause
145      * memory leaks and then add this unused lexer to the cache.
146      */
147     void release();
148     
149 }
150
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags