KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > spi > lexer > Lexer


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.spi.lexer;
21
22 import org.netbeans.api.lexer.TokenId;
23 import org.netbeans.api.lexer.Token;
24
25 /**
26  * Lexer reads input characters from {@link LexerInput} and groups
27  * them into tokens.
28  * <br/>
29  * The lexer delegates token creation
30  * to {@link TokenFactory#createToken(TokenId)}.
31  * Token factory instance should be given to the lexer in its constructor.
32  *
33  * <p>
34  * The lexer must be able to express its internal lexing
35  * state at token boundaries and it must be able
36  * to restart lexing from such state.
37  * <br>
38  * It is expected that if the input characters following the restart point
39  * would not change then the lexer will return the same tokens
40  * regardless whether it was restarted at the restart point
41  * or run from the input begining as a batch lexer.
42  * </p>
43  *
44  * <p>
45  * <b>Testing of the lexers</b>:
46  * <br/>
47  * Testing of newly written lexers can be performed in several ways.
48  * The most simple way is to test batch lexing first
49  * (see e.g.
50  * <a HREF="http://www.netbeans.org/source/browse/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/Attic/SimpleLexerBatchTest.java">
51  * org.netbeans.lib.lexer.test.simple.SimpleLexerBatchTest</a> in lexer module tests).
52  * <br/>
53  * Then an "incremental" behavior of the new lexer can be tested
54  * (see e.g. <a HREF="http://www.netbeans.org/source/browse/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/Attic/SimpleLexerIncTest.java">
55  * org.netbeans.lib.lexer.test.simple.SimpleLexerIncTest</a>).
56  * <br/>
57  * Finally the lexer can be tested by random tests that randomly insert and remove
58  * characters from the document
59  * (see e.g. <a HREF="http://www.netbeans.org/source/browse/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/Attic/SimpleLexerRandomTest.java">
60  * org.netbeans.lib.lexer.test.simple.SimpleLexerRandomTest</a>).
61  * <br/>
62  * Once these tests pass the lexer can be considered stable.
63  * </p>
64  *
65  * @author Miloslav Metelka
66  * @version 1.00
67  */

68
69 public interface Lexer<T extends TokenId> {
70
71     /**
72      * Return a token based on characters of the input
73      * and possibly additional input properties.
74      * <br>
75      * Characters can be read by using
76      * {@link LexerInput#read()} method. Once the lexer
77      * knows that it has read enough characters to recognize
78      * a token it calls
79      * {@link TokenFactory#createToken(TokenId)}
80      * to obtain an instance of a {@link Token} and then returns it.
81      *
82      * <p>
83      * <b>Note:</B>&nbsp;Lexer must *not* return any other <code>Token</code> instances than
84      * those obtained from the TokenFactory.
85      * </p>
86      *
87      * <p>
88      * The lexer is required to tokenize all the characters (except EOF)
89      * provided by the {@link LexerInput} prior to returning null
90      * from this method. Not doing so is treated
91      * as malfunctioning of the lexer.
92      * </p>
93      *
94      * @return token recognized by the lexer
95      * or null if there are no more characters (available in the input) to be tokenized.
96      * <br/>
97      * Return {@link TokenFactory#SKIP_TOKEN}
98      * if the token should be skipped because of a token filter.
99      *
100      * @throws IllegalStateException if the token instance created by the lexer
101      * was not created by the methods of TokenFactory (there is a common superclass
102      * for those token implementations).
103      * @throws IllegalStateException if this method returns null but not all
104      * the characters of the lexer input were tokenized.
105      */

106     Token<T> nextToken();
107     
108     /**
109      * This method is called by lexer's infrastructure
110      * to return present lexer's state
111      * once the lexer has recognized and returned a token.
112      * <br/>
113      * In mutable environment this method is called after each recognized token
114      * and its result is paired (together with token's lookahead) with the token
115      * for later use - when lexer needs to be restarted at the token boundary.
116      *
117      * <p>
118      * If the lexer is in no extra state (it is in a default state)
119      * it should return <code>null</code>. Most lexers are in the default state
120      * only at all the time.
121      * <br/>
122      * If possible the non-default lexer states should be expressed
123      * as small non-negative integers.
124      * The {@link LexerInput#integerState(int)} can be used for convenience.
125      * <br/>
126      * There is an optimization that shrinks the storage costs for small
127      * <code>java.lang.Integer</code>s to single bytes.
128      * </p>
129      *
130      * <p>
131      * The returned value should not be tied to this particular lexer instance in any way.
132      * Another lexer instance may be restarted from this state later.
133      * </p>
134      *
135      * @return valid state object or null if the lexer is in a default state.
136      */

137     Object JavaDoc state();
138
139     /**
140      * Infrastructure calls this method when it no longer needs this lexer for lexing
141      * so it becomes unused.
142      * <br/>
143      * If lexer instances are cached and reused later
144      * then this method should first release all the references that might cause
145      * memory leaks and then add this unused lexer to the cache.
146      */

147     void release();
148     
149 }
150
Popular Tags