Token


1   /*
2    * The contents of this file are subject to the terms of the Common Development
3    * and Distribution License (the License). You may not use this file except in
4    * compliance with the License.
5    *
6    * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7    * or http://www.netbeans.org/cddl.txt.
8    *
9    * When distributing Covered Code, include this CDDL Header Notice in each file
10   * and include the License file at http://www.netbeans.org/cddl.txt.
11   * If applicable, add the following below the CDDL Header, with the fields
12   * enclosed by brackets [] replaced by your own identifying information:
13   * "Portions Copyrighted [year] [name of copyright owner]"
14   *
15   * The Original Software is NetBeans. The Initial Developer of the Original
16   * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17   * Microsystems, Inc. All Rights Reserved.
18   */
19  
20  package org.netbeans.api.lexer;
21  
22  /**
23   * Token describes a lexical element of input text.
24   * <br/>
25   * It mainly provides an identification by {@link #id()}
26   * and a textual body (aka token's image) by {@link #text()}.
27   * <br/>
28   * Only lexers should produce token instances and they should do it
29   * solely by using methods of {@link org.netbeans.spi.lexer.TokenFactory}.
30   *
31   * <p>
32   * <b>Note:</b><font color="red">
33   * Do not create custom extensions of this class - lexers may only return
34   * implementations produced by <code>TokenFactory</code>.
35   * Creation of any other token implementations will be refused.
36   * </font>
37   * </p>
38   *
39   * <p>
40   * Token guarantees stability of the {@link #id()} and {@link #length()} methods.
41   * The {@link #hashCode()} and {@link #equals(Object)} methods
42   * use the default implementations from <code>java.lang.Object</code>.
43   * <br/>
44   * The two tokens are only equal if they are the same object.
45   * </p>
46   *
47   * @author Miloslav Metelka
48   * @version 1.00
49   */
50  
51  public abstract class Token<T extends TokenId> {
52      
53      /**
54       * Create token instance.
55       * @throws IllegalStateException if a non-lexer-module-implementation token
56       *  is attempted to be created.
57       */
58      protected Token() {
59          if (!(this instanceof org.netbeans.lib.lexer.token.AbstractToken)) {
60              throw new IllegalStateException  ("Custom token implementations prohibited."); // NOI18N
61          }
62      }
63  
64      /**
65       * Get identification of this token.
66       *
67       * @return non-null identification of this token.
68       */
69      public abstract T id();
70      
71      /**
72       * Get text of this token (aka token's image) as a character sequence.
73       * <br/>
74       * This text usually corresponds to the characters present in the lexed text input
75       * unless {@link #isCustomText()} returns true.
76       *
77       * <p>
78       * <b>Note for mutable input sources:</b>
79       * <br/>
80       * This method should only be called
81       * within a readonly (or read-write) transaction
82       * over the underlying input source
83       * (such as <code>javax.swing.text.Document.render()</code>
84       * for Swing documents).
85       * <br/>
86       * The result returned by this method
87       * is only valid within a readonly (or read-write) transaction
88       * over the input source (method must be re-called
89       * during the next readonly transaction).
90       * </p>
91       *
92       * @return non-null, non-empty text of this token.
93       *  It may be <code>null</code> in case the token was used
94       *  for a mutable input and it was removed
95       *  from the token list for the given input (but even in such case
96       *  the text can be retained in certain cases).
97       *
98       *  <p>
99       *  The behavior of <code>equals()</code> and <code>hashCode()</code>
100      *  of the returned character sequence is generally undefined.
101      *  <br/>
102      *  The returned character sequence can NOT be compared to another
103      *  character sequence by using its <code>equals()</code> method.
104      *  <br/>
105      *  {@link org.netbeans.api.lexer.TokenUtilities} contains
106      *  utility methods related to token text comparing.
107      *  </p>
108      *
109      *  <p>
110      *  The returned text is just a pointer to the primary source of the data
111      *  e.g. a swing document. The character data are not duplicated in the tokens.
112      *  </p>
113      *
114      * @see #preprocessedText()
115      */
116     public abstract CharSequence   text();
117     
118     /**
119      * Check whether {@link #text()} returns a custom value that may differ
120      * from the original content of the text input.
121      * <br/>
122      * Using custom text may be useful in case when only certain part of the token
123      * is useful for the particular use and the token's text can be shrinked
124      * and possibly a flyweight text can be used.
125      * <br/>
126      * Also this is useful when using lexers generated by various lexer generators
127      * that generally allow to use a custom text in the produced tokens.
128      *
129      * @return true if the text of the token does not correspond
130      *  to the original characters present in the text input being lexed.
131      */
132     public abstract boolean isCustomText();
133     
134     /**
135      * Get number of characters in the original text input
136      * that the token spans.
137      * <br/>
138      * Usually this is the same value like {@link #text()}</code>.length()</code>
139      * unless {@link #isCustomText()} returns true.
140      * <br/>
141      * Also this method will return valid length in all cases even
142      * when the text of the token could become <code>null</code>.
143      *
144      * @return >=0 length of the token.
145      */
146     public abstract int length();
147     
148     /**
149      * Get the offset at which this token is present in the input
150      * or <code>-1</code> if this token is flyweight (and therefore does not store offset).
151      * <br/>
152      * <b>Note:</b> Use of {@link TokenSequence#offset()} is usually preferred over
153      * this method.
154      * <br/>
155      * For flyweight tokens the real offset of the token may only be determined
156      * by doing {@link TokenSequence#offset()} when positioned on the particular
157      * flyweight token.
158      * <br/>
159      * If necessary the flyweight token may be replaced by regular token
160      * by using {@link TokenSequence#offsetToken()}.
161      *
162      * <p>
163      * The complexity of the method should generally be constant
164      * regardless of the level of the language embedding.
165      * </p>
166      *
167      * @param tokenHierarchy token hierarchy to which the offset computation
168      *  will be related. It may either be the live token hierarchy
169      *  (which is equivalent to passing <code>null</code>)
170      *  or a snapshot of the original token hierarchy. For other values
171      *  the result is generally undefined.
172      *
173      * @return >=0 offset of the token in the input or <code>-1</code>
174      *  if this token is flyweight.
175      */
176     public abstract int offset(TokenHierarchy<?> tokenHierarchy);
177     
178     /**
179      * Checks whether this token instance is used for multiple occurrences
180      * of this token in this or other inputs.
181      * <br/>
182      * For example keywords or operators are typically flyweight tokens
183      * while e.g. identifiers are not flyweight as their text generally varies.
184      * <br/>
185      * Flyweight tokens may decrease the memory consumption for the tokens
186      * considerably for frequently used tokens. For example a single space ' '
187      * may be a useful flyweight token as it's used very often throughout a source.
188      * The decision of what tokens are made flyweight is upon the implementor
189      * of the particular language.
190      *
191      * <p>
192      * If the token is flyweight its {@link #offset(TokenHierarchy)} returns -1.
193      *
194      * @return true if the token is flyweight or false otherwise.
195      */
196     public abstract boolean isFlyweight();
197     
198     /**
199      * Check whether this token represents a complete token
200      * or whether it's a part of a complete token.
201      */
202     public abstract PartType partType();
203 
204     /**
205      * Check whether this token has preprocessed text
206      * (e.g. Unicode escapes in the token's text were translated).
207      *
208      * @return true if this token contains preprocessed text and its
209      *  {@link #preprocessedText()} returns a valid result.
210      */
211     public abstract boolean isPreprocessedText();
212 
213     /**
214      * Get a text of this token as it was preprocessed by a character preprocessor
215      * (e.g. with Unicode escapes translated).
216      */
217     public abstract CharSequence   preprocessedText();
218     
219     /**
220      * Get an description of the error that occurred during preprocessing
221      * of the token's characters.
222      */
223     public abstract String   preprocessError();
224     
225     /**
226      * Get the index relative to the token's begining (in the original input text)
227      * of where the preprocessor error has occurred.
228      *
229      * @return >=0 index where the character preprocessing error has occurred.
230      *  Returns -1 if there was no preprocessing error.
231      */
232     public abstract int preprocessErrorIndex();
233     
234     /**
235      * Quickly determine whether this token has any extra properties.
236      */
237     public abstract boolean hasProperties();
238     
239     /**
240      * Get extra property of this token.
241      * <br/>
242      * The token properties are defined by the lexer upon token creation.
243      * The clients of the API cannot set any property of the token.
244      *
245      * @param key non-null key of the property to get.
246      * @return non-null value of the property or null if the property does not
247      *  have any value.
248      *
249      * @see #hasProperties()
250      */
251     public abstract Object   getProperty(Object   key);
252 
253     /**
254      * Make sure the default implementation of <code>hashCode()</code> is used
255      * and the token can safely be used in maps.
256      */
257     public final int hashCode() {
258         return super.hashCode();
259     }
260     
261     /**
262      * Make sure the default implementation of <code>equals()</code> is used
263      * and the token can safely be used in maps.
264      */
265     public final boolean equals(Object   o) {
266         return super.equals(o);
267     }
268 
269 }
270
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags