LanguageHierarchy


1   /*
2    * The contents of this file are subject to the terms of the Common Development
3    * and Distribution License (the License). You may not use this file except in
4    * compliance with the License.
5    *
6    * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7    * or http://www.netbeans.org/cddl.txt.
8    *
9    * When distributing Covered Code, include this CDDL Header Notice in each file
10   * and include the License file at http://www.netbeans.org/cddl.txt.
11   * If applicable, add the following below the CDDL Header, with the fields
12   * enclosed by brackets [] replaced by your own identifying information:
13   * "Portions Copyrighted [year] [name of copyright owner]"
14   *
15   * The Original Software is NetBeans. The Initial Developer of the Original
16   * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17   * Microsystems, Inc. All Rights Reserved.
18   */
19  
20  package org.netbeans.spi.lexer;
21  
22  import java.util.Collection  ;
23  import java.util.Map  ;
24  import org.netbeans.api.lexer.InputAttributes;
25  import org.netbeans.api.lexer.Language;
26  import org.netbeans.api.lexer.LanguagePath;
27  import org.netbeans.api.lexer.Token;
28  import org.netbeans.api.lexer.TokenId;
29  import org.netbeans.lib.lexer.CharPreprocessorOperation;
30  import org.netbeans.lib.lexer.CharProvider;
31  import org.netbeans.lib.lexer.LanguageOperation;
32  import org.netbeans.lib.lexer.LexerInputOperation;
33  import org.netbeans.lib.lexer.LexerSpiPackageAccessor;
34  import org.netbeans.lib.lexer.TokenIdImpl;
35  
36  /**
37   * Definition of a language, its lexer and its embedded languages.
38   * <br/>
39   * It's a mirror of {@link Language} on SPI level containing
40   * additional information necessary for the lexer infrastructure operation.
41   * <br/>
42   * The language hierarchies should be implemented by SPI providers
43   * and their languages should be given for public use
44   * (language hierarchy classes do not need to be public though).
45   * <br/>
46   * A typical situation may look like this:<pre>
47   *
48   * public enum MyTokenId implements TokenId {
49   *
50   *     ERROR(null, "error"),
51   *     IDENTIFIER(null, "identifier"),
52   *     ABSTRACT("abstract", "keyword"),
53   *     ...
54   *     SEMICOLON(";", "separator"),
55   *     ...
56   *
57   *
58   *     private final String fixedText; // Used by lexer for production of flyweight tokens
59   *
60   *     private final String primaryCategory;
61   *
62   *     MyTokenId(String fixedText, String primaryCategory) {
63   *         this.fixedText = fixedText;
64   *         this.primaryCategory = primaryCategory;
65   *     }
66   *
67   *     public String fixedText() {
68   *         return fixedText;
69   *     }
70   *
71   *     public String primaryCategory() {
72   *         return primaryCategory;
73   *     }
74   *
75   *
76   *     private static final Language&lt;MyTokenId&gt; language = new LanguageHierarchy&lt;MyTokenId&gt;() {
77   *         <code>@Override</code>
78   *         protected String mimeType() {
79   *             return "text/x-my";
80   *         }
81   *
82   *         <code>@Override</code>
83   *         protected Collection&lt;MyTokenId&gt; createTokenIds() {
84   *             return EnumSet.allOf(MyTokenId.class);
85   *         }
86   *
87   *         <code>@Override</code>
88   *         protected Lexer&lt;MyTokenId&gt; createLexer(LexerInput input, TokenFactory&lt;MyTokenId&gt; tokenFactory, Object state) {
89   *             return new MyLexer(input, tokenFactory, state);
90   *         }
91   *
92   *     }.language();
93   *
94   *     public static Language&lt;MyTokenId&gt; language() {
95   *         return language;
96   *     }
97   *
98   * }
99   * </pre>
100  *
101  * @author Miloslav Metelka
102  * @version 1.00
103  */
104 
105 public abstract class LanguageHierarchy<T extends TokenId> {
106     
107     static {
108         LexerSpiPackageAccessor.register(new Accessor());
109     }
110 
111     /**
112      * Create a default token id instance in case the token ids
113      * are generated (not created by enum class).
114      */
115     public static TokenId newId(String   name, int ordinal) {
116         return newId(name, ordinal, null);
117     }
118 
119     /**
120      * Create a default token id instance in case the token ids
121      * are generated (not created by enum class).
122      */
123     public static TokenId newId(String   name, int ordinal, String   primaryCategory) {
124         return new TokenIdImpl(name, ordinal, primaryCategory);
125     }
126 
127     
128     /** Operation containing impls of additional services for this language hierarchy. */
129     LanguageOperation<T> operation = new LanguageOperation<T>(this);
130 
131     /**
132      * Provide a collection of token ids that comprise the language.
133      * <br>
134      * If token ids are defined as enums then this method
135      * should simply return <code>EnumSet.allOf(MyTokenId.class)</code>.
136      *
137      * <p>
138      * This method is only called once by the infrastructure
139      * (when constructing language) so it does
140      * not need to cache its result.
141      * <br>
142      * This method is called in synchronized section.
143      * If its implementation would use any synchronization
144      * a care must be taken to prevent deadlocks.
145      * </p>
146      *
147      * @return non-null collection of {@link TokenId} instances.
148      */
149     protected abstract Collection  <T> createTokenIds();
150 
151     /**
152      * Provide map of token category names to collection of its members.
153      * <br/>
154      * The results of this method will be merged with the primary-category
155      * information found in token ids.
156      * <br>
157      * This method is only called once by the infrastructure
158      * (when constructing language) so it does
159      * not need to cache its result.
160      * <br>
161      * This method is called in synchronized section.
162      * If its implementation would use any synchronization
163      * a care must be taken to prevent deadlocks.
164      *
165      *  <p>
166      *  There is a convention that the category names should only consist
167      *  of lowercase letters, numbers and hyphens.
168      *
169      * @return mapping of category name to collection of its ids.
170      *  It may return null to signal no mappings.
171      */
172     protected Map  <String  ,Collection  <T>> createTokenCategories() {
173         return null; // no extra categories
174     }
175 
176     /**
177      * Create lexer prepared for returning tokens
178      * from subsequent calls to {@link Lexer#nextToken()}.
179      *
180      * @param info non-null lexer restart info containing the information
181      *  necessary for lexer restarting.
182      */
183     protected abstract Lexer<T> createLexer(LexerRestartInfo<T> info);
184     
185     /**
186      * Gets the mime type of the language constructed from this language hierarchy.
187      *
188      * @return non-null language's mime type.
189      * @see org.netbeans.api.lexer.LanguagePath#mimePath()
190      */
191     protected abstract String   mimeType();
192     
193     /**
194      * Get language embedding (if exists) for a particular token
195      * of the language at this level of language hierarchy.
196      * <br>
197      * This method will only be called if the given token instance
198      * will not be flyweight token or token with custom text:
199      * <code>token.isFlyweight() == false && token.isCustomText() == false</code>
200      * <br>
201      * That restriction exists because the children token list is constructed
202      * lazily and the infrastructure needs to access the token's parent token
203      * list which would not be possible if the token would be flyweight.
204      *
205      * @param token non-null token for which the language embedding will be resolved.
206      *  <br/>
207      *  The token may have a zero length <code>({@link Token#length()} == 0)</code>
208      *  in case the language infrastructure performs a poll for all embedded
209      *  languages for the 
210      *
211      * @param languagePath non-null language path at which the language embedding
212      *  is being created. It may be used for obtaining appropriate information
213      *  from inputAttributes.
214      *
215      * @param inputAttributes input attributes that could affect the embedding creation.
216      *  It may be null if there are no extra attributes.
217      *
218      * @return language embedding instance or null if there is no language embedding
219      *  for this token.
220      */
221     protected LanguageEmbedding<? extends TokenId> embedding(Token<T> token,
222     LanguagePath languagePath, InputAttributes inputAttributes) {
223         return null; // No extra hardcoded embedding by default
224     }
225     
226     /**
227      * Create character preprocessor that translates certain character sequences
228      * into characters (for example Unicode escape sequences).
229      *
230      * @return valid preprocessor or null if there is no extra preprocessor.
231      */
232     protected CharPreprocessor createCharPreprocessor() {
233         return null; // no preprocessor by default
234     }
235     
236     /**
237      * Create token validator for the given token id.
238      *
239      * @param tokenId token id for which the token validator should be returned.
240      * @return valid token validator or null if there is no validator
241      *  for the given token id.
242      */
243     protected TokenValidator<T> createTokenValidator(T tokenId) {
244         return null;
245     }
246 
247     /**
248      * Determine whether the text of the token with the particular id should
249      * be retained after the token has been removed from the token list
250      * because of the underlying mutable input source modification.
251      * <br/>
252      * {@link org.netbeans.api.lexer.Token#text()} will continue
253      * to return the value that it had right before the token's removal.
254      * <br/>
255      * This may be useful if the tokens are held directly in parse trees
256      * and the parser queries the tokens for text.
257      *
258      * <p>
259      * Retaining text in the tokens has performance and memory implications
260      * and should only be done selectively for tokens where it's desired
261      * (such as identifiers).
262      * <br/>
263      * The extra performance and memory penalty only happens during
264      * token's removal from the token list for the given input.
265      * Token creation performance and memory consumption during
266      * token's lifetime stay unaffected.
267      * </p>
268      *
269      * <p>
270      * Retaining will only work if the input source is capable of providing
271      * the removed text right after the modification has been performed.
272      * </p>
273      *
274      * @return true if the text should be retained or false if not.
275      */
276     protected boolean isRetainTokenText(T tokenId) {
277         return false;
278     }
279 
280 
281     /**
282      * Get language constructed for this language hierarchy
283      * based on token ids and token categories provided.
284      *
285      * @return non-null language.
286      */
287     public final Language<T> language() {
288         return operation.language();
289     }
290     
291     /** Enforce default implementation of <code>hashCode()</code>. */
292     public final int hashCode() {
293         return super.hashCode();
294     }
295     
296     /** Enforce default implementation of <code>equals()</code>. */
297     public final boolean equals(Object   o) {
298         return super.equals(o);
299     }
300 
301     public String   toString() {
302         return getClass().getName(); // for debugging purposes only
303     }
304     
305     /** Implementation of lexer spi package accessor. */
306     private static final class Accessor extends LexerSpiPackageAccessor {
307         
308         public <T extends TokenId> Collection  <T> createTokenIds(LanguageHierarchy<T> languageHierarchy) {
309             return languageHierarchy.createTokenIds();
310         }
311 
312         public <T extends TokenId> Map  <String  ,Collection  <T>> createTokenCategories(LanguageHierarchy<T> languageHierarchy) {
313             return languageHierarchy.createTokenCategories();
314         }
315 
316         public String   mimeType(LanguageHierarchy<? extends TokenId> languageHierarchy) {
317             return languageHierarchy.mimeType();
318         }
319 
320         public <T extends TokenId> LanguageOperation<T> operation(LanguageHierarchy<T> languageHierarchy) {
321             return languageHierarchy.operation;
322         }
323         
324         public <T extends TokenId> LanguageEmbedding<? extends TokenId> embedding(LanguageHierarchy<T> languageHierarchy,
325         Token<T> token, LanguagePath languagePath, InputAttributes inputAttributes) {
326             return languageHierarchy.embedding(token, languagePath, inputAttributes);
327         }
328 
329         public <T extends TokenId> Lexer<T> createLexer(
330         LanguageHierarchy<T> languageHierarchy, LexerRestartInfo<T> info) {
331             return languageHierarchy.createLexer(info);
332         }
333 
334         public <T extends TokenId> LexerRestartInfo<T> createLexerRestartInfo(
335         LexerInput input, TokenFactory<T> tokenFactory, Object   state,
336         LanguagePath languagePath, InputAttributes inputAttributes) {
337             return new LexerRestartInfo<T>(input, tokenFactory, state, languagePath, inputAttributes);
338         }
339 
340         public <T extends TokenId> TokenValidator<T> createTokenValidator(
341         LanguageHierarchy<T> languageHierarchy, T id) {
342             return languageHierarchy.createTokenValidator(id);
343         }
344 
345         public <T extends TokenId> boolean isRetainTokenText(
346         LanguageHierarchy<T> languageHierarchy, T id) {
347             return languageHierarchy.isRetainTokenText(id);
348         }
349 
350         public CharPreprocessor createCharPreprocessor(LanguageHierarchy languageHierarchy) {
351             return languageHierarchy.createCharPreprocessor();
352         }
353 
354         public LexerInput createLexerInput(CharProvider charProvider) {
355             return new LexerInput(charProvider);
356         }
357 
358         public void init(CharPreprocessor preprocessor, CharPreprocessorOperation operation) {
359             preprocessor.init(operation);
360         }
361 
362         public void preprocessChar(CharPreprocessor preprocessor) {
363             preprocessor.preprocessChar();
364         }
365 
366         public Language<? extends TokenId> language(MutableTextInput<?> mti) {
367             return mti.language();
368         }
369 
370         public <T extends TokenId> LanguageEmbedding<T> createLanguageEmbedding(
371         Language<T> language, int startSkipLength, int endSkipLength, boolean joinSections) {
372             return new LanguageEmbedding<T>(language, startSkipLength, endSkipLength, joinSections);
373         }
374 
375         public CharSequence   text(MutableTextInput<?> mti) {
376             return mti.text();
377         }
378 
379         public InputAttributes inputAttributes(MutableTextInput<?> mti) {
380             return mti.inputAttributes();
381         }
382 
383         public <I> I inputSource(MutableTextInput<I> mti) {
384             return mti.inputSource();
385         }
386 
387         public <T extends TokenId> TokenFactory<T> createTokenFactory(
388         LexerInputOperation<T> lexerInputOperation) {
389             return new TokenFactory<T>(lexerInputOperation);
390         }
391 
392     }
393 
394 }
395
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags