KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > api > lexer > TokenHierarchy


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.api.lexer;
21
22 import java.io.Reader JavaDoc;
23 import java.util.Set JavaDoc;
24 import javax.swing.text.Document JavaDoc;
25 import org.netbeans.lib.lexer.TokenHierarchyOperation;
26 import org.netbeans.lib.lexer.TokenList;
27 import org.netbeans.lib.lexer.inc.DocumentInput;
28
29 /**
30  * Token hierarchy represents a given input source as a browsable hierarchy of tokens.
31  * <br>
32  * It's is an entry point into the Lexer API.
33  * <br/>
34  * It allows to create token sequences for hierarchy exploration
35  * and watching for token changes by attaching the token hierarchy listeners.
36  * <br>
37  * The hierarchy may either be flat or it can be a tree if the
38  * corresponding language hierarchy contains language embeddings.
39  *
40  * <p/>
41  * Token hierarchy may also act as a snapshot of another "live" token hierarchy.
42  * <br/>
43  * The snapshot may be created at any time by using {@link #createSnapshot()}
44  * on the live token hierarchy.
45  *
46  * @author Miloslav Metelka
47  * @version 1.00
48  */

49
50 public final class TokenHierarchy<I> { // "I" stands for mutable input source
51

52     /**
53      * Get or create mutable token hierarchy for the given swing document.
54      * <br>
55      * All the operations with the obtained token hierarchy
56      * must be done under document's read lock (or write lock).
57      *
58      * @param doc swing text document for which the token hiearchy should be obtained.
59      * @return token hierarchy or <code>null</code> in case the token hierarchy
60      * does not exist yet and the <code>Language.class</code>
61      * document property was not yet initialized with the valid language
62      * so the hierarchy cannot be created.
63      */

64     public static <D extends Document JavaDoc> TokenHierarchy<D> get(D doc) {
65         DocumentInput<D> di = DocumentInput.get(doc);
66         return di.tokenHierarchyControl().tokenHierarchy();
67     }
68     
69     /**
70      * Create token hierarchy for the given non-mutating input text (for example
71      * java.lang.String).
72      *
73      * @see #create(CharSequence,boolean,Language,Set,InputAttributes)
74      */

75     public static TokenHierarchy<Void JavaDoc> create(CharSequence JavaDoc inputText,
76     Language<? extends TokenId> language) {
77         return create(inputText, false, language, null, null);
78     }
79
80     /**
81      * Create token hierarchy for the given input text.
82      *
83      * @param inputText input text containing the characters to tokenize.
84      * @param copyInputText <code>true</code> in case the content of the input
85      * will not be modified in the future so the created tokens can reference it.
86      * <br>
87      * <code>false</code> means that the text can change in the future
88      * and the tokens should not directly reference it. Instead copy of the necessary text
89      * from the input should be made and the original text should not be referenced.
90      * @param language language defining how the input
91      * will be tokenized.
92      * @param skipTokenIds set containing the token ids for which the tokens
93      * should not be created in the created token hierarchy.
94      * <br/>
95      * <code>null</code> may be passed which means that no tokens will be skipped.
96      * <br/>
97      * This applies to top level of the token hierarchy only (not to embedded tokens).
98      * <br/>
99      * The provided set should be efficient enough - ideally created by e.g.
100      * {@link Language#tokenCategoryMembers(String)}
101      * or {@link Language#merge(Collection,Collection)}.
102      *
103      * @param inputAttributes additional properties related to the input
104      * that may influence token creation or lexer operation
105      * for the particular language (such as version of the language to be used).
106      * @return non-null token hierarchy.
107      */

108     public static <T extends TokenId> TokenHierarchy<Void JavaDoc> create(
109     CharSequence JavaDoc inputText, boolean copyInputText,
110     Language<T> language, Set JavaDoc<T> skipTokenIds, InputAttributes inputAttributes) {
111
112         return new TokenHierarchyOperation<Void JavaDoc,T>(inputText, copyInputText,
113                 language, skipTokenIds, inputAttributes).tokenHierarchy();
114     }
115
116     /**
117      * Create token hierarchy for the given reader.
118      *
119      * @param inputReader input reader containing the characters to tokenize.
120      * @param language language defining how the input
121      * will be tokenized.
122      * @param skipTokenIds set containing the token ids for which the tokens
123      * should not be created in the created token hierarchy.
124      * <br/>
125      * <code>null</code> may be passed which means that no tokens will be skipped.
126      * <br/>
127      * This applies to top level of the token hierarchy only (not to embedded tokens).
128      * <br/>
129      * The provided set should be efficient enough - ideally created by e.g.
130      * {@link Language#tokenCategoryMembers(String)}
131      * or {@link Language#merge(Collection,Collection)}.
132      *
133      * @param inputAttributes additional properties related to the input
134      * that may influence token creation or lexer operation
135      * for the particular language (such as version of the language to be used).
136      * @return non-null token hierarchy.
137      */

138     public static <T extends TokenId> TokenHierarchy<Void JavaDoc> create(
139     Reader JavaDoc inputReader,
140     Language<T> language, Set JavaDoc<T> skipTokenIds, InputAttributes inputAttributes) {
141
142         return new TokenHierarchyOperation<Void JavaDoc,T>(inputReader,
143                 language, skipTokenIds, inputAttributes).tokenHierarchy();
144     }
145     
146
147     private TokenHierarchyOperation<I,?> operation;
148
149     TokenHierarchy(TokenHierarchyOperation<I,?> operation) {
150         this.operation = operation;
151     }
152
153     /**
154      * Get token sequence of the top level language of the token hierarchy.
155      * <br/>
156      * The token sequences for inner levels of the token hierarchy can be
157      * obtained by calling {@link TokenSequence#embedded()}.
158      *
159      * @return non-null token sequence of the top level of the token hierarchy.
160      */

161     public TokenSequence<? extends TokenId> tokenSequence() {
162         @SuppressWarnings JavaDoc("unchecked")
163         TokenSequence<? extends TokenId> ts = new TokenSequence<TokenId>(
164                 (TokenList<TokenId>)operation.checkedTokenList());
165         return ts;
166     }
167
168     /**
169      * Get token sequence of the top level of the language hierarchy
170      * only if it's of the given language.
171      *
172      * @return non-null token sequence or null if the top level token sequence
173      * satisfies the condition <code>(tokenSequence().language() == language)</code>.
174      * Null is returned otherwise.
175      *
176      */

177     public <T extends TokenId> TokenSequence<T> tokenSequence(Language<T> language) {
178         TokenList<? extends TokenId> tokenList = operation.checkedTokenList();
179         @SuppressWarnings JavaDoc("unchecked")
180         TokenSequence<T> ts
181                 = (tokenList.languagePath().topLanguage() == language)
182                     ? new TokenSequence<T>((TokenList<T>)tokenList)
183                     : null;
184         return ts;
185     }
186     
187     /**
188      * Get a set of language paths used by this token hierarchy.
189      * <br/>
190      * The set includes "static" paths that are those reachable by traversing
191      * token ids of the top language and searching for the default embeddings
192      * that could be created by
193      * {@link org.netbeans.spi.lexer.LanguageHierarchy#embedding(Token,LanguagePath,InputAttributes)}.
194      *
195      */

196     public Set JavaDoc<LanguagePath> languagePaths() {
197         return operation.languagePaths();
198     }
199
200     /**
201      * Whether input text of this token hierarchy is mutable or not.
202      *
203      * @return true if the input text is mutable or false otherwise.
204      */

205     public boolean isMutable() {
206         return operation.isMutable();
207     }
208     
209     /**
210      * Get mutable input source providing text over which
211      * this token hierarchy was constructed.
212      * <br/>
213      * For example it may be a swing text document instance
214      * {@link javax.swing.text.Document} in case the token hierarchy
215      * was constructed for its text.
216      * <br/>
217      * Snapshot will return the same input source
218      * as the original mutable token hierarchy.
219      *
220      * @return mutable input source or null in case this token hierarchy
221      * was not created over mutable input source.
222      */

223     public I mutableInputSource() {
224         return operation.mutableInputSource();
225     }
226     
227     /**
228      * Token hierarchy may be set inactive to release resources consumed
229      * by tokens.
230      * <br>
231      * Only token hierarchies over a mutable input can become inactive.
232      *
233      * @return true if valid tokens exist for this hierarchy
234      * or false if the token hierarchy is inactive and there are currently
235      * no active tokens to represent it.
236      */

237     public boolean isActive() {
238         return operation.isActive();
239     }
240     
241     /**
242      * Add listener for token changes inside this hierarchy.
243      *
244      * @param listener token change listener to be added.
245      */

246     public void addTokenHierarchyListener(TokenHierarchyListener listener) {
247         operation.addTokenHierarchyListener(listener);
248     }
249     
250     /**
251      * Remove listener for token changes inside this hierarchy.
252      *
253      * @param listener token change listener to be removed.
254      */

255     public void removeTokenHierarchyListener(TokenHierarchyListener listener) {
256         operation.removeTokenHierarchyListener(listener);
257     }
258     
259     /**
260      * Create a snapshot of the present mutable token hierarchy.
261      * <br/>
262      * Even with subsequent modifications to the "live" token hierarchy
263      * the tokens of the snapshot will retain the original ids, texts and offsets.
264      * <br/>
265      * The snapshot retains the original token instances that were present
266      * in the token hierarchy at time of its creation.
267      *
268      * <p/>
269      * The snapshot creation is cheap. With subsequent modifications
270      * of the mutable input source the snapshot maintenance brings an overhead.
271      * Therefore the snapshot should be released as soon as it's no longer needed.
272      * Ideally the releasing should be performed by using {@link #snapshotRelease()}.
273      * Another way is to forget the reference to the snapshot token hierarchy
274      * but it depends on the garbage collector's releasing of the weak reference.
275      * <br/>
276      * As the snapshot shares information with the live hierarchy
277      * its content must also be accessed under a read lock
278      * in the same way like the live hierarchy.
279      *
280      * <br/>
281      * If a particular token in the snapshot is mutable
282      * then <code>token.offset(snapshotHierarchy)</code> will give the offset
283      * of the token in a snapshot while <code>token.offset(null)</code>
284      * will return the offset of the token in the live hierarchy.
285      *
286      * <p/>
287      * The snapshot attempts to share tokens with the live token hierarchy.
288      * <br/>
289      * Upon a first modification in the live token hierarchy (after the snapshot creation)
290      * an initial and ending areas of tokens shared between the snapshot
291      * and live hierarchy get created. The tokens that were in the live hierarchy prior
292      * to the modification (but which were removed from it because of the modification)
293      * are captured and used by the snapshot as the "middle" area. With subsequent
294      * modifications the initial and ending areas of shared tokens may be reduced
295      * (and the original tokens captured for the snapshot)
296      * if any of the tokens contained in them get modified.
297      *
298      * <p/>
299      * The overhead of the subsequent token modifications
300      * for an existing snapshot in the present implementation are the following:<ul>
301      * <li> Removed token's text must be maintained which creates an overhead
302      * equal to the original token's text characters plus about 24 bytes.
303      * <br/>
304      * This is a single-time overhead per each removed token
305      * referenced by at least one snapshot.
306      * </li>
307      * <li> Token's original offset must be maintained. The overhead
308      * is about 32 bytes per token per snapshot.
309      * </li>
310      *
311      * @return non-null new token hierarchy which is a snapshot
312      * of this token hierarchy. For non-mutable token hierarchies
313      * this method returns null (original token hierarchy may be used
314      * in the same way like the snapshot would be used).
315      */

316     public TokenHierarchy<I> createSnapshot() {
317         return operation.createSnapshot();
318     }
319
320     /**
321      * Check whether this token hierarchy is a snapshot.
322      *
323      * @return true if this is snapshot or false if not.
324      */

325     public boolean isSnapshot() {
326         return operation.isSnapshot();
327     }
328
329     /**
330      * Release snapshot - should only be called if this token hierarchy
331      * is a snapshot.
332      * @throws IllegalStateException if this token hierarchy was already released
333      * or it's not a snapshot.
334      */

335     public void snapshotRelease() {
336         operation.snapshotRelease();
337     }
338
339     /**
340      * Check whether this snapshot is released.
341      *
342      * @return true if this snapshot is already released or false if not.
343      * @throws IllegalStateException if this token hierarchy is not a snapshot.
344      */

345     public boolean isSnapshotReleased() {
346         return operation.isSnapshotReleased();
347     }
348
349     /**
350      * If this token hierarchy is snapshot then return the token hierarchy
351      * for which this snapshot was constructed.
352      *
353      * @return live token hierarchy or null if this is not a snapshot.
354      */

355      public TokenHierarchy<I> snapshotOf() {
356          return operation.snapshotOf();
357      }
358      
359      /**
360       * Get start offset of the area where the tokens in the token hierarchy snapshot
361       * have explicitly shifted offsets.
362       * <br/>
363       * With subsequent modifications the area where the token offsets are shifted
364       * explicitly gets extended (modifications with lowest and highest offsets
365       * define the area boundaries).
366       * <br/>
367       * Below this offset the snapshot uses all the tokens from the live token hierarchy
368       * directly.
369       * <br/>
370       * Above this area (and below {@link #tokenShiftEndOffset()} the tokens
371       * are either removed from the live token hierarchy or still present in it
372       * but all of them have explicitly corrected offsets.
373       * <br/>
374       * The clients may get a token from the snapshot and check its offset
375       * to find out whether it's below token shift start offset.
376       * <br/>
377       * If so then the token is present in the live token hierarchy as well
378       * and it has the same offset there like in the snapshot.
379       *
380       * @see #tokenShiftEndOffset()
381       */

382      public int tokenShiftStartOffset() {
383          return operation.tokenShiftStartOffset();
384      }
385      
386      /**
387       * Get end offset of the area where the tokens in the token hierarchy snapshot
388       * have explicitly shifted offsets.
389       * <br/>
390       * The clients may get a token from the snapshot and check its offset
391       * to find out whether it's above token shift end offset.
392       * <br/>
393       * If so then the token is present in the live token hierarchy as well
394       * and its offset there can be determined by using
395       * <code>Token.offset(null)</code>.
396       *
397       * @see #tokenShiftStartOffset()
398       */

399      public int tokenShiftEndOffset() {
400          return operation.tokenShiftEndOffset();
401      }
402
403     /**
404      * Obtaining of token hierarchy operation is only intended to be done
405      * by package accessor.
406      */

407     TokenHierarchyOperation<I,?> operation() {
408         return operation;
409     }
410
411 }
412
Popular Tags