KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > lib > lexer > inc > TokenListUpdater


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.lib.lexer.inc;
21
22 import org.netbeans.api.lexer.TokenId;
23 import org.netbeans.lib.lexer.EmbeddedTokenList;
24 import org.netbeans.lib.lexer.LanguageOperation;
25 import org.netbeans.lib.lexer.LexerInputOperation;
26 import org.netbeans.lib.lexer.LexerUtilsConstants;
27 import org.netbeans.lib.lexer.token.AbstractToken;
28 import org.netbeans.spi.lexer.TokenValidator;
29
30
31 /**
32  * Token updater fixes a list of tokens constructed for a document
33  * after text of the document gets modified.
34  * <br>
35  * Subclasses need to define all the abstract methods
36  * so that the updating method can work on real token sequences.
37  *
38  * <p>
39  * Updater looks similar to list iterator
40  * but there are differences in the semantics
41  * of iterator's modification operations.
42  * <p>
43  * The algorithm used in the {@link #update(int, int)}
44  * is based on "General Incremental Lexical Analysis" written
45  * by Tim A. Wagner and Susan L. Graham, University
46  * of California, Berkeley. It's available online
47  * at <a HREF="http://www.cs.berkeley.edu/Research/Projects/harmonia/papers/twagner-lexing.pdf">
48  * twagner-lexing.pdf</a>.
49  * <br>
50  * Ending <code>EOF</code> token is not used but the lookahead
51  * of the ending token(s) is increased by one (past the end of the input)
52  * if they have reached the EOF.
53  * <br>
54  * Non-startable tokens are not supported.
55  * <br>
56  * When updating a token with lookback one as a result
57  * of modification the lookahead of the preceding token is inspected
58  * to find out whether the modification has really affected it.
59  * This can often save the previous token from being relexed.
60  * <br>
61  * Currently the algorithm computes the lookback values on the fly
62  * and it does not store the lookback in the tokens. For typical languages
63  * the lookback is reasonably small (0, 1 or 2) so it's usually not worth
64  * to consume extra space in token instances for storing of the lookback.
65  * There would also be an additional overhead of updating the lookback
66  * values in the tokens after the modification and the algorithm code would
67  * be somewhat less readable.
68  *
69  * <p>
70  * The algorithm removes the affected tokens in the natural order as they
71  * follow in the token stream. That can be used when the removed tokens
72  * need to be collected (e.g. in an array).
73  * <br>
74  * If the offset and state after token recognition matches
75  * the end offset and state after recognition of the originally present
76  * token then the relexing is stopped because a match was found and the newly
77  * produced tokens would match the present ones.
78  * <br>
79  * Otherwise the token(s) in the list are removed and replaced
80  * by the relexed token and the relexing continues until a match is reached.
81  *
82  * @author Miloslav Metelka
83  * @version 1.00
84  */

85
86 public final class TokenListUpdater {
87
88     /**
89      * Use incremental algorithm to update the list of tokens
90      * after a modification done in the underlying storage.
91      */

92     public static <T extends TokenId> void update(MutableTokenList<T> tokenList,
93     TokenHierarchyEventInfo eventInfo, TokenListChange<T> change) {
94         // Ensure the offsets in token list are up-to-date
95
if (tokenList.getClass() == EmbeddedTokenList.class) {
96             ((EmbeddedTokenList<? extends TokenId>)tokenList).updateStartOffset();
97         }
98
99         // Fetch offset where the modification occurred
100
int modOffset = eventInfo.modificationOffset();
101         LanguageOperation<T> languageOperation = LexerUtilsConstants.mostEmbeddedLanguageOperation(
102                 tokenList.languagePath());
103
104         int tokenCount = tokenList.tokenCountCurrent(); // presently created token count
105
// Now determine which token is the first to be relexed.
106
// If it would be either modified token or previous-of-modified token
107
// (for modification right at the begining of modified token)
108
// then the token will be attempted to be validated (without running
109
// a lexer).
110
AbstractToken<T> modToken;
111         // modTokenOffset holds begining of the token in which the modification occurred.
112
int modTokenOffset;
113         // index points to the modified token
114
int index;
115
116         if (tokenCount == 0) { // no tokens yet or all removed
117
if (!tokenList.isFullyLexed()) {
118                 // No tokens created yet (they get created lazily).
119
return; // Do nothing in this case
120
}
121             // If fully lexed the tokens must mirror the input source -> offset == 0
122
assert (modOffset == 0);
123             modToken = null;
124             modTokenOffset = 0;
125             index = 0;
126
127         } else { // at least one token exists
128
// Check whether the modification at modOffset might affect existing tokens
129
// Get index of the token in which the modification occurred
130
// Get the offset of the last token into modTokenOffset variable
131
index = tokenCount - 1;
132             modTokenOffset = tokenList.tokenOffset(index);
133             if (modOffset >= modTokenOffset) { // inside or above the last token?
134
modToken = token(tokenList, index);
135                 int modTokenEndOffset = modTokenOffset + modToken.length();
136                 if (modOffset >= modTokenEndOffset) { // above last token
137
// Modification was right at the end boundary of the last token
138
// or above it (token list can be created lazily so that is valid case).
139
// Check whether the last token could be affected at all
140
// by checking the last token's lookahead.
141
// For fully lexed inputs the characters added to the end
142
// must be properly lexed and notified (even if the last present
143
// token has zero lookahead).
144
if (!tokenList.isFullyLexed()
145                         && modOffset >= modTokenEndOffset + tokenList.lookahead(index)
146                     )
147                         return; // not affected at all
148

149                     index++;
150                     modToken = null;
151                     modTokenOffset = modTokenEndOffset;
152                 } // else -> modification inside the last token
153

154             } else { // modification in non-last token
155
// Find modified token by binary search
156
int low = 0; // use index as 'high'
157
while (low < index) {
158                     int mid = (low + index) / 2;
159                     int midStartOffset = tokenList.tokenOffset(mid);
160
161                     if (midStartOffset < modOffset) {
162                         low = mid + 1;
163                     } else if (midStartOffset > modOffset) {
164                         index = mid - 1 < 0 ? 0 : mid - 1;
165                     } else {
166                         // Token starting exactly at modOffset found
167
index = mid;
168                         modTokenOffset = midStartOffset;
169                         break;
170                     }
171                 }
172                 if (index <= low) { // no token starting right at 'modOffset'
173
modTokenOffset = tokenList.tokenOffset(index);
174                 }
175                 modToken = token(tokenList, index);
176             }
177         }
178
179         // Store the index that points to the modified token
180
// i.e. modification at its begining or inside.
181
// Index variable can later be modified but present value is important
182
// for moving of the offset gap later.
183
change.setOffsetGapIndex(index);
184
185         // Index and offset from which the relexing will start.
186
int relexIndex;
187         int relexOffset;
188         // Whether the token validation should be attempted or not.
189
boolean attemptValidation = false;
190
191         if (index == 0) { // modToken is first in the list
192
relexIndex = index;
193             relexOffset = modTokenOffset;
194             // Can validate modToken if removal does not span whole token
195
if (modToken != null && eventInfo.removedLength() < modToken.length()) {
196                 attemptValidation = true;
197             }
198
199         } else { // Previous token exists
200
// Check for insert-only right at the end of the previous token
201
if (modOffset == modTokenOffset && eventInfo.removedLength() == 0) {
202                 index--; // move to previous token
203
modToken = token(tokenList, index);
204                 modTokenOffset -= modToken.length();
205             }
206
207             // Check whether modification affected previous token
208
if (index == 0 || modTokenOffset + tokenList.lookahead(index - 1) <= modOffset) {
209                 // Modification did not affect previous token
210
relexIndex = index;
211                 relexOffset = modTokenOffset;
212                 // Check whether modification was localized to modToken only
213
if (modOffset + eventInfo.removedLength() < modTokenOffset + modToken.length()) {
214                     attemptValidation = true;
215                 }
216
217             } else { // at least previous token affected
218
relexOffset = modTokenOffset - token(tokenList, index - 1).length();
219                 relexIndex = index - 2; // Start with token below previous token
220

221                 // Go back and mark all affected tokens for removals
222
while (relexIndex >= 0) {
223                     AbstractToken<T> token = token(tokenList, relexIndex);
224                     // Check if token was not affected by modification
225
if (relexOffset + tokenList.lookahead(relexIndex) <= modOffset) {
226                         break;
227                     }
228                     relexIndex--;
229                     relexOffset -= token.length();
230                 }
231                 relexIndex++; // Next token will be relexed
232
}
233         }
234         
235         // The lowest offset at which the relexing can end
236
// (the relexing may end at higher offset if the relexed
237
// tokens will end at different boundaries than the original
238
// tokens or if the states after the tokens' recognition
239
// will differ from the original states in the original tokens.
240
int matchOffset;
241
242         // Perform token validation of modToken if possible.
243
// The index variable will hold the token index right before the matching point.
244
if (attemptValidation) {
245             matchOffset = modTokenOffset + modToken.length();
246             TokenValidator tokenValidator = languageOperation.tokenValidator(modToken.id());
247             if (tokenValidator != null
248                 && (tokenList.getClass() != IncTokenList.class
249                     || eventInfo.tokenHierarchyOperation().canModifyToken(index, modToken))
250             ) {
251                     
252 // if (tokenValidator.validateToken(modToken, modOffset - modTokenOffset, modRelOffset,
253
// removedLength, insertedLength)
254
// ) {
255
// // Update positions
256
// change.initRemovedAddedOffsets()
257

258 // return; // validated successfully
259
// }
260
}
261
262         } else { // Validation cannot be attempted
263
// Need to compute matchOffset and matchIndex
264
// by iterating forward
265
if (index < tokenCount) {
266                 matchOffset = modTokenOffset + modToken.length();
267                 int removeEndOffset = modOffset + eventInfo.removedLength();
268                 while (matchOffset < removeEndOffset && index + 1 < tokenCount) {
269                     index++;
270                     matchOffset += token(tokenList, index).length();
271                 }
272
273             } else // After last token
274
matchOffset = modTokenOffset;
275         }
276
277         // State from which the lexer can be started
278
Object JavaDoc relexState = (relexIndex > 0) ? tokenList.state(relexIndex - 1) : null;
279         // Update the matchOffset so that it corresponds to the state
280
// after the modification
281
matchOffset += eventInfo.insertedLength() - eventInfo.removedLength();
282         change.setOffset(relexOffset);
283         
284         // Variables' values:
285
// 'index' - points to modified token. Or index == tokenCount for modification
286
// past the last token.
287
// 'tokenCount' - token count in the original token list.
288
// 'relexIndex' - points to the token that will be relexed as first.
289
// 'relexOffset' - points to begining of the token that will be relexed as first.
290
// 'matchOffset' - points to end of token after which the fixed token list could
291
// possibly match the original token list. Points to end of token at 'index'
292
// variable if 'index < tokenCount' and to the end of the last token
293
// if 'index == tokenCount'.
294

295         // Check whether relexing is necessary.
296
// Necessary condition for no-relexing is that the matchToken
297
// has zero lookahead (if lookahead would be >0
298
// then the matchToken would be affected and relexOffset != matchOffset).
299
// The states before relex token must match the state after the modified token
300
// In case of removal starting and ending at token boundaries
301
// the relexing might not be necessary.
302
boolean relex = (relexOffset != matchOffset)
303                 || index >= tokenCount
304                 || !LexerUtilsConstants.statesEqual(relexState, tokenList.state(index));
305
306         // There is an extra condition that the lookahead of the matchToken
307
// must not span the next (retained) token. This condition helps to ensure
308
// that the lookaheads will be the same like during regular batch lexing.
309
// As the empty tokens are not allowed the situation may only occur
310
// for lookahead > 1.
311
int lookahead;
312         if (!relex && (lookahead = tokenList.lookahead(index)) > 1 && index + 1 < tokenCount) {
313             relex = (lookahead > token(tokenList, index + 1).length()); // check next token
314
}
315
316         if (relex) { // Start relexing
317
LexerInputOperation<T> lexerInputOperation
318                     = tokenList.createLexerInputOperation(relexIndex, relexOffset, relexState);
319
320             do { // Fetch new tokens from lexer as necessary
321
AbstractToken<T> token = lexerInputOperation.nextToken();
322                 if (token == null) {
323                     attemptValidation = false;
324                     break;
325                 }
326
327                 lookahead = lexerInputOperation.lookahead();
328                 Object JavaDoc state = lexerInputOperation.lexerState();
329                 change.addToken(token, lookahead, state);
330
331                 relexOffset += token.length();
332                 // Remove obsolete tokens that would cover the area of just lexed token
333
if (relexOffset > matchOffset && index < tokenCount) {
334                     attemptValidation = false;
335                     do {
336                         index++;
337                         if (index == tokenCount) {
338                             // Make sure the number of removed tokens will be computed properly later
339
modToken = null;
340                             // Check whether it should lex till the end
341
// or whether 'Match at anything' should be done
342
if (tokenList.isFullyLexed()) {
343                                 // Will lex till the end of input
344
matchOffset = Integer.MAX_VALUE;
345                             } else {
346                                 // Force stop lexing
347
relex = false;
348                             }
349                             break;
350                         }
351                         matchOffset += token(tokenList, index).length();
352                     } while (relexOffset > matchOffset);
353                 }
354
355                 // Check whether the new token ends at matchOffset with the same state
356
// like the original which typically means end of relexing
357
if (relexOffset == matchOffset
358                     && (index < tokenCount)
359                     && LexerUtilsConstants.statesEqual(state, tokenList.state(index))
360                 ) {
361                     // Check whether lookaheads stored in the subsequent tokens in the chain
362
// reflect the lookahead of the just relexed token. Otherwise the algorithm
363
// would not work properly.
364
// See initial part of SimpleRandomTest.test() verifies this.
365
//
366
// The algorithm also attempts to have the same lookaheads in tokens
367
// like the regular batch scanning would produce.
368
// Although this is probably not strictly necessary requirement
369
// it helps to simplify the debugging in case the lexer does not work
370
// well in the incremental setup.
371
// The following code checks that the lookahead of the original match token
372
// (i.e. the token right before matchOffset) does "end" inside
373
// the next token - if not then relexing the next token is done.
374
// The second part of SimpleRandomTest.test() verifies this.
375
int matchTokenLookahead = tokenList.lookahead(index);
376                     boolean lookaheadOK = true;
377                     // When assuming non-empty tokens the lookahead 1
378
// just reaches the end of the next token
379
// so lookhead < 1 is always fine from this point of view.
380
if (matchTokenLookahead > 1 || lookahead > 1) {
381                         int laCheckIndex = index + 1;
382                         int laCheckTokenOffset = matchOffset;
383                         while (laCheckIndex < tokenCount) {
384                             int laCheckTokenLength = token(tokenList, laCheckIndex).length();
385                             lookahead -= laCheckTokenLength;
386                             matchTokenLookahead -= laCheckTokenLength;
387                             laCheckTokenOffset += laCheckTokenLength;
388                             if (lookahead <= 0 && matchTokenLookahead <=0) {
389                                 break; // No more work
390
}
391                             if (lookahead != tokenList.lookahead(laCheckIndex)
392                                     || matchTokenLookahead > 0
393                             ) {
394                                 // This token must be relexed
395
index = laCheckIndex;
396                                 matchOffset = laCheckTokenOffset;
397                                 lookaheadOK = false;
398                                 // Continue - further tokens may be affected
399
}
400                             laCheckIndex++;
401                         }
402                     }
403
404                     if (lookaheadOK) {
405                         if (attemptValidation) {
406 // if (modToken.id() == token.id()
407
// && tokenList.lookahead(index) == lookahead
408
// && !modToken.isFlyweight()
409
// && !token.isFlyweight()
410
// && (tokenList.getClass() != IncTokenList.class
411
// || change.tokenHierarchyOperation().canModifyToken(index, modToken))
412
// && LexerSpiTokenPackageAccessor.get().restoreToken(
413
// languageOperation.tokenHandler(),
414
// modToken, token)
415
// ) {
416
// // Restored successfully
417
// // TODO implement - fix token's length and return
418
// // now default in fact to failed validation
419
// }
420
attemptValidation = false;
421                         }
422                         break;
423                     }
424                 }
425             } while (relex);
426             lexerInputOperation.release();
427         }
428
429         // Now ensure that the original tokens will be replaced by the relexed ones.
430
change.setIndex(relexIndex);
431         change.setAddedEndOffset(relexOffset);
432         tokenList.replaceTokens(eventInfo, change,
433             (modToken != null) ? (index - relexIndex + 1) : (index - relexIndex));
434     }
435     
436     private static <T extends TokenId> AbstractToken<T> token(MutableTokenList<T> tokenList, int index) {
437         Object JavaDoc tokenOrEmbeddingContainer = tokenList.tokenOrEmbeddingContainerUnsync(index); // Unsync impl suffices
438
return LexerUtilsConstants.token(tokenOrEmbeddingContainer);
439     }
440
441 }
442
Popular Tags