KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > editor > ext > plain > PlainSyntax


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.editor.ext.plain;
21
22 import org.netbeans.editor.Syntax;
23 import org.netbeans.editor.TokenID;
24
25 /**
26 * Lexical analyzer for the plain text.
27 *
28 * @author Miloslav Metelka
29 * @version 1.00
30 */

31
32 public class PlainSyntax extends Syntax {
33
34     /* Internal states used internally by analyzer. There
35     * can be any number of them declared by the analyzer.
36     * They are usually numbered starting from zero but they don't
37     * have to. The only reserved value is -1 which is reserved
38     * for the INIT state - the initial internal state of the analyzer.
39     */

40     private static final int ISI_TEXT = 0;
41
42     public PlainSyntax() {
43         tokenContextPath = PlainTokenContext.contextPath;
44     }
45
46     /** This is core function of analyzer and it returns one of following numbers:
47     * a) token number of next token from scanned text
48     * b) EOL when end of line was found in scanned buffer
49     * c) EOT when there is no more chars available in scanned buffer.
50     *
51     * The function scans the active character and does one or more
52     * of the following actions:
53     * 1. change internal analyzer state (state = new-state)
54     * 2. return token ID (return token-ID)
55     * 3. adjust current position to signal different end of token;
56     * the character that offset points to is not included in the token
57     */

58     protected TokenID parseToken() {
59         // The main loop that reads characters one by one follows
60
while (offset < stopOffset) {
61             char ch = buffer[offset]; // get the current character
62

63             switch (state) { // switch by the current internal state
64
case INIT:
65                 switch (ch) {
66                 case '\n':
67                     offset++;
68                     return PlainTokenContext.EOL;
69                 default:
70                     state = ISI_TEXT;
71                     break;
72                 }
73                 break;
74
75             case ISI_TEXT:
76                 switch (ch) {
77                 case '\n':
78                     state = INIT;
79                     return PlainTokenContext.TEXT;
80                 }
81                 break;
82
83             } // end of switch(state)
84

85             offset++; // move to the next char
86
}
87
88         /* At this state there's no more text in the scanned buffer.
89         * The caller will decide either to stop scanning at all
90         * or to relocate scanning and provide next buffer with characters.
91         * The lastBuffer variable indicates whether the scanning will
92         * stop (true) or the caller will provide another buffer
93         * to continue on (false) and call relocate() to continue on the given buffer.
94         * If this is the last buffer, the analyzer must ensure
95         * that for all internal states there will be some token ID returned.
96         * The easiest way how to ensure that all the internal states will
97         * be covered is to copy all the internal state constants and
98         * put them after the switch() and provide the code that will return
99         * appropriate token ID.
100         *
101         * When there are no more characters available in the buffer
102         * and the buffer is not the last one the analyzer can still
103         * decide to return the token ID even if it doesn't know whether
104         * the token is complete or not. This is possible in this simple
105         * implementation for example because it doesn't matter whether
106         * it returns the text all together or broken into several pieces.
107         * The advantage of such aproach is that the preScan value
108         * is minimized which avoids the additional increasing of the buffer
109         * by preScan characters, but on the other hand it can become
110         * problematic if the token should be forwarded for some further
111         * processing. For example it could seem handy to return incomplete
112         * token for java block comments but it could become difficult
113         * if we would want to analyzer these comment tokens additionally
114         * by the HTML analyzer for example.
115         */

116
117         // Normally the following block would be done only for lastBuffer == true
118
// but in this case it can always be done
119
switch (state) {
120         case ISI_TEXT:
121             state = INIT;
122             return PlainTokenContext.TEXT;
123         }
124
125         // need to continue on another buffer
126
return null;
127     }
128
129 }
130
Popular Tags