KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > lib > lexer > UnicodeEscapesPreprocessor


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.lib.lexer;
21
22 import org.netbeans.spi.lexer.CharPreprocessor;
23 import org.netbeans.spi.lexer.LexerInput;
24
25 /**
26  * The character translator allows to translate a sequence
27  * of characters to a single character so it may be used
28  * for example for generic Unicode sequences translation.
29  *
30  * <p>
31  * The preprocessor must be stateless.
32  *
33  * @author Miloslav Metelka
34  * @version 1.00
35  */

36
37 public final class UnicodeEscapesPreprocessor extends CharPreprocessor {
38
39     /**
40      * Preprocess a input text preprocessing unicode escape sequences.
41      */

42     protected void preprocessChar() {
43         int ch;
44         switch (ch = inputRead()) {
45             case '\\': // 1 input-char-read
46
// According to JLS only odd number of backslashes
47
// opens a unicode escape sequence. Therefore after seeing two
48
// backslashes it's possible to pass them unchanged.
49
switch (ch = inputRead()) {
50                     case 'u': // 2 input-chars-read
51
// Four hex digits should follow
52
int i;
53                         int c;
54                         for (i = 4; i > 0; i--) {
55                             switch (c = inputRead()) {
56                                 case '0': case '1': case '2': case '3': case '4':
57                                 case '5': case '6': case '7': case '8': case '9':
58                                     ch = (ch << 4) + (c - '0');
59                                     break;
60                                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
61                                     ch = (ch << 4) + (c - 'a' + 10);
62                                     break;
63                                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
64                                     ch = (ch << 4) + (c - 'A' + 10);
65                                     break;
66                                 case LexerInput.EOF: // Do not backup EOF
67
// EOF does not count for a character
68
i = -i; // cause for() loop to break
69
break;
70                                 default: // Invalid char in the sequence
71
inputBackup(1); // backup the extra non-EOF char
72
i = -i; // cause for() loop to break
73
break;
74                             }
75                         }
76                         if (i < 0) { // Invalid char or EOF
77
// Return Unicode invalid char
78
// i < 0 -> (4 - number-of-read-chars)
79
outputPreprocessed((char)0xFFFF, 5 + i);
80                             notifyError("Invalid unicode sequence");
81                         } else {
82                             outputPreprocessed((char)ch, 5);
83                         }
84                         break;
85                         
86                     // case '\\':
87
default:
88                         outputOriginal('\\');
89                         outputOriginal(ch);
90                         break;
91                 }
92                 break;
93                 
94             default:
95                 outputOriginal(ch);
96                 break;
97         }
98     }
99
100     protected boolean isSensitiveChar(char ch) {
101         switch (ch) {
102             case '\\':
103             case '0': case '1': case '2': case '3': case '4':
104             case '5': case '6': case '7': case '8': case '9':
105             case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
106             case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
107             case 'u':
108                 return true;
109
110             default:
111                 return false;
112         }
113     }
114
115     protected int maxLookahead() {
116         // Only one extra character is necessary to decide
117
// whether the sequence continues or not.
118
// After finding '\' if the next char is '\' then
119
// there is no sequence.
120
// After '\' 'u' there may be arbitrary number of 'u' chars
121
// and then four hexadecimal digits.
122
return 1;
123     }
124
125 }
126
Popular Tags