KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > fop > fo > expr > PropertyTokenizer


1 /*
2  * $Id: PropertyTokenizer.java,v 1.4.2.2 2003/02/25 12:56:58 jeremias Exp $
3  * ============================================================================
4  * The Apache Software License, Version 1.1
5  * ============================================================================
6  *
7  * Copyright (C) 1999-2003 The Apache Software Foundation. All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without modifica-
10  * tion, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * 3. The end-user documentation included with the redistribution, if any, must
20  * include the following acknowledgment: "This product includes software
21  * developed by the Apache Software Foundation (http://www.apache.org/)."
22  * Alternately, this acknowledgment may appear in the software itself, if
23  * and wherever such third-party acknowledgments normally appear.
24  *
25  * 4. The names "FOP" and "Apache Software Foundation" must not be used to
26  * endorse or promote products derived from this software without prior
27  * written permission. For written permission, please contact
28  * apache@apache.org.
29  *
30  * 5. Products derived from this software may not be called "Apache", nor may
31  * "Apache" appear in their name, without prior written permission of the
32  * Apache Software Foundation.
33  *
34  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
35  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
36  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
37  * APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
38  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU-
39  * DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
40  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
41  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
42  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
43  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44  * ============================================================================
45  *
46  * This software consists of voluntary contributions made by many individuals
47  * on behalf of the Apache Software Foundation and was originally created by
48  * James Tauber <jtauber@jtauber.com>. For more information on the Apache
49  * Software Foundation, please see <http://www.apache.org/>.
50  */

51 package org.apache.fop.fo.expr;
52
53 /**
54  * Class to tokenize XSL FO property expression.
55  * This class is heavily based on the epxression tokenizer in James Clark's
56  * XT, an XSLT processor.
57  */

58 class PropertyTokenizer {
59
60     static final int TOK_EOF = 0;
61     static final int TOK_NCNAME = TOK_EOF + 1;
62     static final int TOK_MULTIPLY = TOK_NCNAME + 1;
63     static final int TOK_LPAR = TOK_MULTIPLY + 1;
64     static final int TOK_RPAR = TOK_LPAR + 1;
65     static final int TOK_LITERAL = TOK_RPAR + 1;
66     static final int TOK_NUMBER = TOK_LITERAL + 1;
67     static final int TOK_FUNCTION_LPAR = TOK_NUMBER + 1;
68     static final int TOK_PLUS = TOK_FUNCTION_LPAR + 1;
69     static final int TOK_MINUS = TOK_PLUS + 1;
70     static final int TOK_MOD = TOK_MINUS + 1;
71     static final int TOK_DIV = TOK_MOD + 1;
72     static final int TOK_NUMERIC = TOK_DIV + 1;
73     static final int TOK_COMMA = TOK_NUMERIC + 1;
74     static final int TOK_PERCENT = TOK_COMMA + 1;
75     static final int TOK_COLORSPEC = TOK_PERCENT + 1;
76     static final int TOK_FLOAT = TOK_COLORSPEC + 1;
77     static final int TOK_INTEGER = TOK_FLOAT + 1;
78
79     int currentToken = TOK_EOF;
80     String JavaDoc currentTokenValue = null;
81     protected int currentUnitLength = 0;
82
83     private int currentTokenStartIndex = 0;
84     private /* final */ String JavaDoc expr;
85     private int exprIndex = 0;
86     private int exprLength;
87     private boolean recognizeOperator = false;
88
89
90     /**
91      * Construct a new PropertyTokenizer object to tokenize the passed
92      * String.
93      * @param s The Property expressio to tokenize.
94      */

95     PropertyTokenizer(String JavaDoc s) {
96         this.expr = s;
97         this.exprLength = s.length();
98     }
99
100     /**
101      * Return the next token in the expression string.
102      * This sets the following package visible variables:
103      * currentToken An enumerated value identifying the recognized token
104      * currentTokenValue A String containing the token contents
105      * currentUnitLength If currentToken = TOK_NUMERIC, the number of
106      * characters in the unit name.
107      * @throws PropertyException If un unrecognized token is encountered.
108      */

109     void next() throws PropertyException {
110         currentTokenValue = null;
111         currentTokenStartIndex = exprIndex;
112         boolean currentMaybeOperator = recognizeOperator;
113         boolean bSawDecimal;
114         recognizeOperator = true;
115         for (; ; ) {
116             if (exprIndex >= exprLength) {
117                 currentToken = TOK_EOF;
118                 return;
119             }
120             char c = expr.charAt(exprIndex++);
121             switch (c) {
122             case ' ':
123             case '\t':
124             case '\r':
125             case '\n':
126                 currentTokenStartIndex = exprIndex;
127                 break;
128             case ',':
129                 recognizeOperator = false;
130                 currentToken = TOK_COMMA;
131                 return;
132             case '+':
133                 recognizeOperator = false;
134                 currentToken = TOK_PLUS;
135                 return;
136             case '-':
137                 recognizeOperator = false;
138                 currentToken = TOK_MINUS;
139                 return;
140             case '(':
141                 currentToken = TOK_LPAR;
142                 recognizeOperator = false;
143                 return;
144             case ')':
145                 currentToken = TOK_RPAR;
146                 return;
147             case '"':
148             case '\'':
149                 exprIndex = expr.indexOf(c, exprIndex);
150                 if (exprIndex < 0) {
151                     exprIndex = currentTokenStartIndex + 1;
152                     throw new PropertyException("missing quote");
153                 }
154                 currentTokenValue = expr.substring(currentTokenStartIndex
155                                                    + 1, exprIndex++);
156                 currentToken = TOK_LITERAL;
157                 return;
158             case '*':
159                 /*
160                  * if (currentMaybeOperator) {
161                  * recognizeOperator = false;
162                  */

163                 currentToken = TOK_MULTIPLY;
164                 /*
165                  * }
166                  * else
167                  * throw new PropertyException("illegal operator *");
168                  */

169                 return;
170             case '0':
171             case '1':
172             case '2':
173             case '3':
174             case '4':
175             case '5':
176             case '6':
177             case '7':
178             case '8':
179             case '9':
180                 scanDigits();
181                 if (exprIndex < exprLength && expr.charAt(exprIndex) == '.') {
182                     exprIndex++;
183                     bSawDecimal = true;
184                     if (exprIndex < exprLength
185                             && isDigit(expr.charAt(exprIndex))) {
186                         exprIndex++;
187                         scanDigits();
188                     }
189                 } else
190                     bSawDecimal = false;
191                 if (exprIndex < exprLength && expr.charAt(exprIndex) == '%') {
192                     exprIndex++;
193                     currentToken = TOK_PERCENT;
194                 } else {
195                     // Check for possible unit name following number
196
currentUnitLength = exprIndex;
197                     scanName();
198                     currentUnitLength = exprIndex - currentUnitLength;
199                     currentToken = (currentUnitLength > 0) ? TOK_NUMERIC
200                                    : (bSawDecimal ? TOK_FLOAT : TOK_INTEGER);
201                 }
202                 currentTokenValue = expr.substring(currentTokenStartIndex,
203                                                    exprIndex);
204                 return;
205
206             case '.':
207                 if (exprIndex < exprLength
208                         && isDigit(expr.charAt(exprIndex))) {
209                     ++exprIndex;
210                     scanDigits();
211                     if (exprIndex < exprLength
212                             && expr.charAt(exprIndex) == '%') {
213                         exprIndex++;
214                         currentToken = TOK_PERCENT;
215                     } else {
216                         // Check for possible unit name following number
217
currentUnitLength = exprIndex;
218                         scanName();
219                         currentUnitLength = exprIndex - currentUnitLength;
220                         currentToken = (currentUnitLength > 0) ? TOK_NUMERIC
221                                        : TOK_FLOAT;
222                     }
223                     currentTokenValue = expr.substring(currentTokenStartIndex,
224                                                        exprIndex);
225                     return;
226                 }
227                 throw new PropertyException("illegal character '.'");
228
229             case '#': // Start of color value
230
if (exprIndex < exprLength
231                         && isHexDigit(expr.charAt(exprIndex))) {
232                     ++exprIndex;
233                     scanHexDigits();
234                     currentToken = TOK_COLORSPEC;
235                     currentTokenValue = expr.substring(currentTokenStartIndex,
236                                                        exprIndex);
237                     // Probably should have some multiple of 3 for length!
238
return;
239                 } else
240                     throw new PropertyException("illegal character '#'");
241
242             default:
243                 --exprIndex;
244                 scanName();
245                 if (exprIndex == currentTokenStartIndex)
246                     throw new PropertyException("illegal character");
247                 currentTokenValue = expr.substring(currentTokenStartIndex,
248         exprIndex);
249                 // if (currentMaybeOperator) {
250
if (currentTokenValue.equals("mod")) {
251                     currentToken = TOK_MOD;
252                     return;
253                 } else if (currentTokenValue.equals("div")) {
254                     currentToken = TOK_DIV;
255                     return;
256                 }
257                 /*
258                  * else
259                  * throw new PropertyException("unrecognized operator name");
260                  * recognizeOperator = false;
261                  * return;
262                  * }
263                  */

264                 if (followingParen()) {
265                     currentToken = TOK_FUNCTION_LPAR;
266                     recognizeOperator = false;
267                 } else {
268                     currentToken = TOK_NCNAME;
269                     recognizeOperator = false;
270                 }
271                 return;
272             }
273         }
274     }
275
276     /**
277      * Attempt to recognize a valid NAME token in the input expression.
278      */

279     private void scanName() {
280         if (exprIndex < exprLength && isNameStartChar(expr.charAt(exprIndex)))
281             while (++exprIndex < exprLength
282                    && isNameChar(expr.charAt(exprIndex)));
283     }
284
285     /**
286      * Attempt to recognize a valid sequence of decimal digits in the
287      * input expression.
288      */

289     private void scanDigits() {
290         while (exprIndex < exprLength && isDigit(expr.charAt(exprIndex)))
291             exprIndex++;
292     }
293
294     /**
295      * Attempt to recognize a valid sequence of hexadecimal digits in the
296      * input expression.
297      */

298     private void scanHexDigits() {
299         while (exprIndex < exprLength && isHexDigit(expr.charAt(exprIndex)))
300             exprIndex++;
301     }
302
303     /**
304      * Return a boolean value indicating whether the following non-whitespace
305      * character is an opening parenthesis.
306      */

307     private boolean followingParen() {
308         for (int i = exprIndex; i < exprLength; i++) {
309             switch (expr.charAt(i)) {
310             case '(':
311                 exprIndex = i + 1;
312                 return true;
313             case ' ':
314             case '\r':
315             case '\n':
316             case '\t':
317                 break;
318             default:
319                 return false;
320             }
321         }
322         return false;
323     }
324
325     private static final int CS = 1, nameStartChars = 1; // "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
326
private static final int C = 2, nameChars = 2; //".-0123456789";
327
private static final int D = 4, digits = 4; //"0123456789";
328
private static final int H = 8, hexchars = 8; //digits + "abcdefABCDEF";
329
private static final int C_CS = C + CS;
330
331
332     private static final int charMap[] = {
333                                              0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, //0x00
334
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, //0x10
335
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,C,C,0, //0x20
336
C+D+H,C+D+H,C+D+H,C+D+H, C+D+H,C+D+H,C+D+H,C+D+H, C+D+H,C+D+H,0,0, 0,0,0,0, //0x30
337
0,CS+H,CS+H,CS+H, CS+H,CS+H,CS+H,CS, CS,CS,CS,CS, CS,CS,CS,CS, //0x40
338
CS,CS,CS,CS, CS,CS,CS,CS, CS,CS,CS,0, 0,0,0,CS, //0x50
339
0,CS+H,CS+H,CS+H, CS+H,CS+H,CS+H,CS, CS,CS,CS,CS, CS,CS,CS,CS, //0x60
340
CS,CS,CS,CS, CS,CS,CS,CS, CS,CS,CS,0, 0,0,0,0 //0x70
341
};
342
343     /**
344      * Return a boolean value indicating whether the argument is a
345      * decimal digit (0-9).
346      * @param c The character to check
347      */

348     private static final boolean isDigit(char c) {
349         return c > 0 && c < 128 && (charMap[ c] & digits) != 0;
350     }
351
352     /**
353      * Return a boolean value indicating whether the argument is a
354      * hexadecimal digit (0-9, A-F, a-f).
355      * @param c The character to check
356      */

357     private static final boolean isHexDigit(char c) {
358         return c > 0 && c < 128 && (charMap[ c] & hexchars) != 0;
359         //return hexchars.indexOf(c) >= 0;
360
}
361
362     /**
363      * Return a boolean value indicating whether the argument is whitespace
364      * as defined by XSL (space, newline, CR, tab).
365      * @param c The character to check
366      */

367     private static final boolean isSpace(char c) {
368         switch (c) {
369         case ' ':
370         case '\r':
371         case '\n':
372         case '\t':
373             return true;
374         }
375         return false;
376     }
377
378     /**
379      * Return a boolean value indicating whether the argument is a valid name
380      * start character, ie. can start a NAME as defined by XSL.
381      * @param c The character to check
382      */

383     private static final boolean isNameStartChar(char c) {
384         return c >= 0x80 || c < 0 || (charMap[ c] & nameStartChars) != 0;
385         //return nameStartChars.indexOf(c) >= 0 || c >= 0x80;
386
}
387
388     /**
389      * Return a boolean value indicating whether the argument is a valid name
390      * character, ie. can occur in a NAME as defined by XSL.
391      * @param c The character to check
392      */

393     private static final boolean isNameChar(char c) {
394         return c > 0x80 || c < 0 || (charMap[ c] & C_CS) != 0;
395         //return nameStartChars.indexOf(c) >= 0 || nameChars.indexOf(c) >= 0 || c >= 0x80;
396
}
397
398 }
399
Popular Tags