1 package com.icl.saxon.expr; 2 3 import java.util.*; 4 5 12 13 14 final class Tokenizer { 15 private static final int UNKNOWN = -1; 16 public static final int EOF = 0; 17 public static final int NAME = 1; 18 public static final int FUNCTION = 2; 19 public static final int LITERAL = 3; 20 public static final int VBAR = 4; 21 public static final int SLASH = 5; 22 public static final int AT = 6; 23 public static final int LSQB = 7; 24 public static final int RSQB = 8; 25 public static final int LPAR = 9; 26 public static final int RPAR = 10; 27 public static final int EQUALS = 11; 28 public static final int DOT = 12; 29 public static final int DOTDOT = 13; 30 public static final int STAR = 14; 31 public static final int COMMA = 15; 32 public static final int SLSL = 16; 33 public static final int PREFIX = 17; 34 public static final int OR = 18; 35 public static final int AND = 19; 36 public static final int NUMBER = 20; 37 public static final int GT = 21; 38 public static final int LT = 22; 39 public static final int GE = 23; 40 public static final int LE = 24; 41 public static final int PLUS = 25; 42 public static final int MINUS = 26; 43 public static final int MULT = 27; 44 public static final int DIV = 28; 45 public static final int MOD = 29; 46 public static final int DOLLAR = 31; 47 public static final int NODETYPE = 32; 48 public static final int AXIS = 33; 49 public static final int NE = 34; 50 51 public static final int NEGATE = 99; 54 55 public static String [] tokens = 56 {"EOF", "<name>", "<function>", "<literal>", "|", "/", "@", "[", "]", 57 "(", ")", "=", ".", "..", "*", ",", "//", "^", 58 "or", "and", "<number>", ">", "<", ">=", "<=", "+", "-", 59 "*", "div", "mod", "--quo--", "$", "<nodetype>()", 60 "<axis>()", "!="}; 61 62 public int currentToken = EOF; 63 public String currentTokenValue = null; 64 public double currentNumericValue = 0.0; 65 66 private int currentTokenStartIndex = 0; 67 public String pattern; 68 private int patternIndex = 0; 69 private int patternLength; 70 71 private int precedingToken = UNKNOWN; 72 73 77 public void tokenize(String pattern) throws XPathException { 78 currentToken = EOF; 79 currentTokenValue = null; 80 currentTokenStartIndex = 0; 81 patternIndex = 0; 82 this.pattern = pattern; 83 this.patternLength = pattern.length(); 84 next(); 85 } 86 87 94 public void next() throws XPathException { 95 precedingToken = currentToken; 96 currentTokenValue = null; 97 currentTokenStartIndex = patternIndex; 98 for (;;) { 99 if (patternIndex >= patternLength) { 100 currentToken = EOF; 101 return; 102 } 103 char c = pattern.charAt(patternIndex++); 104 switch (c) { 105 case '/': 106 if (patternIndex < patternLength 107 && pattern.charAt(patternIndex) == '/') { 108 patternIndex++; 109 currentToken = SLSL; 110 return; 111 } 112 currentToken = SLASH; 113 return; 114 case '@': 115 currentToken = AT; 116 return; 117 case '[': 118 currentToken = LSQB; 119 return; 120 case ']': 121 currentToken = RSQB; 122 return; 123 case '(': 124 currentToken = LPAR; 125 return; 126 case ')': 127 currentToken = RPAR; 128 return; 129 case '+': 130 currentToken = PLUS; 131 return; 132 case '-': 133 currentToken = MINUS; return; 135 case '=': 136 currentToken = EQUALS; 137 return; 138 case '!': 139 if (patternIndex < patternLength 140 && pattern.charAt(patternIndex) == '=') { 141 patternIndex++; 142 currentToken = NE; 143 return; 144 } 145 throw new XPathException("\"!\" without \"=\" in expression " + pattern); 146 case '*': 147 if (precedingToken==EOF || 148 precedingToken==AT || 149 precedingToken==LPAR || 150 precedingToken==LSQB || 151 precedingToken==COMMA || 152 precedingToken==FUNCTION || 153 precedingToken==AXIS || 154 isOperator(precedingToken)) { 155 currentToken = STAR; 156 } else { 157 currentToken = MULT; 158 } 159 return; 160 case ',': 161 currentToken = COMMA; 162 return; 163 case '$': 164 currentToken = DOLLAR; 165 if (patternIndex < patternLength) { 167 char ahead = pattern.charAt(patternIndex); 168 if (" \r\t\n".indexOf(ahead) >= 0) { 169 throw new XPathException("Whitespace is not allowed after '$' sign"); 170 } 171 } 172 return; 173 case '|': 174 currentToken = VBAR; 175 return; 176 case '<': 177 if (patternIndex < patternLength 178 && pattern.charAt(patternIndex) == '=') { 179 patternIndex++; 180 currentToken = LE; 181 return; 182 } 183 currentToken = LT; 184 return; 185 case '>': 186 if (patternIndex < patternLength 187 && pattern.charAt(patternIndex) == '=') { 188 patternIndex++; 189 currentToken = GE; 190 return; 191 } 192 currentToken = GT; 193 return; 194 case '.': 195 if (patternIndex < patternLength 196 && pattern.charAt(patternIndex) == '.') { 197 patternIndex++; 198 currentToken = DOTDOT; 199 return; 200 } 201 if (patternIndex == patternLength 202 || pattern.charAt(patternIndex) < '0' 203 || pattern.charAt(patternIndex) > '9') { 204 currentToken = DOT; 205 return; 206 } 207 case '0': 209 case '1': 210 case '2': 211 case '3': 212 case '4': 213 case '5': 214 case '6': 215 case '7': 216 case '8': 217 case '9': 218 for (;patternIndex < patternLength; patternIndex++) { 219 c = pattern.charAt(patternIndex); 220 if (!(c=='.' || Character.isDigit(c))) break; 221 } 222 currentTokenValue = pattern.substring(currentTokenStartIndex, patternIndex); 223 try { 224 currentNumericValue = new Double (currentTokenValue).doubleValue(); 225 } catch (NumberFormatException err) { 226 throw new XPathException("Invalid number (" + currentTokenValue + ") in expression " + pattern); 227 } 228 currentToken = NUMBER; 229 return; 230 case '"': 231 case '\'': 232 patternIndex = pattern.indexOf(c, patternIndex); 233 if (patternIndex < 0) { 234 patternIndex = currentTokenStartIndex + 1; 235 throw new XPathException("Unmatched quote in expression " + pattern); 236 } 237 currentTokenValue = pattern.substring(currentTokenStartIndex + 1, 238 patternIndex++).intern(); 239 currentToken = LITERAL; 240 return; 241 case ' ': 242 case '\t': 243 case '\r': 244 case '\n': 245 currentTokenStartIndex = patternIndex; 246 break; 247 default: 248 if (c < 0x80 && !Character.isLetter(c)) 249 throw new XPathException("Invalid character (" + c + ") in expression " + pattern); 250 251 case '_': 252 loop: 253 for (;patternIndex < patternLength; patternIndex++) { 254 c = pattern.charAt(patternIndex); 255 switch (c) { 256 case ':': 257 if (patternIndex+1 < patternLength && 258 pattern.charAt(patternIndex+1) == ':') { 259 currentTokenValue = pattern.substring(currentTokenStartIndex, 260 patternIndex).intern(); 261 currentToken = AXIS; 262 patternIndex+=2; 263 return; 264 } 265 if (patternIndex+1 < patternLength && 266 pattern.charAt(patternIndex+1) == '*') { 267 currentTokenValue = pattern.substring(currentTokenStartIndex, 268 patternIndex).intern(); 269 currentToken = PREFIX; 270 patternIndex+=2; 271 return; 272 } 273 break; 274 case '.': 275 case '-': 276 case '_': 277 break; 278 case '(': 279 currentTokenValue = pattern.substring(currentTokenStartIndex, 280 patternIndex).intern(); 281 int op = getBinaryOp(currentTokenValue); 282 if (op != UNKNOWN) { 283 currentToken = op; 284 return; 285 } 286 patternIndex++; currentToken = getFunctionType(currentTokenValue); 288 return; 289 default: 290 if (c < 0x80 && !Character.isLetterOrDigit(c)) 291 break loop; 292 break; 293 } 294 } 295 currentTokenValue = pattern.substring(currentTokenStartIndex, 296 patternIndex).intern(); 297 lookahead: 298 for (int i = patternIndex; i < patternLength; i++) { 299 switch (pattern.charAt(i)) { 300 case ' ': 301 case '\t': 302 case '\r': 303 case '\n': 304 break; 305 case ':': 306 if (i+1 < patternLength && pattern.charAt(i+1) == ':') { 307 currentToken = AXIS; 308 patternIndex = i+2; 309 return; 310 } 311 break lookahead; 312 case '(': 313 int oper = getBinaryOp(currentTokenValue); 314 if (oper != UNKNOWN) { 315 currentToken = oper; 316 return; 317 } else { 318 currentToken = getFunctionType(currentTokenValue); 319 patternIndex = i + 1; 320 return; 321 } 322 323 default: 324 break lookahead; 325 } 326 } 327 int optype = getBinaryOp(currentTokenValue); 328 if (optype!=UNKNOWN && ! 329 ( precedingToken==EOF || 330 precedingToken==AT || 331 precedingToken==LPAR || 332 precedingToken==LSQB || 333 precedingToken==COMMA || 334 precedingToken==FUNCTION || 335 precedingToken==AXIS || 336 precedingToken==DOLLAR || 337 isOperator(precedingToken)) 338 ) { 339 currentToken = optype; 340 } else { 341 currentToken = NAME; 342 } 343 return; 344 } 345 } 346 } 347 348 352 353 static private int getBinaryOp(String s) { 354 if (s=="and") return AND; 355 if (s=="or") return OR; 356 if (s=="div") return DIV; 357 if (s=="mod") return MOD; 358 return UNKNOWN; 359 } 360 361 366 367 static private int getFunctionType(String s) { 368 if (s=="node") return NODETYPE; 369 if (s=="text") return NODETYPE; 370 if (s=="comment") return NODETYPE; 371 if (s=="processing-instruction") return NODETYPE; 372 return FUNCTION; 373 } 374 375 378 379 static private boolean isOperator(int tok) { 380 return ( 381 tok==SLASH || tok==SLSL || tok==VBAR || 382 tok==EQUALS || tok==OR || tok==AND || tok==GT || tok==LT || tok==NE || 383 tok==GE || tok==LE || tok==PLUS || tok==MINUS || tok==MULT || tok==DIV || 384 tok==MOD ); 385 } 386 } 387 388 420 | Popular Tags |