1 13 14 package mondrian.olap; 15 16 import java_cup.runtime.Symbol; 17 import org.apache.log4j.Logger; 18 import java.util.List ; 19 import java.util.ArrayList ; 20 import java.util.Enumeration ; 21 import java.util.Hashtable ; 22 import java.io.IOException ; 23 24 27 public class Scanner { 28 private static final Logger LOGGER = Logger.getLogger(Scanner.class); 29 30 31 protected int nextChar; 32 33 private int lookaheadChars[] = new int[16]; 34 private int firstLookaheadChar = 0; 35 private int lastLookaheadChar = 0; 36 private Hashtable <String , Integer > m_resWordsTable; 37 private int iMaxResword; 38 private String m_aResWords[]; 39 protected boolean debug; 40 41 42 private List <Integer > lines; 43 44 45 private int iChar; 46 47 48 private int iPrevChar; 49 50 51 private int previousSymbol; 52 private boolean inFormula; 53 54 57 private static final String [][] commentDelim = { 58 {"//", null}, 59 {"--", null}, 60 {"/*", "*/"} 61 }; 62 63 66 private static final boolean allowNestedComments = true; 67 68 Scanner(boolean debug) { 69 this.debug = debug; 70 } 71 72 75 public static boolean getNestedCommentsState() { 76 return allowNestedComments; 77 } 78 79 82 public static String [][] getCommentDelimiters() { 83 return commentDelim; 84 } 85 86 87 private void advance() 88 throws java.io.IOException { 89 90 if (firstLookaheadChar == lastLookaheadChar) { 91 nextChar = getChar(); 93 } else { 94 nextChar = lookaheadChars[firstLookaheadChar++]; 96 if (firstLookaheadChar == lastLookaheadChar) { 97 firstLookaheadChar = 0; 98 lastLookaheadChar = 0; 99 } 100 } 101 if (nextChar == '\012') { 102 lines.add(iChar); 103 } 104 iChar++; 105 } 106 107 108 private int lookahead() 109 throws java.io.IOException { 110 111 return lookahead(1); 112 } 113 114 119 private int lookahead(int n) 120 throws java.io.IOException { 121 122 if (n == 0) { 123 return nextChar; 124 } 125 else { 126 if (n > lastLookaheadChar - firstLookaheadChar) { 128 int len=lastLookaheadChar - firstLookaheadChar; 129 int t[]; 130 131 if (n + firstLookaheadChar > lookaheadChars.length) { 133 if (n > lookaheadChars.length) { 134 t=new int[n * 2]; 137 } 138 else { 139 t = lookaheadChars; 142 } 143 144 System.arraycopy( 145 lookaheadChars, firstLookaheadChar, t, 0, len); 146 lookaheadChars = t; 147 firstLookaheadChar = 0; 148 lastLookaheadChar = len; 149 } 150 151 while (n > lastLookaheadChar - firstLookaheadChar) { 153 lookaheadChars[lastLookaheadChar++] = getChar(); 154 } 155 } 156 157 return lookaheadChars[n - 1 + firstLookaheadChar]; 158 } 159 } 160 161 162 protected int getChar() 163 throws java.io.IOException { 164 165 return System.in.read(); 166 } 167 168 169 public void init() 170 throws java.io.IOException { 171 172 initReswords(); 173 lines = new ArrayList <Integer >(); 174 iChar = iPrevChar = 0; 175 advance(); 176 } 177 178 182 void getLocation(Symbol symbol, int[] loc) { 183 int iTarget = symbol.left; 184 int iLine = -1; 185 int iLineEnd = 0; 186 int iLineStart; 187 do { 188 iLine++; 189 iLineStart = iLineEnd; 190 iLineEnd = Integer.MAX_VALUE; 191 if (iLine < lines.size()) { 192 iLineEnd = lines.get(iLine); 193 } 194 } while (iLineEnd < iTarget); 195 196 loc[0] = iLine; loc[1] = iTarget - iLineStart; } 199 200 private Symbol trace(Symbol s) { 201 if (debug) { 202 String name = null; 203 if (s.sym < m_aResWords.length) { 204 name = m_aResWords[s.sym]; 205 } 206 207 LOGGER.error("Scanner returns #" + s.sym + 208 (name == null ? "" : ":" + name) + 209 (s.value == null ? "" : "(" + s.value.toString() + ")")); 210 } 211 return s; 212 } 213 214 private void initResword(int id, String s) { 215 m_resWordsTable.put(s, id); 216 if (id > iMaxResword) { 217 iMaxResword = id; 218 } 219 } 220 221 private void initReswords() { 222 m_resWordsTable = new Hashtable <String , Integer >(); 228 iMaxResword = 0; 229 initResword(ParserSym.AND ,"AND"); 231 initResword(ParserSym.AS ,"AS"); 232 initResword(ParserSym.AXIS ,"AXIS"); 234 initResword(ParserSym.CAST ,"CAST"); initResword(ParserSym.CASE ,"CASE"); 239 initResword(ParserSym.CELL ,"CELL"); 240 initResword(ParserSym.DIMENSION ,"DIMENSION"); 246 initResword(ParserSym.ELSE ,"ELSE"); 247 initResword(ParserSym.EMPTY ,"EMPTY"); 248 initResword(ParserSym.END ,"END"); 249 initResword(ParserSym.FROM ,"FROM"); 258 initResword(ParserSym.IS ,"IS"); 259 initResword(ParserSym.IN ,"IN"); 260 initResword(ParserSym.MATCHES ,"MATCHES"); 265 initResword(ParserSym.MEMBER ,"MEMBER"); 266 initResword(ParserSym.NON ,"NON"); 269 initResword(ParserSym.NOT ,"NOT"); 270 initResword(ParserSym.NULL ,"NULL"); 271 initResword(ParserSym.ON ,"ON"); 272 initResword(ParserSym.OR ,"OR"); 273 initResword(ParserSym.PROPERTIES ,"PROPERTIES"); 277 initResword(ParserSym.SELECT ,"SELECT"); 281 initResword(ParserSym.SET ,"SET"); 282 initResword(ParserSym.THEN ,"THEN"); 284 initResword(ParserSym.WHEN ,"WHEN"); 286 initResword(ParserSym.WHERE ,"WHERE"); 287 initResword(ParserSym.WITH ,"WITH"); 288 initResword(ParserSym.XOR ,"XOR"); 289 290 m_aResWords = new String [iMaxResword + 1]; 291 Enumeration <String > e = m_resWordsTable.keys(); 292 while (e.hasMoreElements()) { 293 Object o = e.nextElement(); 294 String s = (String ) o; 295 int i = (m_resWordsTable.get(s)).intValue(); 296 m_aResWords[i] = s; 297 } 298 } 299 300 301 public String lookupReserved(int i) { 302 return m_aResWords[i]; 303 } 304 305 private Symbol makeSymbol(int id,Object o) { 306 int iPrevPrevChar = iPrevChar; 307 this.iPrevChar = iChar; 308 this.previousSymbol = id; 309 return trace(new Symbol(id, iPrevPrevChar, iChar, o)); 310 } 311 private Symbol makeNumber(double mantissa, int exponent) { 312 double d = mantissa * java.lang.Math.pow(10, exponent); 313 return makeSymbol(ParserSym.NUMBER, d); 314 } 315 316 private Symbol makeId(String s, boolean quoted, boolean ampersand) { 317 return makeSymbol( 318 quoted && ampersand ? ParserSym.AMP_QUOTED_ID : 319 quoted ? ParserSym.QUOTED_ID : 320 ParserSym.ID, 321 s); 322 } 323 324 private Symbol makeRes(int i) { 325 return makeSymbol(i, m_aResWords[i]); 326 } 327 328 private Symbol makeToken(int i, String s) { 329 return makeSymbol(i, s); 330 } 331 332 private Symbol makeString(String s) { 333 if (inFormula) { 334 inFormula = false; 335 return makeSymbol(ParserSym.FORMULA_STRING, s); 336 } else { 337 return makeSymbol(ParserSym.STRING, s); 338 } 339 } 340 341 344 private void skipToEOL() throws IOException { 345 while (nextChar != -1 && nextChar != '\012') { 346 advance(); 347 } 348 } 349 350 356 private void skipComment( 357 final String startDelim, 358 final String endDelim) throws IOException { 359 360 int depth = 1; 361 362 for (int x = 0; x < startDelim.length(); x++) { 364 advance(); 365 } 366 367 for (;;) { 368 if (nextChar == -1) { 369 return; 370 } 371 else if (checkForSymbol(endDelim)) { 372 for (int x = 0; x < endDelim.length(); x++) { 374 advance(); 375 } 376 if (--depth == 0) { 377 return; 378 } 379 } 380 else if (allowNestedComments && checkForSymbol(startDelim)) { 381 for (int x = 0; x < startDelim.length(); x++) { 383 advance(); 384 } 385 depth++; 386 } 387 else { 388 advance(); 389 } 390 } 391 } 392 393 396 private void searchForComments() throws IOException { 397 398 boolean foundComment; 400 do { 401 foundComment = false; 402 for (String [] aCommentDelim : commentDelim) { 403 if (checkForSymbol(aCommentDelim[0])) { 404 if (aCommentDelim[1] == null) { 405 foundComment = true; 406 skipToEOL(); 407 } else { 408 foundComment = true; 409 skipComment(aCommentDelim[0], aCommentDelim[1]); 410 } 411 } 412 } 413 } while (foundComment); 414 } 415 416 419 private boolean checkForSymbol(final String symb) throws IOException { 420 for (int x = 0; x < symb.length(); x++) { 421 if (symb.charAt(x) != lookahead(x)) { 422 return false; 423 } 424 } 425 return true; 426 } 427 428 431 public Symbol next_token() throws IOException { 432 433 StringBuilder id; 434 boolean ampersandId = false; 435 for (;;) { 436 searchForComments(); 437 switch (nextChar) { 438 case '.': 439 switch (lookahead()) { 440 case '0': case '1': case '2': case '3': case '4': 441 case '5': case '6': case '7': case '8': case '9': 442 break; 445 default: 446 advance(); 447 return makeToken(ParserSym.DOT, "."); 448 } 449 451 case '0': case '1': case '2': case '3': case '4': 452 case '5': case '6': case '7': case '8': case '9': 453 454 final int leftOfPoint = 0; 461 final int rightOfPoint = 1; 462 final int inExponent = 2; 463 int n = 0, nDigits = 0, nSign = 0, exponent = 0; 464 double mantissa = 0.0; 465 int state = leftOfPoint; 466 467 for (;;) { 468 if (nextChar == '.') { 469 if (state == leftOfPoint) { 470 state = rightOfPoint; 471 mantissa = n; 472 n = nDigits = 0; 473 nSign = 1; 474 advance(); 475 } else { 476 if (state == rightOfPoint) { 481 mantissa += (n * java.lang.Math.pow( 482 10, -nDigits)); 483 } else { 484 exponent = n * nSign; 485 } 486 return makeNumber(mantissa, exponent); 487 } 488 489 } else if (nextChar == 'E' || nextChar == 'e') { 490 if (state == inExponent) { 491 exponent = n * nSign; 495 return makeNumber(mantissa, exponent); 496 497 } else { 498 if (state == leftOfPoint) { 499 mantissa = n; 500 } else { 501 mantissa += (n * java.lang.Math.pow( 502 10, -nDigits)); 503 } 504 n = nDigits = 0; 505 nSign = 1; 506 advance(); 507 state = inExponent; 508 } 509 510 } else if ((nextChar == '+' || nextChar == '-') && 511 state == inExponent && 512 nDigits == 0) { 513 nSign = -nSign; 515 advance(); 516 517 } else if (nextChar >= '0' && nextChar <= '9') { 518 n = n * 10 + (nextChar - '0'); 519 nDigits++; 520 advance(); 521 522 } else { 523 if (state == leftOfPoint) { 525 mantissa = n; 526 } else if (state == rightOfPoint) { 527 mantissa += (n * java.lang.Math.pow(10, -nDigits)); 528 } else { 529 exponent = n * nSign; 530 } 531 return makeNumber(mantissa, exponent); 532 } 533 } 534 535 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 536 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 537 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 538 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 539 case 'y': case 'z': 540 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 541 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 542 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 543 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 544 case 'Y': case 'Z': 545 546 id = new StringBuilder (); 547 for (;;) { 548 id.append((char)nextChar); 549 advance(); 550 switch (nextChar) { 551 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 552 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 553 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 554 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 555 case 'y': case 'z': 556 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 557 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 558 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 559 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 560 case 'Y': case 'Z': 561 case '0': case '1': case '2': case '3': case '4': 562 case '5': case '6': case '7': case '8': case '9': 563 case '_': 564 break; 565 default: 566 String strId = id.toString(); 567 Integer i = m_resWordsTable.get( 568 strId.toUpperCase()); 569 if (i == null) { 570 return makeId(strId, false, false); 572 } else { 573 return makeRes(i); 575 } 576 } 577 } 578 579 case '&': 580 advance(); 581 if (nextChar == '[') { 582 ampersandId = true; 583 } else { 585 return makeToken(ParserSym.UNKNOWN, "&"); 586 } 587 588 case '[': 589 590 id = new StringBuilder (); 591 for (;;) { 592 advance(); 593 switch (nextChar) { 594 case ']': 595 advance(); 596 if (nextChar == ']') { 597 id.append(']'); 599 break; 600 } else { 601 if (ampersandId) { 603 ampersandId = false; 604 return makeId(id.toString(), true, true); 605 } else { 606 return makeId(id.toString(), true, false); 607 } 608 } 609 case -1: 610 if (ampersandId) { 611 ampersandId = false; 612 return makeId(id.toString(), true, true); 613 } else { 614 return makeId(id.toString(), true, false); 615 } 616 default: 617 id.append((char)nextChar); 618 } 619 } 620 621 case ':': advance(); return makeToken(ParserSym.COLON, ":"); 622 case ',': advance(); return makeToken(ParserSym.COMMA, ","); 623 case '=': advance(); return makeToken(ParserSym.EQ, "="); 624 case '<': 625 advance(); 626 switch (nextChar) { 627 case '>': advance(); return makeToken(ParserSym.NE, "<>"); 628 case '=': advance(); return makeToken(ParserSym.LE, "<="); 629 default: return makeToken(ParserSym.LT, "<"); 630 } 631 case '>': 632 advance(); 633 switch (nextChar) { 634 case '=': advance(); return makeToken(ParserSym.GE, ">="); 635 default: return makeToken(ParserSym.GT, ">"); 636 } 637 case '{': advance(); return makeToken(ParserSym.LBRACE, "{"); 638 case '(': advance(); return makeToken(ParserSym.LPAREN, "("); 639 case '}': advance(); return makeToken(ParserSym.RBRACE, "}"); 640 case ')': advance(); return makeToken(ParserSym.RPAREN, ")"); 641 case '+': advance(); return makeToken(ParserSym.PLUS, "+"); 642 case '-': advance(); return makeToken(ParserSym.MINUS, "-"); 643 case '*': advance(); return makeToken(ParserSym.ASTERISK, "*"); 644 case '/': advance(); return makeToken(ParserSym.SOLIDUS, "/"); 645 case '|': 646 advance(); 647 switch (nextChar) { 648 case '|': advance(); return makeToken(ParserSym.CONCAT, "||"); 649 default: return makeToken(ParserSym.UNKNOWN, "|"); 650 } 651 652 case '"': 653 654 id = new StringBuilder (); 655 for (;;) { 656 advance(); 657 switch (nextChar) { 658 case '"': 659 advance(); 660 if (nextChar == '"') { 661 id.append('"'); 663 break; 664 } else { 665 return makeString(id.toString()); 667 } 668 case -1: 669 return makeString(id.toString()); 670 default: 671 id.append((char)nextChar); 672 } 673 } 674 675 case '\'': 676 if (previousSymbol == ParserSym.AS) { 677 inFormula = true; 678 } 679 680 681 id = new StringBuilder (); 682 for (;;) { 683 advance(); 684 switch (nextChar) { 685 case '\'': 686 advance(); 687 if (nextChar == '\'') { 688 id.append('\''); 690 break; 691 } else { 692 return makeString(id.toString()); 694 } 695 case -1: 696 return makeString(id.toString()); 697 default: 698 id.append((char)nextChar); 699 } 700 } 701 702 case -1: 703 return makeToken(ParserSym.EOF, "EOF"); 705 706 default: 707 if (nextChar <= Character.MAX_VALUE && 711 Character.isWhitespace((char) nextChar)) { 712 } else { 714 throw new RuntimeException ( 716 "Unexpected character '" + (char) nextChar + "'"); 717 } 718 719 case ' ': 720 case '\t': 721 case '\n': 722 case '\r': 723 iPrevChar = iChar; 725 advance(); 726 break; 727 } 728 } 729 } 730 } 731 732 | Popular Tags |