1 28 package org.jvyamlb; 29 30 import java.io.FileInputStream ; 31 import java.io.IOException ; 32 import java.io.InputStream ; 33 import java.util.Arrays ; 34 import java.util.HashMap ; 35 import java.util.Iterator ; 36 import java.util.LinkedList ; 37 import java.util.List ; 38 import java.util.Map ; 39 40 import org.jruby.util.ByteList; 41 import org.jvyamlb.tokens.AliasToken; 42 import org.jvyamlb.tokens.AnchorToken; 43 import org.jvyamlb.tokens.DirectiveToken; 44 import org.jvyamlb.tokens.ScalarToken; 45 import org.jvyamlb.tokens.TagToken; 46 import org.jvyamlb.tokens.Token; 47 48 53 public class ScannerImpl implements Scanner { 54 private final static byte[] EMPTY = new byte[0]; 55 private final static byte[] NN = new byte[]{'\n'}; 56 private final static ByteList BANG = new ByteList(new byte[]{'!'},false); 57 private final static ByteList SPACE = new ByteList(new byte[]{' '},false); 58 59 private final static boolean[] ALL_FALSE = new boolean[256]; 60 private final static boolean[] ALL_TRUE = new boolean[256]; 61 private final static boolean[] LINEBR = new boolean[256]; 62 private final static boolean[] NULL_BL_LINEBR = new boolean[256]; 63 private final static boolean[] NULL_BL_T_LINEBR = new boolean[256]; 64 private final static boolean[] NULL_OR_LINEBR = new boolean[256]; 65 private final static boolean[] FULL_LINEBR = new boolean[256]; 66 private final static boolean[] BLANK_OR_LINEBR = new boolean[256]; 67 private final static boolean[] S4 = new boolean[256]; 68 private final static boolean[] ALPHA = new boolean[256]; 69 private final static boolean[] DIGIT = new boolean[256]; 70 private final static boolean[] HEXA = new boolean[256]; 71 private final static boolean[] STRANGE_CHAR = new boolean[256]; 72 private final static int[] RN = new int[]{'\r','\n'}; 73 private final static boolean[] BLANK_T = new boolean[256]; 74 private final static boolean[] SPACES_AND_STUFF = new boolean[256]; 75 private final static boolean[] DOUBLE_ESC = new boolean[256]; 76 private final static boolean[] NON_ALPHA_OR_NUM = new boolean[256]; 77 private final static boolean[] NON_PRINTABLE = new boolean[256]; 78 private final static boolean[] STUPID_CHAR = new boolean[256]; 79 private final static boolean[] R_FLOWZERO = NULL_BL_T_LINEBR; 80 private final static boolean[] R_FLOWZERO1 = new boolean[256]; 81 private final static boolean[] R_FLOWNONZERO = new boolean[256]; 82 83 private final static byte[] ESCAPE_REPLACEMENTS = new byte[256]; 84 private final static boolean[] IS_ESCAPE_REPLACEMENT = new boolean[256]; 85 private final static Map ESCAPE_CODES = new HashMap (); 86 87 static { 88 Arrays.fill(ALL_TRUE,true); 89 LINEBR['\n'] = true; 90 LINEBR['\u0085'] = true; 91 NULL_BL_LINEBR['\0'] = true; 92 NULL_BL_LINEBR[' '] = true; 93 NULL_BL_LINEBR['\r'] = true; 94 NULL_BL_LINEBR['\n'] = true; 95 NULL_BL_LINEBR['\u0085'] = true; 96 NULL_BL_T_LINEBR['\0'] = true; 97 NULL_BL_T_LINEBR[' '] = true; 98 NULL_BL_T_LINEBR['\t'] = true; 99 NULL_BL_T_LINEBR['\r'] = true; 100 NULL_BL_T_LINEBR['\n'] = true; 101 NULL_BL_T_LINEBR['\u0085'] = true; 102 NULL_OR_LINEBR['\0'] = true; 103 NULL_OR_LINEBR['\r'] = true; 104 NULL_OR_LINEBR['\n'] = true; 105 NULL_OR_LINEBR['\u0085'] = true; 106 FULL_LINEBR['\r'] = true; 107 FULL_LINEBR['\n'] = true; 108 FULL_LINEBR['\u0085'] = true; 109 BLANK_OR_LINEBR[' '] = true; 110 BLANK_OR_LINEBR['\r'] = true; 111 BLANK_OR_LINEBR['\n'] = true; 112 BLANK_OR_LINEBR['\u0085'] = true; 113 S4['\0'] = true; 114 S4[' '] = true; 115 S4['\t'] = true; 116 S4['\r'] = true; 117 S4['\n'] = true; 118 S4['\u0085'] = true; 119 S4['['] = true; 120 S4[']'] = true; 121 S4['{'] = true; 122 S4['}'] = true; 123 for(char c = 'a'; c<='z'; c++) { 124 ALPHA[c] = true; 125 STRANGE_CHAR[c] = true; 126 } 127 for(char c = 'A'; c<='Z'; c++) { 128 ALPHA[c] = true; 129 STRANGE_CHAR[c] = true; 130 } 131 for(char c = '0'; c<='9'; c++) { 132 ALPHA[c] = true; 133 STRANGE_CHAR[c] = true; 134 HEXA[c] = true; 135 DIGIT[c] = true; 136 } 137 for(char c = 'a'; c<='f'; c++) { 138 HEXA[c] = true; 139 } 140 for(char c = 'A'; c<='F'; c++) { 141 HEXA[c] = true; 142 } 143 ALPHA['-'] = true; 144 ALPHA['_'] = true; 145 STRANGE_CHAR['-'] = true; 146 STRANGE_CHAR['['] = true; 147 STRANGE_CHAR[']'] = true; 148 STRANGE_CHAR['('] = true; 149 STRANGE_CHAR[')'] = true; 150 STRANGE_CHAR['\''] = true; 151 STRANGE_CHAR[';'] = true; 152 STRANGE_CHAR['/'] = true; 153 STRANGE_CHAR['?'] = true; 154 STRANGE_CHAR[':'] = true; 155 STRANGE_CHAR['@'] = true; 156 STRANGE_CHAR['&'] = true; 157 STRANGE_CHAR['='] = true; 158 STRANGE_CHAR['+'] = true; 159 STRANGE_CHAR['$'] = true; 160 STRANGE_CHAR[','] = true; 161 STRANGE_CHAR['.'] = true; 162 STRANGE_CHAR['!'] = true; 163 STRANGE_CHAR['~'] = true; 164 STRANGE_CHAR['*'] = true; 165 STRANGE_CHAR['%'] = true; 166 BLANK_T[' '] = true; 167 BLANK_T['\t'] = true; 168 SPACES_AND_STUFF['\0'] = true; 169 SPACES_AND_STUFF[' '] = true; 170 SPACES_AND_STUFF['\t'] = true; 171 SPACES_AND_STUFF['\r'] = true; 172 SPACES_AND_STUFF['\n'] = true; 173 SPACES_AND_STUFF['\u0085'] = true; 174 SPACES_AND_STUFF['\\'] = true; 175 SPACES_AND_STUFF['\''] = true; 176 SPACES_AND_STUFF['"'] = true; 177 DOUBLE_ESC['\\'] = true; 178 DOUBLE_ESC['"'] = true; 179 NON_ALPHA_OR_NUM['\0'] = true; 180 NON_ALPHA_OR_NUM[' '] = true; 181 NON_ALPHA_OR_NUM['\t'] = true; 182 NON_ALPHA_OR_NUM['\r'] = true; 183 NON_ALPHA_OR_NUM['\n'] = true; 184 NON_ALPHA_OR_NUM['\u0085'] = true; 185 NON_ALPHA_OR_NUM['?'] = true; 186 NON_ALPHA_OR_NUM[':'] = true; 187 NON_ALPHA_OR_NUM[','] = true; 188 NON_ALPHA_OR_NUM[']'] = true; 189 NON_ALPHA_OR_NUM['}'] = true; 190 NON_ALPHA_OR_NUM['%'] = true; 191 NON_ALPHA_OR_NUM['@'] = true; 192 NON_ALPHA_OR_NUM['`'] = true; 193 194 Arrays.fill(ESCAPE_REPLACEMENTS,(byte)0); 195 ESCAPE_REPLACEMENTS['0'] = 0; 196 ESCAPE_REPLACEMENTS['a'] = 7; 197 ESCAPE_REPLACEMENTS['b'] = 8; 198 ESCAPE_REPLACEMENTS['t'] = 9; 199 ESCAPE_REPLACEMENTS['\t'] = 9; 200 ESCAPE_REPLACEMENTS['n'] = 10; 201 ESCAPE_REPLACEMENTS['v'] = 11; 202 ESCAPE_REPLACEMENTS['f'] = 12; 203 ESCAPE_REPLACEMENTS['r'] = 13; 204 ESCAPE_REPLACEMENTS['e'] = 27; 205 ESCAPE_REPLACEMENTS[' '] = 32; 206 ESCAPE_REPLACEMENTS['"'] = (byte)'"'; 207 ESCAPE_REPLACEMENTS['\\'] = (byte)'\\'; 208 ESCAPE_REPLACEMENTS['N'] = (byte)133; 209 ESCAPE_REPLACEMENTS['_'] = (byte)160; 210 IS_ESCAPE_REPLACEMENT['0'] = true; 211 IS_ESCAPE_REPLACEMENT['a'] = true; 212 IS_ESCAPE_REPLACEMENT['b'] = true; 213 IS_ESCAPE_REPLACEMENT['t'] = true; 214 IS_ESCAPE_REPLACEMENT['\t'] = true; 215 IS_ESCAPE_REPLACEMENT['n'] = true; 216 IS_ESCAPE_REPLACEMENT['v'] = true; 217 IS_ESCAPE_REPLACEMENT['f'] = true; 218 IS_ESCAPE_REPLACEMENT['r'] = true; 219 IS_ESCAPE_REPLACEMENT['e'] = true; 220 IS_ESCAPE_REPLACEMENT[' '] = true; 221 IS_ESCAPE_REPLACEMENT['"'] = true; 222 IS_ESCAPE_REPLACEMENT['\\'] = true; 223 IS_ESCAPE_REPLACEMENT['N'] = true; 224 IS_ESCAPE_REPLACEMENT['_'] = true; 225 226 ESCAPE_CODES.put(new Character ('x'),new Integer (2)); 227 ESCAPE_CODES.put(new Character ('u'),new Integer (4)); 228 ESCAPE_CODES.put(new Character ('U'),new Integer (8)); 229 230 Arrays.fill(STUPID_CHAR,true); 231 STUPID_CHAR['\0'] = false; 232 STUPID_CHAR[' '] = false; 233 STUPID_CHAR['\t'] = false; 234 STUPID_CHAR['\r'] = false; 235 STUPID_CHAR['\n'] = false; 236 STUPID_CHAR['\u0085'] = false; 237 STUPID_CHAR['-'] = false; 238 STUPID_CHAR['?'] = false; 239 STUPID_CHAR[':'] = false; 240 STUPID_CHAR[','] = false; 241 STUPID_CHAR['['] = false; 242 STUPID_CHAR[']'] = false; 243 STUPID_CHAR['{'] = false; 244 STUPID_CHAR['#'] = false; 245 STUPID_CHAR['&'] = false; 246 STUPID_CHAR['*'] = false; 247 STUPID_CHAR['!'] = false; 248 STUPID_CHAR['|'] = false; 249 STUPID_CHAR['>'] = false; 250 STUPID_CHAR['\''] = false; 251 STUPID_CHAR['"'] = false; 252 STUPID_CHAR['%'] = false; 253 STUPID_CHAR['@'] = false; 254 R_FLOWZERO1[':'] = true; 255 R_FLOWNONZERO['\0'] = true; 256 R_FLOWNONZERO[' '] = true; 257 R_FLOWNONZERO['\t'] = true; 258 R_FLOWNONZERO['\r'] = true; 259 R_FLOWNONZERO['\n'] = true; 260 R_FLOWNONZERO['\u0085'] = true; 261 R_FLOWNONZERO['['] = true; 262 R_FLOWNONZERO[']'] = true; 263 R_FLOWNONZERO['{'] = true; 264 R_FLOWNONZERO['}'] = true; 265 R_FLOWNONZERO[','] = true; 266 R_FLOWNONZERO[':'] = true; 267 R_FLOWNONZERO['?'] = true; 268 } 269 270 private boolean done = false; 271 private int flowLevel = 0; 272 private int tokensTaken = 0; 273 private int indent = -1; 274 private boolean allowSimpleKey = true; 275 private boolean eof = true; 276 private int column = 0; 277 private int pointer = 0; 278 private ByteList buffer; 279 private InputStream stream; 280 private List tokens; 281 private List indents; 282 private Map possibleSimpleKeys; 283 284 private boolean docStart = false; 285 286 public ScannerImpl(final InputStream stream) { 287 this.stream = stream; 288 this.eof = false; 289 this.buffer = new ByteList(100); 290 this.tokens = new LinkedList (); 291 this.indents = new LinkedList (); 292 this.possibleSimpleKeys = new HashMap (); 293 fetchStreamStart(); 294 } 295 296 public ScannerImpl(final ByteList stream) { 297 this.buffer = stream; 298 this.stream = null; 299 this.tokens = new LinkedList (); 300 this.indents = new LinkedList (); 301 this.possibleSimpleKeys = new HashMap (); 302 fetchStreamStart(); 303 } 304 305 public ScannerImpl(final String stream) { 306 try { 307 this.buffer = new ByteList(ByteList.plain(stream),false); 308 } catch(Exception e) { 309 throw new RuntimeException (e.getMessage()); 310 } 311 this.stream = null; 312 this.tokens = new LinkedList (); 313 this.indents = new LinkedList (); 314 this.possibleSimpleKeys = new HashMap (); 315 fetchStreamStart(); 316 } 317 318 private void update(final int length, final boolean reset) { 319 if(!eof && reset) { 320 this.buffer.delete(0,this.pointer); 321 this.pointer = 0; 322 } 323 while(this.buffer.realSize < (this.pointer+length)) { 324 byte[] rawData = ByteList.NULL_ARRAY; 325 int converted = -2; 326 if(!this.eof) { 327 byte[] data = new byte[1024]; 328 try { 329 converted = this.stream.read(data); 330 } catch(final IOException ioe) { 331 throw new YAMLException(ioe); 332 } 333 if(converted == -1) { 334 this.eof = true; 335 } else { 336 rawData = data; 337 } 338 } 339 if(this.eof) { 340 this.buffer.append('\0'); 341 break; 342 } else { 343 checkPrintable(rawData,converted); 344 this.buffer.append(rawData,0,converted); 345 } 346 } 347 } 348 349 private void checkPrintable(final byte[] b, final int len) { 350 for(int i=0;i<len;i++) { 351 if(NON_PRINTABLE[((int)b[i] & 0xFF)]) { 352 final int position = this.buffer.length() - this.pointer + i; 353 throw new YAMLException("At " + position + " we found: " + (char)((int)b[i] & 0xFF) + ". Special characters are not allowed"); 354 } 355 } 356 } 357 358 private boolean ensure(final int len, final boolean reset) { 359 if(this.pointer + len >= this.buffer.realSize) { 360 update(len, reset); 361 } 362 return true; 363 } 364 365 private char peek() { 366 ensure(1,false); 367 return (char)((char)(buffer.bytes[this.pointer]) & 0xFF); 368 } 369 370 private char peek(final int index) { 371 ensure(index+1,false); 372 return (char)((char)this.buffer.bytes[this.pointer + index] & 0xFF); 373 } 374 375 private void forward() { 376 ensure(2,true); 377 final char ch1 = (char)((int)this.buffer.bytes[this.pointer++] & 0xFF); 378 if(ch1 == '\n' || ch1 == '\u0085' || (ch1 == '\r' && (((int)this.buffer.bytes[this.pointer] & 0xFF) != '\n'))) { 379 this.column = 0; 380 } else { 381 this.column++; 382 } 383 } 384 385 private void forward(final int length) { 386 ensure(length+1,true); 387 int ch = 0; 388 for(int i=0;i<length;i++) { 389 ch = this.buffer.bytes[this.pointer] & 0xFF; 390 this.pointer++; 391 if(LINEBR[ch] || (ch == '\r' && (this.buffer.bytes[this.pointer] & 0xFF) != '\n')) { 392 this.column = 0; 393 } else { 394 this.column++; 395 } 396 } 397 } 398 399 public boolean checkToken(final Class [] choices) { 400 while(needMoreTokens()) { 401 fetchMoreTokens(); 402 } 403 if(!this.tokens.isEmpty()) { 404 if(choices.length == 0) { 405 return true; 406 } 407 final Object first = this.tokens.get(0); 408 for(int i=0,j=choices.length;i<j;i++) { 409 if(choices[i].isInstance(first)) { 410 return true; 411 } 412 } 413 } 414 return false; 415 } 416 417 public Token peekToken() { 418 while(needMoreTokens()) { 419 fetchMoreTokens(); 420 } 421 return (Token)(this.tokens.isEmpty() ? null : this.tokens.get(0)); 422 } 423 424 public Token getToken() { 425 while(needMoreTokens()) { 426 fetchMoreTokens(); 427 } 428 if(!this.tokens.isEmpty()) { 429 this.tokensTaken++; 430 return (Token)this.tokens.remove(0); 431 } 432 return null; 433 } 434 435 private class TokenIterator implements Iterator { 436 public boolean hasNext() { 437 return null != peekToken(); 438 } 439 440 public Object next() { 441 return getToken(); 442 } 443 444 public void remove() { 445 } 446 } 447 448 public Iterator eachToken() { 449 return new TokenIterator(); 450 } 451 452 public Iterator iterator() { 453 return eachToken(); 454 } 455 456 457 private boolean needMoreTokens() { 458 if(this.done) { 459 return false; 460 } 461 return this.tokens.isEmpty() || nextPossibleSimpleKey() == this.tokensTaken; 462 } 463 464 private boolean isEnding() { 465 ensure(4,false); 466 return (this.buffer.bytes[this.pointer] & 0xFF) == '-' && 467 (this.buffer.bytes[this.pointer+1] & 0xFF) == '-' && 468 (this.buffer.bytes[this.pointer+2] & 0xFF) == '-' && 469 (NULL_BL_T_LINEBR[this.buffer.bytes[this.pointer+3]]); 470 } 471 472 private boolean isStart() { 473 ensure(4,false); 474 return (this.buffer.bytes[this.pointer] & 0xFF) == '.' && 475 (this.buffer.bytes[this.pointer+1] & 0xFF) == '.' && 476 (this.buffer.bytes[this.pointer+2] & 0xFF) == '.' && 477 (NULL_BL_T_LINEBR[this.buffer.bytes[this.pointer+3]]); 478 } 479 480 private boolean isEndOrStart() { 481 ensure(4,false); 482 return (((this.buffer.bytes[this.pointer] & 0xFF) == '-' && 483 (this.buffer.bytes[this.pointer+1] & 0xFF) == '-' && 484 (this.buffer.bytes[this.pointer+2] & 0xFF) == '-') || 485 ((this.buffer.bytes[this.pointer] & 0xFF) == '.' && 486 (this.buffer.bytes[this.pointer+1] & 0xFF) == '.' && 487 (this.buffer.bytes[this.pointer+2] & 0xFF) == '.')) && 488 (NULL_BL_T_LINEBR[this.buffer.bytes[this.pointer+3]]); 489 } 490 491 private Token fetchMoreTokens() { 492 scanToNextToken(); 493 unwindIndent(this.column); 494 final char ch = peek(); 495 final boolean colz = this.column == 0; 496 switch(ch) { 497 case '\0': return fetchStreamEnd(); 498 case '\'': return fetchSingle(); 499 case '"': return fetchDouble(); 500 case '?': if(this.flowLevel != 0 || NULL_BL_T_LINEBR[peek(1)]) { return fetchKey(); } break; 501 case ':': if(this.flowLevel != 0 || NULL_BL_T_LINEBR[peek(1)]) { return fetchValue(); } break; 502 case '%': if(colz) {return fetchDirective(); } break; 503 case '-': 504 if((colz || docStart) && isEnding()) { 505 return fetchDocumentStart(); 506 } else if(NULL_BL_T_LINEBR[peek(1)]) { 507 return fetchBlockEntry(); 508 } 509 break; 510 case '.': 511 if(colz && isStart()) { 512 return fetchDocumentEnd(); 513 } 514 break; 515 case '[': return fetchFlowSequenceStart(); 516 case '{': return fetchFlowMappingStart(); 517 case ']': return fetchFlowSequenceEnd(); 518 case '}': return fetchFlowMappingEnd(); 519 case ',': return fetchFlowEntry(); 520 case '*': return fetchAlias(); 521 case '&': return fetchAnchor(); 522 case '!': return fetchTag(); 523 case '|': if(this.flowLevel == 0) { return fetchLiteral(); } break; 524 case '>': if(this.flowLevel == 0) { return fetchFolded(); } break; 525 } 526 527 if(STUPID_CHAR[this.buffer.bytes[this.pointer]&0xFF] || (ensure(1,false) && (this.buffer.bytes[this.pointer] == '-' || this.buffer.bytes[this.pointer] == '?' || this.buffer.bytes[this.pointer] == ':') && !NULL_BL_T_LINEBR[this.buffer.bytes[this.pointer+1]&0xFF])) { 529 return fetchPlain(); 530 } 531 532 throw new ScannerException("while scanning for the next token","found character " + ch + "(" + (int)ch + ") that cannot start any token",null); 533 } 534 535 private Token fetchStreamStart() { 536 this.docStart = true; 537 this.tokens.add(Token.STREAM_START); 538 return Token.STREAM_START; 539 } 540 541 private Token fetchStreamEnd() { 542 unwindIndent(-1); 543 this.allowSimpleKey = false; 544 this.possibleSimpleKeys = new HashMap (); 545 this.tokens.add(Token.STREAM_END); 546 this.done = true; 547 return Token.STREAM_END; 548 } 549 550 private void scanToNextToken() { 551 for(;;) { 552 while(peek() == ' ') { 553 forward(); 554 } 555 if(peek() == '#') { 556 forward(); 557 while(!NULL_OR_LINEBR[peek()]) { 558 forward(); 559 } 560 } 561 if(scanLineBreak().length != 0 ) { 562 if(this.flowLevel == 0) { 563 this.allowSimpleKey = true; 564 } 565 } else { 566 break; 567 } 568 } 569 } 570 571 private byte[] scanLineBreak() { 572 final int val = peek(); 579 if(FULL_LINEBR[val]) { 580 ensure(2,false); 581 if(RN[0] == buffer.bytes[this.pointer] && RN[1] == buffer.bytes[this.pointer+1]) { 582 forward(2); 583 } else { 584 forward(); 585 } 586 return NN; 587 } else { 588 return EMPTY; 589 } 590 } 591 592 private void unwindIndent(final int col) { 593 if(this.flowLevel != 0) { 594 return; 595 } 596 597 while(this.indent > col) { 598 this.indent = ((Integer )(this.indents.remove(0))).intValue(); 599 this.tokens.add(Token.BLOCK_END); 600 } 601 } 602 603 private Token fetchDocumentStart() { 604 this.docStart = false; 605 return fetchDocumentIndicator(Token.DOCUMENT_START); 606 } 607 608 private Token fetchDocumentIndicator(final Token tok) { 609 unwindIndent(-1); 610 this.allowSimpleKey = false; 611 forward(3); 612 this.tokens.add(tok); 613 return tok; 614 } 615 616 private Token fetchBlockEntry() { 617 if(this.flowLevel == 0) { 618 if(!this.allowSimpleKey) { 619 throw new ScannerException(null,"sequence entries are not allowed here",null); 620 } 621 if(addIndent(this.column)) { 622 this.tokens.add(Token.BLOCK_SEQUENCE_START); 623 } 624 } 625 this.allowSimpleKey = true; 626 forward(); 627 this.tokens.add(Token.BLOCK_ENTRY); 628 return Token.BLOCK_ENTRY; 629 } 630 631 private boolean addIndent(final int col) { 632 if(this.indent < col) { 633 this.indents.add(0,new Integer (this.indent)); 634 this.indent = col; 635 return true; 636 } 637 return false; 638 } 639 640 private Token fetchTag() { 641 savePossibleSimpleKey(); 642 this.allowSimpleKey = false; 643 final Token tok = scanTag(); 644 this.tokens.add(tok); 645 return tok; 646 } 647 648 private void savePossibleSimpleKey() { 649 if(this.allowSimpleKey) { 650 this.possibleSimpleKeys.put(new Integer (this.flowLevel),new SimpleKey(this.tokensTaken+this.tokens.size(),(this.flowLevel == 0) && this.indent == this.column,-1,-1,this.column)); 651 } 652 } 653 654 private Token scanTag() { 655 char ch = peek(1); 656 ByteList handle = null; 657 ByteList suffix = null; 658 if(ch == '<') { 659 forward(2); 660 suffix = scanTagUri("tag"); 661 if(peek() != '>') { 662 throw new ScannerException("while scanning a tag","expected '>', but found "+ peek() + "(" + (int)peek() + ")",null); 663 } 664 forward(); 665 } else if(NULL_BL_T_LINEBR[ch]) { 666 suffix = BANG; 667 forward(); 668 } else { 669 int length = 1; 670 boolean useHandle = false; 671 while(!NULL_BL_T_LINEBR[ch]) { 672 if(ch == '!') { 673 useHandle = true; 674 break; 675 } 676 length++; 677 ch = peek(length); 678 } 679 handle = BANG; 680 if(useHandle) { 681 handle = scanTagHandle("tag"); 682 } else { 683 handle = BANG; 684 forward(); 685 } 686 suffix = scanTagUri("tag"); 687 } 688 if(!NULL_BL_LINEBR[peek()]) { 689 throw new ScannerException("while scanning a tag","expected ' ', but found " + peek() + "(" + (int)peek() + ")",null); 690 } 691 return new TagToken(new ByteList[] {handle,suffix}); 692 } 693 694 private ByteList scanTagUri(final String name) { 695 final ByteList chunks = new ByteList(10); 696 int length = 0; 697 char ch = peek(length); 698 while(STRANGE_CHAR[ch]) { 699 if('%' == ch) { 700 ensure(length,false); 701 chunks.append(this.buffer.bytes,this.pointer,length); 702 length = 0; 703 chunks.append(scanUriEscapes(name)); 704 } else { 705 length++; 706 } 707 ch = peek(length); 708 } 709 if(length != 0) { 710 ensure(length,false); 711 chunks.append(this.buffer.bytes,this.pointer,length); 712 forward(length); 713 } 714 if(chunks.length() == 0) { 715 throw new ScannerException("while scanning a " + name,"expected URI, but found " + ch + "(" + (int)ch + ")",null); 716 } 717 return chunks; 718 } 719 720 private ByteList scanTagHandle(final String name) { 721 char ch = peek(); 722 if(ch != '!') { 723 throw new ScannerException("while scanning a " + name,"expected '!', but found " + ch + "(" + (int)ch + ")",null); 724 } 725 int length = 1; 726 ch = peek(length); 727 if(ch != ' ') { 728 while(ALPHA[ch]) { 729 length++; 730 ch = peek(length); 731 } 732 if('!' != ch) { 733 forward(length); 734 throw new ScannerException("while scanning a " + name,"expected '!', but found " + ch + "(" + ((int)ch) + ")",null); 735 } 736 length++; 737 } 738 ensure(length,false); 739 final ByteList value = new ByteList(this.buffer.bytes,this.pointer,length,false); 740 forward(length); 741 return value; 742 } 743 744 private ByteList scanUriEscapes(final String name) { 745 final ByteList bytes = new ByteList(); 746 while(peek() == '%') { 747 forward(); 748 try { 749 ensure(2,false); 750 bytes.append(Integer.parseInt(new String (ByteList.plain(this.buffer.bytes),this.pointer,2),16)); 751 } catch(final NumberFormatException nfe) { 752 throw new ScannerException("while scanning a " + name,"expected URI escape sequence of 2 hexadecimal numbers, but found " + peek(1) + "(" + ((int)peek(1)) + ") and "+ peek(2) + "(" + ((int)peek(2)) + ")",null); 753 } 754 forward(2); 755 } 756 return bytes; 757 } 758 759 private Token fetchPlain() { 760 savePossibleSimpleKey(); 761 this.allowSimpleKey = false; 762 final Token tok = scanPlain(); 763 this.tokens.add(tok); 764 return tok; 765 } 766 767 private Token scanPlain() { 768 final ByteList chunks = new ByteList(7); 769 final int ind = this.indent+1; 770 ByteList spaces = new ByteList(0); 771 boolean f_nzero = true; 772 boolean[] r_check = R_FLOWNONZERO; 773 boolean[] r_check2 = ALL_FALSE; 774 boolean[] r_check3 = ALL_FALSE; 775 if(this.flowLevel == 0) { 776 f_nzero = false; 777 r_check = R_FLOWZERO; 778 r_check2 = R_FLOWZERO1; 779 r_check3 = R_FLOWZERO; 780 } 781 while(peek() != '#') { 782 int length = 0; 783 int i = 0; 784 for(;;i++) { 785 ensure(i+2,false); 786 if(r_check[this.buffer.bytes[this.pointer+i]&0xFF] || (r_check2[this.buffer.bytes[this.pointer+i]&0xFF] && r_check3[this.buffer.bytes[this.pointer+i+1]&0xFF])) { 787 length = i; 788 break; 789 } 790 } 791 792 final char ch = peek(length); 793 if(f_nzero && ch == ':' && !S4[peek(length+1)]) { 794 forward(length); 795 throw new ScannerException("while scanning a plain scalar","found unexpected ':'","Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details."); 796 } 797 if(length == 0) { 798 break; 799 } 800 this.allowSimpleKey = false; 801 chunks.append(spaces); 802 ensure(length,false); 803 chunks.append(this.buffer.bytes,this.pointer,length); 804 forward(length); 805 spaces = scanPlainSpaces(ind); 806 if(spaces == null || (this.flowLevel == 0 && this.column < ind)) { 807 break; 808 } 809 } 810 return new ScalarToken(chunks,true); 811 } 812 813 private int nextPossibleSimpleKey() { 814 for(final Iterator iter = this.possibleSimpleKeys.values().iterator();iter.hasNext();) { 815 final SimpleKey key = (SimpleKey)iter.next(); 816 if(key.getTokenNumber() > 0) { 817 return key.getTokenNumber(); 818 } 819 } 820 return -1; 821 } 822 823 private ByteList scanPlainSpaces(final int indent) { 824 final ByteList chunks = new ByteList(); 825 int length = 0; 826 while(peek(length) == ' ') { 827 length++; 828 } 829 final byte[] whitespaces = new byte[length]; 830 Arrays.fill(whitespaces,(byte)' '); 831 forward(length); 832 char ch = peek(); 833 if(FULL_LINEBR[ch]) { 834 final byte[] lineBreak = scanLineBreak(); 835 this.allowSimpleKey = true; 836 if(isEndOrStart()) { 837 return new ByteList(0); 838 } 839 final ByteList breaks = new ByteList(); 840 while(BLANK_OR_LINEBR[peek()]) { 841 if(' ' == peek()) { 842 forward(); 843 } else { 844 breaks.append(scanLineBreak()); 845 if(isEndOrStart()) { 846 return new ByteList(0); 847 } 848 } 849 } 850 if(!(lineBreak.length == 1 && lineBreak[0] == '\n')) { 851 chunks.append(lineBreak); 852 } else if(breaks == null || breaks.realSize == 0) { 853 chunks.append(SPACE); 854 } 855 chunks.append(breaks); 856 } else { 857 chunks.append(whitespaces); 858 } 859 return chunks; 860 } 861 862 private Token fetchSingle() { 863 return fetchFlowScalar('\''); 864 } 865 866 private Token fetchDouble() { 867 return fetchFlowScalar('"'); 868 } 869 870 private Token fetchFlowScalar(final char style) { 871 savePossibleSimpleKey(); 872 this.allowSimpleKey = false; 873 final Token tok = scanFlowScalar(style); 874 this.tokens.add(tok); 875 return tok; 876 } 877 878 private Token scanFlowScalar(final char style) { 879 final boolean dbl = style == '"'; 880 final ByteList chunks = new ByteList(); 881 final char quote = peek(); 882 forward(); 883 chunks.append(scanFlowScalarNonSpaces(dbl)); 884 while(peek() != quote) { 885 chunks.append(scanFlowScalarSpaces()); 886 chunks.append(scanFlowScalarNonSpaces(dbl)); 887 } 888 forward(); 889 return new ScalarToken(chunks,false,style); 890 } 891 892 private final static byte[] HEXA_VALUES = new byte[256]; 893 static { 894 Arrays.fill(HEXA_VALUES,(byte)-1); 895 HEXA_VALUES['0'] = 0; 896 HEXA_VALUES['1'] = 1; 897 HEXA_VALUES['2'] = 2; 898 HEXA_VALUES['3'] = 3; 899 HEXA_VALUES['4'] = 4; 900 HEXA_VALUES['5'] = 5; 901 HEXA_VALUES['6'] = 6; 902 HEXA_VALUES['7'] = 7; 903 HEXA_VALUES['8'] = 8; 904 HEXA_VALUES['9'] = 9; 905 HEXA_VALUES['A'] = 10; 906 HEXA_VALUES['B'] = 11; 907 HEXA_VALUES['C'] = 12; 908 HEXA_VALUES['D'] = 13; 909 HEXA_VALUES['E'] = 14; 910 HEXA_VALUES['F'] = 15; 911 HEXA_VALUES['a'] = 10; 912 HEXA_VALUES['b'] = 11; 913 HEXA_VALUES['c'] = 12; 914 HEXA_VALUES['d'] = 13; 915 HEXA_VALUES['e'] = 14; 916 HEXA_VALUES['f'] = 15; 917 } 918 919 private ByteList parseHexa(int length) { 920 ensure(length,false); 921 ByteList chunks = new ByteList(length/2); 922 for(int i=0;i<length;i+=2) { 923 byte val = HEXA_VALUES[this.buffer.bytes[this.pointer+i] & 0xFF]; 924 if(val == -1) { 925 throw new ScannerException("while scanning a double-quoted scalar","expected escape sequence of " + length + " hexadecimal numbers, but found something else: " + (char)(this.buffer.bytes[this.pointer+i] & 0xFF),null); 926 } 927 if(i+1 < length) { 928 val = (byte)(val << 4); 929 byte v2 = HEXA_VALUES[this.buffer.bytes[this.pointer+i+1] & 0xFF]; 930 if(v2 == -1) { 931 throw new ScannerException("while scanning a double-quoted scalar","expected escape sequence of " + length + " hexadecimal numbers, but found something else: " + (char)(this.buffer.bytes[this.pointer+i+1] & 0xFF),null); 932 } 933 val+=v2; 934 } 935 chunks.append(val); 936 } 937 forward(length); 938 return chunks; 939 } 940 941 private ByteList scanFlowScalarNonSpaces(final boolean dbl) { 942 final ByteList chunks = new ByteList(); 943 for(;;) { 944 int length = 0; 945 while(!SPACES_AND_STUFF[peek(length)]) { 946 length++; 947 } 948 if(length != 0) { 949 ensure(length,false); 950 chunks.append(this.buffer.bytes,this.pointer,length); 951 forward(length); 952 } 953 char ch = peek(); 954 if(!dbl && ch == '\'' && peek(1) == '\'') { 955 chunks.append('\''); 956 forward(2); 957 } else if((dbl && ch == '\'') || (!dbl && DOUBLE_ESC[ch])) { 958 chunks.append(ch); 959 forward(); 960 } else if(dbl && ch == '\\') { 961 forward(); 962 ch = peek(); 963 if(IS_ESCAPE_REPLACEMENT[ch]) { 964 chunks.append(ESCAPE_REPLACEMENTS[ch]); 965 forward(); 966 } else if(ESCAPE_CODES.containsKey(new Character (ch))) { 967 length = ((Integer )ESCAPE_CODES.get(new Character (ch))).intValue(); 968 forward(); 969 chunks.append(parseHexa(length)); 970 } else if(FULL_LINEBR[ch]) { 971 scanLineBreak(); 972 chunks.append(scanFlowScalarBreaks()); 973 } else { 974 throw new ScannerException("while scanning a double-quoted scalar","found unknown escape character " + ch + "(" + ((int)ch) + ")",null); 975 } 976 } else { 977 return chunks; 978 } 979 } 980 } 981 982 private ByteList scanFlowScalarSpaces() { 983 final ByteList chunks = new ByteList(); 984 int length = 0; 985 while(BLANK_T[peek(length)]) { 986 length++; 987 } 988 ensure(length,false); 989 ByteList whitespaces = new ByteList(this.buffer,this.pointer,length); 990 forward(length); 991 char ch = peek(); 992 if(ch == '\0') { 993 throw new ScannerException("while scanning a quoted scalar","found unexpected end of stream",null); 994 } else if(FULL_LINEBR[ch]) { 995 final byte[] lineBreak = scanLineBreak(); 996 final ByteList breaks = scanFlowScalarBreaks(); 997 if(!(lineBreak.length == 1 && lineBreak[0] == '\n')) { 998 chunks.append(lineBreak); 999 } else if(breaks.length() == 0) { 1000 chunks.append(SPACE); 1001 } 1002 chunks.append(breaks); 1003 } else { 1004 chunks.append(whitespaces); 1005 } 1006 return chunks; 1007 } 1008 1009 private ByteList scanFlowScalarBreaks() { 1010 final ByteList chunks = new ByteList(); 1011 for(;;) { 1012 if(isEndOrStart()) { 1013 throw new ScannerException("while scanning a quoted scalar","found unexpected document separator",null); 1014 } 1015 while(BLANK_T[peek()]) { 1016 forward(); 1017 } 1018 if(FULL_LINEBR[peek()]) { 1019 chunks.append(scanLineBreak()); 1020 } else { 1021 return chunks; 1022 } 1023 } 1024 } 1025 1026 private Token fetchValue() { 1027 final SimpleKey key = (SimpleKey)this.possibleSimpleKeys.get(new Integer (this.flowLevel)); 1028 if(null == key) { 1029 if(this.flowLevel == 0 && !this.allowSimpleKey) { 1030 throw new ScannerException(null,"mapping values are not allowed here",null); 1031 } 1032 } else { 1033 this.possibleSimpleKeys.remove(new Integer (this.flowLevel)); 1034 this.tokens.add(key.getTokenNumber()-this.tokensTaken,Token.KEY); 1035 if(this.flowLevel == 0 && addIndent(key.getColumn())) { 1036 this.tokens.add(key.getTokenNumber()-this.tokensTaken,Token.BLOCK_MAPPING_START); 1037 } 1038 this.allowSimpleKey = false; 1039 } 1040 forward(); 1041 this.tokens.add(Token.VALUE); 1042 return Token.VALUE; 1043 } 1044 1045 private Token fetchFlowSequenceStart() { 1046 return fetchFlowCollectionStart(Token.FLOW_SEQUENCE_START); 1047 } 1048 1049 private Token fetchFlowMappingStart() { 1050 return fetchFlowCollectionStart(Token.FLOW_MAPPING_START); 1051 } 1052 1053 private Token fetchFlowCollectionStart(final Token tok) { 1054 savePossibleSimpleKey(); 1055 this.flowLevel++; 1056 this.allowSimpleKey = true; 1057 forward(1); 1058 this.tokens.add(tok); 1059 return tok; 1060 } 1061 1062 private Token fetchDocumentEnd() { 1063 return fetchDocumentIndicator(Token.DOCUMENT_END); 1064 } 1065 1066 private Token fetchFlowSequenceEnd() { 1067 return fetchFlowCollectionEnd(Token.FLOW_SEQUENCE_END); 1068 } 1069 1070 private Token fetchFlowMappingEnd() { 1071 return fetchFlowCollectionEnd(Token.FLOW_MAPPING_END); 1072 } 1073 1074 private Token fetchFlowCollectionEnd(final Token tok) { 1075 this.flowLevel--; 1076 this.allowSimpleKey = false; 1077 forward(1); 1078 this.tokens.add(tok); 1079 return tok; 1080 } 1081 1082 private Token fetchFlowEntry() { 1083 this.allowSimpleKey = true; 1084 forward(1); 1085 this.tokens.add(Token.FLOW_ENTRY); 1086 return Token.FLOW_ENTRY; 1087 } 1088 1089 private Token fetchLiteral() { 1090 return fetchBlockScalar('|'); 1091 } 1092 1093 private Token fetchFolded() { 1094 return fetchBlockScalar('>'); 1095 } 1096 1097 private Token fetchBlockScalar(final char style) { 1098 this.allowSimpleKey = true; 1099 final Token tok = scanBlockScalar(style); 1100 this.tokens.add(tok); 1101 return tok; 1102 } 1103 1104 private Token scanBlockScalar(final char style) { 1105 final boolean folded = style == '>'; 1106 final ByteList chunks = new ByteList(); 1107 forward(); 1108 final Object [] chompi = scanBlockScalarIndicators(); 1109 final boolean chomping = ((Boolean )chompi[0]).booleanValue(); 1110 final int increment = ((Integer )chompi[1]).intValue(); 1111 scanBlockScalarIgnoredLine(); 1112 int minIndent = this.indent+1; 1113 if(minIndent < 1) { 1114 minIndent = 1; 1115 } 1116 ByteList breaks = null; 1117 int maxIndent = 0; 1118 int ind = 0; 1119 if(increment == -1) { 1120 final Object [] brme = scanBlockScalarIndentation(); 1121 breaks = (ByteList)brme[0]; 1122 maxIndent = ((Integer )brme[1]).intValue(); 1123 if(minIndent > maxIndent) { 1124 ind = minIndent; 1125 } else { 1126 ind = maxIndent; 1127 } 1128 } else { 1129 ind = minIndent + increment - 1; 1130 breaks = scanBlockScalarBreaks(ind); 1131 } 1132 1133 byte[] lineBreak = ByteList.NULL_ARRAY; 1134 while(this.column == ind && peek() != '\0') { 1135 chunks.append(breaks); 1136 final boolean leadingNonSpace = !BLANK_T[peek()]; 1137 int length = 0; 1138 while(!NULL_OR_LINEBR[peek(length)]) { 1139 length++; 1140 } 1141 ensure(length,false); 1142 chunks.append(this.buffer.bytes,this.pointer,length); 1143 forward(length); 1144 lineBreak = scanLineBreak(); 1145 breaks = scanBlockScalarBreaks(ind); 1146 if(this.column == ind && peek() != '\0') { 1147 if(folded && lineBreak.length == 1 && lineBreak[0] == '\n' && leadingNonSpace && !BLANK_T[peek()]) { 1148 if(breaks.length() == 0) { 1149 chunks.append(SPACE); 1150 } 1151 } else { 1152 chunks.append(lineBreak); 1153 } 1154 } else { 1155 break; 1156 } 1157 } 1158 1159 if(chomping) { 1160 chunks.append(lineBreak); 1161 chunks.append(breaks); 1162 } 1163 1164 return new ScalarToken(chunks,false,style); 1165 } 1166 1167 private ByteList scanBlockScalarBreaks(final int indent) { 1168 final ByteList chunks = new ByteList(); 1169 while(this.column < indent && peek() == ' ') { 1170 forward(); 1171 } 1172 while(FULL_LINEBR[peek()]) { 1173 chunks.append(scanLineBreak()); 1174 while(this.column < indent && peek() == ' ') { 1175 forward(); 1176 } 1177 } 1178 return chunks; 1179 } 1180 1181 1182 private Object [] scanBlockScalarIndentation() { 1183 final ByteList chunks = new ByteList(); 1184 int maxIndent = 0; 1185 while(BLANK_OR_LINEBR[peek()]) { 1186 if(peek() != ' ') { 1187 chunks.append(scanLineBreak()); 1188 } else { 1189 forward(); 1190 if(this.column > maxIndent) { 1191 maxIndent = column; 1192 } 1193 } 1194 } 1195 return new Object [] {chunks,new Integer (maxIndent)}; 1196 } 1197 1198 1199 private Object [] scanBlockScalarIndicators() { 1200 boolean chomping = false; 1201 int increment = -1; 1202 char ch = peek(); 1203 if(ch == '-' || ch == '+') { 1204 chomping = ch == '+'; 1205 forward(); 1206 ch = peek(); 1207 if(DIGIT[ch]) { 1208 increment = ch-'0'; 1209 if(increment == 0) { 1210 throw new ScannerException("while scanning a block scalar","expected indentation indicator in the range 1-9, but found 0",null); 1211 } 1212 forward(); 1213 } 1214 } else if(DIGIT[ch]) { 1215 increment = ch-'0'; 1216 if(increment == 0) { 1217 throw new ScannerException("while scanning a block scalar","expected indentation indicator in the range 1-9, but found 0",null); 1218 } 1219 forward(); 1220 ch = peek(); 1221 if(ch == '-' || ch == '+') { 1222 chomping = ch == '+'; 1223 forward(); 1224 } 1225 } 1226 if(!NULL_BL_LINEBR[peek()]) { 1227 throw new ScannerException("while scanning a block scalar","expected chomping or indentation indicators, but found " + peek() + "(" + ((int)peek()) + ")",null); 1228 } 1229 return new Object [] {Boolean.valueOf(chomping),new Integer (increment)}; 1230 } 1231 1232 private byte[] scanBlockScalarIgnoredLine() { 1233 while(peek() == ' ') { 1234 forward(); 1235 } 1236 if(peek() == '#') { 1237 while(!NULL_OR_LINEBR[peek()]) { 1238 forward(); 1239 } 1240 } 1241 if(!NULL_OR_LINEBR[peek()]) { 1242 throw new ScannerException("while scanning a block scalar","expected a comment or a line break, but found " + peek() + "(" + ((int)peek()) + ")",null); 1243 } 1244 return scanLineBreak(); 1245 } 1246 1247 private Token fetchDirective() { 1248 unwindIndent(-1); 1249 this.allowSimpleKey = false; 1250 final Token tok = scanDirective(); 1251 this.tokens.add(tok); 1252 return tok; 1253 } 1254 1255 private Token fetchKey() { 1256 if(this.flowLevel == 0) { 1257 if(!this.allowSimpleKey) { 1258 throw new ScannerException(null,"mapping keys are not allowed here",null); 1259 } 1260 if(addIndent(this.column)) { 1261 this.tokens.add(Token.BLOCK_MAPPING_START); 1262 } 1263 } 1264 this.allowSimpleKey = this.flowLevel == 0; 1265 forward(); 1266 this.tokens.add(Token.KEY); 1267 return Token.KEY; 1268 } 1269 1270 private Token fetchAlias() { 1271 savePossibleSimpleKey(); 1272 this.allowSimpleKey = false; 1273 final Token tok = scanAnchor(new AliasToken()); 1274 this.tokens.add(tok); 1275 return tok; 1276 } 1277 1278 private Token fetchAnchor() { 1279 savePossibleSimpleKey(); 1280 this.allowSimpleKey = false; 1281 final Token tok = scanAnchor(new AnchorToken()); 1282 this.tokens.add(tok); 1283 return tok; 1284 } 1285 1286 private Token scanDirective() { 1287 forward(); 1288 final String name = scanDirectiveName(); 1289 String [] value = null; 1290 if(name.equals("YAML")) { 1291 value = scanYamlDirectiveValue(); 1292 } else if(name.equals("TAG")) { 1293 value = scanTagDirectiveValue(); 1294 } else { 1295 while(!NULL_OR_LINEBR[peek()]) { 1296 forward(); 1297 } 1298 } 1299 scanDirectiveIgnoredLine(); 1300 return new DirectiveToken(name,value); 1301 } 1302 1303 private String scanDirectiveName() { 1304 int length = 0; 1305 char ch = peek(length); 1306 boolean zlen = true; 1307 while(ALPHA[ch]) { 1308 zlen = false; 1309 length++; 1310 ch = peek(length); 1311 } 1312 if(zlen) { 1313 throw new ScannerException("while scanning a directive","expected alphabetic or numeric character, but found " + ch + "(" + ((int)ch) + ")",null); 1314 } 1315 String value = null; 1316 try { 1317 ensure(length,false); 1318 value = new String (this.buffer.bytes,this.pointer,length,"ISO8859-1"); 1319 } catch(Exception e) { 1320 } 1321 forward(length); 1322 if(!NULL_BL_LINEBR[peek()]) { 1323 throw new ScannerException("while scanning a directive","expected alphabetic or numeric character, but found " + ch + "(" + ((int)ch) + ")",null); 1324 } 1325 return value; 1326 } 1327 1328 private byte[] scanDirectiveIgnoredLine() { 1329 while(peek() == ' ') { 1330 forward(); 1331 } 1332 if(peek() == '"') { 1333 while(!NULL_OR_LINEBR[peek()]) { 1334 forward(); 1335 } 1336 } 1337 final char ch = peek(); 1338 if(!NULL_OR_LINEBR[ch]) { 1339 throw new ScannerException("while scanning a directive","expected a comment or a line break, but found " + peek() + "(" + ((int)peek()) + ")",null); 1340 } 1341 return scanLineBreak(); 1342 } 1343 1344 private Token scanAnchor(final Token tok) { 1345 final char indicator = peek(); 1346 final String name = indicator == '*' ? "alias" : "anchor"; 1347 forward(); 1348 int length = 0; 1349 while(ALPHA[peek(length)]) { 1350 length++; 1351 } 1352 if(length == 0) { 1353 throw new ScannerException("while scanning an " + name,"expected alphabetic or numeric character, but found something else...",null); 1354 } 1355 String value = null; 1356 try { 1357 ensure(length,false); 1358 value = new String (this.buffer.bytes,this.pointer,length,"ISO8859-1"); 1359 } catch(Exception e) { 1360 } 1361 forward(length); 1362 if(!NON_ALPHA_OR_NUM[peek()]) { 1363 throw new ScannerException("while scanning an " + name,"expected alphabetic or numeric character, but found "+ peek() + "(" + ((int)peek()) + ")",null); 1364 1365 } 1366 tok.setValue(value); 1367 return tok; 1368 } 1369 1370 private String [] scanYamlDirectiveValue() { 1371 while(peek() == ' ') { 1372 forward(); 1373 } 1374 final String major = scanYamlDirectiveNumber(); 1375 if(peek() != '.') { 1376 throw new ScannerException("while scanning a directive","expected a digit or '.', but found " + peek() + "(" + ((int)peek()) + ")",null); 1377 } 1378 forward(); 1379 final String minor = scanYamlDirectiveNumber(); 1380 if(!NULL_BL_LINEBR[peek()]) { 1381 throw new ScannerException("while scanning a directive","expected a digit or ' ', but found " + peek() + "(" + ((int)peek()) + ")",null); 1382 } 1383 return new String [] {major,minor}; 1384 } 1385 1386 private String scanYamlDirectiveNumber() { 1387 final char ch = peek(); 1388 if(!Character.isDigit(ch)) { 1389 throw new ScannerException("while scanning a directive","expected a digit, but found " + ch + "(" + ((int)ch) + ")",null); 1390 } 1391 int length = 0; 1392 StringBuffer sb = new StringBuffer (); 1393 while(Character.isDigit(peek(length))) { 1394 sb.append(peek(length)); 1395 length++; 1396 } 1397 forward(length); 1398 return sb.toString(); 1399 } 1400 1401 public static String into(ByteList b) { 1402 try { 1403 return new String (b.bytes,0,b.realSize,"ISO8859-1"); 1404 } catch(Exception e) { 1405 return null; } 1407 } 1408 1409 private String [] scanTagDirectiveValue() { 1410 while(peek() == ' ') { 1411 forward(); 1412 } 1413 final String handle = into(scanTagDirectiveHandle()); 1414 while(peek() == ' ') { 1415 forward(); 1416 } 1417 final String prefix = into(scanTagDirectivePrefix()); 1418 return new String [] {handle,prefix}; 1419 } 1420 1421 private ByteList scanTagDirectiveHandle() { 1422 final ByteList value = scanTagHandle("directive"); 1423 if(peek() != ' ') { 1424 throw new ScannerException("while scanning a directive","expected ' ', but found " + peek() + "(" + ((int)peek()) + ")",null); 1425 } 1426 return value; 1427 } 1428 1429 private ByteList scanTagDirectivePrefix() { 1430 final ByteList value = scanTagUri("directive"); 1431 if(!NULL_BL_LINEBR[peek()]) { 1432 throw new ScannerException("while scanning a directive","expected ' ', but found " + peek() + "(" + ((int)peek()) + ")",null); 1433 } 1434 return value; 1435 } 1436 1437 1480 1481 public static void main(final String [] args) throws Exception { 1482 final String filename = args[0]; 1483 System.out.println("Reading of file: \"" + filename + "\""); 1484 1485 final ByteList input = new ByteList(1024); 1486 final InputStream reader = new FileInputStream (filename); 1487 byte[] buff = new byte[1024]; 1488 int read = 0; 1489 while(true) { 1490 read = reader.read(buff); 1491 input.append(buff,0,read); 1492 if(read < 1024) { 1493 break; 1494 } 1495 } 1496 reader.close(); 1497 final long before = System.currentTimeMillis(); 1498 int tokens = 0; 1499 for(int i=0;i<1;i++) { 1500 final Scanner sce2 = new ScannerImpl(input); 1501 for(final Iterator iter = sce2.eachToken();iter.hasNext();) { 1502 tokens++; System.out.println(iter.next()); 1504 } 1505 } 1506 final long after = System.currentTimeMillis(); 1507 final long time = after-before; 1508 final double timeS = (after-before)/1000.0; 1509 System.out.println("Walking through the " + tokens + " tokens took " + time + "ms, or " + timeS + " seconds"); 1510 } 1511 1512 public static void tmain(final String [] args) throws Exception { 1513 final String filename = args[0]; 1514 System.out.println("Reading of file: \"" + filename + "\""); 1515 1516 final InputStream reader = new FileInputStream (filename); 1517 final long before = System.currentTimeMillis(); 1518 int tokens = 0; 1519 for(int i=0;i<1;i++) { 1520 final Scanner sce2 = new ScannerImpl(reader); 1521 for(final Iterator iter = sce2.eachToken();iter.hasNext();) { 1522 tokens++;iter.next(); 1523 } 1525 } 1526 reader.close(); 1527 final long after = System.currentTimeMillis(); 1528 final long time = after-before; 1529 final double timeS = (after-before)/1000.0; 1530 System.out.println("Walking through the " + tokens + " tokens took " + time + "ms, or " + timeS + " seconds"); 1531 } 1532} | Popular Tags |