1 26 27 package it.stefanochizzolini.clown.tokens; 28 29 import it.stefanochizzolini.clown.bytes.Buffer; 30 import it.stefanochizzolini.clown.bytes.IInputStream; 31 import it.stefanochizzolini.clown.files.File; 32 import it.stefanochizzolini.clown.objects.PdfArray; 33 import it.stefanochizzolini.clown.objects.PdfBoolean; 34 import it.stefanochizzolini.clown.objects.PdfDataObject; 35 import it.stefanochizzolini.clown.objects.PdfDate; 36 import it.stefanochizzolini.clown.objects.PdfDictionary; 37 import it.stefanochizzolini.clown.objects.PdfDirectObject; 38 import it.stefanochizzolini.clown.objects.PdfHex; 39 import it.stefanochizzolini.clown.objects.PdfInteger; 40 import it.stefanochizzolini.clown.objects.PdfLiteral; 41 import it.stefanochizzolini.clown.objects.PdfName; 42 import it.stefanochizzolini.clown.objects.PdfNull; 43 import it.stefanochizzolini.clown.objects.PdfReal; 44 import it.stefanochizzolini.clown.objects.PdfReference; 45 import it.stefanochizzolini.clown.objects.PdfStream; 46 import java.io.EOFException ; 47 import java.util.Date ; 48 49 75 public class Parser 76 { 77 public class Reference 80 { 81 private final int generationNumber; 84 private final int objectNumber; 85 87 private Reference( 89 int objectNumber, 90 int generationNumber 91 ) 92 { 93 this.objectNumber = objectNumber; 94 this.generationNumber = generationNumber; 95 } 96 98 public int getGenerationNumber( 101 ) 102 {return generationNumber;} 103 104 public int getObjectNumber( 105 ) 106 {return objectNumber;} 107 } 111 113 private static final String PdfHeader = "%PDF-"; 116 118 123 protected static boolean isDelimiter( 124 int c 125 ) 126 {return (c == '(' || c == ')' || c == '<' || c == '>' || c == '[' || c == ']' || c == '/' || c == '%');} 127 128 131 protected static boolean isEOL( 132 int c 133 ) 134 {return (c == 12 || c == 15);} 135 136 139 protected static boolean isWhitespace( 140 int c 141 ) 142 {return (c == 0 || c == 9 || c == 10 || c == 12 || c == 13 || c == 32);} 143 147 private File file; 150 private IInputStream stream; 151 private Object token; 152 private TokenTypeEnum tokenType; 153 154 private boolean multipleTokenParsing; 155 157 Parser( 159 IInputStream stream, 160 File file 161 ) 162 { 163 this.stream = stream; 164 this.file = file; 165 } 166 168 public long getLength( 171 ) 172 {return stream.getLength();} 173 174 public long getPosition( 175 ) 176 {return stream.getPosition();} 177 178 public IInputStream getStream( 179 ) 180 {return stream;} 181 182 186 public Object getToken( 187 ) 188 {return token;} 189 190 194 public TokenTypeEnum getTokenType( 195 ) 196 {return tokenType;} 197 198 public int hashCode( 199 ) 200 {return stream.hashCode();} 201 202 205 public boolean moveNext( 206 int offset 207 ) throws FileFormatException 208 { 209 for( 210 int index = 0; 211 index < offset; 212 index++ 213 ) 214 { 215 if(!moveNext()) 216 return false; 217 } 218 219 return true; 220 } 221 222 249 public boolean moveNext( 250 ) throws FileFormatException 251 { 252 256 StringBuilder buffer = null; 257 token = null; 258 int c = 0; 259 260 try 262 { 263 do 264 { 265 c = stream.readUnsignedByte(); 266 } while(isWhitespace(c)); } 268 catch(EOFException e) 269 {return false;} 270 271 switch(c) 273 { 274 case '/': tokenType = TokenTypeEnum.Name; 276 277 282 buffer = new StringBuilder (); 283 try 284 { 285 while(true) 286 { 287 c = stream.readUnsignedByte(); 288 if(isDelimiter(c) || isWhitespace(c)) 289 break; 290 291 buffer.append((char)c); 292 } 293 } 294 catch(EOFException e) 295 {throw new FileFormatException("Unexpected EOF (malformed name object).",e,stream.getPosition());} 296 297 stream.skip(-1); break; 299 case '0': 300 case '1': 301 case '2': 302 case '3': 303 case '4': 304 case '5': 305 case '6': 306 case '7': 307 case '8': 308 case '9': 309 case '.': 310 case '-': 311 case '+': switch(c) 313 { 314 case '.': tokenType = TokenTypeEnum.Real; 316 break; 317 case '-': 318 case '+': tokenType = TokenTypeEnum.Integer; break; 321 default: if(multipleTokenParsing) { 324 tokenType = TokenTypeEnum.Integer; } 326 else { 328 332 multipleTokenParsing = true; 335 336 stream.skip(-1); moveNext(); 339 if(tokenType != TokenTypeEnum.Integer) 341 { 342 multipleTokenParsing = false; 344 return true; 345 } 346 int objectNumber = (Integer )token; 348 long oldOffset = stream.getPosition(); 350 351 moveNext(); 354 if(tokenType != TokenTypeEnum.Integer) 356 { 357 stream.seek(oldOffset); 359 token = objectNumber; tokenType = TokenTypeEnum.Integer; 360 multipleTokenParsing = false; 362 return true; 363 } 364 int generationNumber = (Integer )token; 366 367 moveNext(); 370 if(tokenType != TokenTypeEnum.Reference) 372 { 373 stream.seek(oldOffset); 375 token = objectNumber; tokenType = TokenTypeEnum.Integer; 376 multipleTokenParsing = false; 378 return true; 379 } 380 token = new Reference(objectNumber,generationNumber); 381 multipleTokenParsing = false; 383 return true; 384 } 385 break; 386 } 387 388 buffer = new StringBuilder (); 390 try 391 { 392 do 393 { 394 buffer.append((char)c); 395 c = stream.readUnsignedByte(); 396 if(c == '.') 397 tokenType = TokenTypeEnum.Real; 398 else if(c < '0' || c > '9') 399 break; 400 } while(true); 401 } 402 catch(EOFException e) 403 {throw new FileFormatException("Unexpected EOF (malformed number object).",e,stream.getPosition());} 404 405 stream.skip(-1); break; 407 case '[': tokenType = TokenTypeEnum.ArrayBegin; 409 break; 410 case ']': tokenType = TokenTypeEnum.ArrayEnd; 412 break; 413 case '<': try 415 {c = stream.readUnsignedByte();} 416 catch(EOFException e) 417 {throw new FileFormatException("Unexpected EOF (isolated opening angle-bracket character).",e,stream.getPosition());} 418 if(c == '<') 420 { 421 tokenType = TokenTypeEnum.DictionaryBegin; 422 break; 423 } 424 425 tokenType = TokenTypeEnum.Hex; 427 428 buffer = new StringBuilder (); 429 try 430 { 431 while(true) 432 { 433 c = stream.readUnsignedByte(); 434 if(c == '>') 436 break; 437 438 buffer.append((char)c); 439 } 440 } 441 catch(EOFException e) 442 {throw new FileFormatException("Unexpected EOF (malformed hex string).",e,stream.getPosition());} 443 444 break; 445 case '>': try 447 {c = stream.readUnsignedByte();} 448 catch(EOFException e) 449 {throw new FileFormatException("Unexpected EOF (malformed dictionary).",e,stream.getPosition());} 450 if(c != '>') 451 throw new FileFormatException("Malformed dictionary.",stream.getPosition()); 452 453 tokenType = TokenTypeEnum.DictionaryEnd; 454 455 break; 456 case '%': tokenType = TokenTypeEnum.Comment; 458 try 460 { 461 do 462 {c = stream.readUnsignedByte();} while(!isEOL(c)); 463 } 464 catch(EOFException e) 465 {} 466 467 break; 468 case '(': tokenType = TokenTypeEnum.Literal; 470 471 474 buffer = new StringBuilder (); 475 int level = 0; 476 try 477 { 478 while(true) 479 { 480 c = stream.readUnsignedByte(); 481 if(c == '(') 482 level++; 483 else if(c == ')') 484 level--; 485 else if(c == '\\') 486 { 487 boolean lineBreak = false; 488 c = stream.readUnsignedByte(); 489 switch(c) 490 { 491 case 'n': 492 c = '\n'; 493 break; 494 case 'r': 495 c = '\r'; 496 break; 497 case 't': 498 c = '\t'; 499 break; 500 case 'b': 501 c = '\b'; 502 break; 503 case 'f': 504 c = '\f'; 505 break; 506 case '(': 507 case ')': 508 case '\\': 509 break; 510 case '\r': 511 lineBreak = true; 512 c = stream.readUnsignedByte(); 513 if(c != '\n') 514 stream.skip(-1); 515 break; 516 case '\n': 517 lineBreak = true; 518 break; 519 default: 520 { 521 if(c < '0' || c > '7') 523 break; 524 525 int octal = c - '0'; 527 c = stream.readUnsignedByte(); 528 if(c < '0' || c > '7') 530 {c = octal; stream.skip(-1); break;} 531 octal = (octal << 3) + c - '0'; 532 c = stream.readUnsignedByte(); 533 if(c < '0' || c > '7') 535 {c = octal; stream.skip(-1); break;} 536 octal = (octal << 3) + c - '0'; 537 c = octal & 0xff; 538 break; 539 } 540 } 541 if(lineBreak) 542 continue; 543 } 544 else if(c == '\r') 545 { 546 c = stream.readUnsignedByte(); 547 if(c != '\n') 548 {c = '\n'; stream.skip(-1);} 549 } 550 if(level == -1) 551 break; 552 553 buffer.append((char)c); 554 } 555 } 556 catch(EOFException e) 557 {throw new FileFormatException("Unexpected EOF (malformed literal string).",e,stream.getPosition());} 558 559 break; 560 case 'R': tokenType = TokenTypeEnum.Reference; 562 563 break; 564 default: tokenType = TokenTypeEnum.Keyword; 566 567 buffer = new StringBuilder (); 568 try 569 { 570 do 571 { 572 buffer.append((char)c); 573 c = stream.readUnsignedByte(); 574 } while(!isDelimiter(c) && !isWhitespace(c)); 575 } 576 catch(EOFException e) 577 {} 578 stream.skip(-1); 580 break; 581 } 582 583 if(buffer != null) 584 { 585 588 switch(tokenType) 590 { 591 case Keyword: 592 token = buffer.toString(); 593 if(((String )token).equals("false") 595 || ((String )token).equals("true")) { 597 tokenType = TokenTypeEnum.Boolean; 598 token = Boolean.parseBoolean((String )token); 599 } 600 else if(((String )token).equals("null")) { 602 tokenType = TokenTypeEnum.Null; 603 token = null; 604 } 605 break; 606 case Comment: 607 case Hex: 608 case Name: 609 token = buffer.toString(); 610 break; 611 case Literal: 612 token = buffer.toString(); 613 if(((String )token).startsWith("D:")) { 616 tokenType = TokenTypeEnum.Date; 617 token = PdfDate.toDate((String )token); 618 } 619 break; 620 case Integer: 621 token = Integer.parseInt(buffer.toString()); 622 break; 623 case Real: 624 token = Float.parseFloat(buffer.toString()); 625 break; 626 } 627 } 628 629 return true; 630 } 631 632 660 public PdfDataObject parsePdfObject( 661 ) throws FileFormatException 662 { 663 669 670 switch(tokenType) 672 { 673 case Integer: 674 return new PdfInteger((Integer )token); 675 case Name: 676 return new PdfName((String )token,true); 677 case Reference: 678 684 return new PdfReference( 685 (Reference)token, 686 file 687 ); 688 case Literal: 689 return new PdfLiteral((String )token); 690 case DictionaryBegin: 691 PdfDictionary dictionary = new PdfDictionary(); 692 while(true) 694 { 695 moveNext(); 697 if(tokenType == TokenTypeEnum.DictionaryEnd) 698 break; 699 PdfName key = (PdfName)parsePdfObject(); 700 701 moveNext(); 703 PdfDirectObject value = (PdfDirectObject)parsePdfObject(); 704 705 dictionary.put(key,value); 707 } 708 709 int oldOffset = (int)stream.getPosition(); 710 moveNext(); 711 if((tokenType == TokenTypeEnum.Keyword) 713 && token.equals("stream")) { 715 long position = stream.getPosition(); 717 718 723 int length = ((PdfInteger)File.resolve(dictionary.get(PdfName.Length))).getValue(); 724 725 stream.seek(position); 727 728 skipWhitespace(); 729 730 byte[] data = new byte[length]; 732 try 733 {stream.read(data);} 734 catch(EOFException e) 735 {throw new FileFormatException("Unexpected EOF (malformed stream object).",e,stream.getPosition());} 736 737 moveNext(); 739 return new PdfStream( 740 dictionary, 741 new Buffer(data) 742 ); 743 } 744 else { 746 stream.seek(oldOffset); 748 return dictionary; 749 } 750 case ArrayBegin: 751 PdfArray array = new PdfArray(); 752 while(true) 754 { 755 moveNext(); 757 if(tokenType == TokenTypeEnum.ArrayEnd) 758 break; 759 760 array.add((PdfDirectObject)parsePdfObject()); 762 } 763 return array; 764 case Real: 765 return new PdfReal((Float )token); 766 case Boolean: 767 return new PdfBoolean((Boolean )token); 768 case Date: 769 return new PdfDate((Date)token); 770 case Hex: 771 return new PdfHex((String )token); 772 case Null: 773 return PdfNull.Null; 774 default: 775 return null; 776 } 777 } 778 779 805 public String retrieveVersion( 806 ) throws FileFormatException 807 { 808 stream.seek(0); 809 String header; 810 try{header = stream.readString(10);} 811 catch(EOFException e){throw new FileFormatException("Unexpected EOF (malformed version data).",e,stream.getPosition());} 812 if(!header.startsWith(PdfHeader)) 813 throw new FileFormatException("PDF header not found.",stream.getPosition()); 814 815 return header.substring(PdfHeader.length(),PdfHeader.length() + 3); 816 } 817 818 822 public long retrieveXRefOffset( 823 ) throws FileFormatException 824 {return retrieveXRefOffset(stream.getLength());} 825 826 853 public long retrieveXRefOffset( 854 long offset 855 ) throws FileFormatException 856 { 857 final int chunkSize = 1024; 859 long position = offset - chunkSize; stream.seek(position); 861 862 int index; 864 try{index = stream.readString(chunkSize).lastIndexOf("startxref");} 865 catch(EOFException e){throw new FileFormatException("Unexpected EOF (malformed 'startxref' tag).",e,stream.getPosition());} 866 if(index < 0) 867 throw new FileFormatException("PDF startxref not found.",stream.getPosition()); 868 stream.seek(position + index); moveNext(); 870 871 moveNext(); 873 if(tokenType != TokenTypeEnum.Integer) 874 throw new FileFormatException("PDF startxref malformed.",stream.getPosition()); 875 876 return (Integer )token; 877 } 878 879 public void seek( 880 long offset 881 ) 882 {stream.seek(offset);} 883 884 public void skip( 885 long offset 886 ) 887 {stream.skip(offset);} 888 889 893 public boolean skipWhitespace( 894 ) 895 { 896 int b; 897 try 898 { 899 do 900 {b = stream.readUnsignedByte();} while(isWhitespace(b)); } 902 catch(EOFException e) 903 {return false;} 904 stream.skip(-1); 906 return true; 907 } 908 } | Popular Tags |