1 4 5 package com.etymon.pjx; 6 7 import java.io.*; 8 import java.nio.*; 9 import java.util.*; 10 import java.util.regex.*; 11 12 18 public final class PdfReader { 19 20 protected PdfInput _pdfInput; 21 22 26 public PdfInput getInput() { 27 synchronized (this) { 28 29 return _pdfInput; 30 31 } 32 } 33 34 protected PdfInput getPdfInput() { 35 return _pdfInput; 36 } 37 38 42 protected class ArrayEnd extends ParserObject { } 43 44 48 protected class DictionaryEnd extends ParserObject { } 49 50 54 protected class DictionaryEndStream extends ParserObject { } 55 56 61 protected class ParserObject extends PdfObject { 62 63 protected int writePdf(PdfWriter w, boolean spacing) throws IOException { 64 return 0; 65 } 66 67 } 68 69 72 protected static Pattern _patHeader = Pattern.compile("^%(!PS-Adobe-\\d\\.\\d )?PDF-\\d\\.\\d" + 73 PdfReader.REGEX_EOL); 74 75 80 protected static final Pattern _patObjIntro = Pattern.compile( 81 PdfReader.REGEX_WHITESPACE + "*\\d+" + 82 PdfReader.REGEX_WHITESPACE + "+\\d+" + 83 PdfReader.REGEX_WHITESPACE + "+obj" + 84 PdfReader.REGEX_STOP); 85 86 89 protected static final Pattern _patPdfObject = Pattern.compile( 90 91 "(" + 92 93 "(" + PdfReader.REGEX_WHITESPACE + "*null" + PdfReader.REGEX_STOP + ")|" + 95 96 "(" + PdfReader.REGEX_WHITESPACE + "*\\d+" + PdfReader.REGEX_WHITESPACE + "\\d+" + PdfReader.REGEX_WHITESPACE + "R" + PdfReader.REGEX_STOP + ")|" + 98 99 "(" + PdfReader.REGEX_WHITESPACE + "*((true)|(false))" + PdfReader.REGEX_STOP + ")|" + 101 102 "(" + PdfReader.REGEX_WHITESPACE + "*(\\+|\\-)?\\d+" + PdfReader.REGEX_STOP + ")|" + 104 105 "(" + PdfReader.REGEX_WHITESPACE + "*(\\+|\\-)?((\\d*\\.\\d+)|(\\d+\\.))" + PdfReader.REGEX_STOP + ")|" + 107 108 "(" + PdfReader.REGEX_WHITESPACE + "*((\\()|(<[^<]))" + PdfReader.REGEX_ANY_CHAR + ")|" + 110 111 "(" + PdfReader.REGEX_WHITESPACE + 113 "*/((#\\d\\d)|(" + PdfReader.REGEX_REGULAR + "))*" + 114 PdfReader.REGEX_STOP + ")|" + 115 116 "(" + PdfReader.REGEX_WHITESPACE + "*<<" + PdfReader.REGEX_ANY_CHAR + ")|" + 118 119 "(" + PdfReader.REGEX_WHITESPACE + "*>>" + PdfReader.REGEX_WHITESPACE + "*stream((\\r\\n)|\\n|\\r)" + ")|" + 121 134 "(" + PdfReader.REGEX_WHITESPACE + "*>>" + PdfReader.REGEX_ANY_CHAR + ")|" + 136 137 "(" + PdfReader.REGEX_WHITESPACE + "*\\[" + PdfReader.REGEX_ANY_CHAR + ")|" + 139 140 "(" + PdfReader.REGEX_WHITESPACE + "*\\]" + PdfReader.REGEX_ANY_CHAR + ")" + 142 143 ")" 144 145 ); 146 147 150 protected static final Pattern _patStartxref = Pattern.compile( 151 PdfReader.REGEX_EOL + "startxref" + 152 PdfReader.REGEX_WHITESPACE + 153 "+\\d+" + PdfReader.REGEX_WHITESPACE); 154 155 159 protected static final Pattern _patXref = Pattern.compile("xref" + PdfReader.REGEX_WHITESPACE + "+"); 160 161 166 protected static final Pattern _patXrefSub = Pattern.compile( 167 PdfReader.REGEX_WHITESPACE + "*((\\d+ \\d+)|(trailer))" + PdfReader.REGEX_WHITESPACE + "+"); 168 169 173 protected static final Pattern _patXrefTable = Pattern.compile( 174 "xref" + PdfReader.REGEX_WHITESPACE + "*" + 175 PdfReader.REGEX_EOL + "[^t]*" + "trailer" + 176 PdfReader.REGEX_WHITESPACE + "+"); 177 178 182 protected static final Pattern _patXrefEof = Pattern.compile( 183 PdfReader.REGEX_ANY_CHAR + "*" + PdfReader.REGEX_WHITESPACE + "startxref" + PdfReader.REGEX_WHITESPACE); 184 185 189 protected static final PdfName PDFNAME_LENGTH = new PdfName("Length"); 190 191 195 protected static final PdfName PDFNAME_PREV = new PdfName("Prev"); 196 197 201 protected static final PdfName PDFNAME_SIZE = new PdfName("Size"); 202 203 206 protected static final String REGEX_ANY_CHAR = "[\\x00-\\xFF]"; 207 208 211 protected static final String REGEX_COMMENT = "(%[^" + PdfReader.REGEX_EOL + "]*" + PdfReader.REGEX_EOL + ")"; 212 213 216 protected static final String REGEX_DELIMITER = "[\\(\\)<>\\[\\]\\{\\}/%]"; 217 218 222 protected static final String REGEX_EOL = "(\\r|\\n|(\\r\\n))"; 223 224 227 protected static final String REGEX_REGULAR = "[^\\x00\\t\\n\\f\\r \\(\\)<>\\[\\]\\{\\}/%]"; 228 229 233 protected static final String REGEX_STOP = "(" + PdfReader.REGEX_WHITESPACE + "|[\\(\\)<>\\[\\]\\{\\}/])"; 234 235 238 protected static final String REGEX_WHITESPACE = "([\\x00\\t\\n\\f\\r ]|" + PdfReader.REGEX_COMMENT + ")"; 239 240 245 protected static final int STARTXREF_RETRY_COUNT = 25; 246 247 251 protected static final int STARTXREF_RETRY_SCAN = 40; 252 253 258 public PdfReader(PdfInput pdfInput) { 259 260 _pdfInput = pdfInput; 261 262 } 263 264 269 public void close() throws IOException { 270 synchronized (this) { 271 272 _pdfInput = null; 273 274 } 275 } 276 277 293 protected PdfObject parseObject(long start, long end, CharBuffer cbuf, 294 XrefTable xt) throws IOException, PdfFormatException { 295 Matcher m; 296 297 m = _patPdfObject.matcher(cbuf); 298 if (m.lookingAt()) { 299 300 if (m.group(2) != null) { 301 cbuf.position(cbuf.position() + m.end() - 1); 302 return PdfNull.valueOf(); 303 } 304 305 if (m.group(12) != null) { 306 String s = m.group(); 307 s = s.substring(0, s.length() - 1).trim(); 308 cbuf.position(cbuf.position() + m.end() - 1); 309 String [] sp = s.split(PdfReader.REGEX_WHITESPACE); 310 return new PdfReference( 311 Integer.parseInt(sp[0]), 312 Integer.parseInt(sp[1]) ); 313 } 314 315 if (m.group(30) != null) { 316 PdfBoolean bool = PdfBoolean.valueOf(m.group(35) != null); 317 cbuf.position(cbuf.position() + m.end() - 1); 318 return bool; 319 } 320 321 if (m.group(43) != null) { 322 String s = m.group(); 323 s = s.substring(0, s.length() - 1).trim(); 324 cbuf.position(cbuf.position() + m.end() - 1); 325 long n = Long.parseLong(s); 326 if ( (n >= Integer.MIN_VALUE) && 327 (n <= Integer.MAX_VALUE) ) { 328 return new PdfInteger( (int)n ); 329 } else { 330 return new PdfLong(n); 331 } 332 } 333 334 if (m.group(54) != null) { 335 String s = m.group(); 336 s = s.substring(0, s.length() - 1).trim(); 337 cbuf.position(cbuf.position() + m.end() - 1); 338 return new PdfFloat(Float.parseFloat(s)); 339 } 340 341 if (m.group(68) != null) { 342 cbuf.position( cbuf.position() + m.start() ); 343 return new PdfString( PdfString.pdfToString(cbuf) ); 344 } 345 346 if (m.group(76) != null) { 347 String s = m.group(); 348 s = s.substring(0, s.length() - 1).trim(); 349 cbuf.position(cbuf.position() + m.end() - 1); 350 return new PdfName(PdfName.pdfToString(s)); 351 } 352 353 if (m.group(89) != null) { 354 cbuf.position(cbuf.position() + m.end() - 1); 355 HashMap h = new HashMap(); 356 int done = 0; 357 PdfObject streamLength = null; 358 do { 359 PdfObject key, value; 360 key = parseObject(start, end, cbuf, xt); 361 if (key instanceof DictionaryEnd) { 362 done = 1; 363 break; 364 } 365 if (key instanceof DictionaryEndStream) { 366 done = 2; 367 break; 368 } 369 if (key.equals(PDFNAME_LENGTH)) { 370 streamLength = parseObject(start, end, cbuf, xt); 371 value = streamLength; 372 } else { 373 value = parseObject(start, end, cbuf, xt); 374 } 375 if ( (key != null) && (value != null) ) { 376 h.put(key, value); 377 } 378 } while (done == 0); 379 if (done == 1) { return PdfDictionary.wrap(h); 381 } else { PdfObject obj = streamLength; 383 if (obj instanceof PdfReference) { int save = cbuf.position(); 385 int streamLengthId = ((PdfReference)obj).getObjectNumber(); 386 long s = xt.getIndex(streamLengthId); 387 long e = xt.estimateObjectEnd(streamLengthId); 388 obj = readObject(s, e, true, xt); 389 cbuf.position(save); 390 } 391 if ( !(obj instanceof PdfInteger) ) { 392 throw new PdfFormatException( 393 "Valid Length value not found in stream dictionary.", 394 cbuf.position()); 395 } 396 int len = ((PdfInteger)obj).getInt(); 397 PdfDictionary d = PdfDictionary.wrap(h); 398 399 ByteBuffer bbuf = _pdfInput.readBytes(start, end); 400 ByteBuffer bb = ByteBuffer.allocateDirect(len); 401 bbuf.position(cbuf.position()); 402 bbuf.limit(cbuf.position() + len); 403 bb.put(bbuf); 404 bbuf.limit(bbuf.capacity()); 405 406 return PdfStream.wrap(d, bb); 407 } 408 } 409 410 if (m.group(94) != null) { 411 cbuf.position(cbuf.position() + m.end()); 412 return new DictionaryEndStream(); 413 } 414 415 if (m.group(105) != null) { 416 cbuf.position(cbuf.position() + m.end() - 1); 417 return new DictionaryEnd(); 418 } 419 420 if (m.group(110) != null) { 421 cbuf.position(cbuf.position() + m.end() - 1); 422 ArrayList a = new ArrayList(); 423 boolean done = false; 424 do { 425 Object value; 426 value = parseObject(start, end, cbuf, xt); 427 if (value instanceof ArrayEnd) { 428 done = true; 429 break; 430 } 431 if (value != null) { 432 a.add(value); 433 } 434 } while (!done); 435 return PdfArray.wrap(a); 436 } 437 438 if (m.group(115) != null) { 439 cbuf.position(cbuf.position() + m.end() - 1); 440 return new ArrayEnd(); 441 } 442 443 } 444 throw new PdfFormatException("Object not recognized.", cbuf.position()); 445 } 446 447 464 protected XrefTable readPartialXrefTable(XrefTable xt, long startxref, 465 long[] prev) throws IOException, PdfFormatException { 466 467 Matcher m; 468 469 ByteBuffer bbuf; 474 CharBuffer cbuf; 475 int blockSize; 476 if (xt != null) { 477 blockSize = (xt.size() * 20) + 8192; 479 } else { 480 blockSize = 65536; 481 } 482 long inputLength = _pdfInput.getLength(); 483 long endtrailer; 484 boolean done = false; 485 do { 486 endtrailer = startxref + blockSize; 487 if ( endtrailer > inputLength ) { 488 endtrailer = inputLength; 489 } 490 cbuf = _pdfInput.readChars(startxref, endtrailer); 491 if (endtrailer == inputLength) { 492 done = true; 493 break; 494 } 495 m = _patXrefEof.matcher(cbuf); 496 blockSize = blockSize * 2; 497 if (m.lookingAt()) { 498 done = true; 499 break; 500 } 501 } while ( !done ); 502 503 XrefTable r; 504 505 if (xt != null) { 506 507 r = xt; 508 509 } else { 510 511 m = _patXrefTable.matcher(cbuf); 513 if ( !(m.lookingAt()) ) { 514 throw new PdfFormatException( 515 "Cross-reference table or trailer not found at correct position.", startxref); 516 } 517 518 int trailer_offset = cbuf.position() + m.end(); 519 520 PdfObject pobj = readObject(startxref + trailer_offset, endtrailer, 522 false, null); 523 if ( !(pobj instanceof PdfDictionary) ) { 524 throw new PdfFormatException( 525 "Trailer dictionary not found.", trailer_offset); 526 } 527 528 PdfDictionary trailerDictionary = (PdfDictionary)pobj; 530 Map trailerMap = trailerDictionary.getMap(); 531 Object obj = trailerMap.get(PDFNAME_PREV); 532 if (obj == null) { 533 prev[0] = -1; 534 } else { 535 if ( (!(obj instanceof PdfInteger)) && 536 (!(obj instanceof PdfFloat)) ) { 537 throw new PdfFormatException( 538 "Valid Prev value not found in trailer dictionary.", 539 trailer_offset); 540 } 541 prev[0] = ((PdfNumber)obj).getLong(); 542 } 543 544 obj = trailerMap.get(PDFNAME_SIZE); 546 if ( !(obj instanceof PdfInteger) ) { 547 throw new PdfFormatException( 548 "Valid xref size not found in trailer dictionary.", trailer_offset); 549 } 550 int xrefSize = ((PdfInteger)obj).getInt(); 551 long[] rindex = new long[xrefSize]; 553 int[] rgeneration = new int[xrefSize]; 554 byte[] rusage = new byte[xrefSize]; 555 r = XrefTable.wrap(rindex, rgeneration, rusage, trailerDictionary); 556 557 cbuf.position(0); 559 } 560 561 r.getStartxrefList().add(new Long (startxref)); 563 564 m = _patXref.matcher(cbuf); 565 if ( !(m.lookingAt()) ) { 566 throw new PdfFormatException( 567 "Cross-reference table (xref) not found at correct position.", 0); 568 } 569 cbuf.position(cbuf.position() + m.end()); 570 571 String s; 572 done = false; 573 do { 574 m = _patXrefSub.matcher(cbuf); 575 if ( !(m.lookingAt()) ) { 576 throw new PdfFormatException( 577 "Cross-reference table (subsection) not found.", 0); 578 } 579 s = m.group().trim(); 580 if (s.equals("trailer")) { 581 done = true; 582 break; 583 } 584 cbuf.position(cbuf.position() + m.end()); 585 String [] sp = s.split(" "); 586 int x = Integer.parseInt(sp[0]); 587 int n = Integer.parseInt(sp[1]); 588 char[] ca = new char[11]; 589 590 long[] index = r.unwrapIndexArray(); 591 int[] generation = r.unwrapGenerationArray(); 592 byte[] usage = r.unwrapUsageArray(); 593 594 for ( ; n > 0; n--, x++) { 595 596 if ( (xt != null) && (usage[x] != XrefTable.ENTRY_UNDEFINED) ) { 598 cbuf.position(cbuf.position() + 20); 599 } else { 600 cbuf.get(ca, 0, 11); 602 index[x] = Long.parseLong(new String (ca, 0, 10)); 603 cbuf.get(ca, 0, 6); 604 generation[x] = Integer.parseInt(new String (ca, 0, 5)); 605 cbuf.get(ca, 0, 3); 606 usage[x] = (ca[0] == 'n') ? 607 XrefTable.ENTRY_IN_USE : 608 XrefTable.ENTRY_FREE; 609 } 610 } 611 } while (!done); 612 613 if (xt != null) { 618 int trailer_offset = cbuf.position() + m.end(); 619 PdfObject pobj = readObject(startxref + trailer_offset, endtrailer, false, null); 621 if ( !(pobj instanceof PdfDictionary) ) { 622 throw new PdfFormatException( 623 "Trailer dictionary not found.", trailer_offset); 624 } 625 626 PdfDictionary trailerDictionary = (PdfDictionary)pobj; 628 Map trailerMap = trailerDictionary.getMap(); 629 Object obj = trailerMap.get(PDFNAME_PREV); 630 if (obj == null) { 631 prev[0] = -1; 632 } else { 633 if ( (!(obj instanceof PdfInteger)) && 634 (!(obj instanceof PdfLong)) ) { 635 throw new PdfFormatException( 636 "Valid Prev value not found in trailer dictionary.", 637 trailer_offset); 638 } 639 prev[0] = ((PdfNumber)obj).getInt(); 640 } 641 } 642 643 return r; 644 } 645 646 652 public String readHeader() throws IOException, PdfException { 653 synchronized (this) { 654 CharBuffer cbuf = _pdfInput.readChars(0, Math.min(1024, _pdfInput.getLength())); 660 Matcher m = _patHeader.matcher(cbuf); 661 if (m.find()) { 662 return m.group().trim(); 663 } 664 throw new PdfFormatException("PDF document header not found.", 0); 665 } 666 } 667 668 683 public PdfObject readObject(long start, long end, boolean indirect, 684 XrefTable xt) throws IOException, PdfFormatException { 685 synchronized (this) { 686 687 CharBuffer cbuf = _pdfInput.readChars(start, end); 688 689 if (indirect) { 690 Matcher m = _patObjIntro.matcher(cbuf); 692 if ( !(m.lookingAt()) ) { 693 throw new PdfFormatException( 694 "Object not found.", start); 695 } 696 cbuf.position(m.end() - 1); 697 } 698 699 return parseObject(start, end, cbuf, xt); 700 701 } 702 } 703 704 710 public long readStartxref() throws IOException, PdfFormatException { 711 synchronized (this) { 712 713 long bufLength = _pdfInput.getLength(); 714 CharBuffer cbuf = _pdfInput.readChars( 715 Math.max(bufLength - (STARTXREF_RETRY_COUNT * STARTXREF_RETRY_SCAN), 0), 716 bufLength); 717 718 Matcher m = _patStartxref.matcher(cbuf); 719 int start = cbuf.capacity(); 720 for (int retry = PdfReader.STARTXREF_RETRY_COUNT; retry > 0; retry--) { 721 start -= PdfReader.STARTXREF_RETRY_SCAN; 722 if (start >= 0) { 723 if (m.find(start)) { 724 String s = m.group().trim(); 725 String [] sp = s.split(PdfReader.REGEX_WHITESPACE); 726 return Long.parseLong(sp[sp.length - 1]); 727 } 728 } else break; 729 } 730 throw new PdfFormatException("PDF startxref not found.", 0); 731 } 732 } 733 734 745 public XrefTable readXrefTable(long startxref) throws IOException, PdfFormatException { 746 synchronized (this) { 747 XrefTable xt = null; 748 long start = startxref; 749 long[] prev = new long[1]; 750 do { 751 xt = readPartialXrefTable(xt, start, prev); 752 start = prev[0]; 753 754 } while (start != -1); 755 xt.createSortedIndexArray(); 756 return xt; 757 } 758 } 759 760 } 761 | Popular Tags |