PdfReader


1   /*
2     Copyright (C) Etymon Systems, Inc. <http://www.etymon.com/>
3   */
4   
5   package com.etymon.pjx;
6   
7   import java.io.*;
8   import java.nio.*;
9   import java.util.*;
10  import java.util.regex.*;
11  
12  /**
13     Reads a PDF document.  Most applications do not need to access
14     methods in this class but should instead go through {@link
15     PdfManager PdfManager}.  This class is synchronized.
16     @author Nassib Nassar
17  */
18  public final class PdfReader {
19  
20      protected PdfInput _pdfInput;
21  
22      /**
23         Returns the <code>PdfInput</code> instance associated with
24         this document.
25      */
26      public PdfInput getInput() {
27          synchronized (this) {
28  
29              return _pdfInput;
30  
31          }
32      }
33      
34      protected PdfInput getPdfInput() {
35          return _pdfInput;
36      }
37      
38      /**
39         A placeholder used by the PDF parser to mark the end of an
40         array.
41      */
42      protected class ArrayEnd extends ParserObject { }
43      
44      /**
45         A placeholder used by the PDF parser to mark the end of a
46         dictionary.
47       */
48      protected class DictionaryEnd extends ParserObject { }
49          
50      /**
51         A placeholder used by the PDF parser to mark the end of a
52         dictionary that is also followed by a stream.
53       */
54      protected class DictionaryEndStream extends ParserObject { }
55          
56      /**
57         The superclass of inner classes used by this
58         <code>PdfReader</code> to mark positions while parsing PDF
59         objects.
60      */
61      protected class ParserObject extends PdfObject {
62  
63          protected int writePdf(PdfWriter w, boolean spacing) throws IOException {
64              return 0;
65          }
66          
67      }
68      
69      /**
70         The regular expression that matches a PDF header.
71      */
72      protected static Pattern _patHeader = Pattern.compile("^%(!PS-Adobe-\\d\\.\\d )?PDF-\\d\\.\\d" +
73                                    PdfReader.REGEX_EOL);
74  
75      /**
76         The regular expression that matches the begining of an
77         indirect object (specifically, the object number and
78         generation number followed by "obj").
79      */
80      protected static final Pattern _patObjIntro = Pattern.compile(
81          PdfReader.REGEX_WHITESPACE + "*\\d+" +
82          PdfReader.REGEX_WHITESPACE + "+\\d+" +
83          PdfReader.REGEX_WHITESPACE + "+obj" +
84          PdfReader.REGEX_STOP);
85  
86      /**
87         The regular expression that matches a PDF (direct) object.
88       */
89      protected static final Pattern _patPdfObject = Pattern.compile(
90  
91          "(" +
92  
93          // Null 2
94          "(" + PdfReader.REGEX_WHITESPACE + "*null" + PdfReader.REGEX_STOP + ")|" +
95  
96          // Reference 12
97          "(" + PdfReader.REGEX_WHITESPACE + "*\\d+" + PdfReader.REGEX_WHITESPACE + "\\d+" + PdfReader.REGEX_WHITESPACE + "R" + PdfReader.REGEX_STOP + ")|" +
98  
99          // Boolean 30 (35 = true)
100         "(" + PdfReader.REGEX_WHITESPACE + "*((true)|(false))" + PdfReader.REGEX_STOP + ")|" +
101 
102         // Integer 43
103         "(" + PdfReader.REGEX_WHITESPACE + "*(\\+|\\-)?\\d+" + PdfReader.REGEX_STOP + ")|" +
104 
105         // Float 54
106         "(" + PdfReader.REGEX_WHITESPACE + "*(\\+|\\-)?((\\d*\\.\\d+)|(\\d+\\.))" + PdfReader.REGEX_STOP + ")|" +
107 
108         // String 68
109         "(" + PdfReader.REGEX_WHITESPACE + "*((\\()|(<[^<]))" + PdfReader.REGEX_ANY_CHAR + ")|" +
110         
111         // Name 76
112         "(" + PdfReader.REGEX_WHITESPACE +
113                               "*/((#\\d\\d)|(" + PdfReader.REGEX_REGULAR + "))*" +
114                               PdfReader.REGEX_STOP + ")|" +
115 
116         // Dictionary begin 89
117         "(" + PdfReader.REGEX_WHITESPACE + "*<<" + PdfReader.REGEX_ANY_CHAR + ")|" +
118 
119         // Dictionary end / Stream begin 94
120         "(" + PdfReader.REGEX_WHITESPACE + "*>>" + PdfReader.REGEX_WHITESPACE + "*stream((\\r\\n)|\\n|\\r)"  + ")|" +
121         // We accept a solitary '\r' after the "stream"
122         // keyword even though the PDF specification does not
123         // allow it, because of a sample file << /Creator
124         // (Adobe Illustrator_TM_ 7.0) /Producer (Acrobat PDF
125         // File Format 1.1 for Macintosh) >> that used it.
126         // The only harm in accepting it is that output from
127         // such a non-compliant program might be read
128         // incorrectly by PJX in the rare case that the stream
129         // being with '\n'.  That is no harm since without
130         // recognizing '\r' we couldn't read it anyway
131         // (causing an exception to be thrown at some point
132         // later during processing of the PDF document).
133 
134         // Dictionary end 105
135         "(" + PdfReader.REGEX_WHITESPACE + "*>>" + PdfReader.REGEX_ANY_CHAR + ")|" +
136 
137         // Array begin 110
138         "(" + PdfReader.REGEX_WHITESPACE + "*\\[" + PdfReader.REGEX_ANY_CHAR + ")|" +
139 
140         // Array end 115
141         "(" + PdfReader.REGEX_WHITESPACE + "*\\]" + PdfReader.REGEX_ANY_CHAR + ")" +
142 
143         ")"
144         
145         );
146 
147     /**
148        The regular expression that matches a startxref section.
149     */
150     protected static final Pattern _patStartxref = Pattern.compile(
151         PdfReader.REGEX_EOL + "startxref" +
152         PdfReader.REGEX_WHITESPACE +
153         "+\\d+" + PdfReader.REGEX_WHITESPACE);
154     
155     /**
156        The regular expression that matches the beginning of an
157        xref section (specifically, the "xref" key word).
158     */
159     protected static final Pattern _patXref = Pattern.compile("xref" + PdfReader.REGEX_WHITESPACE + "+");
160 
161     /**
162        The regular expression that matches the introduction to a
163        subsection of an xref section (specifically, an integer
164        pair) or the "trailer" key word.
165     */
166     protected static final Pattern _patXrefSub = Pattern.compile(
167         PdfReader.REGEX_WHITESPACE + "*((\\d+ \\d+)|(trailer))" + PdfReader.REGEX_WHITESPACE + "+");
168 
169     /**
170        The regular expression that matches an entire xref table
171        section, including the "trailer" key word.
172     */
173     protected static final Pattern _patXrefTable = Pattern.compile(
174         "xref" + PdfReader.REGEX_WHITESPACE + "*" +
175         PdfReader.REGEX_EOL + "[^t]*" + "trailer" + 
176         PdfReader.REGEX_WHITESPACE + "+");
177 
178     /**
179        The regular expression that matches an entire xref table
180        section, including the "trailer" key word.
181     */
182     protected static final Pattern _patXrefEof = Pattern.compile(
183         PdfReader.REGEX_ANY_CHAR + "*" + PdfReader.REGEX_WHITESPACE + "startxref" + PdfReader.REGEX_WHITESPACE);
184 
185     /**
186            A <code>PdfName</code> object representing the name
187            <code>Length</code>.
188     */
189     protected static final PdfName PDFNAME_LENGTH = new PdfName("Length");
190 
191     /**
192            A <code>PdfName</code> object representing the name
193            <code>Prev</code>.
194     */
195     protected static final PdfName PDFNAME_PREV = new PdfName("Prev");
196 
197     /**
198            A <code>PdfName</code> object representing the name
199            <code>Size</code>.
200     */
201     protected static final PdfName PDFNAME_SIZE = new PdfName("Size");
202 
203     /**
204        The regular expression that matches literally any character.
205     */
206     protected static final String   REGEX_ANY_CHAR = "[\\x00-\\xFF]";
207     
208     /**
209        The regular expression that matches a comment in PDF.
210     */
211     protected static final String   REGEX_COMMENT = "(%[^" + PdfReader.REGEX_EOL + "]*" + PdfReader.REGEX_EOL + ")";
212 
213     /**
214        The regular expression that matches a delimiter in PDF.
215     */
216     protected static final String   REGEX_DELIMITER = "[\\(\\)<>\\[\\]\\{\\}/%]";
217 
218     /**
219        The regular expression that matches an end-of-line (EOL)
220        marker in PDF.
221     */
222     protected static final String   REGEX_EOL = "(\\r|\\n|(\\r\\n))";
223 
224     /**
225        The regular expression that matches a regular character in PDF.
226     */
227     protected static final String   REGEX_REGULAR = "[^\\x00\\t\\n\\f\\r \\(\\)<>\\[\\]\\{\\}/%]";
228     
229     /**
230        The regular expression that matches a white-space or
231        delimiter (stopping syntactic entities) in PDF.
232     */
233     protected static final String   REGEX_STOP = "(" + PdfReader.REGEX_WHITESPACE + "|[\\(\\)<>\\[\\]\\{\\}/])";
234 
235     /**
236        The regular expression that matches general white-space in PDF.
237     */
238     protected static final String   REGEX_WHITESPACE = "([\\x00\\t\\n\\f\\r ]|" + PdfReader.REGEX_COMMENT + ")";
239 
240     /**
241        Number of times to try scanning for startxref.  Each time
242        the parser will back up to a point (STARTXREF_RETRY_SCAN)
243        bytes before the previous time.
244     */
245     protected static final int STARTXREF_RETRY_COUNT = 25;
246 
247     /**
248            The number of bytes from the end of a PDF document at which to
249            start scanning for startxref.
250     */
251     protected static final int STARTXREF_RETRY_SCAN = 40;
252 
253         /**
254            Creates a reader for a PDF document to be read from a
255            <code>PdfInput</code> source.
256            @param pdfInput the source to read the PDF document from.
257         */
258     public PdfReader(PdfInput pdfInput) {
259 
260         _pdfInput = pdfInput;
261 
262     }
263     
264         /**
265            Closes the PDF document and releases any system resources
266            associated with it.
267            @throws IOException
268         */
269         public void close() throws IOException {
270         synchronized (this) {
271 
272             _pdfInput = null;
273 
274         }
275         }
276 
277     /**
278        Parses and returns a PDF object from the input source.  The
279        object is filtered through <code>PdfReaderFilter</code>.
280        It is possible for this method to return <code>null</code>
281        if the filtering method discards all objects.  This method
282        is intended to be called from <code>readObject()</code>
283        which advanced the buffer position past introduction if the
284        object is indirect.
285        @param start the offset where the object starts.
286        @param end the offset where the object ends.
287        @param cbuf the character buffer cached from
288        <code>readObject()</code>.
289        @param xt the cross-reference table; used for resolving
290        indirect references.
291        @throws PdfFormatException
292         */
293         protected PdfObject parseObject(long start, long end, CharBuffer cbuf,
294                     XrefTable xt) throws IOException, PdfFormatException {
295         Matcher m;
296         
297         m = _patPdfObject.matcher(cbuf);
298         if (m.lookingAt()) {
299 
300             if (m.group(2) != null) {
301                 cbuf.position(cbuf.position() + m.end() - 1);
302                 return PdfNull.valueOf();
303             }
304             
305             if (m.group(12) != null) {
306                 String   s = m.group();
307                 s = s.substring(0, s.length() - 1).trim();
308                 cbuf.position(cbuf.position() + m.end() - 1);
309                 String  [] sp = s.split(PdfReader.REGEX_WHITESPACE);
310                 return new PdfReference(
311                     Integer.parseInt(sp[0]),
312                     Integer.parseInt(sp[1]) );
313             }
314             
315             if (m.group(30) != null) {
316                 PdfBoolean bool = PdfBoolean.valueOf(m.group(35) != null);
317                 cbuf.position(cbuf.position() + m.end() - 1);
318                 return bool;
319             }
320             
321             if (m.group(43) != null) {
322                 String   s = m.group();
323                 s = s.substring(0, s.length() - 1).trim();
324                 cbuf.position(cbuf.position() + m.end() - 1);
325                     long n = Long.parseLong(s);
326                     if ( (n >= Integer.MIN_VALUE) &&
327                          (n <= Integer.MAX_VALUE) ) {
328                         return new PdfInteger( (int)n );
329                     } else {
330                         return new PdfLong(n);
331                     }
332             }
333 
334             if (m.group(54) != null) {
335                 String   s = m.group();
336                 s = s.substring(0, s.length() - 1).trim();
337                 cbuf.position(cbuf.position() + m.end() - 1);
338                 return new PdfFloat(Float.parseFloat(s));
339             }
340             
341             if (m.group(68) != null) {
342                 cbuf.position( cbuf.position() + m.start() );
343                 return new PdfString( PdfString.pdfToString(cbuf) );
344             }
345             
346             if (m.group(76) != null) {
347                 String   s = m.group();
348                 s = s.substring(0, s.length() - 1).trim();
349                 cbuf.position(cbuf.position() + m.end() - 1);
350                 return new PdfName(PdfName.pdfToString(s));
351             }
352             
353             if (m.group(89) != null) {
354                 cbuf.position(cbuf.position() + m.end() - 1);
355                 HashMap h = new HashMap();
356                 int done = 0;
357                 PdfObject streamLength = null;
358                 do {
359                     PdfObject key, value;
360                     key = parseObject(start, end, cbuf, xt);
361                     if (key instanceof DictionaryEnd) {
362                         done = 1;
363                         break;
364                     }
365                     if (key instanceof DictionaryEndStream) {
366                         done = 2;
367                         break;
368                     }
369                     if (key.equals(PDFNAME_LENGTH)) {
370                         streamLength = parseObject(start, end, cbuf, xt);
371                         value = streamLength;
372                     } else {
373                         value = parseObject(start, end, cbuf, xt);
374                     }
375                     if ( (key != null) && (value != null) ) {
376                         h.put(key, value);
377                     }
378                 } while (done == 0);
379                 if (done == 1) { // DictionaryEnd
380                     return PdfDictionary.wrap(h);
381                 } else { // DictionaryEndStream
382                     PdfObject obj = streamLength;
383                     if (obj instanceof PdfReference) { // get indirect reference
384                         int save = cbuf.position();
385                         int streamLengthId = ((PdfReference)obj).getObjectNumber();
386                         long s = xt.getIndex(streamLengthId);
387                         long e = xt.estimateObjectEnd(streamLengthId);
388                         obj = readObject(s, e, true, xt);
389                         cbuf.position(save);
390                     }
391                     if ( !(obj instanceof PdfInteger) ) {
392                         throw new PdfFormatException(
393                             "Valid Length value not found in stream dictionary.",
394                             cbuf.position());
395                     }
396                     int len = ((PdfInteger)obj).getInt();
397                     PdfDictionary d = PdfDictionary.wrap(h);
398 
399                     ByteBuffer bbuf = _pdfInput.readBytes(start, end);
400                     ByteBuffer bb = ByteBuffer.allocateDirect(len);
401                     bbuf.position(cbuf.position());
402                     bbuf.limit(cbuf.position() + len);
403                     bb.put(bbuf);
404                     bbuf.limit(bbuf.capacity());
405                     
406                     return PdfStream.wrap(d, bb);
407                 }
408             }
409             
410             if (m.group(94) != null) {
411                 cbuf.position(cbuf.position() + m.end());
412                 return new DictionaryEndStream();
413             }
414             
415             if (m.group(105) != null) {
416                 cbuf.position(cbuf.position() + m.end() - 1);
417                 return new DictionaryEnd();
418             }
419             
420             if (m.group(110) != null) {
421                 cbuf.position(cbuf.position() + m.end() - 1);
422                 ArrayList a = new ArrayList();
423                 boolean done = false;
424                 do {
425                     Object   value;
426                     value = parseObject(start, end, cbuf, xt);
427                     if (value instanceof ArrayEnd) {
428                         done = true;
429                         break;
430                     }
431                     if (value != null) {
432                         a.add(value);
433                     }
434                 } while (!done);
435                 return PdfArray.wrap(a);
436             }
437             
438             if (m.group(115) != null) {
439                 cbuf.position(cbuf.position() + m.end() - 1);
440                 return new ArrayEnd();
441             }
442             
443         }
444         throw new PdfFormatException("Object not recognized.", cbuf.position());
445     }
446 
447         /**
448            Reads an individual (partial) cross-reference table and
449            trailer dictionary from the PDF document.  The trailer
450            dictionary is filtered through
451            <code>PdfReaderFilter</code>.  <b>This method should be
452            made public.</b>
453        @param xrefTrailer an existing xrefTrailer object to add
454        data to; assumed to be the "subsequent" to the new
455        XrefTrailer that is to be read.  Only non-existing entries
456        are modified.  The trailer is not modified.
457        @param startxref the xref start position.
458        @param filter the filter.
459        @param prev the current Prev offset.
460            @return the cross-reference table and trailer.
461            @throws IOException
462            @throws PdfFormatException
463         */
464         protected XrefTable readPartialXrefTable(XrefTable xt, long startxref,
465                          long[] prev) throws IOException, PdfFormatException {
466 
467         Matcher m;
468 
469         // there is no way to determine how large a block to
470         // read that will contain the entire xref section; so
471         // we must try progressively larger blocks until we
472         // can match the whole section
473         ByteBuffer bbuf;
474         CharBuffer cbuf;
475         int blockSize;
476         if (xt != null) {
477             // we can use the xref table size as a hint
478             blockSize = (xt.size() * 20) + 8192;
479         } else {
480             blockSize = 65536;
481         }
482         long inputLength = _pdfInput.getLength();
483         long endtrailer;
484         boolean done = false;
485         do {
486             endtrailer = startxref + blockSize;
487             if ( endtrailer > inputLength ) {
488                 endtrailer = inputLength;
489             }
490             cbuf = _pdfInput.readChars(startxref, endtrailer);
491             if (endtrailer == inputLength) {
492                 done = true;
493                 break;
494             }
495             m = _patXrefEof.matcher(cbuf);
496             blockSize = blockSize * 2;
497             if (m.lookingAt()) {
498                 done = true;
499                 break;
500             }
501         } while ( !done );
502         
503         XrefTable r;
504 
505         if (xt != null) {
506             
507             r = xt;
508             
509         } else {
510             
511             // first read past xref table to get trailer
512             m = _patXrefTable.matcher(cbuf);
513             if ( !(m.lookingAt()) ) {
514                 throw new PdfFormatException(
515                     "Cross-reference table or trailer not found at correct position.", startxref);
516             }
517             
518             int trailer_offset = cbuf.position() + m.end();
519             
520             // read trailer
521             PdfObject pobj = readObject(startxref + trailer_offset, endtrailer,
522                             false, null);
523             if ( !(pobj instanceof PdfDictionary) ) {
524                 throw new PdfFormatException(
525                     "Trailer dictionary not found.", trailer_offset);
526             }
527             
528             // get Prev value
529             PdfDictionary trailerDictionary = (PdfDictionary)pobj;
530             Map trailerMap = trailerDictionary.getMap();
531             Object   obj = trailerMap.get(PDFNAME_PREV);
532             if (obj == null) {
533                 prev[0] = -1;
534             } else {
535                 if ( (!(obj instanceof PdfInteger)) &&
536                      (!(obj instanceof PdfFloat)) ) {
537                     throw new PdfFormatException(
538                         "Valid Prev value not found in trailer dictionary.",
539                         trailer_offset);
540                 }
541                 prev[0] = ((PdfNumber)obj).getLong();
542             }
543             
544             // get xref size
545             obj = trailerMap.get(PDFNAME_SIZE);
546             if ( !(obj instanceof PdfInteger) ) {
547                 throw new PdfFormatException(
548                     "Valid xref size not found in trailer dictionary.", trailer_offset);
549             }
550             int xrefSize = ((PdfInteger)obj).getInt();
551             // initialize XrefTable
552             long[] rindex = new long[xrefSize];
553             int[] rgeneration = new int[xrefSize];
554             byte[] rusage = new byte[xrefSize];
555             r = XrefTable.wrap(rindex, rgeneration, rusage, trailerDictionary);
556             
557             // rewind to xref beginning
558             cbuf.position(0);
559         }
560 
561         // added startxref to XrefTable's startxref list
562         r.getStartxrefList().add(new Long  (startxref));
563         
564         m = _patXref.matcher(cbuf);
565         if ( !(m.lookingAt()) ) {
566             throw new PdfFormatException(
567                 "Cross-reference table (xref) not found at correct position.", 0);
568         }
569         cbuf.position(cbuf.position() + m.end());
570 
571         String   s;
572         done = false;
573         do {
574             m = _patXrefSub.matcher(cbuf);
575             if ( !(m.lookingAt()) ) {
576                 throw new PdfFormatException(
577                     "Cross-reference table (subsection) not found.", 0);
578             }
579             s = m.group().trim();
580             if (s.equals("trailer")) {
581                 done = true;
582                 break;
583             }
584             cbuf.position(cbuf.position() + m.end());
585             String  [] sp = s.split(" ");
586             int x = Integer.parseInt(sp[0]);
587             int n = Integer.parseInt(sp[1]);
588             char[] ca = new char[11];
589             
590             long[] index = r.unwrapIndexArray();
591             int[] generation = r.unwrapGenerationArray();
592             byte[] usage = r.unwrapUsageArray();
593             
594             for ( ; n > 0; n--, x++) {
595                 
596                 // check for existing data
597                 if ( (xt != null) && (usage[x] != XrefTable.ENTRY_UNDEFINED) ) {
598                     cbuf.position(cbuf.position() + 20);
599                 } else {
600                     // add the data
601                     cbuf.get(ca, 0, 11);
602                     index[x] = Long.parseLong(new String  (ca, 0, 10));
603                     cbuf.get(ca, 0, 6);
604                     generation[x] = Integer.parseInt(new String  (ca, 0, 5));
605                     cbuf.get(ca, 0, 3);
606                     usage[x] = (ca[0] == 'n') ?
607                         XrefTable.ENTRY_IN_USE :
608                         XrefTable.ENTRY_FREE;
609                 }
610             }
611         } while (!done);
612         
613         // if this is not the most recently updated
614         // xref, then we didn't need to read the
615         // trailer beforehand (to get the table size);
616         // so now we do it here.
617         if (xt != null) {
618             int trailer_offset = cbuf.position() + m.end();
619             // read trailer
620             PdfObject pobj = readObject(startxref + trailer_offset, endtrailer, false, null);
621             if ( !(pobj instanceof PdfDictionary) ) {
622                 throw new PdfFormatException(
623                     "Trailer dictionary not found.", trailer_offset);
624             }
625             
626             // get Prev value
627             PdfDictionary trailerDictionary = (PdfDictionary)pobj;
628             Map trailerMap = trailerDictionary.getMap();
629             Object   obj = trailerMap.get(PDFNAME_PREV);
630             if (obj == null) {
631                 prev[0] = -1;
632             } else {
633                 if ( (!(obj instanceof PdfInteger)) &&
634                      (!(obj instanceof PdfLong)) ) {
635                     throw new PdfFormatException(
636                         "Valid Prev value not found in trailer dictionary.",
637                         trailer_offset);
638                 }
639                 prev[0] = ((PdfNumber)obj).getInt();
640             }
641         }
642         
643         return r;
644         }
645 
646         /**
647            Reads the header of the PDF document.
648            @return the PDF document header.
649            @throws IOException
650            @throws PdfException
651         */
652         public String   readHeader() throws IOException, PdfException {
653         synchronized (this) {
654             // searches within the first 1024 bytes for a header
655             // in the form "%PDF-M.m" or "%!PS-Adobe-N.n PDF-M.m"
656             // where N.n is an Adobe Document Structuring
657             // Conventions version number and M.m is a PDF version
658             // number.
659             CharBuffer cbuf = _pdfInput.readChars(0, Math.min(1024, _pdfInput.getLength()));
660             Matcher m = _patHeader.matcher(cbuf);
661             if (m.find()) {
662                 return m.group().trim();
663             }
664             throw new PdfFormatException("PDF document header not found.", 0);
665         }
666         }
667 
668         /**
669            Reads a PDF object from the document.  The object is
670        filtered through <code>PdfReaderFilter</code>.  It is
671        possible for this method to return <code>null</code> if the
672        filtering method discards all objects.
673        @param start the offset where the object starts.
674        @param end the offset where the object ends.
675        @param indirect true if the object is preceded by the object
676        number, generation, and "obj".
677        @param xt the PDF document's cross-reference table.
678        @param filter the object filter.
679            @return the PDF object.
680            @throws IOException
681            @throws PdfFormatException
682         */
683         public PdfObject readObject(long start, long end, boolean indirect,
684                     XrefTable xt) throws IOException, PdfFormatException {
685         synchronized (this) {
686 
687             CharBuffer cbuf = _pdfInput.readChars(start, end);
688 
689             if (indirect) {
690                 // move past the introduction
691                 Matcher m = _patObjIntro.matcher(cbuf);
692                 if ( !(m.lookingAt()) ) {
693                     throw new PdfFormatException(
694                         "Object not found.", start);
695                 }
696                 cbuf.position(m.end() - 1);
697             }
698             
699             return parseObject(start, end, cbuf, xt);
700             
701         }
702         }
703 
704         /**
705            Reads the startxref value from the PDF document.
706            @return the startxref value.
707        @throws IOException
708            @throws PdfFormatException
709         */
710         public long readStartxref() throws IOException, PdfFormatException {
711         synchronized (this) {
712 
713             long bufLength = _pdfInput.getLength();
714             CharBuffer cbuf = _pdfInput.readChars(
715                 Math.max(bufLength - (STARTXREF_RETRY_COUNT * STARTXREF_RETRY_SCAN), 0),
716                 bufLength);
717 
718             Matcher m = _patStartxref.matcher(cbuf);
719             int start = cbuf.capacity();
720             for (int retry = PdfReader.STARTXREF_RETRY_COUNT; retry > 0; retry--) {
721                 start -= PdfReader.STARTXREF_RETRY_SCAN;
722                 if (start >= 0) {
723                     if (m.find(start)) {
724                         String   s = m.group().trim();
725                         String  [] sp = s.split(PdfReader.REGEX_WHITESPACE);
726                         return Long.parseLong(sp[sp.length - 1]);
727                     }
728                 } else break;
729             }
730             throw new PdfFormatException("PDF startxref not found.", 0);
731         }
732         }
733 
734         /**
735            Reads and compiles all cross-reference tables and trailer
736            dictionaries from the PDF document beginning at a specified
737            position.  The most recent trailer dictionary is filtered
738            through <code>PdfReaderFilter</code>.
739        @param startxref the xref start position.
740        @param filter the filter.
741            @return the cross-reference table and trailer.
742            @throws IOException
743            @throws PdfFormatException
744         */
745         public XrefTable readXrefTable(long startxref) throws IOException, PdfFormatException {
746         synchronized (this) {
747             XrefTable xt = null;
748             long start = startxref;
749             long[] prev = new long[1];
750             do {
751                 xt = readPartialXrefTable(xt, start, prev);
752                 start = prev[0];
753                 
754             } while (start != -1);
755             xt.createSortedIndexArray();
756             return xt;
757         }
758     }
759     
760 }
761
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags