KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > etymon > pjx > PdfReader


1 /*
2   Copyright (C) Etymon Systems, Inc. <http://www.etymon.com/>
3 */

4
5 package com.etymon.pjx;
6
7 import java.io.*;
8 import java.nio.*;
9 import java.util.*;
10 import java.util.regex.*;
11
12 /**
13    Reads a PDF document. Most applications do not need to access
14    methods in this class but should instead go through {@link
15    PdfManager PdfManager}. This class is synchronized.
16    @author Nassib Nassar
17 */

18 public final class PdfReader {
19
20     protected PdfInput _pdfInput;
21
22     /**
23        Returns the <code>PdfInput</code> instance associated with
24        this document.
25     */

26     public PdfInput getInput() {
27         synchronized (this) {
28
29             return _pdfInput;
30
31         }
32     }
33     
34     protected PdfInput getPdfInput() {
35         return _pdfInput;
36     }
37     
38     /**
39        A placeholder used by the PDF parser to mark the end of an
40        array.
41     */

42     protected class ArrayEnd extends ParserObject { }
43     
44     /**
45        A placeholder used by the PDF parser to mark the end of a
46        dictionary.
47      */

48     protected class DictionaryEnd extends ParserObject { }
49         
50     /**
51        A placeholder used by the PDF parser to mark the end of a
52        dictionary that is also followed by a stream.
53      */

54     protected class DictionaryEndStream extends ParserObject { }
55         
56     /**
57        The superclass of inner classes used by this
58        <code>PdfReader</code> to mark positions while parsing PDF
59        objects.
60     */

61     protected class ParserObject extends PdfObject {
62
63         protected int writePdf(PdfWriter w, boolean spacing) throws IOException {
64             return 0;
65         }
66         
67     }
68     
69     /**
70        The regular expression that matches a PDF header.
71     */

72     protected static Pattern _patHeader = Pattern.compile("^%(!PS-Adobe-\\d\\.\\d )?PDF-\\d\\.\\d" +
73                                   PdfReader.REGEX_EOL);
74
75     /**
76        The regular expression that matches the begining of an
77        indirect object (specifically, the object number and
78        generation number followed by "obj").
79     */

80     protected static final Pattern _patObjIntro = Pattern.compile(
81         PdfReader.REGEX_WHITESPACE + "*\\d+" +
82         PdfReader.REGEX_WHITESPACE + "+\\d+" +
83         PdfReader.REGEX_WHITESPACE + "+obj" +
84         PdfReader.REGEX_STOP);
85
86     /**
87        The regular expression that matches a PDF (direct) object.
88      */

89     protected static final Pattern _patPdfObject = Pattern.compile(
90
91         "(" +
92
93         // Null 2
94
"(" + PdfReader.REGEX_WHITESPACE + "*null" + PdfReader.REGEX_STOP + ")|" +
95
96         // Reference 12
97
"(" + PdfReader.REGEX_WHITESPACE + "*\\d+" + PdfReader.REGEX_WHITESPACE + "\\d+" + PdfReader.REGEX_WHITESPACE + "R" + PdfReader.REGEX_STOP + ")|" +
98
99         // Boolean 30 (35 = true)
100
"(" + PdfReader.REGEX_WHITESPACE + "*((true)|(false))" + PdfReader.REGEX_STOP + ")|" +
101
102         // Integer 43
103
"(" + PdfReader.REGEX_WHITESPACE + "*(\\+|\\-)?\\d+" + PdfReader.REGEX_STOP + ")|" +
104
105         // Float 54
106
"(" + PdfReader.REGEX_WHITESPACE + "*(\\+|\\-)?((\\d*\\.\\d+)|(\\d+\\.))" + PdfReader.REGEX_STOP + ")|" +
107
108         // String 68
109
"(" + PdfReader.REGEX_WHITESPACE + "*((\\()|(<[^<]))" + PdfReader.REGEX_ANY_CHAR + ")|" +
110         
111         // Name 76
112
"(" + PdfReader.REGEX_WHITESPACE +
113                               "*/((#\\d\\d)|(" + PdfReader.REGEX_REGULAR + "))*" +
114                               PdfReader.REGEX_STOP + ")|" +
115
116         // Dictionary begin 89
117
"(" + PdfReader.REGEX_WHITESPACE + "*<<" + PdfReader.REGEX_ANY_CHAR + ")|" +
118
119         // Dictionary end / Stream begin 94
120
"(" + PdfReader.REGEX_WHITESPACE + "*>>" + PdfReader.REGEX_WHITESPACE + "*stream((\\r\\n)|\\n|\\r)" + ")|" +
121         // We accept a solitary '\r' after the "stream"
122
// keyword even though the PDF specification does not
123
// allow it, because of a sample file << /Creator
124
// (Adobe Illustrator_TM_ 7.0) /Producer (Acrobat PDF
125
// File Format 1.1 for Macintosh) >> that used it.
126
// The only harm in accepting it is that output from
127
// such a non-compliant program might be read
128
// incorrectly by PJX in the rare case that the stream
129
// being with '\n'. That is no harm since without
130
// recognizing '\r' we couldn't read it anyway
131
// (causing an exception to be thrown at some point
132
// later during processing of the PDF document).
133

134         // Dictionary end 105
135
"(" + PdfReader.REGEX_WHITESPACE + "*>>" + PdfReader.REGEX_ANY_CHAR + ")|" +
136
137         // Array begin 110
138
"(" + PdfReader.REGEX_WHITESPACE + "*\\[" + PdfReader.REGEX_ANY_CHAR + ")|" +
139
140         // Array end 115
141
"(" + PdfReader.REGEX_WHITESPACE + "*\\]" + PdfReader.REGEX_ANY_CHAR + ")" +
142
143         ")"
144         
145         );
146
147     /**
148        The regular expression that matches a startxref section.
149     */

150     protected static final Pattern _patStartxref = Pattern.compile(
151         PdfReader.REGEX_EOL + "startxref" +
152         PdfReader.REGEX_WHITESPACE +
153         "+\\d+" + PdfReader.REGEX_WHITESPACE);
154     
155     /**
156        The regular expression that matches the beginning of an
157        xref section (specifically, the "xref" key word).
158     */

159     protected static final Pattern _patXref = Pattern.compile("xref" + PdfReader.REGEX_WHITESPACE + "+");
160
161     /**
162        The regular expression that matches the introduction to a
163        subsection of an xref section (specifically, an integer
164        pair) or the "trailer" key word.
165     */

166     protected static final Pattern _patXrefSub = Pattern.compile(
167         PdfReader.REGEX_WHITESPACE + "*((\\d+ \\d+)|(trailer))" + PdfReader.REGEX_WHITESPACE + "+");
168
169     /**
170        The regular expression that matches an entire xref table
171        section, including the "trailer" key word.
172     */

173     protected static final Pattern _patXrefTable = Pattern.compile(
174         "xref" + PdfReader.REGEX_WHITESPACE + "*" +
175         PdfReader.REGEX_EOL + "[^t]*" + "trailer" +
176         PdfReader.REGEX_WHITESPACE + "+");
177
178     /**
179        The regular expression that matches an entire xref table
180        section, including the "trailer" key word.
181     */

182     protected static final Pattern _patXrefEof = Pattern.compile(
183         PdfReader.REGEX_ANY_CHAR + "*" + PdfReader.REGEX_WHITESPACE + "startxref" + PdfReader.REGEX_WHITESPACE);
184
185     /**
186            A <code>PdfName</code> object representing the name
187            <code>Length</code>.
188     */

189     protected static final PdfName PDFNAME_LENGTH = new PdfName("Length");
190
191     /**
192            A <code>PdfName</code> object representing the name
193            <code>Prev</code>.
194     */

195     protected static final PdfName PDFNAME_PREV = new PdfName("Prev");
196
197     /**
198            A <code>PdfName</code> object representing the name
199            <code>Size</code>.
200     */

201     protected static final PdfName PDFNAME_SIZE = new PdfName("Size");
202
203     /**
204        The regular expression that matches literally any character.
205     */

206     protected static final String JavaDoc REGEX_ANY_CHAR = "[\\x00-\\xFF]";
207     
208     /**
209        The regular expression that matches a comment in PDF.
210     */

211     protected static final String JavaDoc REGEX_COMMENT = "(%[^" + PdfReader.REGEX_EOL + "]*" + PdfReader.REGEX_EOL + ")";
212
213     /**
214        The regular expression that matches a delimiter in PDF.
215     */

216     protected static final String JavaDoc REGEX_DELIMITER = "[\\(\\)<>\\[\\]\\{\\}/%]";
217
218     /**
219        The regular expression that matches an end-of-line (EOL)
220        marker in PDF.
221     */

222     protected static final String JavaDoc REGEX_EOL = "(\\r|\\n|(\\r\\n))";
223
224     /**
225        The regular expression that matches a regular character in PDF.
226     */

227     protected static final String JavaDoc REGEX_REGULAR = "[^\\x00\\t\\n\\f\\r \\(\\)<>\\[\\]\\{\\}/%]";
228     
229     /**
230        The regular expression that matches a white-space or
231        delimiter (stopping syntactic entities) in PDF.
232     */

233     protected static final String JavaDoc REGEX_STOP = "(" + PdfReader.REGEX_WHITESPACE + "|[\\(\\)<>\\[\\]\\{\\}/])";
234
235     /**
236        The regular expression that matches general white-space in PDF.
237     */

238     protected static final String JavaDoc REGEX_WHITESPACE = "([\\x00\\t\\n\\f\\r ]|" + PdfReader.REGEX_COMMENT + ")";
239
240     /**
241        Number of times to try scanning for startxref. Each time
242        the parser will back up to a point (STARTXREF_RETRY_SCAN)
243        bytes before the previous time.
244     */

245     protected static final int STARTXREF_RETRY_COUNT = 25;
246
247     /**
248            The number of bytes from the end of a PDF document at which to
249            start scanning for startxref.
250     */

251     protected static final int STARTXREF_RETRY_SCAN = 40;
252
253         /**
254            Creates a reader for a PDF document to be read from a
255            <code>PdfInput</code> source.
256            @param pdfInput the source to read the PDF document from.
257         */

258     public PdfReader(PdfInput pdfInput) {
259
260         _pdfInput = pdfInput;
261
262     }
263     
264         /**
265            Closes the PDF document and releases any system resources
266            associated with it.
267            @throws IOException
268         */

269         public void close() throws IOException {
270         synchronized (this) {
271
272             _pdfInput = null;
273
274         }
275         }
276
277     /**
278        Parses and returns a PDF object from the input source. The
279        object is filtered through <code>PdfReaderFilter</code>.
280        It is possible for this method to return <code>null</code>
281        if the filtering method discards all objects. This method
282        is intended to be called from <code>readObject()</code>
283        which advanced the buffer position past introduction if the
284        object is indirect.
285        @param start the offset where the object starts.
286        @param end the offset where the object ends.
287        @param cbuf the character buffer cached from
288        <code>readObject()</code>.
289        @param xt the cross-reference table; used for resolving
290        indirect references.
291        @throws PdfFormatException
292         */

293         protected PdfObject parseObject(long start, long end, CharBuffer cbuf,
294                     XrefTable xt) throws IOException, PdfFormatException {
295         Matcher m;
296         
297         m = _patPdfObject.matcher(cbuf);
298         if (m.lookingAt()) {
299
300             if (m.group(2) != null) {
301                 cbuf.position(cbuf.position() + m.end() - 1);
302                 return PdfNull.valueOf();
303             }
304             
305             if (m.group(12) != null) {
306                 String JavaDoc s = m.group();
307                 s = s.substring(0, s.length() - 1).trim();
308                 cbuf.position(cbuf.position() + m.end() - 1);
309                 String JavaDoc[] sp = s.split(PdfReader.REGEX_WHITESPACE);
310                 return new PdfReference(
311                     Integer.parseInt(sp[0]),
312                     Integer.parseInt(sp[1]) );
313             }
314             
315             if (m.group(30) != null) {
316                 PdfBoolean bool = PdfBoolean.valueOf(m.group(35) != null);
317                 cbuf.position(cbuf.position() + m.end() - 1);
318                 return bool;
319             }
320             
321             if (m.group(43) != null) {
322                 String JavaDoc s = m.group();
323                 s = s.substring(0, s.length() - 1).trim();
324                 cbuf.position(cbuf.position() + m.end() - 1);
325                     long n = Long.parseLong(s);
326                     if ( (n >= Integer.MIN_VALUE) &&
327                          (n <= Integer.MAX_VALUE) ) {
328                         return new PdfInteger( (int)n );
329                     } else {
330                         return new PdfLong(n);
331                     }
332             }
333
334             if (m.group(54) != null) {
335                 String JavaDoc s = m.group();
336                 s = s.substring(0, s.length() - 1).trim();
337                 cbuf.position(cbuf.position() + m.end() - 1);
338                 return new PdfFloat(Float.parseFloat(s));
339             }
340             
341             if (m.group(68) != null) {
342                 cbuf.position( cbuf.position() + m.start() );
343                 return new PdfString( PdfString.pdfToString(cbuf) );
344             }
345             
346             if (m.group(76) != null) {
347                 String JavaDoc s = m.group();
348                 s = s.substring(0, s.length() - 1).trim();
349                 cbuf.position(cbuf.position() + m.end() - 1);
350                 return new PdfName(PdfName.pdfToString(s));
351             }
352             
353             if (m.group(89) != null) {
354                 cbuf.position(cbuf.position() + m.end() - 1);
355                 HashMap h = new HashMap();
356                 int done = 0;
357                 PdfObject streamLength = null;
358                 do {
359                     PdfObject key, value;
360                     key = parseObject(start, end, cbuf, xt);
361                     if (key instanceof DictionaryEnd) {
362                         done = 1;
363                         break;
364                     }
365                     if (key instanceof DictionaryEndStream) {
366                         done = 2;
367                         break;
368                     }
369                     if (key.equals(PDFNAME_LENGTH)) {
370                         streamLength = parseObject(start, end, cbuf, xt);
371                         value = streamLength;
372                     } else {
373                         value = parseObject(start, end, cbuf, xt);
374                     }
375                     if ( (key != null) && (value != null) ) {
376                         h.put(key, value);
377                     }
378                 } while (done == 0);
379                 if (done == 1) { // DictionaryEnd
380
return PdfDictionary.wrap(h);
381                 } else { // DictionaryEndStream
382
PdfObject obj = streamLength;
383                     if (obj instanceof PdfReference) { // get indirect reference
384
int save = cbuf.position();
385                         int streamLengthId = ((PdfReference)obj).getObjectNumber();
386                         long s = xt.getIndex(streamLengthId);
387                         long e = xt.estimateObjectEnd(streamLengthId);
388                         obj = readObject(s, e, true, xt);
389                         cbuf.position(save);
390                     }
391                     if ( !(obj instanceof PdfInteger) ) {
392                         throw new PdfFormatException(
393                             "Valid Length value not found in stream dictionary.",
394                             cbuf.position());
395                     }
396                     int len = ((PdfInteger)obj).getInt();
397                     PdfDictionary d = PdfDictionary.wrap(h);
398
399                     ByteBuffer bbuf = _pdfInput.readBytes(start, end);
400                     ByteBuffer bb = ByteBuffer.allocateDirect(len);
401                     bbuf.position(cbuf.position());
402                     bbuf.limit(cbuf.position() + len);
403                     bb.put(bbuf);
404                     bbuf.limit(bbuf.capacity());
405                     
406                     return PdfStream.wrap(d, bb);
407                 }
408             }
409             
410             if (m.group(94) != null) {
411                 cbuf.position(cbuf.position() + m.end());
412                 return new DictionaryEndStream();
413             }
414             
415             if (m.group(105) != null) {
416                 cbuf.position(cbuf.position() + m.end() - 1);
417                 return new DictionaryEnd();
418             }
419             
420             if (m.group(110) != null) {
421                 cbuf.position(cbuf.position() + m.end() - 1);
422                 ArrayList a = new ArrayList();
423                 boolean done = false;
424                 do {
425                     Object JavaDoc value;
426                     value = parseObject(start, end, cbuf, xt);
427                     if (value instanceof ArrayEnd) {
428                         done = true;
429                         break;
430                     }
431                     if (value != null) {
432                         a.add(value);
433                     }
434                 } while (!done);
435                 return PdfArray.wrap(a);
436             }
437             
438             if (m.group(115) != null) {
439                 cbuf.position(cbuf.position() + m.end() - 1);
440                 return new ArrayEnd();
441             }
442             
443         }
444         throw new PdfFormatException("Object not recognized.", cbuf.position());
445     }
446
447         /**
448            Reads an individual (partial) cross-reference table and
449            trailer dictionary from the PDF document. The trailer
450            dictionary is filtered through
451            <code>PdfReaderFilter</code>. <b>This method should be
452            made public.</b>
453        @param xrefTrailer an existing xrefTrailer object to add
454        data to; assumed to be the "subsequent" to the new
455        XrefTrailer that is to be read. Only non-existing entries
456        are modified. The trailer is not modified.
457        @param startxref the xref start position.
458        @param filter the filter.
459        @param prev the current Prev offset.
460            @return the cross-reference table and trailer.
461            @throws IOException
462            @throws PdfFormatException
463         */

464         protected XrefTable readPartialXrefTable(XrefTable xt, long startxref,
465                          long[] prev) throws IOException, PdfFormatException {
466
467         Matcher m;
468
469         // there is no way to determine how large a block to
470
// read that will contain the entire xref section; so
471
// we must try progressively larger blocks until we
472
// can match the whole section
473
ByteBuffer bbuf;
474         CharBuffer cbuf;
475         int blockSize;
476         if (xt != null) {
477             // we can use the xref table size as a hint
478
blockSize = (xt.size() * 20) + 8192;
479         } else {
480             blockSize = 65536;
481         }
482         long inputLength = _pdfInput.getLength();
483         long endtrailer;
484         boolean done = false;
485         do {
486             endtrailer = startxref + blockSize;
487             if ( endtrailer > inputLength ) {
488                 endtrailer = inputLength;
489             }
490             cbuf = _pdfInput.readChars(startxref, endtrailer);
491             if (endtrailer == inputLength) {
492                 done = true;
493                 break;
494             }
495             m = _patXrefEof.matcher(cbuf);
496             blockSize = blockSize * 2;
497             if (m.lookingAt()) {
498                 done = true;
499                 break;
500             }
501         } while ( !done );
502         
503         XrefTable r;
504
505         if (xt != null) {
506             
507             r = xt;
508             
509         } else {
510             
511             // first read past xref table to get trailer
512
m = _patXrefTable.matcher(cbuf);
513             if ( !(m.lookingAt()) ) {
514                 throw new PdfFormatException(
515                     "Cross-reference table or trailer not found at correct position.", startxref);
516             }
517             
518             int trailer_offset = cbuf.position() + m.end();
519             
520             // read trailer
521
PdfObject pobj = readObject(startxref + trailer_offset, endtrailer,
522                             false, null);
523             if ( !(pobj instanceof PdfDictionary) ) {
524                 throw new PdfFormatException(
525                     "Trailer dictionary not found.", trailer_offset);
526             }
527             
528             // get Prev value
529
PdfDictionary trailerDictionary = (PdfDictionary)pobj;
530             Map trailerMap = trailerDictionary.getMap();
531             Object JavaDoc obj = trailerMap.get(PDFNAME_PREV);
532             if (obj == null) {
533                 prev[0] = -1;
534             } else {
535                 if ( (!(obj instanceof PdfInteger)) &&
536                      (!(obj instanceof PdfFloat)) ) {
537                     throw new PdfFormatException(
538                         "Valid Prev value not found in trailer dictionary.",
539                         trailer_offset);
540                 }
541                 prev[0] = ((PdfNumber)obj).getLong();
542             }
543             
544             // get xref size
545
obj = trailerMap.get(PDFNAME_SIZE);
546             if ( !(obj instanceof PdfInteger) ) {
547                 throw new PdfFormatException(
548                     "Valid xref size not found in trailer dictionary.", trailer_offset);
549             }
550             int xrefSize = ((PdfInteger)obj).getInt();
551             // initialize XrefTable
552
long[] rindex = new long[xrefSize];
553             int[] rgeneration = new int[xrefSize];
554             byte[] rusage = new byte[xrefSize];
555             r = XrefTable.wrap(rindex, rgeneration, rusage, trailerDictionary);
556             
557             // rewind to xref beginning
558
cbuf.position(0);
559         }
560
561         // added startxref to XrefTable's startxref list
562
r.getStartxrefList().add(new Long JavaDoc(startxref));
563         
564         m = _patXref.matcher(cbuf);
565         if ( !(m.lookingAt()) ) {
566             throw new PdfFormatException(
567                 "Cross-reference table (xref) not found at correct position.", 0);
568         }
569         cbuf.position(cbuf.position() + m.end());
570
571         String JavaDoc s;
572         done = false;
573         do {
574             m = _patXrefSub.matcher(cbuf);
575             if ( !(m.lookingAt()) ) {
576                 throw new PdfFormatException(
577                     "Cross-reference table (subsection) not found.", 0);
578             }
579             s = m.group().trim();
580             if (s.equals("trailer")) {
581                 done = true;
582                 break;
583             }
584             cbuf.position(cbuf.position() + m.end());
585             String JavaDoc[] sp = s.split(" ");
586             int x = Integer.parseInt(sp[0]);
587             int n = Integer.parseInt(sp[1]);
588             char[] ca = new char[11];
589             
590             long[] index = r.unwrapIndexArray();
591             int[] generation = r.unwrapGenerationArray();
592             byte[] usage = r.unwrapUsageArray();
593             
594             for ( ; n > 0; n--, x++) {
595                 
596                 // check for existing data
597
if ( (xt != null) && (usage[x] != XrefTable.ENTRY_UNDEFINED) ) {
598                     cbuf.position(cbuf.position() + 20);
599                 } else {
600                     // add the data
601
cbuf.get(ca, 0, 11);
602                     index[x] = Long.parseLong(new String JavaDoc(ca, 0, 10));
603                     cbuf.get(ca, 0, 6);
604                     generation[x] = Integer.parseInt(new String JavaDoc(ca, 0, 5));
605                     cbuf.get(ca, 0, 3);
606                     usage[x] = (ca[0] == 'n') ?
607                         XrefTable.ENTRY_IN_USE :
608                         XrefTable.ENTRY_FREE;
609                 }
610             }
611         } while (!done);
612         
613         // if this is not the most recently updated
614
// xref, then we didn't need to read the
615
// trailer beforehand (to get the table size);
616
// so now we do it here.
617
if (xt != null) {
618             int trailer_offset = cbuf.position() + m.end();
619             // read trailer
620
PdfObject pobj = readObject(startxref + trailer_offset, endtrailer, false, null);
621             if ( !(pobj instanceof PdfDictionary) ) {
622                 throw new PdfFormatException(
623                     "Trailer dictionary not found.", trailer_offset);
624             }
625             
626             // get Prev value
627
PdfDictionary trailerDictionary = (PdfDictionary)pobj;
628             Map trailerMap = trailerDictionary.getMap();
629             Object JavaDoc obj = trailerMap.get(PDFNAME_PREV);
630             if (obj == null) {
631                 prev[0] = -1;
632             } else {
633                 if ( (!(obj instanceof PdfInteger)) &&
634                      (!(obj instanceof PdfLong)) ) {
635                     throw new PdfFormatException(
636                         "Valid Prev value not found in trailer dictionary.",
637                         trailer_offset);
638                 }
639                 prev[0] = ((PdfNumber)obj).getInt();
640             }
641         }
642         
643         return r;
644         }
645
646         /**
647            Reads the header of the PDF document.
648            @return the PDF document header.
649            @throws IOException
650            @throws PdfException
651         */

652         public String JavaDoc readHeader() throws IOException, PdfException {
653         synchronized (this) {
654             // searches within the first 1024 bytes for a header
655
// in the form "%PDF-M.m" or "%!PS-Adobe-N.n PDF-M.m"
656
// where N.n is an Adobe Document Structuring
657
// Conventions version number and M.m is a PDF version
658
// number.
659
CharBuffer cbuf = _pdfInput.readChars(0, Math.min(1024, _pdfInput.getLength()));
660             Matcher m = _patHeader.matcher(cbuf);
661             if (m.find()) {
662                 return m.group().trim();
663             }
664             throw new PdfFormatException("PDF document header not found.", 0);
665         }
666         }
667
668         /**
669            Reads a PDF object from the document. The object is
670        filtered through <code>PdfReaderFilter</code>. It is
671        possible for this method to return <code>null</code> if the
672        filtering method discards all objects.
673        @param start the offset where the object starts.
674        @param end the offset where the object ends.
675        @param indirect true if the object is preceded by the object
676        number, generation, and "obj".
677        @param xt the PDF document's cross-reference table.
678        @param filter the object filter.
679            @return the PDF object.
680            @throws IOException
681            @throws PdfFormatException
682         */

683         public PdfObject readObject(long start, long end, boolean indirect,
684                     XrefTable xt) throws IOException, PdfFormatException {
685         synchronized (this) {
686
687             CharBuffer cbuf = _pdfInput.readChars(start, end);
688
689             if (indirect) {
690                 // move past the introduction
691
Matcher m = _patObjIntro.matcher(cbuf);
692                 if ( !(m.lookingAt()) ) {
693                     throw new PdfFormatException(
694                         "Object not found.", start);
695                 }
696                 cbuf.position(m.end() - 1);
697             }
698             
699             return parseObject(start, end, cbuf, xt);
700             
701         }
702         }
703
704         /**
705            Reads the startxref value from the PDF document.
706            @return the startxref value.
707        @throws IOException
708            @throws PdfFormatException
709         */

710         public long readStartxref() throws IOException, PdfFormatException {
711         synchronized (this) {
712
713             long bufLength = _pdfInput.getLength();
714             CharBuffer cbuf = _pdfInput.readChars(
715                 Math.max(bufLength - (STARTXREF_RETRY_COUNT * STARTXREF_RETRY_SCAN), 0),
716                 bufLength);
717
718             Matcher m = _patStartxref.matcher(cbuf);
719             int start = cbuf.capacity();
720             for (int retry = PdfReader.STARTXREF_RETRY_COUNT; retry > 0; retry--) {
721                 start -= PdfReader.STARTXREF_RETRY_SCAN;
722                 if (start >= 0) {
723                     if (m.find(start)) {
724                         String JavaDoc s = m.group().trim();
725                         String JavaDoc[] sp = s.split(PdfReader.REGEX_WHITESPACE);
726                         return Long.parseLong(sp[sp.length - 1]);
727                     }
728                 } else break;
729             }
730             throw new PdfFormatException("PDF startxref not found.", 0);
731         }
732         }
733
734         /**
735            Reads and compiles all cross-reference tables and trailer
736            dictionaries from the PDF document beginning at a specified
737            position. The most recent trailer dictionary is filtered
738            through <code>PdfReaderFilter</code>.
739        @param startxref the xref start position.
740        @param filter the filter.
741            @return the cross-reference table and trailer.
742            @throws IOException
743            @throws PdfFormatException
744         */

745         public XrefTable readXrefTable(long startxref) throws IOException, PdfFormatException {
746         synchronized (this) {
747             XrefTable xt = null;
748             long start = startxref;
749             long[] prev = new long[1];
750             do {
751                 xt = readPartialXrefTable(xt, start, prev);
752                 start = prev[0];
753                 
754             } while (start != -1);
755             xt.createSortedIndexArray();
756             return xt;
757         }
758     }
759     
760 }
761
Popular Tags