PdfAppender


1   package com.etymon.pjx.util;
2   
3   import java.io.*;
4   import java.nio.*;
5   import java.util.*;
6   import com.etymon.pjx.*;
7   
8   /**
9      Appends multiple PDF documents together, forming a new PDF
10     document.
11     @author Nassib Nassar
12  */
13  public class PdfAppender {
14  
15      /**
16         A flag used to indicate whether file names should be
17         printed during the appending process.  This is temporary
18         and will be superceded when logging is implemented.
19       */
20      protected boolean _printFileNames = false;
21      
22      protected static PdfObject renumber(PdfObject obj, int offset) throws PdfFormatException {
23  
24          if (obj == null) {
25              return null;
26          }
27          
28          if (obj instanceof PdfArray) {
29              List list = ((PdfArray)obj).getList();
30              ArrayList nlist = new ArrayList(list.size());
31              for (Iterator t = list.iterator(); t.hasNext(); ) {
32                  nlist.add( renumber((PdfObject)t.next(), offset) );
33              }
34              return new PdfArray(nlist);
35          }           
36          
37          if (obj instanceof PdfDictionary) {
38              Map map = ((PdfDictionary)obj).getMap();
39              HashMap nmap = new HashMap(map.size());
40              for (Iterator t = map.keySet().iterator(); t.hasNext(); ) {
41                  PdfName key = (PdfName)t.next();
42                  nmap.put( key, renumber((PdfObject)map.get(key), offset) );
43              }
44              return new PdfDictionary(nmap);
45          }
46  
47          if (obj instanceof PdfStream) {
48              PdfStream s = (PdfStream)obj;
49              ByteBuffer bb = s.getBuffer();
50              bb.position(0);
51              return new PdfStream( (PdfDictionary)renumber(s.getDictionary(), offset), bb );
52          }
53              
54          if (obj instanceof PdfReference) {
55              PdfReference r = (PdfReference)obj;
56              return new PdfReference(r.getObjectNumber() + offset, 0);
57          }
58          
59          return obj;
60          
61      }
62      
63      /**
64         The array of PDF managers.
65      */
66      protected PdfManager[] _m;
67  
68      /**
69         The current amount to offset (increase) all object numbers
70         by.  The pdfReaderFilter(PdfObject) method renumbers
71         indirect references by adding this value to their object
72         number.
73       */
74      protected int _renumber_offset;
75  
76      /**
77         Stores whether the {@link #append() append()} method has
78         been used.
79       */
80      protected boolean _used;
81      
82      /**
83         The PDF writer.
84      */
85      protected PdfWriter _w;
86  
87      /**
88         The class is initialized to read a list of PDF documents
89         (<code>PdfManager</code> objects) in order and to write the
90         resultant document to a specified <code>PdfWriter</code>.
91         The <code>PdfWriter</code> should be newly created (i.e. it
92         should not have been previously used for anything); and
93         after {@link #append() append()} has been called, the
94         <code>PdfWriter</code> should be closed and discarded, and
95         this <code>PdfAppender</code> should be discarded.
96         @param managers the documents to read.
97         @param writer the document to write to.
98         @deprecated Use {@link #PdfAppender(List,
99         PdfWriter) PdfAppender(List, PdfWriter)}.
100      */
101     public PdfAppender(PdfManager[] managers, PdfWriter writer) {
102 
103         _m = new PdfManager[managers.length];
104         System.arraycopy(managers, 0, _m, 0, managers.length);
105         
106         _w = writer;
107 
108         _used = false;
109 
110     }
111 
112     /**
113        The class is initialized to read a list of PDF documents
114        (<code>PdfManager</code> objects) in order and to write the
115        resultant document to a specified <code>PdfWriter</code>.
116        The <code>PdfWriter</code> should be newly created (i.e. it
117        should not have been previously used for anything); and
118        after {@link #append() append()} has been called, the
119        <code>PdfWriter</code> should be closed and discarded, and
120        this <code>PdfAppender</code> should be discarded.
121        @param managers the documents to read.  This must be a list
122        of <code>PdfManager</code> objects.
123        @param writer the document to write to.
124        @throws PdfFormatException
125      */
126     public PdfAppender(List managers, PdfWriter writer) throws PdfFormatException {
127 
128         _m = new PdfManager[managers.size()];
129         int x = 0;
130         for (Iterator t = managers.iterator(); t.hasNext(); ) {
131             Object   obj = t.next();
132             if ( !(obj instanceof PdfManager) ) {
133                 throw new PdfFormatException("List element is not a PdfManager instance.");
134             }
135             _m[x++] = (PdfManager)obj;
136         }
137         
138         _w = writer;
139 
140         _used = false;
141 
142     }
143 
144     // needs to be synchronized on the managers
145     /**
146        Performs the append operation.  This method can be called
147        only once per instance of this class.
148        @throws IOException
149        @throws PdfFormatException
150      */
151     public void append() throws IOException, PdfFormatException {
152 
153         if (_used) {
154             throw new PdfFormatException("PdfAppender.append() called more than once per instance.");
155         }
156 
157         _used = true;
158         
159         PdfManager[] ma = _m;
160         PdfWriter w = _w;
161 
162         if (ma.length == 0) {
163             return;
164         }
165 
166         int[] pageTreeRootId = new int[ma.length];
167         int[] pageTreeRootGen = new int[ma.length];
168         PdfDictionary[] pageTreeRoot = new PdfDictionary[ma.length];
169         List[] fieldsRef = new List[ma.length];
170         List[] fields = new List[ma.length];
171         Map newAcroFormMap = null;
172 
173         if (_printFileNames) {
174             System.out.println(ma[0].getReader().getInput().getName());
175         }
176 
177         // first copy ma[0] to the output
178         long pos = ma[0].writeDocument(w);
179 
180         if (ma.length == 1) {
181             return;
182         }
183         
184         long prev = ma[0].getStartxref();
185 
186         // get the page tree root object
187         PdfManager manager = ma[0];
188         PdfModifier modifier = new PdfModifier(manager);
189         PdfReference pageTreeRootRef = modifier.getPageTreeRootReference();
190         pageTreeRootId[0] = pageTreeRootRef.getObjectNumber();
191         pageTreeRootGen[0] = pageTreeRootRef.getGenerationNumber();
192         pageTreeRoot[0] = modifier.getPageTreeRoot();
193 
194         // get the interactive form dictionary
195         PdfDictionary catalog = modifier.getCatalog();
196         PdfObject acroFormObj = (PdfObject)catalog.getMap().get(new PdfName("AcroForm"));
197         PdfDictionary acroForm = (PdfDictionary)ma[0].getObjectIndirect(acroFormObj);
198         if (acroForm != null) {
199             Map acroFormMap = acroForm.getMap();
200             int acroFormMapSize = acroFormMap.size();
201             // we only copy the dictionary as a whole if
202             // we don't have one yet
203             if (newAcroFormMap == null) {
204                 newAcroFormMap = new HashMap(acroFormMap);
205             }
206             // now add fields to our running list
207             PdfObject fieldsObj = (PdfObject)acroFormMap.get(new PdfName("Fields"));
208             PdfArray fa = (PdfArray)ma[0].getObjectIndirect(fieldsObj);
209             List fr = new ArrayList();
210             List ff = new ArrayList();
211             if (fa != null) {
212                 for (Iterator t = fa.getList().iterator(); t.hasNext(); ) {
213                     PdfReference f = (PdfReference)t.next();
214                     fr.add( f );
215                     ff.add( ma[0].getObjectIndirect(f) );
216                 }
217             }
218             fieldsRef[0] = fr;
219             fields[0] = ff;
220             
221         }
222 
223         int pageCount;
224         PdfInteger countObj =
225             (PdfInteger)manager.getObjectIndirect((PdfObject)(
226                                       pageTreeRoot[0].getMap().get(new PdfName("Count"))));
227         if (countObj != null) {
228             pageCount = countObj.getInt();
229         } else {
230             pageCount = 0;
231         }
232         
233         _renumber_offset = manager.getXrefTableSize();
234         
235         // next append the remaining documents
236         for (int mx = 1; mx < ma.length; mx++) {
237 
238             PdfManager m = ma[mx];
239 
240             if (_printFileNames) {
241                 System.out.println(m.getReader().getInput().getName());
242             }
243             
244             // first extract needed information, before we
245             // renumber all of the objects.  we need the
246             // object number of the root of the page tree.
247             
248             manager = m;
249             modifier = new PdfModifier(manager);
250             pageTreeRootRef = modifier.getPageTreeRootReference();
251             pageTreeRootId[mx] = pageTreeRootRef.getObjectNumber() +
252                 _renumber_offset;
253             pageTreeRootGen[mx] = pageTreeRootRef.getGenerationNumber();
254             // we delay setting pageTreeRoot[rax] and
255             // using it to get the number of pages until
256             // later when we can get the renumbered
257             // version of the page tree root
258             
259             // get the interactive form dictionary
260             catalog = modifier.getCatalog();
261             acroFormObj = (PdfObject)catalog.getMap().get(new PdfName("AcroForm"));
262             acroForm = (PdfDictionary)m.getObjectIndirect(acroFormObj);
263             if (acroForm != null) {
264                 Map acroFormMap = acroForm.getMap();
265                 int acroFormMapSize = acroFormMap.size();
266                 // we only copy the dictionary as a whole if
267                 // we don't have one yet
268                 if (newAcroFormMap == null) {
269                     newAcroFormMap = new HashMap(acroFormMapSize);
270                     for (Iterator t = acroFormMap.keySet().iterator(); t.hasNext(); ) {
271                         PdfName key = (PdfName)t.next();
272                         newAcroFormMap.put( key,
273                                     renumber((PdfObject)acroFormMap.get(key),
274                                          _renumber_offset) );
275                     }
276                 }
277                 // now add fields to our running list
278                 PdfObject fieldsObj = (PdfObject)acroFormMap.get(new PdfName("Fields"));
279                 PdfArray fa = (PdfArray)m.getObjectIndirect(fieldsObj);
280                 List fr = new ArrayList();
281                 List ff = new ArrayList();
282                 if (fa != null) {
283                     for (Iterator t = fa.getList().iterator(); t.hasNext(); ) {
284                         PdfReference f = (PdfReference)t.next();
285                         fr.add( renumber(f, _renumber_offset) );
286                         ff.add( renumber(m.getObjectIndirect(f), _renumber_offset) );
287                     }
288                 }
289                 fieldsRef[mx] = fr;
290                 fields[mx] = ff;
291             }
292             
293             // next read all the objects, renumber them,
294             // and write them to the output
295 
296             int xtSize = m.getXrefTableSize();
297             int nxtSize = xtSize + _renumber_offset;
298 
299             long[] index = new long[nxtSize];
300             int[] generation = new int[nxtSize];
301             byte[] usage = new byte[nxtSize];
302             index[0] = XrefTable.ENTRY_FREE;
303             generation[0] = 65535;
304             usage[0] = XrefTable.ENTRY_FREE;
305             
306             for (int x = 1; x < xtSize; x++) {
307                 
308                 PdfObject obj = m.getObject(x);
309                 
310                 if (obj != null) {
311 
312                     obj = renumber(obj, _renumber_offset);
313                     
314                     index[_renumber_offset + x] = pos;
315                     generation[_renumber_offset + x] = 0;
316                     usage[_renumber_offset + x] = XrefTable.ENTRY_IN_USE;
317 
318                     
319                     pos += w.writeObjectIndirect(obj, x + _renumber_offset, 0);
320 
321                     if ((x + _renumber_offset) == pageTreeRootId[mx]) {
322                         pageTreeRoot[mx] = (PdfDictionary)obj;
323                         
324                         // now we can get the number of pages
325                         countObj =
326                             (PdfInteger)manager.getObjectIndirect((PdfObject)(
327                                                       pageTreeRoot[mx].getMap().get(new PdfName("Count"))));
328                         if (countObj != null) {
329                             pageCount += countObj.getInt();
330                         }
331                         
332                     }
333                 } else {
334                     
335                     generation[_renumber_offset + x] = 0;
336                     usage[_renumber_offset + x] = XrefTable.ENTRY_FREE;
337                     
338                 }
339 
340             }
341             
342             // finally, write the xref table and trailer
343 
344             PdfDictionary trailer = m.getTrailerDictionary();
345             Map trailerMap = trailer.getMap();
346 
347             HashMap ntrailerMap = new HashMap(trailerMap);
348             
349             ntrailerMap.put(new PdfName("Size"), new PdfInteger(_renumber_offset + nxtSize));
350             ntrailerMap.put(new PdfName("Prev"), new PdfLong(prev));
351 
352             prev = pos;
353             
354             PdfDictionary ntrailer = new PdfDictionary(ntrailerMap);
355             
356             XrefTable nxt = new XrefTable(index, generation, usage, ntrailer);
357 
358             pos += w.writeXrefTable(nxt, pos);
359 
360             _renumber_offset = nxtSize;
361             
362         }
363 
364         // write the old page tree roots and field
365         // dictionaries with new parent values
366 
367         int newPageTreeRootId = _renumber_offset;
368         int newFieldsId = _renumber_offset + 1;
369         int newCatalogId = _renumber_offset + 1 + fields.length;
370         int xtSize = _renumber_offset + 2 + fields.length;
371         
372         long[] index = new long[xtSize];
373         int[] generation = new int[xtSize];
374         byte[] usage = new byte[xtSize];
375         Arrays.fill(usage, XrefTable.ENTRY_UNDEFINED);
376         index[0] = 0;
377         generation[0] = 65535;
378         usage[0] = XrefTable.ENTRY_FREE;
379 
380         for (int x = 0; x < pageTreeRoot.length; x++) {
381             index[pageTreeRootId[x]] = pos;
382             generation[pageTreeRootId[x]] = pageTreeRootGen[x];
383             usage[pageTreeRootId[x]] = XrefTable.ENTRY_IN_USE;
384 
385             // update parent value
386             PdfDictionary d = pageTreeRoot[x];
387             Map map = d.getMap();
388             HashMap nmap = new HashMap(map);
389             nmap.put(new PdfName("Parent"), new PdfReference(newPageTreeRootId, 0));
390 
391             pos += w.writeObjectIndirect(new PdfDictionary(nmap),
392                              pageTreeRootId[x], pageTreeRootGen[x]);
393         }
394 
395         for (int y = 0; y < fields.length; y++) {
396             if (fields[y] != null) {
397                 int fieldsSize = fields[y].size();
398                 for (int x = 0; x < fieldsSize; x++) {
399                     PdfReference ref = (PdfReference)fieldsRef[y].get(x);
400                     int id = ref.getObjectNumber();
401                     int gen = ref.getGenerationNumber();
402                     index[id] = pos;
403                     generation[id] = gen;
404                     usage[id] = XrefTable.ENTRY_IN_USE;
405                     
406                     // update parent value
407                     PdfDictionary d = (PdfDictionary)fields[y].get(x);
408                     Map map = d.getMap();
409                     HashMap nmap = new HashMap(map);
410                     nmap.put(new PdfName("Parent"), new PdfReference(newFieldsId + y, 0));
411                     
412                     pos += w.writeObjectIndirect(new PdfDictionary(nmap),
413                                      id, gen);
414                 }
415             }
416         }
417         
418         // write the new page tree root, which contains the
419         // root from each document
420 
421         HashMap rootMap = new HashMap();
422         rootMap.put(new PdfName("Type"), new PdfName("Pages"));
423         rootMap.put(new PdfName("Count"), new PdfInteger(pageCount));
424         ArrayList kids = new ArrayList(pageTreeRoot.length);
425         for (int x = 0; x < pageTreeRoot.length; x++) {
426             kids.add( new PdfReference(pageTreeRootId[x],
427                            pageTreeRootGen[x]) );
428         }
429         rootMap.put(new PdfName("Kids"), new PdfArray(kids));
430 
431         index[newPageTreeRootId] = pos;
432         generation[newPageTreeRootId] = 0;
433         usage[newPageTreeRootId] = XrefTable.ENTRY_IN_USE;
434         
435         pos += w.writeObjectIndirect(new PdfDictionary(rootMap), newPageTreeRootId, 0);
436 
437         // write the new fields roots, which contain all the
438         // fields
439 
440         List fieldRootList = new ArrayList(fields.length);
441         
442         for (int x = 0; x < fields.length; x++) {
443 
444             if (fields[x] != null) {
445             
446                 rootMap = new HashMap();
447                 kids = new ArrayList(fields[x].size());
448                 for (Iterator t = fieldsRef[x].iterator(); t.hasNext(); ) {
449                     PdfReference ref = (PdfReference)t.next();
450                     kids.add(ref);
451                 }
452                 rootMap.put(new PdfName("Kids"), new PdfArray(kids));
453                 
454                 rootMap.put(new PdfName("T"), new PdfString("A" + x));
455                 
456                 int n = newFieldsId + x;
457                 index[n] = pos;
458                 generation[n] = 0;
459                 usage[n] = XrefTable.ENTRY_IN_USE;
460                 
461                 pos += w.writeObjectIndirect(new PdfDictionary(rootMap), n, 0);
462 
463                 fieldRootList.add(new PdfReference(n, 0));
464             }
465 
466         }
467 
468         // build the interactive form dictionary for the new
469         // catalog
470 
471         Map buildAcroFormMap;
472         if (newAcroFormMap != null) {
473             buildAcroFormMap = new HashMap(newAcroFormMap);
474             buildAcroFormMap.put(new PdfName("Fields"), new PdfArray(fieldRootList));
475         } else {
476             buildAcroFormMap = null;
477         }
478         
479         // write the new catalog
480 
481         HashMap catalogMap = new HashMap();
482         catalogMap.put(new PdfName("Type"), new PdfName("Catalog"));
483         catalogMap.put(new PdfName("Pages"), new PdfReference(newPageTreeRootId, 0));
484         if (buildAcroFormMap != null) {
485             catalogMap.put(new PdfName("AcroForm"), new PdfDictionary(buildAcroFormMap));
486         }
487                    
488         index[newCatalogId] = pos;
489         generation[newCatalogId] = 0;
490         usage[newCatalogId] = XrefTable.ENTRY_IN_USE;
491         
492         pos += w.writeObjectIndirect(new PdfDictionary(catalogMap), newCatalogId, 0);
493 
494         // write the final xref table and trailer
495         
496         HashMap ntrailerMap = new HashMap();
497         
498         ntrailerMap.put(new PdfName("Size"), new PdfInteger(xtSize));
499         ntrailerMap.put(new PdfName("Prev"), new PdfLong(prev));
500         ntrailerMap.put(new PdfName("Root"), new PdfReference(newCatalogId, 0));
501         
502         PdfDictionary ntrailer = new PdfDictionary(ntrailerMap);
503         
504         XrefTable nxt = new XrefTable(index, generation, usage, ntrailer);
505         
506         pos += w.writeXrefTable(nxt, pos);
507 
508     }
509 
510     /**
511        Appends multiple PDF documents together using this class.
512        The documents are specified with a list of file names; the
513        last indicating the output file and the others indicating
514        the input files.  The input files are appended in the order
515        they are specified within the list.
516        @param args the list of file names.  <b>Note that the last
517        file in this list (<code>args[args.length - 1]</code>) is
518        overwritten with the resultant PDF document.</b>
519        @throws IOException
520        @throws PdfFormatException
521      */
522         public static void main(String  [] args) throws IOException, PdfFormatException {
523 
524         if (args.length < 2) {
525             System.err.println(
526                 "Usage:  java com.etymon.pjx.util.PdfAppender [input1.pdf] [input2.pdf] [...] [output.pdf]");
527             return;
528         }
529 
530         List m = new ArrayList(args.length - 1);
531 
532         for (int x = 0; x < args.length - 1; x++) {
533             try {
534                 m.add( new PdfManager(new PdfReader(new PdfInputFile(new File(args[x])))) );
535             } catch (PdfFormatException e) {
536                 throw new PdfFormatException(args[x] + ": " + e.getMessage(), e.getOffset());
537             }
538         }
539         
540         PdfWriter w = new PdfWriter(new File(args[args.length - 1]));
541 
542         PdfAppender a = new PdfAppender(m, w);
543         a._printFileNames = true;
544         a.append();
545 
546         w.close();
547 
548     }   
549 
550 }
551
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags