KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > etymon > pjx > util > PdfPageTree


1 package com.etymon.pjx.util;
2
3 import java.io.*;
4 import java.util.*;
5 import com.etymon.pjx.*;
6
7 /**
8    Provides methods for retrieving and modifying the page tree of a
9    PDF document. This class is synchronized.
10    @author Nassib Nassar
11 */

12 public class PdfPageTree {
13
14     /**
15        The page tree root of the document.
16     */

17     protected PdfReference _pageTreeRoot;
18
19     /**
20        The manager associated with this document.
21     */

22     protected PdfManager _m;
23
24     /**
25        The catalog associated with this document.
26     */

27     protected PdfCatalog _catalog;
28
29     /**
30        Defines the set of inheritable field attributes.
31     */

32     protected static Set _inheritable;
33
34     protected static final PdfName PDFNAME_COUNT = new PdfName("Count");
35     protected static final PdfName PDFNAME_KIDS = new PdfName("Kids");
36     protected static final PdfName PDFNAME_PAGE = new PdfName("Page");
37     protected static final PdfName PDFNAME_PAGES = new PdfName("Pages");
38     protected static final PdfName PDFNAME_PARENT = new PdfName("Parent");
39     protected static final PdfName PDFNAME_TYPE = new PdfName("Type");
40
41     /**
42        Constructs a <code>PdfPageTree</code> instance based on a
43        specified <code>PdfManager</code>.
44      */

45     public PdfPageTree(PdfManager manager) {
46
47         _m = manager;
48         _catalog = new PdfCatalog(manager);
49
50         _inheritable = new HashSet(4);
51         _inheritable.add(new PdfName("Resources"));
52         _inheritable.add(new PdfName("MediaBox"));
53         _inheritable.add(new PdfName("CropBox"));
54         _inheritable.add(new PdfName("Rotate"));
55         
56     }
57
58     /**
59        Returns an indirect reference to a page object specified by
60        page number. Note that page objects do not include
61        inherited attributes; {@link
62        #inheritAttributes(PdfDictionary)
63        inheritAttributes(PdfDictionary)} should be used to obtain
64        inherited attributes.
65        @param pageNumber the page number. The numbering starts
66        with <code>0</code>.
67        @return the indirect reference.
68        @throws IOException
69        @throws PdfFormatException
70      */

71         public PdfReference getPage(int pageNumber) throws IOException, PdfFormatException {
72                 synchronized (this) {
73             synchronized (_m) {
74
75                 if (pageNumber < 0) {
76                     throw new IndexOutOfBoundsException JavaDoc(
77                         "Requested page number is less than 0");
78                 }
79
80                 // keep a running list of all page
81
// nodes visited so that we can detect
82
// a cycle and avoid getting caught in
83
// an infinite loop
84
Set visited = new HashSet();
85
86                 // get the root of the page tree
87
PdfReference nodeR = getRoot();
88                 visited.add(nodeR);
89                 Object JavaDoc obj = _m.getObjectIndirect(nodeR);
90                 if ( !(obj instanceof PdfDictionary) ) {
91                     throw new PdfFormatException(
92                         "Page tree (Pages) is not a dictionary.");
93                 }
94                 Map node = ((PdfDictionary)obj).getMap();
95
96                 // descend the page tree; each
97
// iteration through this loop
98
// descends one level
99
boolean first = true;
100                 int numberOfPages;
101                 int pageSum = 0;
102                 while ( true ) {
103
104                     // if this is the first node,
105
// it should contain the
106
// total number of pages;
107
// check that the requested
108
// page is within that range
109
if (first) {
110
111                         first = false;
112                         
113                         obj = node.get(PDFNAME_COUNT);
114                         if ( ( !(obj instanceof PdfInteger) ) &&
115                              ( !(obj instanceof PdfReference) ) ) {
116                             throw new PdfFormatException(
117                                 "Page count is not an integer or reference.");
118                         }
119                         if (obj instanceof PdfReference) {
120                             obj = _m.getObjectIndirect((PdfReference)obj);
121                         }
122                         if ( !(obj instanceof PdfInteger) ) {
123                             throw new PdfFormatException(
124                                 "Page count is not an integer.");
125                         }
126                         numberOfPages = ((PdfInteger)obj).getInt();
127
128                         if (pageNumber >= numberOfPages) {
129                             throw new IndexOutOfBoundsException JavaDoc(
130                                 "Requested page number is too large");
131                         }
132                         
133                     }
134
135                     // at this point we have a
136
// node that is not a page
137
// object; therefore we assume
138
// it is a pages object and
139
// proceed to determine the
140
// next node to examine
141

142                     // get the list of kids
143
obj = node.get(PDFNAME_KIDS);
144                     if ( ( !(obj instanceof PdfArray) ) &&
145                          ( !(obj instanceof PdfReference) ) ) {
146                         throw new PdfFormatException(
147                             "Kids object is not an array or reference.");
148                     }
149                     if (obj instanceof PdfReference) {
150                         obj = _m.getObjectIndirect((PdfReference)obj);
151                     }
152                     if ( !(obj instanceof PdfArray) ) {
153                         throw new PdfFormatException(
154                             "Kids object is not an array.");
155                     }
156                     List kids = ((PdfArray)obj).getList();
157 //System.out.println( (PdfArray)obj );
158

159                     // iterate through the list of
160
// kids, examining the number
161
// of pages in each, and
162
// stopping when we reach the
163
// one that must contain the
164
// page we are looking for
165
boolean descend = false;
166                     for (Iterator t = kids.iterator(); ( (t.hasNext()) && (!descend) ); ) {
167
168                         // get the "kid",
169
// i.e. the referenced
170
// page or pages
171
// object
172
obj = t.next();
173                         if ( !(obj instanceof PdfReference) ) {
174                             throw new PdfFormatException(
175                                 "Kids element is not a reference.");
176                         }
177                         PdfReference kidR = ((PdfReference)obj);
178                         if (visited.contains(kidR)) {
179                             throw new PdfFormatException(
180                                 "Page tree contains a cycle (must be acyclic).");
181                         }
182                         visited.add(kidR);
183                         obj = _m.getObjectIndirect(kidR);
184                         if ( !(obj instanceof PdfDictionary) ) {
185                             throw new PdfFormatException(
186                                 "Kids element is not a dictionary.");
187                         }
188                         Map kid = ((PdfDictionary)obj).getMap();
189
190                         // determine whether
191
// it is a page object
192
// or a pages object
193
obj = kid.get(PDFNAME_TYPE);
194                         if ( ( !(obj instanceof PdfName) ) &&
195                              ( !(obj instanceof PdfReference) ) ) {
196                             throw new PdfFormatException(
197                                 "Page node type is not a name or reference.");
198                         }
199                         if (obj instanceof PdfReference) {
200                             obj = _m.getObjectIndirect((PdfReference)obj);
201                         }
202                         if ( !(obj instanceof PdfName) ) {
203                             throw new PdfFormatException(
204                                 "Page node type is not a name.");
205                         }
206                         PdfName nodeType = (PdfName)obj;
207                         boolean singlePage = nodeType.equals(PDFNAME_PAGE);
208                         
209                         // determine how many
210
// pages are
211
// represented by this
212
// node
213
int count;
214                         if (singlePage) {
215                             // this is a
216
// page
217
// object, so
218
// it
219
// represents
220
// exactly one
221
// page
222
count = 1;
223                         } else {
224                             // otherwise
225
// we assume
226
// this is a
227
// pages
228
// object, and
229
// we examine
230
// the Count
231
// value
232
obj = kid.get(PDFNAME_COUNT);
233                             if ( ( !(obj instanceof PdfInteger) ) &&
234                                  ( !(obj instanceof PdfReference) ) ) {
235                                 throw new PdfFormatException(
236                                     "Page count is not an integer or reference.");
237                             }
238                             if (obj instanceof PdfReference) {
239                                 obj = _m.getObjectIndirect((PdfReference)obj);
240                             }
241                             if ( !(obj instanceof PdfInteger) ) {
242                                 throw new PdfFormatException(
243                                     "Page count is not an integer.");
244                             }
245                             count = ((PdfInteger)obj).getInt();
246                         }
247
248                         if ( (pageSum + count) > pageNumber ) {
249
250                             if (singlePage) {
251                                 // this is the page we are looking for
252
return kidR;
253                             } else {
254                                 // descend this node; don't bother with the rest of the
255
// kids in the list
256
node = kid;
257                                 descend = true;
258                             }
259                             
260                         } else {
261
262                             // we will
263
// keep
264
// iterating
265
// the kid
266
// list, so we
267
// add the
268
// number of
269
// pages to
270
// the left to
271
// our running
272
// sum
273
pageSum += count;
274
275                         }
276                         
277                     } // for()
278

279                     // if descend was not set to
280
// true, then the for() loop
281
// completed normally, meaning
282
// that the kids do not
283
// contains enough pages, and
284
// something is wrong with the
285
// document
286
if ( !descend ) {
287                         throw new PdfFormatException(
288                             "Requested page not found.");
289                     }
290                     
291                 } // while()
292
}
293         }
294     }
295     
296         /**
297            Returns the number of pages in the document.
298            @return the number of pages.
299            @throws IOException
300            @throws PdfFormatException
301          */

302         public int getNumberOfPages() throws IOException, PdfFormatException {
303                 synchronized (this) {
304             synchronized (_m) {
305
306                 Object JavaDoc obj = _m.getObjectIndirect(getRoot());
307
308                 if ( !(obj instanceof PdfDictionary) ) {
309                     throw new PdfFormatException(
310                         "Page tree root (Pages) is not a dictionary.");
311                 }
312
313                 Map root = ((PdfDictionary)obj).getMap();
314
315                 obj = root.get(PDFNAME_COUNT);
316                 
317                 if ( ( !(obj instanceof PdfInteger) ) &&
318                      ( !(obj instanceof PdfReference) ) ) {
319                     throw new PdfFormatException(
320                         "Page count is not an integer or reference.");
321                 }
322                 
323                 if (obj instanceof PdfReference) {
324                     obj = _m.getObjectIndirect((PdfReference)obj);
325                 }
326                         
327                 if ( !(obj instanceof PdfInteger) ) {
328                     throw new PdfFormatException(
329                         "Page count is not an integer.");
330                 }
331
332                 return ((PdfInteger)obj).getInt();
333                 
334             }
335                 }
336         }
337
338     /**
339        Returns an indirect reference to the root node of the
340        document's page tree.
341        @return the indirect reference.
342        @throws IOException
343        @throws PdfFormatException
344      */

345     public PdfReference getRoot() throws IOException, PdfFormatException {
346         synchronized (this) {
347             synchronized (_m) {
348
349                 Object JavaDoc obj = _m.getObjectIndirect(_catalog.getCatalog());
350                 if ( !(obj instanceof PdfDictionary) ) {
351                     throw new PdfFormatException(
352                         "Catalog is not a dictionary.");
353                 }
354                 PdfDictionary catalog = (PdfDictionary)obj;
355
356                 obj = catalog.getMap().get(PDFNAME_PAGES);
357                 if ( !(obj instanceof PdfReference) ) {
358                     throw new PdfFormatException(
359                         "Page tree root (Pages) is not an indirect reference.");
360                 }
361                 return (PdfReference)obj;
362
363             }
364         }
365     }
366
367     /**
368        Adds inherited attributes to a specified page dictionary
369        object. The page object is cloned and the inherited
370        attributes are made explicit in the cloned object's
371        dictionary. The inherited attributes are retrieved by
372        ascending the page tree and looking for inheritable
373        attributes (if any) that are missing from the specified
374        page dictionary.
375        @param page the page dictionary to be filled in with
376        inherited attributes.
377        @return a clone of the specified page dictionary, with all
378        inherited attributes filled in.
379        @throws IOException
380        @throws PdfFormatException
381      */

382     public PdfDictionary inheritAttributes(PdfDictionary page) throws IOException, PdfFormatException {
383         synchronized (this) {
384             synchronized (_m) {
385
386                 Map pageM = page.getMap();
387                 
388                 // define new dictionary map
389
Map newMap = new HashMap(page.getMap());
390         
391                 // start out looking for all inheritable attributes
392
// that are not present in this page
393
Set unused = new HashSet(_inheritable.size());
394                 for (Iterator t = _inheritable.iterator(); t.hasNext(); ) {
395                     
396                     PdfName attr = (PdfName)t.next();
397                     Object JavaDoc obj = pageM.get(attr);
398                     
399                     if ( (obj == null) || (obj instanceof PdfNull) ) {
400                         unused.add(attr);
401                     }
402                     
403                 }
404                 
405                 boolean done = false;
406                 
407                 do {
408                     
409                     // if all the inheritable attributes have been
410
// filled, there is no need to continue
411
// ascending the tree
412
if (unused.isEmpty()) {
413                         done = true;
414                         break;
415                     }
416                     
417                     // get the Parent node
418
Object JavaDoc obj = pageM.get(PDFNAME_PARENT);
419                     if (obj == null) {
420                         // we are done
421
done = true;
422                         break;
423                     }
424                     if ( !(obj instanceof PdfObject) ) {
425                         throw new PdfFormatException(
426                             "Parent object is not a PDF object.");
427                     }
428                     obj = _m.getObjectIndirect((PdfObject)obj);
429                     if ( !(obj instanceof PdfDictionary) ) {
430                         throw new PdfFormatException(
431                             "Parent object is not a dictionary.");
432                     }
433                     pageM = ((PdfDictionary)obj).getMap();
434                     
435                     // now examine the parent node
436
for (Iterator t = unused.iterator(); t.hasNext(); ) {
437                         
438                         PdfName attr = (PdfName)t.next();
439                         
440                         // check if the attribute is present
441
obj = pageM.get(attr);
442                         if ( (obj != null) && ( !(obj instanceof PdfNull) ) ) {
443                             t.remove();
444                             newMap.put(attr, obj);
445                         }
446                         
447                     }
448                     
449                 } while ( !done );
450
451                 return new PdfDictionary(newMap);
452
453             }
454         }
455     }
456     
457 }
458
Popular Tags