KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > it > stefanochizzolini > clown > samples > ParsingSample


1 package it.stefanochizzolini.clown.samples;
2
3 import it.stefanochizzolini.clown.bytes.FileInputStream;
4 import it.stefanochizzolini.clown.bytes.IBuffer;
5 import it.stefanochizzolini.clown.documents.Document;
6 import it.stefanochizzolini.clown.documents.Page;
7 import it.stefanochizzolini.clown.documents.Pages;
8 import it.stefanochizzolini.clown.documents.contents.ContentStream;
9 import it.stefanochizzolini.clown.documents.contents.Resources;
10 import it.stefanochizzolini.clown.documents.contents.objects.Operation;
11 import it.stefanochizzolini.clown.documents.interchange.metadata.Information;
12 import it.stefanochizzolini.clown.files.File;
13 import it.stefanochizzolini.clown.objects.PdfDictionary;
14 import it.stefanochizzolini.clown.objects.PdfIndirectObject;
15 import it.stefanochizzolini.clown.objects.PdfName;
16 import it.stefanochizzolini.clown.objects.PdfReference;
17 import it.stefanochizzolini.clown.tokens.FileFormatException;
18
19 import java.io.RandomAccessFile JavaDoc;
20 import java.util.HashMap JavaDoc;
21 import java.util.Map JavaDoc;
22
23 /**
24   This sample demonstrates how to inspect the structure of a PDF document.
25   <h3>Remarks</h3>
26   <p>This implementation is just a limited exercise: see the API documentation
27   to perform all the possible access functionalities.</p>
28 */

29 public class ParsingSample
30   implements ISample
31 {
32   public void run(
33     PDFClownSampleLoader loader
34     )
35   {
36     /*
37       NOTE: This procedure is made up of this sequence of actions:
38       1. User choice.
39       2. Document parsing.
40     */

41
42     // 1. User choice.
43
String JavaDoc filePath = loader.getPdfFileChoice("Please select a PDF file");
44
45     // 2. Document parsing.
46
File file;
47     try
48     {
49       // Open the PDF file!
50
file = new File(
51         new FileInputStream(
52           new RandomAccessFile JavaDoc(filePath,"r")
53           )
54         );
55     }
56     catch(FileFormatException e)
57     {throw new RuntimeException JavaDoc(filePath + " file has a bad file format.",e);}
58     catch(Exception JavaDoc e)
59     {throw new RuntimeException JavaDoc(filePath + " file access error.",e);}
60
61     // Get the PDF document!
62
Document document = file.getDocument();
63
64     System.out.println("\nDocument information:");
65     Information info = document.getInformation();
66     if(info == null)
67     {
68       System.out.println("No information available (Info dictionary doesn't exist).");
69     }
70     else
71     {
72       System.out.println("Author: " + info.getAuthor());
73       System.out.println("Title: " + info.getTitle());
74       System.out.println("Subject: " + info.getSubject());
75       System.out.println("CreationDate: " + info.getCreationDate());
76     }
77
78     System.out.println("\nIterating through the indirect-object collection (please wait)...");
79
80     // Count the indirect objects, grouping them by type.
81
HashMap JavaDoc<String JavaDoc,Integer JavaDoc> objCounters = new HashMap JavaDoc<String JavaDoc,Integer JavaDoc>();
82     objCounters.put("xref free entry",0);
83     for(PdfIndirectObject object : file.getIndirectObjects())
84     {
85       if(object.isInUse())
86       {
87         String JavaDoc typeName = object.getDataObject().getClass().getName();
88         if(objCounters.containsKey(typeName))
89         {objCounters.put(typeName, objCounters.get(typeName) + 1);}
90         else
91         {objCounters.put(typeName, 1);}
92       }
93       else
94       {
95         objCounters.put("xref free entry", objCounters.get("xref free entry") + 1);
96       }
97     }
98
99     System.out.println("\nIndirect objects partial counts (grouped by PDF object type):");
100     for(Map.Entry JavaDoc<String JavaDoc,Integer JavaDoc> entry : objCounters.entrySet())
101     {System.out.println(" " + entry.getKey() + ": " + entry.getValue());}
102     System.out.println("Indirect objects total count: " + file.getIndirectObjects().size());
103
104     Pages pages = document.getPages();
105     int pageCount = pages.size();
106     System.out.println("\nPage count: " + pageCount);
107     int pageIndex = (int)Math.floor((float)pageCount / 2);
108     Page page = pages.get(pageIndex);
109     System.out.println("Mid page:");
110     printPageInfo(page,pageIndex);
111
112     pageIndex++;
113     if(pageIndex < pageCount)
114     {
115       System.out.println("Next page:");
116       printPageInfo(page.getNext(),pageIndex);
117     }
118   }
119
120   private void printPageInfo(
121     Page page,
122     int index
123     )
124   {
125     System.out.println(" Index (calculated): " + page.getIndex() + " (should be " + index + ")");
126     System.out.println(" ID: " + ((PdfReference)page.getBaseObject()).getID());
127     PdfDictionary pageDictionary = page.getBaseDataObject();
128     System.out.println(" Dictionary entries:");
129     for(PdfName key : pageDictionary.keySet())
130     {System.out.println(" " + key.getValue());}
131
132     ContentStream content = page.getContents().get(0);
133     IBuffer contentBody = content.getBaseDataObject().getBody();
134     System.out.println(" Content hashcode: " + contentBody.hashCode());
135     System.out.println(" Content size: " + contentBody.getLength());
136     System.out.println(" Content head (operations):");
137     it.stefanochizzolini.clown.documents.contents.tokens.Parser contentParser = content.getParser();
138     try
139     {
140       for(int i = 0; i < 10; i++)
141       {
142         contentParser.moveNext();
143         Operation operation = contentParser.parseOperation();
144         System.out.println(" " + i + ": " + operation.toString());
145       }
146     }
147     catch(Exception JavaDoc e)
148     {throw new RuntimeException JavaDoc(e);}
149     System.out.println(" Content head (source):");
150     System.out.println(contentBody.getString(0,(int)contentParser.getPosition()));
151
152     {
153       Resources resources = page.getResources();
154       System.out.println(" Resources:");
155       Map JavaDoc subResources = resources.getFonts();
156       if(subResources != null)
157       {System.out.println(" Font count: " + subResources.size());}
158
159       subResources = resources.getXObjects();
160       if(subResources != null)
161       {System.out.println(" XObjects count: " + subResources.size());}
162
163       subResources = resources.getColorSpaces();
164       if(subResources != null)
165       {System.out.println(" ColorSpaces count: " + subResources.size());}
166     }
167   }
168 }
Popular Tags