ParsingSample


1   package it.stefanochizzolini.clown.samples;
2   
3   import it.stefanochizzolini.clown.bytes.FileInputStream;
4   import it.stefanochizzolini.clown.bytes.IBuffer;
5   import it.stefanochizzolini.clown.documents.Document;
6   import it.stefanochizzolini.clown.documents.Page;
7   import it.stefanochizzolini.clown.documents.Pages;
8   import it.stefanochizzolini.clown.documents.contents.ContentStream;
9   import it.stefanochizzolini.clown.documents.contents.Resources;
10  import it.stefanochizzolini.clown.documents.contents.objects.Operation;
11  import it.stefanochizzolini.clown.documents.interchange.metadata.Information;
12  import it.stefanochizzolini.clown.files.File;
13  import it.stefanochizzolini.clown.objects.PdfDictionary;
14  import it.stefanochizzolini.clown.objects.PdfIndirectObject;
15  import it.stefanochizzolini.clown.objects.PdfName;
16  import it.stefanochizzolini.clown.objects.PdfReference;
17  import it.stefanochizzolini.clown.tokens.FileFormatException;
18  
19  import java.io.RandomAccessFile  ;
20  import java.util.HashMap  ;
21  import java.util.Map  ;
22  
23  /**
24    This sample demonstrates how to inspect the structure of a PDF document.
25    <h3>Remarks</h3>
26    <p>This implementation is just a limited exercise: see the API documentation
27    to perform all the possible access functionalities.</p>
28  */
29  public class ParsingSample
30    implements ISample
31  {
32    public void run(
33      PDFClownSampleLoader loader
34      )
35    {
36      /*
37        NOTE: This procedure is made up of this sequence of actions:
38        1. User choice.
39        2. Document parsing.
40      */
41  
42      // 1. User choice.
43      String   filePath = loader.getPdfFileChoice("Please select a PDF file");
44  
45      // 2. Document parsing.
46      File file;
47      try
48      {
49        // Open the PDF file!
50        file = new File(
51          new FileInputStream(
52            new RandomAccessFile  (filePath,"r")
53            )
54          );
55      }
56      catch(FileFormatException e)
57      {throw new RuntimeException  (filePath + " file has a bad file format.",e);}
58      catch(Exception   e)
59      {throw new RuntimeException  (filePath + " file access error.",e);}
60  
61      // Get the PDF document!
62      Document document = file.getDocument();
63  
64      System.out.println("\nDocument information:");
65      Information info = document.getInformation();
66      if(info == null)
67      {
68        System.out.println("No information available (Info dictionary doesn't exist).");
69      }
70      else
71      {
72        System.out.println("Author: " + info.getAuthor());
73        System.out.println("Title: " + info.getTitle());
74        System.out.println("Subject: " + info.getSubject());
75        System.out.println("CreationDate: " + info.getCreationDate());
76      }
77  
78      System.out.println("\nIterating through the indirect-object collection (please wait)...");
79  
80      // Count the indirect objects, grouping them by type.
81      HashMap  <String  ,Integer  > objCounters = new HashMap  <String  ,Integer  >();
82      objCounters.put("xref free entry",0);
83      for(PdfIndirectObject object : file.getIndirectObjects())
84      {
85        if(object.isInUse())
86        {
87          String   typeName = object.getDataObject().getClass().getName();
88          if(objCounters.containsKey(typeName))
89          {objCounters.put(typeName, objCounters.get(typeName) + 1);}
90          else
91          {objCounters.put(typeName, 1);}
92        }
93        else
94        {
95          objCounters.put("xref free entry", objCounters.get("xref free entry") + 1);
96        }
97      }
98  
99      System.out.println("\nIndirect objects partial counts (grouped by PDF object type):");
100     for(Map.Entry  <String  ,Integer  > entry : objCounters.entrySet())
101     {System.out.println(" " + entry.getKey() + ": " + entry.getValue());}
102     System.out.println("Indirect objects total count: " + file.getIndirectObjects().size());
103 
104     Pages pages = document.getPages();
105     int pageCount = pages.size();
106     System.out.println("\nPage count: " + pageCount);
107     int pageIndex = (int)Math.floor((float)pageCount / 2);
108     Page page = pages.get(pageIndex);
109     System.out.println("Mid page:");
110     printPageInfo(page,pageIndex);
111 
112     pageIndex++;
113     if(pageIndex < pageCount)
114     {
115       System.out.println("Next page:");
116       printPageInfo(page.getNext(),pageIndex);
117     }
118   }
119 
120   private void printPageInfo(
121     Page page,
122     int index
123     )
124   {
125     System.out.println(" Index (calculated): " + page.getIndex() + " (should be " + index + ")");
126     System.out.println(" ID: " + ((PdfReference)page.getBaseObject()).getID());
127     PdfDictionary pageDictionary = page.getBaseDataObject();
128     System.out.println(" Dictionary entries:");
129     for(PdfName key : pageDictionary.keySet())
130     {System.out.println("  " + key.getValue());}
131 
132     ContentStream content = page.getContents().get(0);
133     IBuffer contentBody = content.getBaseDataObject().getBody();
134     System.out.println(" Content hashcode: " + contentBody.hashCode());
135     System.out.println(" Content size: " + contentBody.getLength());
136     System.out.println(" Content head (operations):");
137     it.stefanochizzolini.clown.documents.contents.tokens.Parser contentParser = content.getParser();
138     try
139     {
140       for(int i = 0; i < 10; i++)
141       {
142         contentParser.moveNext();
143         Operation operation = contentParser.parseOperation();
144         System.out.println("  " + i + ": " + operation.toString());
145       }
146     }
147     catch(Exception   e)
148     {throw new RuntimeException  (e);}
149     System.out.println(" Content head (source):");
150     System.out.println(contentBody.getString(0,(int)contentParser.getPosition()));
151 
152     {
153       Resources resources = page.getResources();
154       System.out.println(" Resources:");
155       Map   subResources = resources.getFonts();
156       if(subResources != null)
157       {System.out.println("  Font count: " + subResources.size());}
158 
159       subResources = resources.getXObjects();
160       if(subResources != null)
161       {System.out.println("  XObjects count: " + subResources.size());}
162 
163       subResources = resources.getColorSpaces();
164       if(subResources != null)
165       {System.out.println("  ColorSpaces count: " + subResources.size());}
166     }
167   }
168 }
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags