1 package it.stefanochizzolini.clown.samples; 2 3 import it.stefanochizzolini.clown.bytes.FileInputStream; 4 import it.stefanochizzolini.clown.bytes.IBuffer; 5 import it.stefanochizzolini.clown.documents.Document; 6 import it.stefanochizzolini.clown.documents.Page; 7 import it.stefanochizzolini.clown.documents.Pages; 8 import it.stefanochizzolini.clown.documents.contents.ContentStream; 9 import it.stefanochizzolini.clown.documents.contents.Resources; 10 import it.stefanochizzolini.clown.documents.contents.objects.Operation; 11 import it.stefanochizzolini.clown.documents.interchange.metadata.Information; 12 import it.stefanochizzolini.clown.files.File; 13 import it.stefanochizzolini.clown.objects.PdfDictionary; 14 import it.stefanochizzolini.clown.objects.PdfIndirectObject; 15 import it.stefanochizzolini.clown.objects.PdfName; 16 import it.stefanochizzolini.clown.objects.PdfReference; 17 import it.stefanochizzolini.clown.tokens.FileFormatException; 18 19 import java.io.RandomAccessFile ; 20 import java.util.HashMap ; 21 import java.util.Map ; 22 23 29 public class ParsingSample 30 implements ISample 31 { 32 public void run( 33 PDFClownSampleLoader loader 34 ) 35 { 36 41 42 String filePath = loader.getPdfFileChoice("Please select a PDF file"); 44 45 File file; 47 try 48 { 49 file = new File( 51 new FileInputStream( 52 new RandomAccessFile (filePath,"r") 53 ) 54 ); 55 } 56 catch(FileFormatException e) 57 {throw new RuntimeException (filePath + " file has a bad file format.",e);} 58 catch(Exception e) 59 {throw new RuntimeException (filePath + " file access error.",e);} 60 61 Document document = file.getDocument(); 63 64 System.out.println("\nDocument information:"); 65 Information info = document.getInformation(); 66 if(info == null) 67 { 68 System.out.println("No information available (Info dictionary doesn't exist)."); 69 } 70 else 71 { 72 System.out.println("Author: " + info.getAuthor()); 73 System.out.println("Title: " + info.getTitle()); 74 System.out.println("Subject: " + info.getSubject()); 75 System.out.println("CreationDate: " + info.getCreationDate()); 76 } 77 78 System.out.println("\nIterating through the indirect-object collection (please wait)..."); 79 80 HashMap <String ,Integer > objCounters = new HashMap <String ,Integer >(); 82 objCounters.put("xref free entry",0); 83 for(PdfIndirectObject object : file.getIndirectObjects()) 84 { 85 if(object.isInUse()) 86 { 87 String typeName = object.getDataObject().getClass().getName(); 88 if(objCounters.containsKey(typeName)) 89 {objCounters.put(typeName, objCounters.get(typeName) + 1);} 90 else 91 {objCounters.put(typeName, 1);} 92 } 93 else 94 { 95 objCounters.put("xref free entry", objCounters.get("xref free entry") + 1); 96 } 97 } 98 99 System.out.println("\nIndirect objects partial counts (grouped by PDF object type):"); 100 for(Map.Entry <String ,Integer > entry : objCounters.entrySet()) 101 {System.out.println(" " + entry.getKey() + ": " + entry.getValue());} 102 System.out.println("Indirect objects total count: " + file.getIndirectObjects().size()); 103 104 Pages pages = document.getPages(); 105 int pageCount = pages.size(); 106 System.out.println("\nPage count: " + pageCount); 107 int pageIndex = (int)Math.floor((float)pageCount / 2); 108 Page page = pages.get(pageIndex); 109 System.out.println("Mid page:"); 110 printPageInfo(page,pageIndex); 111 112 pageIndex++; 113 if(pageIndex < pageCount) 114 { 115 System.out.println("Next page:"); 116 printPageInfo(page.getNext(),pageIndex); 117 } 118 } 119 120 private void printPageInfo( 121 Page page, 122 int index 123 ) 124 { 125 System.out.println(" Index (calculated): " + page.getIndex() + " (should be " + index + ")"); 126 System.out.println(" ID: " + ((PdfReference)page.getBaseObject()).getID()); 127 PdfDictionary pageDictionary = page.getBaseDataObject(); 128 System.out.println(" Dictionary entries:"); 129 for(PdfName key : pageDictionary.keySet()) 130 {System.out.println(" " + key.getValue());} 131 132 ContentStream content = page.getContents().get(0); 133 IBuffer contentBody = content.getBaseDataObject().getBody(); 134 System.out.println(" Content hashcode: " + contentBody.hashCode()); 135 System.out.println(" Content size: " + contentBody.getLength()); 136 System.out.println(" Content head (operations):"); 137 it.stefanochizzolini.clown.documents.contents.tokens.Parser contentParser = content.getParser(); 138 try 139 { 140 for(int i = 0; i < 10; i++) 141 { 142 contentParser.moveNext(); 143 Operation operation = contentParser.parseOperation(); 144 System.out.println(" " + i + ": " + operation.toString()); 145 } 146 } 147 catch(Exception e) 148 {throw new RuntimeException (e);} 149 System.out.println(" Content head (source):"); 150 System.out.println(contentBody.getString(0,(int)contentParser.getPosition())); 151 152 { 153 Resources resources = page.getResources(); 154 System.out.println(" Resources:"); 155 Map subResources = resources.getFonts(); 156 if(subResources != null) 157 {System.out.println(" Font count: " + subResources.size());} 158 159 subResources = resources.getXObjects(); 160 if(subResources != null) 161 {System.out.println(" XObjects count: " + subResources.size());} 162 163 subResources = resources.getColorSpaces(); 164 if(subResources != null) 165 {System.out.println(" ColorSpaces count: " + subResources.size());} 166 } 167 } 168 } | Popular Tags |