1 31 package org.pdfbox.cmapparser; 32 33 import java.io.FileInputStream ; 34 import java.io.InputStream ; 35 import java.io.IOException ; 36 import java.io.RandomAccessFile ; 37 38 import java.util.List ; 39 40 import org.pdfbox.cmaptypes.CMap; 41 import org.pdfbox.cmaptypes.CodespaceRange; 42 43 import org.pdfbox.cos.COSArray; 44 import org.pdfbox.cos.COSName; 45 import org.pdfbox.cos.COSNumber; 46 import org.pdfbox.cos.COSString; 47 48 import org.pdfbox.pdfparser.PDFStreamParser; 49 50 import org.pdfbox.util.PDFOperator; 51 52 58 public class CMapParser 59 { 60 private static final String BEGIN_CODESPACE_RANGE = "begincodespacerange"; 61 private static final String BEGIN_BASE_FONT_CHAR = "beginbfchar"; 62 private static final String BEGIN_BASE_FONT_RANGE = "beginbfrange"; 63 64 private InputStream input; 65 private CMap result; 66 private RandomAccessFile file; 67 68 74 public CMapParser( InputStream in, RandomAccessFile raf ) 75 { 76 input = in; 77 file = raf; 78 } 79 80 85 public CMap getResult() 86 { 87 return result; 88 } 89 90 95 public void parse() throws IOException 96 { 97 result = new CMap(); 98 PDFStreamParser parser = new PDFStreamParser( input, file ); 99 parser.parse(); 100 List tokens = parser.getTokens(); 101 for( int i=0; i<tokens.size(); i++ ) 102 { 103 Object token = tokens.get( i ); 104 if( token instanceof PDFOperator ) 105 { 106 PDFOperator op = (PDFOperator)token; 107 if( op.getOperation().equals( BEGIN_CODESPACE_RANGE ) ) 108 { 109 COSNumber cosCount = (COSNumber)tokens.get( i-1 ); 110 for( int j=0; j<cosCount.intValue(); j++ ) 111 { 112 i++; 113 COSString startRange = (COSString)tokens.get( i ); 114 i++; 115 COSString endRange = (COSString)tokens.get( i ); 116 CodespaceRange range = new CodespaceRange(); 117 range.setStart( startRange.getBytes() ); 118 range.setEnd( endRange.getBytes() ); 119 result.addCodespaceRange( range ); 120 } 121 } 122 else if( op.getOperation().equals( BEGIN_BASE_FONT_CHAR ) ) 123 { 124 COSNumber cosCount = (COSNumber)tokens.get( i-1 ); 125 for( int j=0; j<cosCount.intValue(); j++ ) 126 { 127 i++; 128 COSString inputCode = (COSString)tokens.get( i ); 129 i++; 130 Object nextToken = tokens.get( i ); 131 if( nextToken instanceof COSString ) 132 { 133 byte[] bytes = ((COSString)nextToken).getBytes(); 134 String value = createStringFromBytes( bytes ); 135 result.addMapping( inputCode.getBytes(), value ); 136 } 137 else if( nextToken instanceof COSName ) 138 { 139 result.addMapping( inputCode.getBytes(), ((COSName)nextToken).getName() ); 140 } 141 else 142 { 143 throw new IOException ( "Error parsing CMap beginbfchar, expected{COSString " + 144 "or COSName} and not " + nextToken ); 145 } 146 } 147 } 148 else if( op.getOperation().equals( BEGIN_BASE_FONT_RANGE ) ) 149 { 150 COSNumber cosCount = (COSNumber)tokens.get( i-1 ); 151 152 for( int j=0; j<cosCount.intValue(); j++ ) 153 { 154 i++; 155 COSString startCode = (COSString)tokens.get( i ); 156 i++; 157 COSString endCode = (COSString)tokens.get( i ); 158 i++; 159 Object nextToken = tokens.get( i ); 160 COSArray array = null; 161 if( nextToken instanceof COSArray ) 162 { 163 array = (COSArray)nextToken; 164 } 165 166 byte[] startBytes = startCode.getBytes(); 167 byte[] endBytes = endCode.getBytes(); 168 byte[] tokenBytes = null; 169 if( array == null ) 170 { 171 tokenBytes = ((COSString)nextToken).getBytes(); 172 } 173 else 174 { 175 tokenBytes = ((COSString)array.getObject( 0 )).getBytes(); 176 } 177 178 String value = null; 179 180 int arrayIndex = 0; 181 boolean done = false; 182 while( !done ) 183 { 184 if( compare( startBytes, endBytes ) >= 0 ) 185 { 186 done = true; 187 } 188 value = createStringFromBytes( tokenBytes ); 189 result.addMapping( startBytes, value ); 190 increment( startBytes ); 191 192 if( array == null ) 193 { 194 increment( tokenBytes ); 195 } 196 else 197 { 198 if( arrayIndex < array.size() ) 199 { 200 tokenBytes = ((COSString)array.getObject( arrayIndex++ )).getBytes(); 201 } 202 } 203 } 204 } 205 } 206 } 207 } 208 } 209 210 private void increment( byte[] data ) 211 { 212 increment( data, data.length-1 ); 213 } 214 215 private void increment( byte[] data, int position ) 216 { 217 if( position > 0 && (data[position]+256)%256 == 255 ) 218 { 219 data[position]=0; 220 increment( data, position-1); 221 } 222 else 223 { 224 data[position] = (byte)(data[position]+1); 225 } 226 } 227 228 private String createStringFromBytes( byte[] bytes ) throws IOException 229 { 230 String retval = null; 231 if( bytes.length == 1 ) 232 { 233 retval = new String ( bytes ); 234 } 235 else 236 { 237 retval = new String ( bytes, "UTF-16BE" ); 238 } 239 return retval; 240 } 241 242 private int compare( byte[] first, byte[] second ) 243 { 244 int retval = 1; 245 boolean done = false; 246 for( int i=0; i<first.length && !done; i++ ) 247 { 248 if( first[i] == second[i] ) 249 { 250 } 252 else if( ((first[i]+256)%256) < ((second[i]+256)%256) ) 253 { 254 done = true; 255 retval = -1; 256 } 257 else 258 { 259 done = true; 260 retval = 1; 261 } 262 } 263 return retval; 264 } 265 266 273 public static void main( String [] args ) throws Exception 274 { 275 if( args.length != 1 ) 276 { 277 System.err.println( "usage: java org.pdfbox.cmapparser.CMapParser <CMAP File>" ); 278 System.exit( -1 ); 279 } 280 CMapParser parser = new CMapParser( new FileInputStream ( args[0] ), null ); 281 parser.parse(); 282 CMap result = parser.getResult(); 283 System.out.println( "Result:" + result ); 284 } 285 } | Popular Tags |