1 31 package org.pdfbox.pdfparser; 32 33 import java.io.ByteArrayOutputStream ; 34 import java.io.InputStream ; 35 import java.io.IOException ; 36 37 import java.util.ArrayList ; 38 import java.util.List ; 39 40 import org.pdfbox.cos.COSBase; 41 import org.pdfbox.cos.COSBoolean; 42 import org.pdfbox.cos.COSDictionary; 43 import org.pdfbox.cos.COSName; 44 import org.pdfbox.cos.COSNull; 45 import org.pdfbox.cos.COSNumber; 46 import org.pdfbox.cos.COSObject; 47 import org.pdfbox.cos.COSStream; 48 import org.pdfbox.io.RandomAccess; 49 50 import org.pdfbox.pdmodel.common.PDStream; 51 import org.pdfbox.util.PDFOperator; 52 import org.pdfbox.util.ImageParameters; 53 54 60 public class PDFStreamParser extends BaseParser 61 { 62 private List streamObjects = new ArrayList ( 100 ); 63 private RandomAccess file; 64 private PDFOperator lastBIToken = null; 65 66 74 public PDFStreamParser( InputStream stream, RandomAccess raf ) throws IOException 75 { 76 super( stream ); 77 file = raf; 78 } 79 80 87 public PDFStreamParser( PDStream stream ) throws IOException 88 { 89 this( stream.createInputStream(), stream.getStream().getScratchFile() ); 90 } 91 92 99 public PDFStreamParser( COSStream stream ) throws IOException 100 { 101 this( stream.getUnfilteredStream(), stream.getScratchFile() ); 102 } 103 104 110 public void parse() throws IOException 111 { 112 try 113 { 114 Object token = null; 115 while( (token = parseNextToken()) != null ) 116 { 117 streamObjects.add( token ); 118 } 119 } 120 finally 121 { 122 pdfSource.close(); 123 } 124 } 125 126 131 public List getTokens() 132 { 133 return streamObjects; 134 } 135 136 143 private Object parseNextToken() throws IOException 144 { 145 Object retval = null; 146 147 skipSpaces(); 148 int nextByte = pdfSource.peek(); 149 if( ((byte)nextByte) == -1 ) 150 { 151 return null; 152 } 153 char c = (char)nextByte; 154 switch(c) 155 { 156 case '<': 157 { 158 int leftBracket = pdfSource.read(); c = (char)pdfSource.peek(); pdfSource.unread( leftBracket ); if(c == '<') 162 { 163 164 COSDictionary pod = parseCOSDictionary(); 165 skipSpaces(); 166 if((char)pdfSource.peek() == 's') 167 { 168 retval = parseCOSStream( pod, file ); 169 } 170 else 171 { 172 retval = pod; 173 } 174 } 175 else 176 { 177 retval = parseCOSString(); 178 } 179 break; 180 } 181 case '[': { 183 retval = parseCOSArray(); 184 break; 185 } 186 case '(': retval = parseCOSString(); 188 break; 189 case '/': retval = parseCOSName(); 191 break; 192 case 'n': { 194 String nullString = readString(); 195 if( nullString.equals( "null") ) 196 { 197 retval = COSNull.NULL; 198 } 199 else 200 { 201 retval = PDFOperator.getOperator( nullString ); 202 } 203 break; 204 } 205 case 't': 206 case 'f': 207 { 208 String next = readString(); 209 if( next.equals( "true" ) ) 210 { 211 retval = COSBoolean.TRUE; 212 break; 213 } 214 else if( next.equals( "false" ) ) 215 { 216 retval = COSBoolean.FALSE; 217 } 218 else 219 { 220 retval = PDFOperator.getOperator( next ); 221 } 222 break; 223 } 224 case 'R': 225 { 226 String line = readString(); 227 if( line.equals( "R" ) ) 228 { 229 retval = new COSObject( null ); 230 } 231 else 232 { 233 retval = PDFOperator.getOperator( line ); 234 } 235 break; 236 } 237 case '0': 238 case '1': 239 case '2': 240 case '3': 241 case '4': 242 case '5': 243 case '6': 244 case '7': 245 case '8': 246 case '9': 247 case '-': 248 case '+': 249 case '.': 250 { 251 if( Character.isDigit(c) || c == '-' || c == '+' || c == '.') 252 { 253 StringBuffer buf = new StringBuffer (); 254 while( Character.isDigit(( c = (char)pdfSource.peek()) )|| c== '-' || c== '+' || c =='.' ) 255 { 256 buf.append( c ); 257 pdfSource.read(); 258 } 259 retval = COSNumber.get( buf.toString() ); 260 } 261 else 262 { 263 throw new IOException ( "Unknown dir object c='" + c + 264 "' peek='" + (char)pdfSource.peek() + "' " + pdfSource ); 265 } 266 break; 267 } 268 case 'B': 269 { 270 String next = readString(); 271 retval = PDFOperator.getOperator( next ); 272 273 if( next.equals( "BI" ) ) 274 { 275 lastBIToken = (PDFOperator)retval; 276 COSDictionary imageParams = new COSDictionary(); 277 lastBIToken.setImageParameters( new ImageParameters( imageParams ) ); 278 Object nextToken = null; 279 while( (nextToken = parseNextToken()) instanceof COSName ) 280 { 281 Object value = parseNextToken(); 282 imageParams.setItem( (COSName)nextToken, (COSBase)value ); 283 } 284 PDFOperator imageData = (PDFOperator)nextToken; 286 lastBIToken.setImageData( imageData.getImageData() ); 287 } 288 break; 289 } 290 case 'I': 291 { 292 294 String id = "" + (char)pdfSource.read() + (char)pdfSource.read(); 298 if( !id.equals( "ID" ) ) 299 { 300 throw new IOException ( "Error: Expected operator 'ID' actual='" + id + "'" ); 301 } 302 ByteArrayOutputStream imageData = new ByteArrayOutputStream (); 303 if( this.isWhitespace() ) 305 { 306 pdfSource.read(); 308 } 309 int twoBytesAgo = 0; 310 int lastByte = pdfSource.read(); 311 int currentByte = pdfSource.read(); 312 int count = 0; 313 while( !(isWhitespace( twoBytesAgo ) && 319 lastByte == 'E' && 320 currentByte == 'I' && 321 isWhitespace() ) && 326 !pdfSource.isEOF() ) 327 { 328 imageData.write( lastByte ); 329 twoBytesAgo = lastByte; 330 lastByte = currentByte; 331 currentByte = pdfSource.read(); 332 count++; 333 } 334 pdfSource.unread( 'I' ); pdfSource.unread( 'E' ); 336 retval = PDFOperator.getOperator( "ID" ); 337 ((PDFOperator)retval).setImageData( imageData.toByteArray() ); 338 break; 339 } 340 case ']': 341 { 342 pdfSource.read(); 345 retval = COSNull.NULL; break; 347 } 348 default: 349 { 350 String operator = readOperator(); 352 if( operator.trim().length() == 0 ) 353 { 354 retval = null; 356 } 357 else 358 { 359 retval = PDFOperator.getOperator( operator ); 360 } 361 } 362 363 } 364 365 return retval; 366 } 367 368 375 protected String readOperator() throws IOException 376 { 377 skipSpaces(); 378 379 StringBuffer buffer = new StringBuffer (4); 382 while( 383 !isWhitespace() && 384 !isClosing() && 385 !pdfSource.isEOF() && 386 pdfSource.peek() != (int)'[' && 387 pdfSource.peek() != (int)'<' && 388 pdfSource.peek() != (int)'(' && 389 pdfSource.peek() != (int)'/' && 390 (pdfSource.peek() < (int)'0' || 391 pdfSource.peek() > (int)'9' ) ) 392 { 393 buffer.append( (char)pdfSource.read() ); 394 } 395 return buffer.toString(); 396 } 397 } | Popular Tags |