1 26 27 package it.stefanochizzolini.clown.documents.contents.tokens; 28 29 import it.stefanochizzolini.clown.bytes.IInputStream; 30 import it.stefanochizzolini.clown.documents.contents.objects.Operation; 31 import it.stefanochizzolini.clown.objects.PdfArray; 32 import it.stefanochizzolini.clown.objects.PdfBoolean; 33 import it.stefanochizzolini.clown.objects.PdfDate; 34 import it.stefanochizzolini.clown.objects.PdfDictionary; 35 import it.stefanochizzolini.clown.objects.PdfDirectObject; 36 import it.stefanochizzolini.clown.objects.PdfHex; 37 import it.stefanochizzolini.clown.objects.PdfInteger; 38 import it.stefanochizzolini.clown.objects.PdfLiteral; 39 import it.stefanochizzolini.clown.objects.PdfName; 40 import it.stefanochizzolini.clown.objects.PdfNull; 41 import it.stefanochizzolini.clown.objects.PdfReal; 42 import it.stefanochizzolini.clown.objects.PdfReference; 43 import it.stefanochizzolini.clown.objects.PdfStream; 44 import it.stefanochizzolini.clown.tokens.FileFormatException; 45 import it.stefanochizzolini.clown.tokens.TokenTypeEnum; 46 import java.io.EOFException ; 47 import java.util.ArrayList ; 48 import java.util.Date ; 49 import java.util.List ; 50 51 54 public class Parser 55 { 56 59 protected static int getHex( 64 int c 65 ) 66 { 67 if(c >= '0' && c <= '9') 68 return (c - '0'); 69 if(c >= 'A' && c <= 'F') 70 return (c - 'A' + 10); 71 if(c >= 'a' && c <= 'f') 72 return (c - 'a' + 10); 73 return -1; 74 } 75 76 79 protected static boolean isDelimiter( 80 int c 81 ) 82 { 83 return (c == '(' || c == ')' || c == '<' || c == '>' || c == '[' || c == ']' || c == '/' || c == '%'); 84 } 85 86 89 protected static boolean isEOL( 90 int c 91 ) 92 { 93 return (c == 12 || c == 15); 94 } 95 96 99 protected static boolean isWhitespace( 100 int c 101 ) 102 { 103 return (c == 0 || c == 9 || c == 10 || c == 12 || c == 13 || c == 32); 104 } 105 109 private IInputStream stream; 112 private Object token; 113 private TokenTypeEnum tokenType; 114 116 121 public Parser( 122 IInputStream stream 123 ) 124 { 125 this.stream = stream; 126 } 127 129 public long getLength( 132 ) 133 {return stream.getLength();} 134 135 public long getPosition( 136 ) 137 {return stream.getPosition();} 138 139 public IInputStream getStream( 140 ) 141 {return stream;} 142 143 147 public Object getToken( 148 ) 149 {return token;} 150 151 155 public TokenTypeEnum getTokenType( 156 ) 157 {return tokenType;} 158 159 162 public boolean moveNext( 163 int offset 164 ) throws FileFormatException 165 { 166 for( 167 int index = 0; 168 index < offset; 169 index++ 170 ) 171 { 172 if(!moveNext()) 173 return false; 174 } 175 176 return true; 177 } 178 179 206 public boolean moveNext( 207 ) throws FileFormatException 208 { 209 213 StringBuilder buffer = null; 214 token = null; 215 int c = 0; 216 217 try 219 { 220 do 221 { 222 c = stream.readUnsignedByte(); 223 } while(isWhitespace(c)); } 225 catch(EOFException e) 226 {return false;} 227 228 switch(c) 230 { 231 case '/': tokenType = TokenTypeEnum.Name; 233 234 buffer = new StringBuilder (); 235 try 236 { 237 while(true) 238 { 239 c = stream.readUnsignedByte(); 240 if(isDelimiter(c) || isWhitespace(c)) 241 break; 242 if(c == '#') 244 { 245 try 246 {c = (getHex(stream.readUnsignedByte()) << 4) + getHex(stream.readUnsignedByte());} 247 catch(EOFException e) 248 {throw new FileFormatException("Unexpected EOF (malformed hexadecimal code in name object).",e,stream.getPosition());} 249 } 250 251 buffer.append((char)c); 252 } 253 } 254 catch(EOFException e) 255 {throw new FileFormatException("Unexpected EOF (malformed name object).",e,stream.getPosition());} 256 257 stream.skip(-1); break; 259 case '0': 260 case '1': 261 case '2': 262 case '3': 263 case '4': 264 case '5': 265 case '6': 266 case '7': 267 case '8': 268 case '9': 269 case '.': 270 case '-': 271 case '+': switch(c) 273 { 274 case '.': tokenType = TokenTypeEnum.Real; 276 break; 277 default: tokenType = TokenTypeEnum.Integer; break; 280 } 281 282 buffer = new StringBuilder (); 284 try 285 { 286 do 287 { 288 buffer.append((char)c); 289 c = stream.readUnsignedByte(); 290 if(c == '.') 291 tokenType = TokenTypeEnum.Real; 292 else if(c < '0' || c > '9') 293 break; 294 } while(true); 295 } 296 catch(EOFException e) 297 {throw new FileFormatException("Unexpected EOF (malformed number object).",e,stream.getPosition());} 298 299 stream.skip(-1); break; 301 case '[': tokenType = TokenTypeEnum.ArrayBegin; 303 break; 304 case ']': tokenType = TokenTypeEnum.ArrayEnd; 306 break; 307 case '<': try 309 {c = stream.readUnsignedByte();} 310 catch(EOFException e) 311 {throw new FileFormatException("Unexpected EOF (isolated opening angle-bracket character).",e,stream.getPosition());} 312 if(c == '<') 314 { 315 tokenType = TokenTypeEnum.DictionaryBegin; 316 break; 317 } 318 319 tokenType = TokenTypeEnum.Hex; 321 322 buffer = new StringBuilder (); 323 try 324 { 325 while(true) 326 { 327 c = stream.readUnsignedByte(); 328 if(c == '>') 330 break; 331 332 buffer.append((char)c); 333 } 334 } 335 catch(EOFException e) 336 {throw new FileFormatException("Unexpected EOF (malformed hex string).",e,stream.getPosition());} 337 338 break; 339 case '>': try 341 {c = stream.readUnsignedByte();} 342 catch(EOFException e) 343 {throw new FileFormatException("Unexpected EOF (malformed dictionary).",e,stream.getPosition());} 344 if(c != '>') 345 throw new FileFormatException("Malformed dictionary.",stream.getPosition()); 346 347 tokenType = TokenTypeEnum.DictionaryEnd; 348 349 break; 350 case '%': tokenType = TokenTypeEnum.Comment; 352 try 354 { 355 do 356 {c = stream.readUnsignedByte();} while(!isEOL(c)); 357 } 358 catch(EOFException e) 359 {} 360 361 break; 362 case '(': tokenType = TokenTypeEnum.Literal; 364 365 buffer = new StringBuilder (); 366 int level = 0; 367 try 368 { 369 while(true) 370 { 371 c = stream.readUnsignedByte(); 372 if(c == '(') 373 level++; 374 else if(c == ')') 375 level--; 376 else if(c == '\\') 377 { 378 boolean lineBreak = false; 379 c = stream.readUnsignedByte(); 380 switch(c) 381 { 382 case 'n': 383 c = '\n'; 384 break; 385 case 'r': 386 c = '\r'; 387 break; 388 case 't': 389 c = '\t'; 390 break; 391 case 'b': 392 c = '\b'; 393 break; 394 case 'f': 395 c = '\f'; 396 break; 397 case '(': 398 case ')': 399 case '\\': 400 break; 401 case '\r': 402 lineBreak = true; 403 c = stream.readUnsignedByte(); 404 if(c != '\n') 405 stream.skip(-1); 406 break; 407 case '\n': 408 lineBreak = true; 409 break; 410 default: 411 { 412 if(c < '0' || c > '7') 414 break; 415 416 int octal = c - '0'; 418 c = stream.readUnsignedByte(); 419 if(c < '0' || c > '7') 421 {c = octal; stream.skip(-1); break;} 422 octal = (octal << 3) + c - '0'; 423 c = stream.readUnsignedByte(); 424 if(c < '0' || c > '7') 426 {c = octal; stream.skip(-1); break;} 427 octal = (octal << 3) + c - '0'; 428 c = octal & 0xff; 429 break; 430 } 431 } 432 if(lineBreak) 433 continue; 434 } 435 else if(c == '\r') 436 { 437 c = stream.readUnsignedByte(); 438 if(c != '\n') 439 {c = '\n'; stream.skip(-1);} 440 } 441 if(level == -1) 442 break; 443 444 buffer.append((char)c); 445 } 446 } 447 catch(EOFException e) 448 {throw new FileFormatException("Unexpected EOF (malformed literal string).",e,stream.getPosition());} 449 450 break; 451 default: tokenType = TokenTypeEnum.Keyword; 453 454 buffer = new StringBuilder (); 455 try 456 { 457 do 458 { 459 buffer.append((char)c); 460 c = stream.readUnsignedByte(); 461 } while(!isDelimiter(c) && !isWhitespace(c)); 462 } 463 catch(EOFException e) 464 {} 465 stream.skip(-1); 467 break; 468 } 469 470 if(buffer != null) 471 { 472 475 switch(tokenType) 477 { 478 case Keyword: 479 token = buffer.toString(); 480 if(((String )token).equals("false") 482 || ((String )token).equals("true")) { 484 tokenType = TokenTypeEnum.Boolean; 485 token = Boolean.parseBoolean((String )token); 486 } 487 else if(((String )token).equals("null")) { 489 tokenType = TokenTypeEnum.Null; 490 token = null; 491 } 492 break; 493 case Comment: 494 case Hex: 495 case Name: 496 token = buffer.toString(); 497 break; 498 case Literal: 499 token = buffer.toString(); 500 if(((String )token).startsWith("D:")) { 503 tokenType = TokenTypeEnum.Date; 504 token = PdfDate.toDate((String )token); 505 } 506 break; 507 case Integer: 508 token = Integer.parseInt(buffer.toString()); 509 break; 510 case Real: 511 token = Float.parseFloat(buffer.toString()); 512 break; 513 } 514 } 515 516 return true; 517 } 518 519 public Operation parseOperation( 520 ) throws FileFormatException 521 { 522 List <PdfDirectObject> operands = new ArrayList <PdfDirectObject>(); 523 while(true) 525 { 526 if(tokenType == TokenTypeEnum.Keyword) 528 break; 529 530 operands.add(parsePdfObject()); moveNext(); 531 } 532 533 return new Operation( 534 (String )token, 535 operands 536 ); 537 } 538 539 567 protected PdfDirectObject parsePdfObject( 568 ) throws FileFormatException 569 { 570 switch(tokenType) 571 { 572 case Integer: 573 return new PdfInteger((Integer )token); 574 case Name: 575 return new PdfName((String )token,true); 576 case Literal: 577 return new PdfLiteral((String )token); 578 case DictionaryBegin: 579 PdfDictionary dictionary = new PdfDictionary(); 580 while(true) 582 { 583 moveNext(); 585 if(tokenType == TokenTypeEnum.DictionaryEnd) 586 break; 587 PdfName key = (PdfName)parsePdfObject(); 588 589 moveNext(); 591 PdfDirectObject value = (PdfDirectObject)parsePdfObject(); 592 593 dictionary.put(key,value); 595 } 596 return dictionary; 597 case ArrayBegin: 598 PdfArray array = new PdfArray(); 599 while(true) 601 { 602 moveNext(); 604 if(tokenType == TokenTypeEnum.ArrayEnd) 605 break; 606 607 array.add((PdfDirectObject)parsePdfObject()); 609 } 610 return array; 611 case Real: 612 return new PdfReal((Float )token); 613 case Boolean: 614 return new PdfBoolean((Boolean )token); 615 case Date: 616 return new PdfDate((Date)token); 617 case Hex: 618 return new PdfHex((String )token); 619 case Null: 620 return PdfNull.Null; 621 default: 622 return null; 623 } 624 } 625 626 public void seek( 627 long offset 628 ) 629 {stream.seek(offset);} 630 631 public void skip( 632 long offset 633 ) 634 {stream.skip(offset);} 635 636 640 public boolean skipWhitespace( 641 ) 642 { 643 int b; 644 try 645 { 646 do 647 {b = stream.readUnsignedByte();} while(isWhitespace(b)); } 649 catch(EOFException e) 650 {return false;} 651 stream.skip(-1); 653 return true; 654 } 655 } | Popular Tags |