Parser


1   /*
2     Copyright � 2006,2007 Stefano Chizzolini. http://clown.stefanochizzolini.it
3   
4     Contributors:
5       * Stefano Chizzolini (original code developer, info@stefanochizzolini.it):
6         contributed code is Copyright � 2006,2007 by Stefano Chizzolini.
7   
8     This file should be part of the source code distribution of "PDF Clown library"
9     (the Program): see the accompanying README files for more info.
10  
11    This Program is free software; you can redistribute it and/or modify it under
12    the terms of the GNU General Public License as published by the Free Software
13    Foundation; either version 2 of the License, or (at your option) any later version.
14  
15    This Program is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY, either expressed or implied; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the License for more details.
18  
19    You should have received a copy of the GNU General Public License along with this
20    Program (see README files); if not, go to the GNU website (http://www.gnu.org/).
21  
22    Redistribution and use, with or without modification, are permitted provided that such
23    redistributions retain the above copyright notice, license and disclaimer, along with
24    this list of conditions.
25  */
26  
27  package it.stefanochizzolini.clown.tokens;
28  
29  import it.stefanochizzolini.clown.bytes.Buffer;
30  import it.stefanochizzolini.clown.bytes.IInputStream;
31  import it.stefanochizzolini.clown.files.File;
32  import it.stefanochizzolini.clown.objects.PdfArray;
33  import it.stefanochizzolini.clown.objects.PdfBoolean;
34  import it.stefanochizzolini.clown.objects.PdfDataObject;
35  import it.stefanochizzolini.clown.objects.PdfDate;
36  import it.stefanochizzolini.clown.objects.PdfDictionary;
37  import it.stefanochizzolini.clown.objects.PdfDirectObject;
38  import it.stefanochizzolini.clown.objects.PdfHex;
39  import it.stefanochizzolini.clown.objects.PdfInteger;
40  import it.stefanochizzolini.clown.objects.PdfLiteral;
41  import it.stefanochizzolini.clown.objects.PdfName;
42  import it.stefanochizzolini.clown.objects.PdfNull;
43  import it.stefanochizzolini.clown.objects.PdfReal;
44  import it.stefanochizzolini.clown.objects.PdfReference;
45  import it.stefanochizzolini.clown.objects.PdfStream;
46  import java.io.EOFException  ;
47  import java.util.Date  ;
48  
49  /**
50    Token parser.
51    <h3>Contract</h3>
52    <ul>
53      <li>Preconditions:
54      <ol>
55        <li>(none).</li>
56      </ol>
57      </li>
58      <li>Postconditions:
59      <ol>
60        <li>(none).</li>
61      </ol>
62      </li>
63      <li>Invariants:
64      <ol>
65        <li>Stream data IS kept untouched.</li>
66      </ol>
67      </li>
68      <li>Side-effects:
69      <ol>
70        <li>(none).</li>
71      </ol>
72      </li>
73    </ul>
74  */
75  public class Parser
76  {
77    // <class>
78    // <classes>
79    public class Reference
80    {
81      // <class>
82      // <fields>
83      private final int generationNumber;
84      private final int objectNumber;
85      // </fields>
86  
87      // <constructors>
88      private Reference(
89        int objectNumber,
90        int generationNumber
91        )
92      {
93        this.objectNumber = objectNumber;
94        this.generationNumber = generationNumber;
95      }
96      // </constructors>
97  
98      // <interface>
99      // <public>
100     public int getGenerationNumber(
101       )
102     {return generationNumber;}
103 
104     public int getObjectNumber(
105       )
106     {return objectNumber;}
107     // </public>
108     // </interface>
109     // </class>
110   }
111   // </classes>
112 
113   // <static>
114   // <fields>
115   private static final String   PdfHeader = "%PDF-";
116   // </fields>
117 
118   // <interface>
119   // <protected>
120   /**
121     Evaluates whether a character is a delimiter [PDF:1.6:3.1.1].
122   */
123   protected static boolean isDelimiter(
124     int c
125     )
126   {return (c == '(' || c == ')' || c == '<' || c == '>' || c == '[' || c == ']' || c == '/' || c == '%');}
127 
128   /**
129     Evaluates whether a character is an EOL marker [PDF:1.6:3.1.1].
130   */
131   protected static boolean isEOL(
132     int c
133     )
134   {return (c == 12 || c == 15);}
135 
136   /**
137     Evaluates whether a character is a white-space [PDF:1.6:3.1.1].
138   */
139   protected static boolean isWhitespace(
140     int c
141     )
142   {return (c == 0 || c == 9 || c == 10 || c == 12 || c == 13 || c == 32);}
143   // </protected>
144   // </interface>
145   // </static>
146 
147   // <dynamic>
148   // <fields>
149   private File file;
150   private IInputStream stream;
151   private Object   token;
152   private TokenTypeEnum tokenType;
153 
154   private boolean multipleTokenParsing;
155   // </fields>
156 
157   // <constructors>
158   Parser(
159     IInputStream stream,
160     File file
161     )
162   {
163     this.stream = stream;
164     this.file = file;
165   }
166   // </constructors>
167 
168   // <interface>
169   // <public>
170   public long getLength(
171     )
172   {return stream.getLength();}
173 
174   public long getPosition(
175     )
176   {return stream.getPosition();}
177 
178   public IInputStream getStream(
179     )
180   {return stream;}
181 
182   /**
183     Gets the currently-parsed token.
184     @return The current token.
185   */
186   public Object   getToken(
187     )
188   {return token;}
189 
190   /**
191     Gets the currently-parsed token type.
192     @return The current token type.
193   */
194   public TokenTypeEnum getTokenType(
195     )
196   {return tokenType;}
197 
198   public int hashCode(
199     )
200   {return stream.hashCode();}
201 
202   /**
203     @param offset Number of tokens to be skipped before reaching the intended one.
204   */
205   public boolean moveNext(
206     int offset
207     ) throws FileFormatException
208   {
209     for(
210       int index = 0;
211       index < offset;
212       index++
213       )
214     {
215       if(!moveNext())
216         return false;
217     }
218 
219     return true;
220   }
221 
222   /**
223     Parse the next token [PDF:1.6:3.1].
224     <h3>Contract</h3>
225     <ul>
226      <li>Preconditions:
227       <ol>
228        <li>To properly parse the current token, the pointer MUST be just before its starting (leading whitespaces are ignored).</li>
229       </ol>
230      </li>
231      <li>Postconditions:
232       <ol>
233        <li id="moveNext_contract_post[0]">When this method terminates, the pointer IS at the last byte of the current token.</li>
234       </ol>
235      </li>
236      <li>Invariants:
237       <ol>
238        <li>The byte-level position of the pointer IS anytime (during token parsing) at the end of the current token (whereas the 'current token' represents the token-level position of the pointer).</li>
239       </ol>
240      </li>
241      <li>Side-effects:
242       <ol>
243        <li>See <a HREF="#moveNext_contract_post[0]">Postconditions</a>.</li>
244       </ol>
245      </li>
246     </ul>
247     @return Whether a new token was found.
248   */
249   public boolean moveNext(
250     ) throws FileFormatException
251   {
252     /*
253       NOTE: It'd be interesting to evaluate an alternative regular-expression-based
254       implementation...
255     */
256     StringBuilder   buffer = null;
257     token = null;
258     int c = 0;
259 
260     // Skip leading white-space characters [PDF:1.6:3.1.1].
261     try
262     {
263       do
264       {
265         c = stream.readUnsignedByte();
266       } while(isWhitespace(c)); // Keep goin' till there's a white-space character...
267     }
268     catch(EOFException   e)
269     {return false;}
270 
271     // Which character is it?
272     switch(c)
273     {
274       case '/': // Name [PDF:1.6:3.2.4].
275         tokenType = TokenTypeEnum.Name;
276 
277         /*
278           NOTE: As name objects are atomic symbols uniquely defined by sequences of characters,
279           the bytes making up the name are never treated as text, so here they are just
280           passed through without unescaping.
281         */
282         buffer = new StringBuilder  ();
283         try
284         {
285           while(true)
286           {
287             c = stream.readUnsignedByte();
288             if(isDelimiter(c) || isWhitespace(c))
289               break;
290 
291             buffer.append((char)c);
292           }
293         }
294         catch(EOFException   e)
295         {throw new FileFormatException("Unexpected EOF (malformed name object).",e,stream.getPosition());}
296 
297         stream.skip(-1); // Recover the first byte after the current token.
298         break;
299       case '0':
300       case '1':
301       case '2':
302       case '3':
303       case '4':
304       case '5':
305       case '6':
306       case '7':
307       case '8':
308       case '9':
309       case '.':
310       case '-':
311       case '+': // Number [PDF:1.6:3.2.2] | Indirect reference.
312         switch(c)
313         {
314           case '.': // Decimal point.
315             tokenType = TokenTypeEnum.Real;
316             break;
317           case '-':
318           case '+': // Signum.
319             tokenType = TokenTypeEnum.Integer; // By default (it may be real).
320             break;
321           default: // Digit.
322             if(multipleTokenParsing) // Plain number (multiple token parsing -- see indirect reference search).
323             {
324               tokenType = TokenTypeEnum.Integer; // By default (it may be real).
325             }
326             else // Maybe an indirect reference (postfix notation [PDF:1.6:3.2.9]).
327             {
328               /*
329                 NOTE: We need to identify this pattern:
330                 ref :=  { int int 'R' }
331               */
332               // Enable multiple token parsing!
333               // NOTE: This state MUST be disabled before returning.
334               multipleTokenParsing = true;
335 
336               // 1. Object number.
337               // Try the possible object number!
338               stream.skip(-1); moveNext();
339               // Isn't it a valid object number?
340               if(tokenType != TokenTypeEnum.Integer)
341               {
342                 // Disable multiple token parsing!
343                 multipleTokenParsing = false;
344                 return true;
345               }
346               // Assign object number!
347               int objectNumber = (Integer  )token;
348               // Backup the recovery position!
349               long oldOffset = stream.getPosition();
350 
351               // 2. Generation number.
352               // Try the possible generation number!
353               moveNext();
354               // Isn't it a valid generation number?
355               if(tokenType != TokenTypeEnum.Integer)
356               {
357                 // Rollback!
358                 stream.seek(oldOffset);
359                 token = objectNumber; tokenType = TokenTypeEnum.Integer;
360                 // Disable multiple token parsing!
361                 multipleTokenParsing = false;
362                 return true;
363               }
364               // Assign generation number!
365               int generationNumber = (Integer  )token;
366 
367               // 3. Reference keyword.
368               // Try the possible reference keyword!
369               moveNext();
370               // Isn't it a valid reference keyword?
371               if(tokenType != TokenTypeEnum.Reference)
372               {
373                 // Rollback!
374                 stream.seek(oldOffset);
375                 token = objectNumber; tokenType = TokenTypeEnum.Integer;
376                 // Disable multiple token parsing!
377                 multipleTokenParsing = false;
378                 return true;
379               }
380               token = new Reference(objectNumber,generationNumber);
381               // Disable multiple token parsing!
382               multipleTokenParsing = false;
383               return true;
384             }
385             break;
386         }
387 
388         // Building the number...
389         buffer = new StringBuilder  ();
390         try
391         {
392           do
393           {
394             buffer.append((char)c);
395             c = stream.readUnsignedByte();
396             if(c == '.')
397               tokenType = TokenTypeEnum.Real;
398             else if(c < '0' || c > '9')
399               break;
400           } while(true);
401         }
402         catch(EOFException   e)
403         {throw new FileFormatException("Unexpected EOF (malformed number object).",e,stream.getPosition());}
404 
405         stream.skip(-1); // Recover the first byte after the current token.
406         break;
407       case '[': // Array (begin).
408         tokenType = TokenTypeEnum.ArrayBegin;
409         break;
410       case ']': // Array (end).
411         tokenType = TokenTypeEnum.ArrayEnd;
412         break;
413       case '<': // Dictionary (begin) | Hexadecimal string.
414         try
415         {c = stream.readUnsignedByte();}
416         catch(EOFException   e)
417         {throw new FileFormatException("Unexpected EOF (isolated opening angle-bracket character).",e,stream.getPosition());}
418         // Is it a dictionary (2nd angle bracket [PDF:1.6:3.2.6])?
419         if(c == '<')
420         {
421           tokenType = TokenTypeEnum.DictionaryBegin;
422           break;
423         }
424 
425         // Hexadecimal string (single angle bracket [PDF:1.6:3.2.3]).
426         tokenType = TokenTypeEnum.Hex;
427 
428         buffer = new StringBuilder  ();
429         try
430         {
431           while(true)
432           {
433             c = stream.readUnsignedByte();
434             // String end?
435             if(c == '>')
436               break;
437 
438             buffer.append((char)c);
439           }
440         }
441         catch(EOFException   e)
442         {throw new FileFormatException("Unexpected EOF (malformed hex string).",e,stream.getPosition());}
443 
444         break;
445       case '>': // Dictionary (end).
446         try
447         {c = stream.readUnsignedByte();}
448         catch(EOFException   e)
449         {throw new FileFormatException("Unexpected EOF (malformed dictionary).",e,stream.getPosition());}
450         if(c != '>')
451           throw new FileFormatException("Malformed dictionary.",stream.getPosition());
452 
453         tokenType = TokenTypeEnum.DictionaryEnd;
454 
455         break;
456       case '%': // Comment [PDF:1.6:3.1.2].
457         tokenType = TokenTypeEnum.Comment;
458         // Skipping comment content...
459         try
460         {
461           do
462           {c = stream.readUnsignedByte();} while(!isEOL(c));
463         }
464         catch(EOFException   e)
465         {/* Let it go. */}
466 
467         break;
468       case '(': // Literal string [PDF:1.6:3.2.3].
469         tokenType = TokenTypeEnum.Literal;
470 
471         /*
472           NOTE: As literal objects are textual, their characters are unescaped when deserialized.
473         */
474         buffer = new StringBuilder  ();
475         int level = 0;
476         try
477         {
478           while(true)
479           {
480             c = stream.readUnsignedByte();
481             if(c == '(')
482               level++;
483             else if(c == ')')
484               level--;
485             else if(c == '\\')
486             {
487               boolean lineBreak = false;
488               c = stream.readUnsignedByte();
489               switch(c)
490               {
491                 case 'n':
492                   c = '\n';
493                   break;
494                 case 'r':
495                   c = '\r';
496                   break;
497                 case 't':
498                   c = '\t';
499                   break;
500                 case 'b':
501                   c = '\b';
502                   break;
503                 case 'f':
504                   c = '\f';
505                   break;
506                 case '(':
507                 case ')':
508                 case '\\':
509                   break;
510                 case '\r':
511                   lineBreak = true;
512                   c = stream.readUnsignedByte();
513                   if(c != '\n')
514                     stream.skip(-1);
515                   break;
516                 case '\n':
517                   lineBreak = true;
518                   break;
519                 default:
520                 {
521                   // Is it outside the octal encoding?
522                   if(c < '0' || c > '7')
523                     break;
524 
525                   // Octal.
526                   int octal = c - '0';
527                   c = stream.readUnsignedByte();
528                   // Octal end?
529                   if(c < '0' || c > '7')
530                   {c = octal; stream.skip(-1); break;}
531                   octal = (octal << 3) + c - '0';
532                   c = stream.readUnsignedByte();
533                   // Octal end?
534                   if(c < '0' || c > '7')
535                   {c = octal; stream.skip(-1); break;}
536                   octal = (octal << 3) + c - '0';
537                   c = octal & 0xff;
538                   break;
539                 }
540               }
541               if(lineBreak)
542                 continue;
543             }
544             else if(c == '\r')
545             {
546               c = stream.readUnsignedByte();
547               if(c != '\n')
548               {c = '\n'; stream.skip(-1);}
549             }
550             if(level == -1)
551               break;
552 
553             buffer.append((char)c);
554           }
555         }
556         catch(EOFException   e)
557         {throw new FileFormatException("Unexpected EOF (malformed literal string).",e,stream.getPosition());}
558 
559         break;
560       case 'R': // Indirect reference.
561         tokenType = TokenTypeEnum.Reference;
562 
563         break;
564       default: // Keyword object.
565         tokenType = TokenTypeEnum.Keyword;
566 
567         buffer = new StringBuilder  ();
568         try
569         {
570           do
571           {
572             buffer.append((char)c);
573             c = stream.readUnsignedByte();
574           } while(!isDelimiter(c) && !isWhitespace(c));
575         }
576         catch(EOFException   e)
577         {/* Let it go. */}
578         stream.skip(-1); // Recover the first byte after the current token.
579 
580         break;
581     }
582 
583     if(buffer != null)
584     {
585       /*
586         Here we prepare the current token state.
587       */
588       // Which token type?
589       switch(tokenType)
590       {
591         case Keyword:
592           token = buffer.toString();
593           // Late recognition.
594           if(((String  )token).equals("false")
595             || ((String  )token).equals("true")) // Boolean.
596           {
597             tokenType = TokenTypeEnum.Boolean;
598             token = Boolean.parseBoolean((String  )token);
599           }
600           else if(((String  )token).equals("null")) // Null.
601           {
602             tokenType = TokenTypeEnum.Null;
603             token = null;
604           }
605           break;
606         case Comment:
607         case Hex:
608         case Name:
609           token = buffer.toString();
610           break;
611         case Literal:
612           token = buffer.toString();
613           // Late recognition.
614           if(((String  )token).startsWith("D:")) // Date.
615           {
616             tokenType = TokenTypeEnum.Date;
617             token = PdfDate.toDate((String  )token);
618           }
619           break;
620         case Integer:
621           token = Integer.parseInt(buffer.toString());
622           break;
623         case Real:
624           token = Float.parseFloat(buffer.toString());
625           break;
626       }
627     }
628 
629     return true;
630   }
631 
632   /**
633     Parse the current PDF object [PDF:1.6:3.2].
634     <h3>Contract</h3>
635     <ul>
636      <li>Preconditions:
637       <ol>
638        <li>When this method is invoked, the pointer MUST be at the first
639        token of the requested object.</li>
640       </ol>
641      </li>
642      <li>Postconditions:
643       <ol>
644        <li id="parsePdfObject_contract_post[0]">When this method terminates,
645        the pointer IS at the last token of the requested object.</li>
646       </ol>
647      </li>
648      <li>Invariants:
649       <ol>
650        <li>(none).</li>
651       </ol>
652      </li>
653      <li>Side-effects:
654       <ol>
655        <li>See <a HREF="#parsePdfObject_contract_post[0]">Postconditions</a>.</li>
656       </ol>
657      </li>
658     </ul>
659   */
660   public PdfDataObject parsePdfObject(
661     ) throws FileFormatException
662   {
663     /*
664       NOTE: Object parsing is intrinsically a sequential operation tied to the stream pointer.
665       Calls bound towards other classes are potentially disruptive for the predictability of
666       the position of the stream pointer, so we are forced to carefully keep track of our
667       current position in order to recover its proper state after any outbound call.
668     */
669 
670     // Which token type?
671     switch(tokenType)
672     {
673       case Integer:
674         return new PdfInteger((Integer  )token);
675       case Name:
676         return new PdfName((String  )token,true);
677       case Reference:
678         /*
679           NOTE: Curiously, PDF references are the only primitive objects that require
680           a file reference. That's because they deal with indirect objects, which are strongly
681           coupled with the current state of the file: so, PDF references are the fundamental
682           bridge between the token layer and the file layer.
683         */
684         return new PdfReference(
685           (Reference)token,
686           file
687           );
688       case Literal:
689         return new PdfLiteral((String  )token);
690       case DictionaryBegin:
691         PdfDictionary dictionary = new PdfDictionary();
692         // Populate the dictionary.
693         while(true)
694         {
695           // Key.
696           moveNext();
697           if(tokenType == TokenTypeEnum.DictionaryEnd)
698             break;
699           PdfName key = (PdfName)parsePdfObject();
700 
701           // Value.
702           moveNext();
703           PdfDirectObject value = (PdfDirectObject)parsePdfObject();
704 
705           // Add the current entry to the dictionary!
706           dictionary.put(key,value);
707         }
708 
709         int oldOffset = (int)stream.getPosition();
710         moveNext();
711         // Is this dictionary the header of a stream object [PDF:1.6:3.2.7]?
712         if((tokenType == TokenTypeEnum.Keyword)
713           && token.equals("stream")) // Stream.
714         {
715           // Keep track of current position!
716           long position = stream.getPosition();
717 
718           // Get the stream length!
719           /*
720             NOTE: Indirect reference resolution is an outbound call (stream pointer hazard!),
721             so we need to recover our current position after it returns.
722           */
723           int length = ((PdfInteger)File.resolve(dictionary.get(PdfName.Length))).getValue();
724 
725           // Come back to current position!
726           stream.seek(position);
727 
728           skipWhitespace();
729 
730           // Copy the stream data to the instance!
731           byte[] data = new byte[length];
732           try
733           {stream.read(data);}
734           catch(EOFException   e)
735           {throw new FileFormatException("Unexpected EOF (malformed stream object).",e,stream.getPosition());}
736 
737           moveNext(); // Postcondition (last token should be 'endstream' keyword).
738 
739           return new PdfStream(
740             dictionary,
741             new Buffer(data)
742             );
743         }
744         else // Simple dictionary.
745         {
746           stream.seek(oldOffset); // Restore postcondition (last token should be the dictionary end).
747 
748           return dictionary;
749         }
750       case ArrayBegin:
751         PdfArray array = new PdfArray();
752         // Populate the array.
753         while(true)
754         {
755           // Value.
756           moveNext();
757           if(tokenType == TokenTypeEnum.ArrayEnd)
758             break;
759 
760           // Add the current item to the array!
761           array.add((PdfDirectObject)parsePdfObject());
762         }
763         return array;
764       case Real:
765         return new PdfReal((Float  )token);
766       case Boolean:
767         return new PdfBoolean((Boolean  )token);
768       case Date:
769         return new PdfDate((Date)token);
770       case Hex:
771         return new PdfHex((String  )token);
772       case Null:
773         return PdfNull.Null;
774       default:
775         return null;
776     }
777   }
778 
779   /**
780     Retrieves the PDF version of the file [PDF:1.6:3.4.1].
781     <h3>Contract</h3>
782     <ul>
783      <li>Preconditions:
784       <ol>
785        <li>(none).</li>
786       </ol>
787      </li>
788      <li>Postconditions:
789       <ol>
790        <li>(none).</li>
791       </ol>
792      </li>
793      <li>Invariants:
794       <ol>
795        <li>(none).</li>
796       </ol>
797      </li>
798      <li>Side-effects:
799       <ol>
800        <li>The pointer is released at an undefined location.</li>
801       </ol>
802      </li>
803     </ul>
804   */
805   public String   retrieveVersion(
806     ) throws FileFormatException
807   {
808     stream.seek(0);
809     String   header;
810     try{header = stream.readString(10);}
811     catch(EOFException   e){throw new FileFormatException("Unexpected EOF (malformed version data).",e,stream.getPosition());}
812     if(!header.startsWith(PdfHeader))
813       throw new FileFormatException("PDF header not found.",stream.getPosition());
814 
815     return header.substring(PdfHeader.length(),PdfHeader.length() + 3);
816   }
817 
818   /**
819     Retrieves the starting position of the last xref-table section.
820     @see retrieveXRefOffset(long)
821   */
822   public long retrieveXRefOffset(
823     ) throws FileFormatException
824   {return retrieveXRefOffset(stream.getLength());}
825 
826   /**
827     Retrieves the starting position of an xref-table section [PDF:1.6:3.4.4].
828     <h3>Contract</h3>
829     <ul>
830      <li>Preconditions:
831       <ol>
832        <li>(none).</li>
833       </ol>
834      </li>
835      <li>Postconditions:
836       <ol>
837        <li>(none).</li>
838       </ol>
839      </li>
840      <li>Invariants:
841       <ol>
842        <li>(none).</li>
843       </ol>
844      </li>
845      <li>Side-effects:
846       <ol>
847        <li>The pointer is released at an undefined location.</li>
848       </ol>
849      </li>
850     </ul>
851     @param offset Position of the EOF marker related to the section intended to be parsed.
852   */
853   public long retrieveXRefOffset(
854     long offset
855     ) throws FileFormatException
856   {
857     final int chunkSize = 1024; // [PDF:1.6:H.3.18].
858 
859     // Move back before 'startxref' keyword!
860     long position = offset - chunkSize; stream.seek(position);
861 
862     // Get 'startxref' keyword position!
863     int index;
864     try{index = stream.readString(chunkSize).lastIndexOf("startxref");}
865     catch(EOFException   e){throw new FileFormatException("Unexpected EOF (malformed 'startxref' tag).",e,stream.getPosition());}
866     if(index < 0)
867       throw new FileFormatException("PDF startxref not found.",stream.getPosition());
868     // Go past the 'startxref' keyword!
869     stream.seek(position + index); moveNext();
870 
871     // Get the xref offset!
872     moveNext();
873     if(tokenType != TokenTypeEnum.Integer)
874       throw new FileFormatException("PDF startxref malformed.",stream.getPosition());
875 
876     return (Integer  )token;
877   }
878 
879   public void seek(
880     long offset
881     )
882   {stream.seek(offset);}
883 
884   public void skip(
885     long offset
886     )
887   {stream.skip(offset);}
888 
889   /**
890     Moves to the last whitespace after the current position in order to let read
891     the first non-whitespace.
892   */
893   public boolean skipWhitespace(
894     )
895   {
896     int b;
897     try
898     {
899       do
900       {b = stream.readUnsignedByte();} while(isWhitespace(b)); // Keep goin' till there's a white-space character...
901     }
902     catch(EOFException   e)
903     {return false;}
904     stream.skip(-1); // Recover the last whitespace position.
905 
906     return true;
907   }
908   // </public>
909   // </interface>
910   // </dynamic>
911   // </class>
912 }
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags