KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > lowagie > text > pdf > PRTokeniser


1 /*
2  * $Id: PRTokeniser.java 2517 2006-12-28 19:41:02Z psoares33 $
3  *
4  * Copyright 2001, 2002 by Paulo Soares.
5  *
6  * The contents of this file are subject to the Mozilla Public License Version 1.1
7  * (the "License"); you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at http://www.mozilla.org/MPL/
9  *
10  * Software distributed under the License is distributed on an "AS IS" basis,
11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12  * for the specific language governing rights and limitations under the License.
13  *
14  * The Original Code is 'iText, a free JAVA-PDF library'.
15  *
16  * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
17  * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
18  * All Rights Reserved.
19  * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
20  * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
21  *
22  * Contributor(s): all the names of the contributors are added in the source code
23  * where applicable.
24  *
25  * Alternatively, the contents of this file may be used under the terms of the
26  * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
27  * provisions of LGPL are applicable instead of those above. If you wish to
28  * allow use of your version of this file only under the terms of the LGPL
29  * License and not to allow others to use your version of this file under
30  * the MPL, indicate your decision by deleting the provisions above and
31  * replace them with the notice and other provisions required by the LGPL.
32  * If you do not delete the provisions above, a recipient may use your version
33  * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
34  *
35  * This library is free software; you can redistribute it and/or modify it
36  * under the terms of the MPL as stated above or under the terms of the GNU
37  * Library General Public License as published by the Free Software Foundation;
38  * either version 2 of the License, or any later version.
39  *
40  * This library is distributed in the hope that it will be useful, but WITHOUT
41  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
42  * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
43  * details.
44  *
45  * If you didn't download this code from the following link, you should check if
46  * you aren't using an obsolete version:
47  * http://www.lowagie.com/iText/
48  */

49
50 package com.lowagie.text.pdf;
51
52 import java.io.IOException JavaDoc;
53 /**
54  *
55  * @author Paulo Soares (psoares@consiste.pt)
56  */

57 public class PRTokeniser {
58     
59     public static final int TK_NUMBER = 1;
60     public static final int TK_STRING = 2;
61     public static final int TK_NAME = 3;
62     public static final int TK_COMMENT = 4;
63     public static final int TK_START_ARRAY = 5;
64     public static final int TK_END_ARRAY = 6;
65     public static final int TK_START_DIC = 7;
66     public static final int TK_END_DIC = 8;
67     public static final int TK_REF = 9;
68     public static final int TK_OTHER = 10;
69     public static final boolean delims[] = {
70         true, true, false, false, false, false, false, false, false, false,
71         true, true, false, true, true, false, false, false, false, false,
72         false, false, false, false, false, false, false, false, false, false,
73         false, false, false, true, false, false, false, false, true, false,
74         false, true, true, false, false, false, false, false, true, false,
75         false, false, false, false, false, false, false, false, false, false,
76         false, true, false, true, false, false, false, false, false, false,
77         false, false, false, false, false, false, false, false, false, false,
78         false, false, false, false, false, false, false, false, false, false,
79         false, false, true, false, true, false, false, false, false, false,
80         false, false, false, false, false, false, false, false, false, false,
81         false, false, false, false, false, false, false, false, false, false,
82         false, false, false, false, false, false, false, false, false, false,
83         false, false, false, false, false, false, false, false, false, false,
84         false, false, false, false, false, false, false, false, false, false,
85         false, false, false, false, false, false, false, false, false, false,
86         false, false, false, false, false, false, false, false, false, false,
87         false, false, false, false, false, false, false, false, false, false,
88         false, false, false, false, false, false, false, false, false, false,
89         false, false, false, false, false, false, false, false, false, false,
90         false, false, false, false, false, false, false, false, false, false,
91         false, false, false, false, false, false, false, false, false, false,
92         false, false, false, false, false, false, false, false, false, false,
93         false, false, false, false, false, false, false, false, false, false,
94         false, false, false, false, false, false, false, false, false, false,
95         false, false, false, false, false, false, false};
96     
97     static final String JavaDoc EMPTY = "";
98
99     
100     protected RandomAccessFileOrArray file;
101     protected int type;
102     protected String JavaDoc stringValue;
103     protected int reference;
104     protected int generation;
105     protected boolean hexString;
106        
107     public PRTokeniser(String JavaDoc filename) throws IOException JavaDoc {
108         file = new RandomAccessFileOrArray(filename);
109     }
110
111     public PRTokeniser(byte pdfIn[]) {
112         file = new RandomAccessFileOrArray(pdfIn);
113     }
114     
115     public PRTokeniser(RandomAccessFileOrArray file) {
116         this.file = file;
117     }
118     
119     public void seek(int pos) throws IOException JavaDoc {
120         file.seek(pos);
121     }
122     
123     public int getFilePointer() throws IOException JavaDoc {
124         return file.getFilePointer();
125     }
126
127     public void close() throws IOException JavaDoc {
128         file.close();
129     }
130     
131     public int length() throws IOException JavaDoc {
132         return file.length();
133     }
134
135     public int read() throws IOException JavaDoc {
136         return file.read();
137     }
138     
139     public RandomAccessFileOrArray getSafeFile() {
140         return new RandomAccessFileOrArray(file);
141     }
142     
143     public RandomAccessFileOrArray getFile() {
144         return file;
145     }
146     
147     public String JavaDoc readString(int size) throws IOException JavaDoc {
148         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
149         int ch;
150         while ((size--) > 0) {
151             ch = file.read();
152             if (ch == -1)
153                 break;
154             buf.append((char)ch);
155         }
156         return buf.toString();
157     }
158
159     public static final boolean isWhitespace(int ch) {
160         return (ch == 0 || ch == 9 || ch == 10 || ch == 12 || ch == 13 || ch == 32);
161     }
162     
163     public static final boolean isDelimiter(int ch) {
164         return (ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '[' || ch == ']' || ch == '/' || ch == '%');
165     }
166
167     public static final boolean isDelimiterWhitespace(int ch) {
168         return delims[ch + 1];
169     }
170
171     public int getTokenType() {
172         return type;
173     }
174     
175     public String JavaDoc getStringValue() {
176         return stringValue;
177     }
178     
179     public int getReference() {
180         return reference;
181     }
182     
183     public int getGeneration() {
184         return generation;
185     }
186     
187     public void backOnePosition(int ch) {
188         if (ch != -1)
189             file.pushBack((byte)ch);
190     }
191     
192     public void throwError(String JavaDoc error) throws IOException JavaDoc {
193         throw new IOException JavaDoc(error + " at file pointer " + file.getFilePointer());
194     }
195     
196     public char checkPdfHeader() throws IOException JavaDoc {
197         file.setStartOffset(0);
198         String JavaDoc str = readString(1024);
199         int idx = str.indexOf("%PDF-");
200         if (idx < 0)
201             throw new IOException JavaDoc("PDF header signature not found.");
202         file.setStartOffset(idx);
203         return str.charAt(idx + 7);
204     }
205     
206     public void checkFdfHeader() throws IOException JavaDoc {
207         file.setStartOffset(0);
208         String JavaDoc str = readString(1024);
209         int idx = str.indexOf("%FDF-1.2");
210         if (idx < 0)
211             throw new IOException JavaDoc("FDF header signature not found.");
212         file.setStartOffset(idx);
213     }
214
215     public int getStartxref() throws IOException JavaDoc {
216         int size = Math.min(1024, file.length());
217         int pos = file.length() - size;
218         file.seek(pos);
219         String JavaDoc str = readString(1024);
220         int idx = str.lastIndexOf("startxref");
221         if (idx < 0)
222             throw new IOException JavaDoc("PDF startxref not found.");
223         return pos + idx;
224     }
225
226     public static int getHex(int v) {
227         if (v >= '0' && v <= '9')
228             return v - '0';
229         if (v >= 'A' && v <= 'F')
230             return v - 'A' + 10;
231         if (v >= 'a' && v <= 'f')
232             return v - 'a' + 10;
233         return -1;
234     }
235     
236     public void nextValidToken() throws IOException JavaDoc {
237         int level = 0;
238         String JavaDoc n1 = null;
239         String JavaDoc n2 = null;
240         int ptr = 0;
241         while (nextToken()) {
242             if (type == TK_COMMENT)
243                 continue;
244             switch (level) {
245                 case 0:
246                 {
247                     if (type != TK_NUMBER)
248                         return;
249                     ptr = file.getFilePointer();
250                     n1 = stringValue;
251                     ++level;
252                     break;
253                 }
254                 case 1:
255                 {
256                     if (type != TK_NUMBER) {
257                         file.seek(ptr);
258                         type = TK_NUMBER;
259                         stringValue = n1;
260                         return;
261                     }
262                     n2 = stringValue;
263                     ++level;
264                     break;
265                 }
266                 default:
267                 {
268                     if (type != TK_OTHER || !stringValue.equals("R")) {
269                         file.seek(ptr);
270                         type = TK_NUMBER;
271                         stringValue = n1;
272                         return;
273                     }
274                     type = TK_REF;
275                     reference = Integer.parseInt(n1);
276                     generation = Integer.parseInt(n2);
277                     return;
278                 }
279             }
280         }
281         throwError("Unexpected end of file");
282     }
283     
284     public boolean nextToken() throws IOException JavaDoc {
285         StringBuffer JavaDoc outBuf = null;
286         stringValue = EMPTY;
287         int ch = 0;
288         do {
289             ch = file.read();
290         } while (ch != -1 && isWhitespace(ch));
291         if (ch == -1)
292             return false;
293         switch (ch) {
294             case '[':
295                 type = TK_START_ARRAY;
296                 break;
297             case ']':
298                 type = TK_END_ARRAY;
299                 break;
300             case '/':
301             {
302                 outBuf = new StringBuffer JavaDoc();
303                 type = TK_NAME;
304                 while (true) {
305                     ch = file.read();
306                     if (delims[ch + 1])
307                         break;
308                     if (ch == '#') {
309                         ch = (getHex(file.read()) << 4) + getHex(file.read());
310                     }
311                     outBuf.append((char)ch);
312                 }
313                 backOnePosition(ch);
314                 break;
315             }
316             case '>':
317                 ch = file.read();
318                 if (ch != '>')
319                     throwError("'>' not expected");
320                 type = TK_END_DIC;
321                 break;
322             case '<':
323             {
324                 int v1 = file.read();
325                 if (v1 == '<') {
326                     type = TK_START_DIC;
327                     break;
328                 }
329                 outBuf = new StringBuffer JavaDoc();
330                 type = TK_STRING;
331                 hexString = true;
332                 int v2 = 0;
333                 while (true) {
334                     while (isWhitespace(v1))
335                         v1 = file.read();
336                     if (v1 == '>')
337                         break;
338                     v1 = getHex(v1);
339                     if (v1 < 0)
340                         break;
341                     v2 = file.read();
342                     while (isWhitespace(v2))
343                         v2 = file.read();
344                     if (v2 == '>') {
345                         ch = v1 << 4;
346                         outBuf.append((char)ch);
347                         break;
348                     }
349                     v2 = getHex(v2);
350                     if (v2 < 0)
351                         break;
352                     ch = (v1 << 4) + v2;
353                     outBuf.append((char)ch);
354                     v1 = file.read();
355                 }
356                 if (v1 < 0 || v2 < 0)
357                     throwError("Error reading string");
358                 break;
359             }
360             case '%':
361                 type = TK_COMMENT;
362                 do {
363                     ch = file.read();
364                 } while (ch != -1 && ch != '\r' && ch != '\n');
365                 break;
366             case '(':
367             {
368                 outBuf = new StringBuffer JavaDoc();
369                 type = TK_STRING;
370                 hexString = false;
371                 int nesting = 0;
372                 while (true) {
373                     ch = file.read();
374                     if (ch == -1)
375                         break;
376                     if (ch == '(') {
377                         ++nesting;
378                     }
379                     else if (ch == ')') {
380                         --nesting;
381                     }
382                     else if (ch == '\\') {
383                         boolean lineBreak = false;
384                         ch = file.read();
385                         switch (ch) {
386                             case 'n':
387                                 ch = '\n';
388                                 break;
389                             case 'r':
390                                 ch = '\r';
391                                 break;
392                             case 't':
393                                 ch = '\t';
394                                 break;
395                             case 'b':
396                                 ch = '\b';
397                                 break;
398                             case 'f':
399                                 ch = '\f';
400                                 break;
401                             case '(':
402                             case ')':
403                             case '\\':
404                                 break;
405                             case '\r':
406                                 lineBreak = true;
407                                 ch = file.read();
408                                 if (ch != '\n')
409                                     backOnePosition(ch);
410                                 break;
411                             case '\n':
412                                 lineBreak = true;
413                                 break;
414                             default:
415                             {
416                                 if (ch < '0' || ch > '7') {
417                                     break;
418                                 }
419                                 int octal = ch - '0';
420                                 ch = file.read();
421                                 if (ch < '0' || ch > '7') {
422                                     backOnePosition(ch);
423                                     ch = octal;
424                                     break;
425                                 }
426                                 octal = (octal << 3) + ch - '0';
427                                 ch = file.read();
428                                 if (ch < '0' || ch > '7') {
429                                     backOnePosition(ch);
430                                     ch = octal;
431                                     break;
432                                 }
433                                 octal = (octal << 3) + ch - '0';
434                                 ch = octal & 0xff;
435                                 break;
436                             }
437                         }
438                         if (lineBreak)
439                             continue;
440                         if (ch < 0)
441                             break;
442                     }
443                     else if (ch == '\r') {
444                         ch = file.read();
445                         if (ch < 0)
446                             break;
447                         if (ch != '\n') {
448                             backOnePosition(ch);
449                             ch = '\n';
450                         }
451                     }
452                     if (nesting == -1)
453                         break;
454                     outBuf.append((char)ch);
455                 }
456                 if (ch == -1)
457                     throwError("Error reading string");
458                 break;
459             }
460             default:
461             {
462                 outBuf = new StringBuffer JavaDoc();
463                 if (ch == '-' || ch == '+' || ch == '.' || (ch >= '0' && ch <= '9')) {
464                     type = TK_NUMBER;
465                     do {
466                         outBuf.append((char)ch);
467                         ch = file.read();
468                     } while (ch != -1 && ((ch >= '0' && ch <= '9') || ch == '.'));
469                 }
470                 else {
471                     type = TK_OTHER;
472                     do {
473                         outBuf.append((char)ch);
474                         ch = file.read();
475                     } while (!delims[ch + 1]);
476                 }
477                 backOnePosition(ch);
478                 break;
479             }
480         }
481         if (outBuf != null)
482             stringValue = outBuf.toString();
483         return true;
484     }
485     
486     public int intValue() {
487         return Integer.parseInt(stringValue);
488     }
489     
490     public boolean readLineSegment(byte input[]) throws IOException JavaDoc {
491         int c = -1;
492         boolean eol = false;
493         int ptr = 0;
494         int len = input.length;
495     // ssteward, pdftk-1.10, 040922:
496
// skip initial whitespace; added this because PdfReader.rebuildXref()
497
// assumes that line provided by readLineSegment does not have init. whitespace;
498
if ( ptr < len ) {
499         while ( isWhitespace( (c = read()) ) );
500     }
501     while ( !eol && ptr < len ) {
502         switch (c) {
503                 case -1:
504                 case '\n':
505                     eol = true;
506                     break;
507                 case '\r':
508                     eol = true;
509                     int cur = getFilePointer();
510                     if ((read()) != '\n') {
511                         seek(cur);
512                     }
513                     break;
514                 default:
515                     input[ptr++] = (byte)c;
516                     break;
517             }
518
519         // break loop? do it before we read() again
520
if( eol || len <= ptr ) {
521         break;
522         }
523         else {
524         c = read();
525         }
526         }
527         if (ptr >= len) {
528             eol = false;
529             while (!eol) {
530                 switch (c = read()) {
531                     case -1:
532                     case '\n':
533                         eol = true;
534                         break;
535                     case '\r':
536                         eol = true;
537                         int cur = getFilePointer();
538                         if ((read()) != '\n') {
539                             seek(cur);
540                         }
541                         break;
542                 }
543             }
544         }
545         
546         if ((c == -1) && (ptr == 0)) {
547             return false;
548         }
549         if (ptr + 2 <= len) {
550             input[ptr++] = (byte)' ';
551             input[ptr] = (byte)'X';
552         }
553         return true;
554     }
555     
556     public static int[] checkObjectStart(byte line[]) {
557         try {
558             PRTokeniser tk = new PRTokeniser(line);
559             int num = 0;
560             int gen = 0;
561             if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER)
562                 return null;
563             num = tk.intValue();
564             if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER)
565                 return null;
566             gen = tk.intValue();
567             if (!tk.nextToken())
568                 return null;
569             if (!tk.getStringValue().equals("obj"))
570                 return null;
571             return new int[]{num, gen};
572         }
573         catch (Exception JavaDoc ioe) {
574             // empty on purpose
575
}
576         return null;
577     }
578     
579     public boolean isHexString() {
580         return this.hexString;
581     }
582     
583 }
584
Popular Tags