1 49 50 package com.lowagie.text.pdf; 51 52 import java.io.IOException ; 53 57 public class PRTokeniser { 58 59 public static final int TK_NUMBER = 1; 60 public static final int TK_STRING = 2; 61 public static final int TK_NAME = 3; 62 public static final int TK_COMMENT = 4; 63 public static final int TK_START_ARRAY = 5; 64 public static final int TK_END_ARRAY = 6; 65 public static final int TK_START_DIC = 7; 66 public static final int TK_END_DIC = 8; 67 public static final int TK_REF = 9; 68 public static final int TK_OTHER = 10; 69 public static final boolean delims[] = { 70 true, true, false, false, false, false, false, false, false, false, 71 true, true, false, true, true, false, false, false, false, false, 72 false, false, false, false, false, false, false, false, false, false, 73 false, false, false, true, false, false, false, false, true, false, 74 false, true, true, false, false, false, false, false, true, false, 75 false, false, false, false, false, false, false, false, false, false, 76 false, true, false, true, false, false, false, false, false, false, 77 false, false, false, false, false, false, false, false, false, false, 78 false, false, false, false, false, false, false, false, false, false, 79 false, false, true, false, true, false, false, false, false, false, 80 false, false, false, false, false, false, false, false, false, false, 81 false, false, false, false, false, false, false, false, false, false, 82 false, false, false, false, false, false, false, false, false, false, 83 false, false, false, false, false, false, false, false, false, false, 84 false, false, false, false, false, false, false, false, false, false, 85 false, false, false, false, false, false, false, false, false, false, 86 false, false, false, false, false, false, false, false, false, false, 87 false, false, false, false, false, false, false, false, false, false, 88 false, false, false, false, false, false, false, false, false, false, 89 false, false, false, false, false, false, false, false, false, false, 90 false, false, false, false, false, false, false, false, false, false, 91 false, false, false, false, false, false, false, false, false, false, 92 false, false, false, false, false, false, false, false, false, false, 93 false, false, false, false, false, false, false, false, false, false, 94 false, false, false, false, false, false, false, false, false, false, 95 false, false, false, false, false, false, false}; 96 97 static final String EMPTY = ""; 98 99 100 protected RandomAccessFileOrArray file; 101 protected int type; 102 protected String stringValue; 103 protected int reference; 104 protected int generation; 105 protected boolean hexString; 106 107 public PRTokeniser(String filename) throws IOException { 108 file = new RandomAccessFileOrArray(filename); 109 } 110 111 public PRTokeniser(byte pdfIn[]) { 112 file = new RandomAccessFileOrArray(pdfIn); 113 } 114 115 public PRTokeniser(RandomAccessFileOrArray file) { 116 this.file = file; 117 } 118 119 public void seek(int pos) throws IOException { 120 file.seek(pos); 121 } 122 123 public int getFilePointer() throws IOException { 124 return file.getFilePointer(); 125 } 126 127 public void close() throws IOException { 128 file.close(); 129 } 130 131 public int length() throws IOException { 132 return file.length(); 133 } 134 135 public int read() throws IOException { 136 return file.read(); 137 } 138 139 public RandomAccessFileOrArray getSafeFile() { 140 return new RandomAccessFileOrArray(file); 141 } 142 143 public RandomAccessFileOrArray getFile() { 144 return file; 145 } 146 147 public String readString(int size) throws IOException { 148 StringBuffer buf = new StringBuffer (); 149 int ch; 150 while ((size--) > 0) { 151 ch = file.read(); 152 if (ch == -1) 153 break; 154 buf.append((char)ch); 155 } 156 return buf.toString(); 157 } 158 159 public static final boolean isWhitespace(int ch) { 160 return (ch == 0 || ch == 9 || ch == 10 || ch == 12 || ch == 13 || ch == 32); 161 } 162 163 public static final boolean isDelimiter(int ch) { 164 return (ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '[' || ch == ']' || ch == '/' || ch == '%'); 165 } 166 167 public static final boolean isDelimiterWhitespace(int ch) { 168 return delims[ch + 1]; 169 } 170 171 public int getTokenType() { 172 return type; 173 } 174 175 public String getStringValue() { 176 return stringValue; 177 } 178 179 public int getReference() { 180 return reference; 181 } 182 183 public int getGeneration() { 184 return generation; 185 } 186 187 public void backOnePosition(int ch) { 188 if (ch != -1) 189 file.pushBack((byte)ch); 190 } 191 192 public void throwError(String error) throws IOException { 193 throw new IOException (error + " at file pointer " + file.getFilePointer()); 194 } 195 196 public char checkPdfHeader() throws IOException { 197 file.setStartOffset(0); 198 String str = readString(1024); 199 int idx = str.indexOf("%PDF-"); 200 if (idx < 0) 201 throw new IOException ("PDF header signature not found."); 202 file.setStartOffset(idx); 203 return str.charAt(idx + 7); 204 } 205 206 public void checkFdfHeader() throws IOException { 207 file.setStartOffset(0); 208 String str = readString(1024); 209 int idx = str.indexOf("%FDF-1.2"); 210 if (idx < 0) 211 throw new IOException ("FDF header signature not found."); 212 file.setStartOffset(idx); 213 } 214 215 public int getStartxref() throws IOException { 216 int size = Math.min(1024, file.length()); 217 int pos = file.length() - size; 218 file.seek(pos); 219 String str = readString(1024); 220 int idx = str.lastIndexOf("startxref"); 221 if (idx < 0) 222 throw new IOException ("PDF startxref not found."); 223 return pos + idx; 224 } 225 226 public static int getHex(int v) { 227 if (v >= '0' && v <= '9') 228 return v - '0'; 229 if (v >= 'A' && v <= 'F') 230 return v - 'A' + 10; 231 if (v >= 'a' && v <= 'f') 232 return v - 'a' + 10; 233 return -1; 234 } 235 236 public void nextValidToken() throws IOException { 237 int level = 0; 238 String n1 = null; 239 String n2 = null; 240 int ptr = 0; 241 while (nextToken()) { 242 if (type == TK_COMMENT) 243 continue; 244 switch (level) { 245 case 0: 246 { 247 if (type != TK_NUMBER) 248 return; 249 ptr = file.getFilePointer(); 250 n1 = stringValue; 251 ++level; 252 break; 253 } 254 case 1: 255 { 256 if (type != TK_NUMBER) { 257 file.seek(ptr); 258 type = TK_NUMBER; 259 stringValue = n1; 260 return; 261 } 262 n2 = stringValue; 263 ++level; 264 break; 265 } 266 default: 267 { 268 if (type != TK_OTHER || !stringValue.equals("R")) { 269 file.seek(ptr); 270 type = TK_NUMBER; 271 stringValue = n1; 272 return; 273 } 274 type = TK_REF; 275 reference = Integer.parseInt(n1); 276 generation = Integer.parseInt(n2); 277 return; 278 } 279 } 280 } 281 throwError("Unexpected end of file"); 282 } 283 284 public boolean nextToken() throws IOException { 285 StringBuffer outBuf = null; 286 stringValue = EMPTY; 287 int ch = 0; 288 do { 289 ch = file.read(); 290 } while (ch != -1 && isWhitespace(ch)); 291 if (ch == -1) 292 return false; 293 switch (ch) { 294 case '[': 295 type = TK_START_ARRAY; 296 break; 297 case ']': 298 type = TK_END_ARRAY; 299 break; 300 case '/': 301 { 302 outBuf = new StringBuffer (); 303 type = TK_NAME; 304 while (true) { 305 ch = file.read(); 306 if (delims[ch + 1]) 307 break; 308 if (ch == '#') { 309 ch = (getHex(file.read()) << 4) + getHex(file.read()); 310 } 311 outBuf.append((char)ch); 312 } 313 backOnePosition(ch); 314 break; 315 } 316 case '>': 317 ch = file.read(); 318 if (ch != '>') 319 throwError("'>' not expected"); 320 type = TK_END_DIC; 321 break; 322 case '<': 323 { 324 int v1 = file.read(); 325 if (v1 == '<') { 326 type = TK_START_DIC; 327 break; 328 } 329 outBuf = new StringBuffer (); 330 type = TK_STRING; 331 hexString = true; 332 int v2 = 0; 333 while (true) { 334 while (isWhitespace(v1)) 335 v1 = file.read(); 336 if (v1 == '>') 337 break; 338 v1 = getHex(v1); 339 if (v1 < 0) 340 break; 341 v2 = file.read(); 342 while (isWhitespace(v2)) 343 v2 = file.read(); 344 if (v2 == '>') { 345 ch = v1 << 4; 346 outBuf.append((char)ch); 347 break; 348 } 349 v2 = getHex(v2); 350 if (v2 < 0) 351 break; 352 ch = (v1 << 4) + v2; 353 outBuf.append((char)ch); 354 v1 = file.read(); 355 } 356 if (v1 < 0 || v2 < 0) 357 throwError("Error reading string"); 358 break; 359 } 360 case '%': 361 type = TK_COMMENT; 362 do { 363 ch = file.read(); 364 } while (ch != -1 && ch != '\r' && ch != '\n'); 365 break; 366 case '(': 367 { 368 outBuf = new StringBuffer (); 369 type = TK_STRING; 370 hexString = false; 371 int nesting = 0; 372 while (true) { 373 ch = file.read(); 374 if (ch == -1) 375 break; 376 if (ch == '(') { 377 ++nesting; 378 } 379 else if (ch == ')') { 380 --nesting; 381 } 382 else if (ch == '\\') { 383 boolean lineBreak = false; 384 ch = file.read(); 385 switch (ch) { 386 case 'n': 387 ch = '\n'; 388 break; 389 case 'r': 390 ch = '\r'; 391 break; 392 case 't': 393 ch = '\t'; 394 break; 395 case 'b': 396 ch = '\b'; 397 break; 398 case 'f': 399 ch = '\f'; 400 break; 401 case '(': 402 case ')': 403 case '\\': 404 break; 405 case '\r': 406 lineBreak = true; 407 ch = file.read(); 408 if (ch != '\n') 409 backOnePosition(ch); 410 break; 411 case '\n': 412 lineBreak = true; 413 break; 414 default: 415 { 416 if (ch < '0' || ch > '7') { 417 break; 418 } 419 int octal = ch - '0'; 420 ch = file.read(); 421 if (ch < '0' || ch > '7') { 422 backOnePosition(ch); 423 ch = octal; 424 break; 425 } 426 octal = (octal << 3) + ch - '0'; 427 ch = file.read(); 428 if (ch < '0' || ch > '7') { 429 backOnePosition(ch); 430 ch = octal; 431 break; 432 } 433 octal = (octal << 3) + ch - '0'; 434 ch = octal & 0xff; 435 break; 436 } 437 } 438 if (lineBreak) 439 continue; 440 if (ch < 0) 441 break; 442 } 443 else if (ch == '\r') { 444 ch = file.read(); 445 if (ch < 0) 446 break; 447 if (ch != '\n') { 448 backOnePosition(ch); 449 ch = '\n'; 450 } 451 } 452 if (nesting == -1) 453 break; 454 outBuf.append((char)ch); 455 } 456 if (ch == -1) 457 throwError("Error reading string"); 458 break; 459 } 460 default: 461 { 462 outBuf = new StringBuffer (); 463 if (ch == '-' || ch == '+' || ch == '.' || (ch >= '0' && ch <= '9')) { 464 type = TK_NUMBER; 465 do { 466 outBuf.append((char)ch); 467 ch = file.read(); 468 } while (ch != -1 && ((ch >= '0' && ch <= '9') || ch == '.')); 469 } 470 else { 471 type = TK_OTHER; 472 do { 473 outBuf.append((char)ch); 474 ch = file.read(); 475 } while (!delims[ch + 1]); 476 } 477 backOnePosition(ch); 478 break; 479 } 480 } 481 if (outBuf != null) 482 stringValue = outBuf.toString(); 483 return true; 484 } 485 486 public int intValue() { 487 return Integer.parseInt(stringValue); 488 } 489 490 public boolean readLineSegment(byte input[]) throws IOException { 491 int c = -1; 492 boolean eol = false; 493 int ptr = 0; 494 int len = input.length; 495 if ( ptr < len ) { 499 while ( isWhitespace( (c = read()) ) ); 500 } 501 while ( !eol && ptr < len ) { 502 switch (c) { 503 case -1: 504 case '\n': 505 eol = true; 506 break; 507 case '\r': 508 eol = true; 509 int cur = getFilePointer(); 510 if ((read()) != '\n') { 511 seek(cur); 512 } 513 break; 514 default: 515 input[ptr++] = (byte)c; 516 break; 517 } 518 519 if( eol || len <= ptr ) { 521 break; 522 } 523 else { 524 c = read(); 525 } 526 } 527 if (ptr >= len) { 528 eol = false; 529 while (!eol) { 530 switch (c = read()) { 531 case -1: 532 case '\n': 533 eol = true; 534 break; 535 case '\r': 536 eol = true; 537 int cur = getFilePointer(); 538 if ((read()) != '\n') { 539 seek(cur); 540 } 541 break; 542 } 543 } 544 } 545 546 if ((c == -1) && (ptr == 0)) { 547 return false; 548 } 549 if (ptr + 2 <= len) { 550 input[ptr++] = (byte)' '; 551 input[ptr] = (byte)'X'; 552 } 553 return true; 554 } 555 556 public static int[] checkObjectStart(byte line[]) { 557 try { 558 PRTokeniser tk = new PRTokeniser(line); 559 int num = 0; 560 int gen = 0; 561 if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER) 562 return null; 563 num = tk.intValue(); 564 if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER) 565 return null; 566 gen = tk.intValue(); 567 if (!tk.nextToken()) 568 return null; 569 if (!tk.getStringValue().equals("obj")) 570 return null; 571 return new int[]{num, gen}; 572 } 573 catch (Exception ioe) { 574 } 576 return null; 577 } 578 579 public boolean isHexString() { 580 return this.hexString; 581 } 582 583 } 584 | Popular Tags |