- Timestamp:
- 2010/12/11 12:15:20 (13 years ago)
- Location:
- branches/version-2_5-dev/data/module/fpdf
- Files:
-
- 1 added
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
branches/version-2_5-dev/data/module/fpdf/pdf_parser.php
r18701 r19716 1 1 <?php 2 2 // 3 // FPDI - Version 1. 23 // FPDI - Version 1.4 4 4 // 5 // Copyright 2004-20 07Setasign - Jan Slabon5 // Copyright 2004-2010 Setasign - Jan Slabon 6 6 // 7 7 // Licensed under the Apache License, Version 2.0 (the "License"); … … 40 40 if (!defined ('PDF_TYPE_STREAM')) 41 41 define ('PDF_TYPE_STREAM', 10); 42 if (!defined ('PDF_TYPE_BOOLEAN')) 43 define ('PDF_TYPE_BOOLEAN', 11); 44 if (!defined ('PDF_TYPE_REAL')) 45 define ('PDF_TYPE_REAL', 12); 46 47 require_once('pdf_context.php'); 42 48 43 require_once("pdf_context.php"); 44 require_once("wrapper_functions.php"); 45 46 class pdf_parser { 47 48 /** 49 * Filename 50 * @var string 51 */ 52 var $filename; 53 54 /** 55 * File resource 56 * @var resource 57 */ 58 var $f; 59 60 /** 61 * PDF Context 62 * @var object pdf_context-Instance 63 */ 64 var $c; 65 66 /** 67 * xref-Data 68 * @var array 69 */ 70 var $xref; 71 72 /** 73 * root-Object 74 * @var array 75 */ 76 var $root; 77 78 79 /** 80 * Constructor 81 * 82 * @param string $filename Source-Filename 83 */ 84 function pdf_parser($filename) { 85 $this->filename = $filename; 86 87 $this->f = @fopen($this->filename, "rb"); 88 89 if (!$this->f) 90 $this->error(sprintf("Cannot open %s !", $filename)); 91 92 $this->getPDFVersion(); 93 94 $this->c =& new pdf_context($this->f); 95 // Read xref-Data 96 $this->pdf_read_xref($this->xref, $this->pdf_find_xref()); 97 98 // Check for Encryption 99 $this->getEncryption(); 100 101 // Read root 102 $this->pdf_read_root(); 103 } 104 105 /** 106 * Close the opened file 107 */ 108 function closeFile() { 109 if (isset($this->f)) { 110 fclose($this->f); 111 unset($this->f); 112 } 113 } 114 115 /** 116 * Print Error and die 117 * 118 * @param string $msg Error-Message 119 */ 120 function error($msg) { 121 die("<b>PDF-Parser Error:</b> ".$msg); 122 } 123 124 /** 125 * Check Trailer for Encryption 126 */ 127 function getEncryption() { 128 if (isset($this->xref['trailer'][1]['/Encrypt'])) { 129 $this->error("File is encrypted!"); 130 } 131 } 132 133 /** 134 * Find/Return /Root 135 * 136 * @return array 137 */ 138 function pdf_find_root() { 139 if ($this->xref['trailer'][1]['/Root'][0] != PDF_TYPE_OBJREF) { 140 $this->error("Wrong Type of Root-Element! Must be an indirect reference"); 141 } 142 return $this->xref['trailer'][1]['/Root']; 143 } 144 145 /** 146 * Read the /Root 147 */ 148 function pdf_read_root() { 149 // read root 150 $this->root = $this->pdf_resolve_object($this->c, $this->pdf_find_root()); 151 } 152 153 /** 154 * Get PDF-Version 155 * 156 * And reset the PDF Version used in FPDI if needed 157 */ 158 function getPDFVersion() { 159 fseek($this->f, 0); 160 preg_match("/\d\.\d/",fread($this->f,16),$m); 161 $this->pdfVersion = $m[0]; 162 } 163 164 /** 165 * Find the xref-Table 166 */ 167 function pdf_find_xref() { 168 fseek ($this->f, -min(filesize($this->filename),1500), SEEK_END); 169 $data = fread($this->f, 1500); 170 171 $pos = strlen($data) - strpos(strrev($data), strrev('startxref')); 172 $data = substr($data, $pos); 173 174 if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) { 175 $this->error("Unable to find pointer to xref table"); 176 } 177 178 return (int) $matches[1]; 179 } 180 181 /** 182 * Read xref-table 183 * 184 * @param array $result Array of xref-table 185 * @param integer $offset of xref-table 186 * @param integer $start start-position in xref-table 187 * @param integer $end end-position in xref-table 188 */ 189 function pdf_read_xref(&$result, $offset, $start = null, $end = null) { 190 if (is_null ($start) || is_null ($end)) { 191 fseek($this->f, $o_pos = $offset); 192 $data = trim(fgets($this->f,1024)); 193 194 if (strlen($data) == 0) 195 $data = trim(fgets($this->f,1024)); 196 197 if ($data !== 'xref') { 198 fseek($this->f, $o_pos); 199 $data = trim(_fgets($this->f, true)); 200 if ($data !== 'xref') { 201 if (preg_match('/(.*xref)(.*)/m', $data, $m)) { // xref 0 128 - in one line 202 fseek($this->f, $o_pos+strlen($m[1])); 203 } elseif (preg_match('/(x|r|e|f)+/', $data, $m)) { // correct invalid xref-pointer 204 $tmpOffset = $offset-4+strlen($m[0]); 205 $this->pdf_read_xref($result, $tmpOffset, $start, $end); 206 return; 207 } else { 208 $this->error("Unable to find xref table - Maybe a Problem with 'auto_detect_line_endings'"); 209 } 210 } 211 } 212 213 $o_pos = ftell($this->f); 214 $data = explode(' ', trim(fgets($this->f,1024))); 215 if (count($data) != 2) { 216 fseek($this->f, $o_pos); 217 $data = explode(' ', trim(_fgets($this->f, true))); 218 219 if (count($data) != 2) { 220 if (count($data) > 2) { // no lineending 221 $n_pos = $o_pos+strlen($data[0])+strlen($data[1])+2; 222 fseek($this->f, $n_pos); 223 } else { 224 $this->error("Unexpected header in xref table"); 225 } 226 } 49 if (!class_exists('pdf_parser', false)) { 50 51 class pdf_parser { 52 53 /** 54 * Filename 55 * @var string 56 */ 57 var $filename; 58 59 /** 60 * File resource 61 * @var resource 62 */ 63 var $f; 64 65 /** 66 * PDF Context 67 * @var object pdf_context-Instance 68 */ 69 var $c; 70 71 /** 72 * xref-Data 73 * @var array 74 */ 75 var $xref; 76 77 /** 78 * root-Object 79 * @var array 80 */ 81 var $root; 82 83 /** 84 * PDF version of the loaded document 85 * @var string 86 */ 87 var $pdfVersion; 88 89 /** 90 * For reading encrypted documents and xref/objectstreams are in use 91 * 92 * @var boolean 93 */ 94 var $readPlain = true; 95 96 /** 97 * Constructor 98 * 99 * @param string $filename Source-Filename 100 */ 101 function pdf_parser($filename) { 102 $this->filename = $filename; 103 104 $this->f = @fopen($this->filename, 'rb'); 105 106 if (!$this->f) 107 $this->error(sprintf('Cannot open %s !', $filename)); 108 109 $this->getPDFVersion(); 110 111 $this->c = new pdf_context($this->f); 112 113 // Read xref-Data 114 $this->xref = array(); 115 $this->pdf_read_xref($this->xref, $this->pdf_find_xref()); 116 117 // Check for Encryption 118 $this->getEncryption(); 119 120 // Read root 121 $this->pdf_read_root(); 122 } 123 124 /** 125 * Close the opened file 126 */ 127 function closeFile() { 128 if (isset($this->f) && is_resource($this->f)) { 129 fclose($this->f); 130 unset($this->f); 131 } 132 } 133 134 /** 135 * Print Error and die 136 * 137 * @param string $msg Error-Message 138 */ 139 function error($msg) { 140 die('<b>PDF-Parser Error:</b> '.$msg); 141 } 142 143 /** 144 * Check Trailer for Encryption 145 */ 146 function getEncryption() { 147 if (isset($this->xref['trailer'][1]['/Encrypt'])) { 148 $this->error('File is encrypted!'); 227 149 } 228 $start = $data[0]; 229 $end = $start + $data[1]; 230 } 231 232 if (!isset($result['xref_location'])) { 233 $result['xref_location'] = $offset; 234 } 235 236 if (!isset($result['max_object']) || $end > $result['max_object']) { 237 $result['max_object'] = $end; 238 } 239 240 for (; $start < $end; $start++) { 241 $data = ltrim(fread($this->f, 20)); // Spezifications says: 20 bytes including newlines 242 $offset = substr($data, 0, 10); 243 $generation = substr($data, 11, 5); 244 245 if (!isset ($result['xref'][$start][(int) $generation])) { 246 $result['xref'][$start][(int) $generation] = (int) $offset; 247 } 248 } 249 250 $o_pos = ftell($this->f); 251 $data = fgets($this->f,1024); 252 if (strlen(trim($data)) == 0) 253 $data = fgets($this->f, 1024); 254 255 if (preg_match("/trailer/",$data)) { 256 if (preg_match("/(.*trailer[ \n\r]*)/",$data,$m)) { 257 fseek($this->f, $o_pos+strlen($m[1])); 258 } 259 260 $c =& new pdf_context($this->f); 150 } 151 152 /** 153 * Find/Return /Root 154 * 155 * @return array 156 */ 157 function pdf_find_root() { 158 if ($this->xref['trailer'][1]['/Root'][0] != PDF_TYPE_OBJREF) { 159 $this->error('Wrong Type of Root-Element! Must be an indirect reference'); 160 } 161 162 return $this->xref['trailer'][1]['/Root']; 163 } 164 165 /** 166 * Read the /Root 167 */ 168 function pdf_read_root() { 169 // read root 170 $this->root = $this->pdf_resolve_object($this->c, $this->pdf_find_root()); 171 } 172 173 /** 174 * Get PDF-Version 175 * 176 * And reset the PDF Version used in FPDI if needed 177 */ 178 function getPDFVersion() { 179 fseek($this->f, 0); 180 preg_match('/\d\.\d/',fread($this->f,16),$m); 181 if (isset($m[0])) 182 $this->pdfVersion = $m[0]; 183 return $this->pdfVersion; 184 } 185 186 /** 187 * Find the xref-Table 188 */ 189 function pdf_find_xref() { 190 $toRead = 1500; 191 192 $stat = fseek ($this->f, -$toRead, SEEK_END); 193 if ($stat === -1) { 194 fseek ($this->f, 0); 195 } 196 $data = fread($this->f, $toRead); 197 198 $pos = strlen($data) - strpos(strrev($data), strrev('startxref')); 199 $data = substr($data, $pos); 200 201 if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) { 202 $this->error('Unable to find pointer to xref table'); 203 } 204 205 return (int) $matches[1]; 206 } 207 208 /** 209 * Read xref-table 210 * 211 * @param array $result Array of xref-table 212 * @param integer $offset of xref-table 213 */ 214 function pdf_read_xref(&$result, $offset) { 215 $o_pos = $offset-min(20, $offset); 216 fseek($this->f, $o_pos); // set some bytes backwards to fetch errorious docs 217 218 $data = fread($this->f, 100); 219 220 $xrefPos = strrpos($data, 'xref'); 221 222 if ($xrefPos === false) { 223 fseek($this->f, $offset); 224 $c = new pdf_context($this->f); 225 $xrefStreamObjDec = $this->pdf_read_value($c); 226 227 if (is_array($xrefStreamObjDec) && isset($xrefStreamObjDec[0]) && $xrefStreamObjDec[0] == PDF_TYPE_OBJDEC) { 228 $this->error(sprintf('This document (%s) probably uses a compression technique which is not supported by the free parser shipped with FPDI.', $this->filename)); 229 } else { 230 $this->error('Unable to find xref table.'); 231 } 232 } 233 234 if (!isset($result['xref_location'])) { 235 $result['xref_location'] = $o_pos+$xrefPos; 236 $result['max_object'] = 0; 237 } 238 239 $cylces = -1; 240 $bytesPerCycle = 100; 241 242 fseek($this->f, $o_pos = $o_pos+$xrefPos+4); // set the handle directly after the "xref"-keyword 243 $data = fread($this->f, $bytesPerCycle); 244 245 while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle*$cylces++, 0))) === false && !feof($this->f)) { 246 $data .= fread($this->f, $bytesPerCycle); 247 } 248 249 if ($trailerPos === false) { 250 $this->error('Trailer keyword not found after xref table'); 251 } 252 253 $data = substr($data, 0, $trailerPos); 254 255 // get Line-Ending 256 preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for linebreaks 257 258 $differentLineEndings = count(array_unique($m[0])); 259 if ($differentLineEndings > 1) { 260 $lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY); 261 } else { 262 $lines = explode($m[0][1], $data); 263 } 264 265 $data = $differentLineEndings = $m = null; 266 unset($data, $differentLineEndings, $m); 267 268 $linesCount = count($lines); 269 270 $start = 1; 271 272 for ($i = 0; $i < $linesCount; $i++) { 273 $line = trim($lines[$i]); 274 if ($line) { 275 $pieces = explode(' ', $line); 276 $c = count($pieces); 277 switch($c) { 278 case 2: 279 $start = (int)$pieces[0]; 280 $end = $start+(int)$pieces[1]; 281 if ($end > $result['max_object']) 282 $result['max_object'] = $end; 283 break; 284 case 3: 285 if (!isset($result['xref'][$start])) 286 $result['xref'][$start] = array(); 287 288 if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) { 289 $result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null; 290 } 291 $start++; 292 break; 293 default: 294 $this->error('Unexpected data in xref table'); 295 } 296 } 297 } 298 299 $lines = $pieces = $line = $start = $end = $gen = null; 300 unset($lines, $pieces, $line, $start, $end, $gen); 301 302 fseek($this->f, $o_pos+$trailerPos+7); 303 304 $c = new pdf_context($this->f); 261 305 $trailer = $this->pdf_read_value($c); 262 306 307 $c = null; 308 unset($c); 309 310 if (!isset($result['trailer'])) { 311 $result['trailer'] = $trailer; 312 } 313 263 314 if (isset($trailer[1]['/Prev'])) { 264 $this->pdf_read_xref($result, $trailer[1]['/Prev'][1]); 265 $result['trailer'][1] = array_merge($result['trailer'][1], $trailer[1]); 266 } else { 267 $result['trailer'] = $trailer; 268 } 269 } else { 270 $data = explode(' ', trim($data)); 271 272 if (count($data) != 2) { 273 fseek($this->f, $o_pos); 274 $data = explode(' ', trim (_fgets ($this->f, true))); 275 276 if (count($data) != 2) { 277 $this->error("Unexpected data in xref table"); 315 $this->pdf_read_xref($result, $trailer[1]['/Prev'][1]); 316 } 317 318 $trailer = null; 319 unset($trailer); 320 321 return true; 322 } 323 324 /** 325 * Reads an Value 326 * 327 * @param object $c pdf_context 328 * @param string $token a Token 329 * @return mixed 330 */ 331 function pdf_read_value(&$c, $token = null) { 332 if (is_null($token)) { 333 $token = $this->pdf_read_token($c); 334 } 335 336 if ($token === false) { 337 return false; 338 } 339 340 switch ($token) { 341 case '<': 342 // This is a hex string. 343 // Read the value, then the terminator 344 345 $pos = $c->offset; 346 347 while(1) { 348 349 $match = strpos ($c->buffer, '>', $pos); 350 351 // If you can't find it, try 352 // reading more data from the stream 353 354 if ($match === false) { 355 if (!$c->increase_length()) { 356 return false; 357 } else { 358 continue; 359 } 360 } 361 362 $result = substr ($c->buffer, $c->offset, $match - $c->offset); 363 $c->offset = $match + 1; 364 365 return array (PDF_TYPE_HEX, $result); 366 } 367 368 break; 369 case '<<': 370 // This is a dictionary. 371 372 $result = array(); 373 374 // Recurse into this function until we reach 375 // the end of the dictionary. 376 while (($key = $this->pdf_read_token($c)) !== '>>') { 377 if ($key === false) { 378 return false; 379 } 380 381 if (($value = $this->pdf_read_value($c)) === false) { 382 return false; 383 } 384 385 // Catch missing value 386 if ($value[0] == PDF_TYPE_TOKEN && $value[1] == '>>') { 387 $result[$key] = array(PDF_TYPE_NULL); 388 break; 389 } 390 391 $result[$key] = $value; 392 } 393 394 return array (PDF_TYPE_DICTIONARY, $result); 395 396 case '[': 397 // This is an array. 398 399 $result = array(); 400 401 // Recurse into this function until we reach 402 // the end of the array. 403 while (($token = $this->pdf_read_token($c)) !== ']') { 404 if ($token === false) { 405 return false; 406 } 407 408 if (($value = $this->pdf_read_value($c, $token)) === false) { 409 return false; 410 } 411 412 $result[] = $value; 413 } 414 415 return array (PDF_TYPE_ARRAY, $result); 416 417 case '(' : 418 // This is a string 419 $pos = $c->offset; 420 421 $openBrackets = 1; 422 do { 423 for (; $openBrackets != 0 && $pos < $c->length; $pos++) { 424 switch (ord($c->buffer[$pos])) { 425 case 0x28: // '(' 426 $openBrackets++; 427 break; 428 case 0x29: // ')' 429 $openBrackets--; 430 break; 431 case 0x5C: // backslash 432 $pos++; 433 } 434 } 435 } while($openBrackets != 0 && $c->increase_length()); 436 437 $result = substr($c->buffer, $c->offset, $pos - $c->offset - 1); 438 $c->offset = $pos; 439 440 return array (PDF_TYPE_STRING, $result); 441 442 case 'stream': 443 $o_pos = ftell($c->file)-strlen($c->buffer); 444 $o_offset = $c->offset; 445 446 $c->reset($startpos = $o_pos + $o_offset); 447 448 $e = 0; // ensure line breaks in front of the stream 449 if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13)) 450 $e++; 451 if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10)) 452 $e++; 453 454 if ($this->actual_obj[1][1]['/Length'][0] == PDF_TYPE_OBJREF) { 455 $tmp_c = new pdf_context($this->f); 456 $tmp_length = $this->pdf_resolve_object($tmp_c, $this->actual_obj[1][1]['/Length']); 457 $length = $tmp_length[1][1]; 458 } else { 459 $length = $this->actual_obj[1][1]['/Length'][1]; 460 } 461 462 if ($length > 0) { 463 $c->reset($startpos+$e,$length); 464 $v = $c->buffer; 465 } else { 466 $v = ''; 467 } 468 $c->reset($startpos+$e+$length+9); // 9 = strlen("endstream") 469 470 return array(PDF_TYPE_STREAM, $v); 471 472 default : 473 if (is_numeric ($token)) { 474 // A numeric token. Make sure that 475 // it is not part of something else. 476 if (($tok2 = $this->pdf_read_token ($c)) !== false) { 477 if (is_numeric ($tok2)) { 478 479 // Two numeric tokens in a row. 480 // In this case, we're probably in 481 // front of either an object reference 482 // or an object specification. 483 // Determine the case and return the data 484 if (($tok3 = $this->pdf_read_token ($c)) !== false) { 485 switch ($tok3) { 486 case 'obj' : 487 return array (PDF_TYPE_OBJDEC, (int) $token, (int) $tok2); 488 case 'R' : 489 return array (PDF_TYPE_OBJREF, (int) $token, (int) $tok2); 490 } 491 // If we get to this point, that numeric value up 492 // there was just a numeric value. Push the extra 493 // tokens back into the stack and return the value. 494 array_push ($c->stack, $tok3); 495 } 496 } 497 498 array_push ($c->stack, $tok2); 499 } 500 501 if ($token === (string)((int)$token)) 502 return array (PDF_TYPE_NUMERIC, (int)$token); 503 else 504 return array (PDF_TYPE_REAL, (float)$token); 505 } else if ($token == 'true' || $token == 'false') { 506 return array (PDF_TYPE_BOOLEAN, $token == 'true'); 507 } else if ($token == 'null') { 508 return array (PDF_TYPE_NULL); 509 } else { 510 // Just a token. Return it. 511 return array (PDF_TYPE_TOKEN, $token); 512 } 513 } 514 } 515 516 /** 517 * Resolve an object 518 * 519 * @param object $c pdf_context 520 * @param array $obj_spec The object-data 521 * @param boolean $encapsulate Must set to true, cause the parsing and fpdi use this method only without this para 522 */ 523 function pdf_resolve_object(&$c, $obj_spec, $encapsulate = true) { 524 // Exit if we get invalid data 525 if (!is_array($obj_spec)) { 526 $ret = false; 527 return $ret; 528 } 529 530 if ($obj_spec[0] == PDF_TYPE_OBJREF) { 531 532 // This is a reference, resolve it 533 if (isset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]])) { 534 535 // Save current file position 536 // This is needed if you want to resolve 537 // references while you're reading another object 538 // (e.g.: if you need to determine the length 539 // of a stream) 540 541 $old_pos = ftell($c->file); 542 543 // Reposition the file pointer and 544 // load the object header. 545 546 $c->reset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]]); 547 548 $header = $this->pdf_read_value($c); 549 550 if ($header[0] != PDF_TYPE_OBJDEC || $header[1] != $obj_spec[1] || $header[2] != $obj_spec[2]) { 551 $toSearchFor = $obj_spec[1].' '.$obj_spec[2].' obj'; 552 if (preg_match('/'.$toSearchFor.'/', $c->buffer)) { 553 $c->offset = strpos($c->buffer, $toSearchFor) + strlen($toSearchFor); 554 // reset stack 555 $c->stack = array(); 556 } else { 557 $this->error("Unable to find object ({$obj_spec[1]}, {$obj_spec[2]}) at expected location"); 558 } 559 } 560 561 // If we're being asked to store all the information 562 // about the object, we add the object ID and generation 563 // number for later use 564 $result = array(); 565 $this->actual_obj =& $result; 566 if ($encapsulate) { 567 $result = array ( 568 PDF_TYPE_OBJECT, 569 'obj' => $obj_spec[1], 570 'gen' => $obj_spec[2] 571 ); 572 } 573 574 // Now simply read the object data until 575 // we encounter an end-of-object marker 576 while(1) { 577 $value = $this->pdf_read_value($c); 578 if ($value === false || count($result) > 4) { 579 // in this case the parser coudn't find an endobj so we break here 580 break; 581 } 582 583 if ($value[0] == PDF_TYPE_TOKEN && $value[1] === 'endobj') { 584 break; 585 } 586 587 $result[] = $value; 588 } 589 590 $c->reset($old_pos); 591 592 if (isset($result[2][0]) && $result[2][0] == PDF_TYPE_STREAM) { 593 $result[0] = PDF_TYPE_STREAM; 594 } 595 596 return $result; 278 597 } 279 } 280 281 $this->pdf_read_xref($result, null, (int) $data[0], (int) $data[0] + (int) $data[1]); 282 } 598 } else { 599 return $obj_spec; 600 } 601 } 602 603 604 605 /** 606 * Reads a token from the file 607 * 608 * @param object $c pdf_context 609 * @return mixed 610 */ 611 function pdf_read_token(&$c) 612 { 613 // If there is a token available 614 // on the stack, pop it out and 615 // return it. 616 617 if (count($c->stack)) { 618 return array_pop($c->stack); 619 } 620 621 // Strip away any whitespace 622 623 do { 624 if (!$c->ensure_content()) { 625 return false; 626 } 627 $c->offset += strspn($c->buffer, " \n\r\t", $c->offset); 628 } while ($c->offset >= $c->length - 1); 629 630 // Get the first character in the stream 631 632 $char = $c->buffer[$c->offset++]; 633 634 switch ($char) { 635 636 case '[': 637 case ']': 638 case '(': 639 case ')': 640 641 // This is either an array or literal string 642 // delimiter, Return it 643 644 return $char; 645 646 case '<': 647 case '>': 648 649 // This could either be a hex string or 650 // dictionary delimiter. Determine the 651 // appropriate case and return the token 652 653 if ($c->buffer[$c->offset] == $char) { 654 if (!$c->ensure_content()) { 655 return false; 656 } 657 $c->offset++; 658 return $char . $char; 659 } else { 660 return $char; 661 } 662 663 case '%': 664 665 // This is a comment - jump over it! 666 667 $pos = $c->offset; 668 while(1) { 669 $match = preg_match("/(\r\n|\r|\n)/", $c->buffer, $m, PREG_OFFSET_CAPTURE, $pos); 670 if ($match === 0) { 671 if (!$c->increase_length()) { 672 return false; 673 } else { 674 continue; 675 } 676 } 677 678 $c->offset = $m[0][1]+strlen($m[0][0]); 679 680 return $this->pdf_read_token($c); 681 } 682 683 default: 684 685 // This is "another" type of token (probably 686 // a dictionary entry or a numeric value) 687 // Find the end and return it. 688 689 if (!$c->ensure_content()) { 690 return false; 691 } 692 693 while(1) { 694 695 // Determine the length of the token 696 697 $pos = strcspn($c->buffer, " %[]<>()\r\n\t/", $c->offset); 698 699 if ($c->offset + $pos <= $c->length - 1) { 700 break; 701 } else { 702 // If the script reaches this point, 703 // the token may span beyond the end 704 // of the current buffer. Therefore, 705 // we increase the size of the buffer 706 // and try again--just to be safe. 707 708 $c->increase_length(); 709 } 710 } 711 712 $result = substr($c->buffer, $c->offset - 1, $pos + 1); 713 714 $c->offset += $pos; 715 return $result; 716 } 717 } 283 718 } 284 285 286 /**287 * Reads an Value288 *289 * @param object $c pdf_context290 * @param string $token a Token291 * @return mixed292 */293 function pdf_read_value(&$c, $token = null) {294 if (is_null($token)) {295 $token = $this->pdf_read_token($c);296 }297 298 if ($token === false) {299 return false;300 }301 302 switch ($token) {303 case '<':304 // This is a hex string.305 // Read the value, then the terminator306 307 $pos = $c->offset;308 309 while(1) {310 311 $match = strpos ($c->buffer, '>', $pos);312 313 // If you can't find it, try314 // reading more data from the stream315 316 if ($match === false) {317 if (!$c->increase_length()) {318 return false;319 } else {320 continue;321 }322 }323 324 $result = substr ($c->buffer, $c->offset, $match - $c->offset);325 $c->offset = $match+1;326 327 return array (PDF_TYPE_HEX, $result);328 }329 330 break;331 case '<<':332 // This is a dictionary.333 334 $result = array();335 336 // Recurse into this function until we reach337 // the end of the dictionary.338 while (($key = $this->pdf_read_token($c)) !== '>>') {339 if ($key === false) {340 return false;341 }342 343 if (($value = $this->pdf_read_value($c)) === false) {344 return false;345 }346 $result[$key] = $value;347 }348 349 return array (PDF_TYPE_DICTIONARY, $result);350 351 case '[':352 // This is an array.353 354 $result = array();355 356 // Recurse into this function until we reach357 // the end of the array.358 while (($token = $this->pdf_read_token($c)) !== ']') {359 if ($token === false) {360 return false;361 }362 363 if (($value = $this->pdf_read_value($c, $token)) === false) {364 return false;365 }366 367 $result[] = $value;368 }369 370 return array (PDF_TYPE_ARRAY, $result);371 372 case '(' :373 // This is a string374 375 $pos = $c->offset;376 377 while(1) {378 379 // Start by finding the next closed380 // parenthesis381 382 $match = strpos ($c->buffer, ')', $pos);383 384 // If you can't find it, try385 // reading more data from the stream386 387 if ($match === false) {388 if (!$c->increase_length()) {389 return false;390 } else {391 continue;392 }393 }394 395 // Make sure that there is no backslash396 // before the parenthesis. If there is,397 // move on. Otherwise, return the string.398 $esc = preg_match('/([\\\\]+)$/', $tmpresult = substr($c->buffer, $c->offset, $match - $c->offset), $m);399 400 if ($esc === 0 || strlen($m[1]) % 2 == 0) {401 $result = $tmpresult;402 $c->offset = $match + 1;403 return array (PDF_TYPE_STRING, $result);404 } else {405 $pos = $match + 1;406 407 if ($pos > $c->offset + $c->length) {408 $c->increase_length();409 }410 }411 }412 413 case "stream":414 $o_pos = ftell($c->file)-strlen($c->buffer);415 $o_offset = $c->offset;416 417 $c->reset($startpos = $o_pos + $o_offset);418 419 $e = 0; // ensure line breaks in front of the stream420 if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13))421 $e++;422 if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10))423 $e++;424 425 if ($this->actual_obj[1][1]['/Length'][0] == PDF_TYPE_OBJREF) {426 $tmp_c =& new pdf_context($this->f);427 $tmp_length = $this->pdf_resolve_object($tmp_c,$this->actual_obj[1][1]['/Length']);428 $length = $tmp_length[1][1];429 } else {430 $length = $this->actual_obj[1][1]['/Length'][1];431 }432 433 if ($length > 0) {434 $c->reset($startpos+$e,$length);435 $v = $c->buffer;436 } else {437 $v = '';438 }439 $c->reset($startpos+$e+$length+9); // 9 = strlen("endstream")440 441 return array(PDF_TYPE_STREAM, $v);442 443 default :444 if (is_numeric ($token)) {445 // A numeric token. Make sure that446 // it is not part of something else.447 if (($tok2 = $this->pdf_read_token ($c)) !== false) {448 if (is_numeric ($tok2)) {449 450 // Two numeric tokens in a row.451 // In this case, we're probably in452 // front of either an object reference453 // or an object specification.454 // Determine the case and return the data455 if (($tok3 = $this->pdf_read_token ($c)) !== false) {456 switch ($tok3) {457 case 'obj' :458 return array (PDF_TYPE_OBJDEC, (int) $token, (int) $tok2);459 case 'R' :460 return array (PDF_TYPE_OBJREF, (int) $token, (int) $tok2);461 }462 // If we get to this point, that numeric value up463 // there was just a numeric value. Push the extra464 // tokens back into the stack and return the value.465 array_push ($c->stack, $tok3);466 }467 }468 469 array_push ($c->stack, $tok2);470 }471 472 return array (PDF_TYPE_NUMERIC, $token);473 } else {474 475 // Just a token. Return it.476 return array (PDF_TYPE_TOKEN, $token);477 }478 479 }480 }481 482 /**483 * Resolve an object484 *485 * @param object $c pdf_context486 * @param array $obj_spec The object-data487 * @param boolean $encapsulate Must set to true, cause the parsing and fpdi use this method only without this para488 */489 function pdf_resolve_object(&$c, $obj_spec, $encapsulate = true) {490 // Exit if we get invalid data491 if (!is_array($obj_spec)) {492 return false;493 }494 495 if ($obj_spec[0] == PDF_TYPE_OBJREF) {496 497 // This is a reference, resolve it498 if (isset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]])) {499 500 // Save current file position501 // This is needed if you want to resolve502 // references while you're reading another object503 // (e.g.: if you need to determine the length504 // of a stream)505 506 $old_pos = ftell($c->file);507 508 // Reposition the file pointer and509 // load the object header.510 511 $c->reset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]]);512 513 $header = $this->pdf_read_value($c,null,true);514 515 if ($header[0] != PDF_TYPE_OBJDEC || $header[1] != $obj_spec[1] || $header[2] != $obj_spec[2]) {516 $this->error("Unable to find object ({$obj_spec[1]}, {$obj_spec[2]}) at expected location");517 }518 519 // If we're being asked to store all the information520 // about the object, we add the object ID and generation521 // number for later use522 $this->actual_obj =& $result;523 if ($encapsulate) {524 $result = array (525 PDF_TYPE_OBJECT,526 'obj' => $obj_spec[1],527 'gen' => $obj_spec[2]528 );529 } else {530 $result = array();531 }532 533 // Now simply read the object data until534 // we encounter an end-of-object marker535 while(1) {536 $value = $this->pdf_read_value($c);537 if ($value === false || count($result) > 4) {538 // in this case the parser coudn't find an endobj so we break here539 break;540 }541 542 if ($value[0] == PDF_TYPE_TOKEN && $value[1] === 'endobj') {543 break;544 }545 546 $result[] = $value;547 }548 549 $c->reset($old_pos);550 551 if (isset($result[2][0]) && $result[2][0] == PDF_TYPE_STREAM) {552 $result[0] = PDF_TYPE_STREAM;553 }554 555 return $result;556 }557 } else {558 return $obj_spec;559 }560 }561 562 563 564 /**565 * Reads a token from the file566 *567 * @param object $c pdf_context568 * @return mixed569 */570 function pdf_read_token(&$c)571 {572 // If there is a token available573 // on the stack, pop it out and574 // return it.575 576 if (count($c->stack)) {577 return array_pop($c->stack);578 }579 580 // Strip away any whitespace581 582 do {583 if (!$c->ensure_content()) {584 return false;585 }586 $c->offset += _strspn($c->buffer, " \n\r\t", $c->offset);587 } while ($c->offset >= $c->length - 1);588 589 // Get the first character in the stream590 591 $char = $c->buffer[$c->offset++];592 593 switch ($char) {594 595 case '[' :596 case ']' :597 case '(' :598 case ')' :599 600 // This is either an array or literal string601 // delimiter, Return it602 603 return $char;604 605 case '<' :606 case '>' :607 608 // This could either be a hex string or609 // dictionary delimiter. Determine the610 // appropriate case and return the token611 612 if ($c->buffer[$c->offset] == $char) {613 if (!$c->ensure_content()) {614 return false;615 }616 $c->offset++;617 return $char . $char;618 } else {619 return $char;620 }621 622 default :623 624 // This is "another" type of token (probably625 // a dictionary entry or a numeric value)626 // Find the end and return it.627 628 if (!$c->ensure_content()) {629 return false;630 }631 632 while(1) {633 634 // Determine the length of the token635 636 $pos = _strcspn($c->buffer, " []<>()\r\n\t/", $c->offset);637 if ($c->offset + $pos <= $c->length - 1) {638 break;639 } else {640 // If the script reaches this point,641 // the token may span beyond the end642 // of the current buffer. Therefore,643 // we increase the size of the buffer644 // and try again--just to be safe.645 646 $c->increase_length();647 }648 }649 650 $result = substr($c->buffer, $c->offset - 1, $pos + 1);651 652 $c->offset += $pos;653 return $result;654 }655 }656 657 658 719 } 659 660 ?>
Note: See TracChangeset
for help on using the changeset viewer.