1 | <?php
|
---|
2 | //
|
---|
3 | // FPDI - Version 1.4
|
---|
4 | //
|
---|
5 | // Copyright 2004-2010 Setasign - Jan Slabon
|
---|
6 | //
|
---|
7 | // Licensed under the Apache License, Version 2.0 (the "License");
|
---|
8 | // you may not use this file except in compliance with the License.
|
---|
9 | // You may obtain a copy of the License at
|
---|
10 | //
|
---|
11 | // http://www.apache.org/licenses/LICENSE-2.0
|
---|
12 | //
|
---|
13 | // Unless required by applicable law or agreed to in writing, software
|
---|
14 | // distributed under the License is distributed on an "AS IS" BASIS,
|
---|
15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
16 | // See the License for the specific language governing permissions and
|
---|
17 | // limitations under the License.
|
---|
18 | //
|
---|
19 |
|
---|
20 | if (!defined ('PDF_TYPE_NULL'))
|
---|
21 | define ('PDF_TYPE_NULL', 0);
|
---|
22 | if (!defined ('PDF_TYPE_NUMERIC'))
|
---|
23 | define ('PDF_TYPE_NUMERIC', 1);
|
---|
24 | if (!defined ('PDF_TYPE_TOKEN'))
|
---|
25 | define ('PDF_TYPE_TOKEN', 2);
|
---|
26 | if (!defined ('PDF_TYPE_HEX'))
|
---|
27 | define ('PDF_TYPE_HEX', 3);
|
---|
28 | if (!defined ('PDF_TYPE_STRING'))
|
---|
29 | define ('PDF_TYPE_STRING', 4);
|
---|
30 | if (!defined ('PDF_TYPE_DICTIONARY'))
|
---|
31 | define ('PDF_TYPE_DICTIONARY', 5);
|
---|
32 | if (!defined ('PDF_TYPE_ARRAY'))
|
---|
33 | define ('PDF_TYPE_ARRAY', 6);
|
---|
34 | if (!defined ('PDF_TYPE_OBJDEC'))
|
---|
35 | define ('PDF_TYPE_OBJDEC', 7);
|
---|
36 | if (!defined ('PDF_TYPE_OBJREF'))
|
---|
37 | define ('PDF_TYPE_OBJREF', 8);
|
---|
38 | if (!defined ('PDF_TYPE_OBJECT'))
|
---|
39 | define ('PDF_TYPE_OBJECT', 9);
|
---|
40 | if (!defined ('PDF_TYPE_STREAM'))
|
---|
41 | define ('PDF_TYPE_STREAM', 10);
|
---|
42 | if (!defined ('PDF_TYPE_BOOLEAN'))
|
---|
43 | define ('PDF_TYPE_BOOLEAN', 11);
|
---|
44 | if (!defined ('PDF_TYPE_REAL'))
|
---|
45 | define ('PDF_TYPE_REAL', 12);
|
---|
46 |
|
---|
47 | require_once('pdf_context.php');
|
---|
48 |
|
---|
49 | if (!class_exists('pdf_parser', false)) {
|
---|
50 |
|
---|
51 | class pdf_parser {
|
---|
52 |
|
---|
53 | /**
|
---|
54 | * Filename
|
---|
55 | * @var string
|
---|
56 | */
|
---|
57 | var $filename;
|
---|
58 |
|
---|
59 | /**
|
---|
60 | * File resource
|
---|
61 | * @var resource
|
---|
62 | */
|
---|
63 | var $f;
|
---|
64 |
|
---|
65 | /**
|
---|
66 | * PDF Context
|
---|
67 | * @var object pdf_context-Instance
|
---|
68 | */
|
---|
69 | var $c;
|
---|
70 |
|
---|
71 | /**
|
---|
72 | * xref-Data
|
---|
73 | * @var array
|
---|
74 | */
|
---|
75 | var $xref;
|
---|
76 |
|
---|
77 | /**
|
---|
78 | * root-Object
|
---|
79 | * @var array
|
---|
80 | */
|
---|
81 | var $root;
|
---|
82 |
|
---|
83 | /**
|
---|
84 | * PDF version of the loaded document
|
---|
85 | * @var string
|
---|
86 | */
|
---|
87 | var $pdfVersion;
|
---|
88 |
|
---|
89 | /**
|
---|
90 | * For reading encrypted documents and xref/objectstreams are in use
|
---|
91 | *
|
---|
92 | * @var boolean
|
---|
93 | */
|
---|
94 | var $readPlain = true;
|
---|
95 |
|
---|
96 | /**
|
---|
97 | * Constructor
|
---|
98 | *
|
---|
99 | * @param string $filename Source-Filename
|
---|
100 | */
|
---|
101 | function pdf_parser($filename) {
|
---|
102 | $this->filename = $filename;
|
---|
103 |
|
---|
104 | $this->f = @fopen($this->filename, 'rb');
|
---|
105 |
|
---|
106 | if (!$this->f)
|
---|
107 | $this->error(sprintf('Cannot open %s !', $filename));
|
---|
108 |
|
---|
109 | $this->getPDFVersion();
|
---|
110 |
|
---|
111 | $this->c = new pdf_context($this->f);
|
---|
112 |
|
---|
113 | // Read xref-Data
|
---|
114 | $this->xref = array();
|
---|
115 | $this->pdf_read_xref($this->xref, $this->pdf_find_xref());
|
---|
116 |
|
---|
117 | // Check for Encryption
|
---|
118 | $this->getEncryption();
|
---|
119 |
|
---|
120 | // Read root
|
---|
121 | $this->pdf_read_root();
|
---|
122 | }
|
---|
123 |
|
---|
124 | /**
|
---|
125 | * Close the opened file
|
---|
126 | */
|
---|
127 | function closeFile() {
|
---|
128 | if (isset($this->f) && is_resource($this->f)) {
|
---|
129 | fclose($this->f);
|
---|
130 | unset($this->f);
|
---|
131 | }
|
---|
132 | }
|
---|
133 |
|
---|
134 | /**
|
---|
135 | * Print Error and die
|
---|
136 | *
|
---|
137 | * @param string $msg Error-Message
|
---|
138 | */
|
---|
139 | function error($msg) {
|
---|
140 | die('<b>PDF-Parser Error:</b> '.$msg);
|
---|
141 | }
|
---|
142 |
|
---|
143 | /**
|
---|
144 | * Check Trailer for Encryption
|
---|
145 | */
|
---|
146 | function getEncryption() {
|
---|
147 | if (isset($this->xref['trailer'][1]['/Encrypt'])) {
|
---|
148 | $this->error('File is encrypted!');
|
---|
149 | }
|
---|
150 | }
|
---|
151 |
|
---|
152 | /**
|
---|
153 | * Find/Return /Root
|
---|
154 | *
|
---|
155 | * @return array
|
---|
156 | */
|
---|
157 | function pdf_find_root() {
|
---|
158 | if ($this->xref['trailer'][1]['/Root'][0] != PDF_TYPE_OBJREF) {
|
---|
159 | $this->error('Wrong Type of Root-Element! Must be an indirect reference');
|
---|
160 | }
|
---|
161 |
|
---|
162 | return $this->xref['trailer'][1]['/Root'];
|
---|
163 | }
|
---|
164 |
|
---|
165 | /**
|
---|
166 | * Read the /Root
|
---|
167 | */
|
---|
168 | function pdf_read_root() {
|
---|
169 | // read root
|
---|
170 | $this->root = $this->pdf_resolve_object($this->c, $this->pdf_find_root());
|
---|
171 | }
|
---|
172 |
|
---|
173 | /**
|
---|
174 | * Get PDF-Version
|
---|
175 | *
|
---|
176 | * And reset the PDF Version used in FPDI if needed
|
---|
177 | */
|
---|
178 | function getPDFVersion() {
|
---|
179 | fseek($this->f, 0);
|
---|
180 | preg_match('/\d\.\d/',fread($this->f,16),$m);
|
---|
181 | if (isset($m[0]))
|
---|
182 | $this->pdfVersion = $m[0];
|
---|
183 | return $this->pdfVersion;
|
---|
184 | }
|
---|
185 |
|
---|
186 | /**
|
---|
187 | * Find the xref-Table
|
---|
188 | */
|
---|
189 | function pdf_find_xref() {
|
---|
190 | $toRead = 1500;
|
---|
191 |
|
---|
192 | $stat = fseek ($this->f, -$toRead, SEEK_END);
|
---|
193 | if ($stat === -1) {
|
---|
194 | fseek ($this->f, 0);
|
---|
195 | }
|
---|
196 | $data = fread($this->f, $toRead);
|
---|
197 |
|
---|
198 | $pos = strlen($data) - strpos(strrev($data), strrev('startxref'));
|
---|
199 | $data = substr($data, $pos);
|
---|
200 |
|
---|
201 | if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) {
|
---|
202 | $this->error('Unable to find pointer to xref table');
|
---|
203 | }
|
---|
204 |
|
---|
205 | return (int) $matches[1];
|
---|
206 | }
|
---|
207 |
|
---|
208 | /**
|
---|
209 | * Read xref-table
|
---|
210 | *
|
---|
211 | * @param array $result Array of xref-table
|
---|
212 | * @param integer $offset of xref-table
|
---|
213 | */
|
---|
214 | function pdf_read_xref(&$result, $offset) {
|
---|
215 | $o_pos = $offset-min(20, $offset);
|
---|
216 | fseek($this->f, $o_pos); // set some bytes backwards to fetch errorious docs
|
---|
217 |
|
---|
218 | $data = fread($this->f, 100);
|
---|
219 |
|
---|
220 | $xrefPos = strrpos($data, 'xref');
|
---|
221 |
|
---|
222 | if ($xrefPos === false) {
|
---|
223 | fseek($this->f, $offset);
|
---|
224 | $c = new pdf_context($this->f);
|
---|
225 | $xrefStreamObjDec = $this->pdf_read_value($c);
|
---|
226 |
|
---|
227 | if (is_array($xrefStreamObjDec) && isset($xrefStreamObjDec[0]) && $xrefStreamObjDec[0] == PDF_TYPE_OBJDEC) {
|
---|
228 | $this->error(sprintf('This document (%s) probably uses a compression technique which is not supported by the free parser shipped with FPDI.', $this->filename));
|
---|
229 | } else {
|
---|
230 | $this->error('Unable to find xref table.');
|
---|
231 | }
|
---|
232 | }
|
---|
233 |
|
---|
234 | if (!isset($result['xref_location'])) {
|
---|
235 | $result['xref_location'] = $o_pos+$xrefPos;
|
---|
236 | $result['max_object'] = 0;
|
---|
237 | }
|
---|
238 |
|
---|
239 | $cylces = -1;
|
---|
240 | $bytesPerCycle = 100;
|
---|
241 |
|
---|
242 | fseek($this->f, $o_pos = $o_pos+$xrefPos+4); // set the handle directly after the "xref"-keyword
|
---|
243 | $data = fread($this->f, $bytesPerCycle);
|
---|
244 |
|
---|
245 | while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle*$cylces++, 0))) === false && !feof($this->f)) {
|
---|
246 | $data .= fread($this->f, $bytesPerCycle);
|
---|
247 | }
|
---|
248 |
|
---|
249 | if ($trailerPos === false) {
|
---|
250 | $this->error('Trailer keyword not found after xref table');
|
---|
251 | }
|
---|
252 |
|
---|
253 | $data = substr($data, 0, $trailerPos);
|
---|
254 |
|
---|
255 | // get Line-Ending
|
---|
256 | preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for linebreaks
|
---|
257 |
|
---|
258 | $differentLineEndings = count(array_unique($m[0]));
|
---|
259 | if ($differentLineEndings > 1) {
|
---|
260 | $lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY);
|
---|
261 | } else {
|
---|
262 | $lines = explode($m[0][1], $data);
|
---|
263 | }
|
---|
264 |
|
---|
265 | $data = $differentLineEndings = $m = null;
|
---|
266 | unset($data, $differentLineEndings, $m);
|
---|
267 |
|
---|
268 | $linesCount = count($lines);
|
---|
269 |
|
---|
270 | $start = 1;
|
---|
271 |
|
---|
272 | for ($i = 0; $i < $linesCount; $i++) {
|
---|
273 | $line = trim($lines[$i]);
|
---|
274 | if ($line) {
|
---|
275 | $pieces = explode(' ', $line);
|
---|
276 | $c = count($pieces);
|
---|
277 | switch($c) {
|
---|
278 | case 2:
|
---|
279 | $start = (int)$pieces[0];
|
---|
280 | $end = $start+(int)$pieces[1];
|
---|
281 | if ($end > $result['max_object'])
|
---|
282 | $result['max_object'] = $end;
|
---|
283 | break;
|
---|
284 | case 3:
|
---|
285 | if (!isset($result['xref'][$start]))
|
---|
286 | $result['xref'][$start] = array();
|
---|
287 |
|
---|
288 | if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) {
|
---|
289 | $result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null;
|
---|
290 | }
|
---|
291 | $start++;
|
---|
292 | break;
|
---|
293 | default:
|
---|
294 | $this->error('Unexpected data in xref table');
|
---|
295 | }
|
---|
296 | }
|
---|
297 | }
|
---|
298 |
|
---|
299 | $lines = $pieces = $line = $start = $end = $gen = null;
|
---|
300 | unset($lines, $pieces, $line, $start, $end, $gen);
|
---|
301 |
|
---|
302 | fseek($this->f, $o_pos+$trailerPos+7);
|
---|
303 |
|
---|
304 | $c = new pdf_context($this->f);
|
---|
305 | $trailer = $this->pdf_read_value($c);
|
---|
306 |
|
---|
307 | $c = null;
|
---|
308 | unset($c);
|
---|
309 |
|
---|
310 | if (!isset($result['trailer'])) {
|
---|
311 | $result['trailer'] = $trailer;
|
---|
312 | }
|
---|
313 |
|
---|
314 | if (isset($trailer[1]['/Prev'])) {
|
---|
315 | $this->pdf_read_xref($result, $trailer[1]['/Prev'][1]);
|
---|
316 | }
|
---|
317 |
|
---|
318 | $trailer = null;
|
---|
319 | unset($trailer);
|
---|
320 |
|
---|
321 | return true;
|
---|
322 | }
|
---|
323 |
|
---|
324 | /**
|
---|
325 | * Reads an Value
|
---|
326 | *
|
---|
327 | * @param object $c pdf_context
|
---|
328 | * @param string $token a Token
|
---|
329 | * @return mixed
|
---|
330 | */
|
---|
331 | function pdf_read_value(&$c, $token = null) {
|
---|
332 | if (is_null($token)) {
|
---|
333 | $token = $this->pdf_read_token($c);
|
---|
334 | }
|
---|
335 |
|
---|
336 | if ($token === false) {
|
---|
337 | return false;
|
---|
338 | }
|
---|
339 |
|
---|
340 | switch ($token) {
|
---|
341 | case '<':
|
---|
342 | // This is a hex string.
|
---|
343 | // Read the value, then the terminator
|
---|
344 |
|
---|
345 | $pos = $c->offset;
|
---|
346 |
|
---|
347 | while(1) {
|
---|
348 |
|
---|
349 | $match = strpos ($c->buffer, '>', $pos);
|
---|
350 |
|
---|
351 | // If you can't find it, try
|
---|
352 | // reading more data from the stream
|
---|
353 |
|
---|
354 | if ($match === false) {
|
---|
355 | if (!$c->increase_length()) {
|
---|
356 | return false;
|
---|
357 | } else {
|
---|
358 | continue;
|
---|
359 | }
|
---|
360 | }
|
---|
361 |
|
---|
362 | $result = substr ($c->buffer, $c->offset, $match - $c->offset);
|
---|
363 | $c->offset = $match + 1;
|
---|
364 |
|
---|
365 | return array (PDF_TYPE_HEX, $result);
|
---|
366 | }
|
---|
367 |
|
---|
368 | break;
|
---|
369 | case '<<':
|
---|
370 | // This is a dictionary.
|
---|
371 |
|
---|
372 | $result = array();
|
---|
373 |
|
---|
374 | // Recurse into this function until we reach
|
---|
375 | // the end of the dictionary.
|
---|
376 | while (($key = $this->pdf_read_token($c)) !== '>>') {
|
---|
377 | if ($key === false) {
|
---|
378 | return false;
|
---|
379 | }
|
---|
380 |
|
---|
381 | if (($value = $this->pdf_read_value($c)) === false) {
|
---|
382 | return false;
|
---|
383 | }
|
---|
384 |
|
---|
385 | // Catch missing value
|
---|
386 | if ($value[0] == PDF_TYPE_TOKEN && $value[1] == '>>') {
|
---|
387 | $result[$key] = array(PDF_TYPE_NULL);
|
---|
388 | break;
|
---|
389 | }
|
---|
390 |
|
---|
391 | $result[$key] = $value;
|
---|
392 | }
|
---|
393 |
|
---|
394 | return array (PDF_TYPE_DICTIONARY, $result);
|
---|
395 |
|
---|
396 | case '[':
|
---|
397 | // This is an array.
|
---|
398 |
|
---|
399 | $result = array();
|
---|
400 |
|
---|
401 | // Recurse into this function until we reach
|
---|
402 | // the end of the array.
|
---|
403 | while (($token = $this->pdf_read_token($c)) !== ']') {
|
---|
404 | if ($token === false) {
|
---|
405 | return false;
|
---|
406 | }
|
---|
407 |
|
---|
408 | if (($value = $this->pdf_read_value($c, $token)) === false) {
|
---|
409 | return false;
|
---|
410 | }
|
---|
411 |
|
---|
412 | $result[] = $value;
|
---|
413 | }
|
---|
414 |
|
---|
415 | return array (PDF_TYPE_ARRAY, $result);
|
---|
416 |
|
---|
417 | case '(' :
|
---|
418 | // This is a string
|
---|
419 | $pos = $c->offset;
|
---|
420 |
|
---|
421 | $openBrackets = 1;
|
---|
422 | do {
|
---|
423 | for (; $openBrackets != 0 && $pos < $c->length; $pos++) {
|
---|
424 | switch (ord($c->buffer[$pos])) {
|
---|
425 | case 0x28: // '('
|
---|
426 | $openBrackets++;
|
---|
427 | break;
|
---|
428 | case 0x29: // ')'
|
---|
429 | $openBrackets--;
|
---|
430 | break;
|
---|
431 | case 0x5C: // backslash
|
---|
432 | $pos++;
|
---|
433 | }
|
---|
434 | }
|
---|
435 | } while($openBrackets != 0 && $c->increase_length());
|
---|
436 |
|
---|
437 | $result = substr($c->buffer, $c->offset, $pos - $c->offset - 1);
|
---|
438 | $c->offset = $pos;
|
---|
439 |
|
---|
440 | return array (PDF_TYPE_STRING, $result);
|
---|
441 |
|
---|
442 | case 'stream':
|
---|
443 | $o_pos = ftell($c->file)-strlen($c->buffer);
|
---|
444 | $o_offset = $c->offset;
|
---|
445 |
|
---|
446 | $c->reset($startpos = $o_pos + $o_offset);
|
---|
447 |
|
---|
448 | $e = 0; // ensure line breaks in front of the stream
|
---|
449 | if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13))
|
---|
450 | $e++;
|
---|
451 | if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10))
|
---|
452 | $e++;
|
---|
453 |
|
---|
454 | if ($this->actual_obj[1][1]['/Length'][0] == PDF_TYPE_OBJREF) {
|
---|
455 | $tmp_c = new pdf_context($this->f);
|
---|
456 | $tmp_length = $this->pdf_resolve_object($tmp_c, $this->actual_obj[1][1]['/Length']);
|
---|
457 | $length = $tmp_length[1][1];
|
---|
458 | } else {
|
---|
459 | $length = $this->actual_obj[1][1]['/Length'][1];
|
---|
460 | }
|
---|
461 |
|
---|
462 | if ($length > 0) {
|
---|
463 | $c->reset($startpos+$e,$length);
|
---|
464 | $v = $c->buffer;
|
---|
465 | } else {
|
---|
466 | $v = '';
|
---|
467 | }
|
---|
468 | $c->reset($startpos+$e+$length+9); // 9 = strlen("endstream")
|
---|
469 |
|
---|
470 | return array(PDF_TYPE_STREAM, $v);
|
---|
471 |
|
---|
472 | default :
|
---|
473 | if (is_numeric ($token)) {
|
---|
474 | // A numeric token. Make sure that
|
---|
475 | // it is not part of something else.
|
---|
476 | if (($tok2 = $this->pdf_read_token ($c)) !== false) {
|
---|
477 | if (is_numeric ($tok2)) {
|
---|
478 |
|
---|
479 | // Two numeric tokens in a row.
|
---|
480 | // In this case, we're probably in
|
---|
481 | // front of either an object reference
|
---|
482 | // or an object specification.
|
---|
483 | // Determine the case and return the data
|
---|
484 | if (($tok3 = $this->pdf_read_token ($c)) !== false) {
|
---|
485 | switch ($tok3) {
|
---|
486 | case 'obj' :
|
---|
487 | return array (PDF_TYPE_OBJDEC, (int) $token, (int) $tok2);
|
---|
488 | case 'R' :
|
---|
489 | return array (PDF_TYPE_OBJREF, (int) $token, (int) $tok2);
|
---|
490 | }
|
---|
491 | // If we get to this point, that numeric value up
|
---|
492 | // there was just a numeric value. Push the extra
|
---|
493 | // tokens back into the stack and return the value.
|
---|
494 | array_push ($c->stack, $tok3);
|
---|
495 | }
|
---|
496 | }
|
---|
497 |
|
---|
498 | array_push ($c->stack, $tok2);
|
---|
499 | }
|
---|
500 |
|
---|
501 | if ($token === (string)((int)$token))
|
---|
502 | return array (PDF_TYPE_NUMERIC, (int)$token);
|
---|
503 | else
|
---|
504 | return array (PDF_TYPE_REAL, (float)$token);
|
---|
505 | } else if ($token == 'true' || $token == 'false') {
|
---|
506 | return array (PDF_TYPE_BOOLEAN, $token == 'true');
|
---|
507 | } else if ($token == 'null') {
|
---|
508 | return array (PDF_TYPE_NULL);
|
---|
509 | } else {
|
---|
510 | // Just a token. Return it.
|
---|
511 | return array (PDF_TYPE_TOKEN, $token);
|
---|
512 | }
|
---|
513 | }
|
---|
514 | }
|
---|
515 |
|
---|
516 | /**
|
---|
517 | * Resolve an object
|
---|
518 | *
|
---|
519 | * @param object $c pdf_context
|
---|
520 | * @param array $obj_spec The object-data
|
---|
521 | * @param boolean $encapsulate Must set to true, cause the parsing and fpdi use this method only without this para
|
---|
522 | */
|
---|
523 | function pdf_resolve_object(&$c, $obj_spec, $encapsulate = true) {
|
---|
524 | // Exit if we get invalid data
|
---|
525 | if (!is_array($obj_spec)) {
|
---|
526 | $ret = false;
|
---|
527 | return $ret;
|
---|
528 | }
|
---|
529 |
|
---|
530 | if ($obj_spec[0] == PDF_TYPE_OBJREF) {
|
---|
531 |
|
---|
532 | // This is a reference, resolve it
|
---|
533 | if (isset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]])) {
|
---|
534 |
|
---|
535 | // Save current file position
|
---|
536 | // This is needed if you want to resolve
|
---|
537 | // references while you're reading another object
|
---|
538 | // (e.g.: if you need to determine the length
|
---|
539 | // of a stream)
|
---|
540 |
|
---|
541 | $old_pos = ftell($c->file);
|
---|
542 |
|
---|
543 | // Reposition the file pointer and
|
---|
544 | // load the object header.
|
---|
545 |
|
---|
546 | $c->reset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]]);
|
---|
547 |
|
---|
548 | $header = $this->pdf_read_value($c);
|
---|
549 |
|
---|
550 | if ($header[0] != PDF_TYPE_OBJDEC || $header[1] != $obj_spec[1] || $header[2] != $obj_spec[2]) {
|
---|
551 | $toSearchFor = $obj_spec[1].' '.$obj_spec[2].' obj';
|
---|
552 | if (preg_match('/'.$toSearchFor.'/', $c->buffer)) {
|
---|
553 | $c->offset = strpos($c->buffer, $toSearchFor) + strlen($toSearchFor);
|
---|
554 | // reset stack
|
---|
555 | $c->stack = array();
|
---|
556 | } else {
|
---|
557 | $this->error("Unable to find object ({$obj_spec[1]}, {$obj_spec[2]}) at expected location");
|
---|
558 | }
|
---|
559 | }
|
---|
560 |
|
---|
561 | // If we're being asked to store all the information
|
---|
562 | // about the object, we add the object ID and generation
|
---|
563 | // number for later use
|
---|
564 | $result = array();
|
---|
565 | $this->actual_obj =& $result;
|
---|
566 | if ($encapsulate) {
|
---|
567 | $result = array (
|
---|
568 | PDF_TYPE_OBJECT,
|
---|
569 | 'obj' => $obj_spec[1],
|
---|
570 | 'gen' => $obj_spec[2]
|
---|
571 | );
|
---|
572 | }
|
---|
573 |
|
---|
574 | // Now simply read the object data until
|
---|
575 | // we encounter an end-of-object marker
|
---|
576 | while(1) {
|
---|
577 | $value = $this->pdf_read_value($c);
|
---|
578 | if ($value === false || count($result) > 4) {
|
---|
579 | // in this case the parser coudn't find an endobj so we break here
|
---|
580 | break;
|
---|
581 | }
|
---|
582 |
|
---|
583 | if ($value[0] == PDF_TYPE_TOKEN && $value[1] === 'endobj') {
|
---|
584 | break;
|
---|
585 | }
|
---|
586 |
|
---|
587 | $result[] = $value;
|
---|
588 | }
|
---|
589 |
|
---|
590 | $c->reset($old_pos);
|
---|
591 |
|
---|
592 | if (isset($result[2][0]) && $result[2][0] == PDF_TYPE_STREAM) {
|
---|
593 | $result[0] = PDF_TYPE_STREAM;
|
---|
594 | }
|
---|
595 |
|
---|
596 | return $result;
|
---|
597 | }
|
---|
598 | } else {
|
---|
599 | return $obj_spec;
|
---|
600 | }
|
---|
601 | }
|
---|
602 |
|
---|
603 |
|
---|
604 |
|
---|
605 | /**
|
---|
606 | * Reads a token from the file
|
---|
607 | *
|
---|
608 | * @param object $c pdf_context
|
---|
609 | * @return mixed
|
---|
610 | */
|
---|
611 | function pdf_read_token(&$c)
|
---|
612 | {
|
---|
613 | // If there is a token available
|
---|
614 | // on the stack, pop it out and
|
---|
615 | // return it.
|
---|
616 |
|
---|
617 | if (count($c->stack)) {
|
---|
618 | return array_pop($c->stack);
|
---|
619 | }
|
---|
620 |
|
---|
621 | // Strip away any whitespace
|
---|
622 |
|
---|
623 | do {
|
---|
624 | if (!$c->ensure_content()) {
|
---|
625 | return false;
|
---|
626 | }
|
---|
627 | $c->offset += strspn($c->buffer, " \n\r\t", $c->offset);
|
---|
628 | } while ($c->offset >= $c->length - 1);
|
---|
629 |
|
---|
630 | // Get the first character in the stream
|
---|
631 |
|
---|
632 | $char = $c->buffer[$c->offset++];
|
---|
633 |
|
---|
634 | switch ($char) {
|
---|
635 |
|
---|
636 | case '[':
|
---|
637 | case ']':
|
---|
638 | case '(':
|
---|
639 | case ')':
|
---|
640 |
|
---|
641 | // This is either an array or literal string
|
---|
642 | // delimiter, Return it
|
---|
643 |
|
---|
644 | return $char;
|
---|
645 |
|
---|
646 | case '<':
|
---|
647 | case '>':
|
---|
648 |
|
---|
649 | // This could either be a hex string or
|
---|
650 | // dictionary delimiter. Determine the
|
---|
651 | // appropriate case and return the token
|
---|
652 |
|
---|
653 | if ($c->buffer[$c->offset] == $char) {
|
---|
654 | if (!$c->ensure_content()) {
|
---|
655 | return false;
|
---|
656 | }
|
---|
657 | $c->offset++;
|
---|
658 | return $char . $char;
|
---|
659 | } else {
|
---|
660 | return $char;
|
---|
661 | }
|
---|
662 |
|
---|
663 | case '%':
|
---|
664 |
|
---|
665 | // This is a comment - jump over it!
|
---|
666 |
|
---|
667 | $pos = $c->offset;
|
---|
668 | while(1) {
|
---|
669 | $match = preg_match("/(\r\n|\r|\n)/", $c->buffer, $m, PREG_OFFSET_CAPTURE, $pos);
|
---|
670 | if ($match === 0) {
|
---|
671 | if (!$c->increase_length()) {
|
---|
672 | return false;
|
---|
673 | } else {
|
---|
674 | continue;
|
---|
675 | }
|
---|
676 | }
|
---|
677 |
|
---|
678 | $c->offset = $m[0][1]+strlen($m[0][0]);
|
---|
679 |
|
---|
680 | return $this->pdf_read_token($c);
|
---|
681 | }
|
---|
682 |
|
---|
683 | default:
|
---|
684 |
|
---|
685 | // This is "another" type of token (probably
|
---|
686 | // a dictionary entry or a numeric value)
|
---|
687 | // Find the end and return it.
|
---|
688 |
|
---|
689 | if (!$c->ensure_content()) {
|
---|
690 | return false;
|
---|
691 | }
|
---|
692 |
|
---|
693 | while(1) {
|
---|
694 |
|
---|
695 | // Determine the length of the token
|
---|
696 |
|
---|
697 | $pos = strcspn($c->buffer, " %[]<>()\r\n\t/", $c->offset);
|
---|
698 |
|
---|
699 | if ($c->offset + $pos <= $c->length - 1) {
|
---|
700 | break;
|
---|
701 | } else {
|
---|
702 | // If the script reaches this point,
|
---|
703 | // the token may span beyond the end
|
---|
704 | // of the current buffer. Therefore,
|
---|
705 | // we increase the size of the buffer
|
---|
706 | // and try again--just to be safe.
|
---|
707 |
|
---|
708 | $c->increase_length();
|
---|
709 | }
|
---|
710 | }
|
---|
711 |
|
---|
712 | $result = substr($c->buffer, $c->offset - 1, $pos + 1);
|
---|
713 |
|
---|
714 | $c->offset += $pos;
|
---|
715 | return $result;
|
---|
716 | }
|
---|
717 | }
|
---|
718 | }
|
---|
719 | }
|
---|