1 | <?php |
---|
2 | // |
---|
3 | // FPDI - Version 1.4 |
---|
4 | // |
---|
5 | // Copyright 2004-2010 Setasign - Jan Slabon |
---|
6 | // |
---|
7 | // Licensed under the Apache License, Version 2.0 (the "License"); |
---|
8 | // you may not use this file except in compliance with the License. |
---|
9 | // You may obtain a copy of the License at |
---|
10 | // |
---|
11 | // http://www.apache.org/licenses/LICENSE-2.0 |
---|
12 | // |
---|
13 | // Unless required by applicable law or agreed to in writing, software |
---|
14 | // distributed under the License is distributed on an "AS IS" BASIS, |
---|
15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
16 | // See the License for the specific language governing permissions and |
---|
17 | // limitations under the License. |
---|
18 | // |
---|
19 | |
---|
20 | if (!defined ('PDF_TYPE_NULL')) |
---|
21 | define ('PDF_TYPE_NULL', 0); |
---|
22 | if (!defined ('PDF_TYPE_NUMERIC')) |
---|
23 | define ('PDF_TYPE_NUMERIC', 1); |
---|
24 | if (!defined ('PDF_TYPE_TOKEN')) |
---|
25 | define ('PDF_TYPE_TOKEN', 2); |
---|
26 | if (!defined ('PDF_TYPE_HEX')) |
---|
27 | define ('PDF_TYPE_HEX', 3); |
---|
28 | if (!defined ('PDF_TYPE_STRING')) |
---|
29 | define ('PDF_TYPE_STRING', 4); |
---|
30 | if (!defined ('PDF_TYPE_DICTIONARY')) |
---|
31 | define ('PDF_TYPE_DICTIONARY', 5); |
---|
32 | if (!defined ('PDF_TYPE_ARRAY')) |
---|
33 | define ('PDF_TYPE_ARRAY', 6); |
---|
34 | if (!defined ('PDF_TYPE_OBJDEC')) |
---|
35 | define ('PDF_TYPE_OBJDEC', 7); |
---|
36 | if (!defined ('PDF_TYPE_OBJREF')) |
---|
37 | define ('PDF_TYPE_OBJREF', 8); |
---|
38 | if (!defined ('PDF_TYPE_OBJECT')) |
---|
39 | define ('PDF_TYPE_OBJECT', 9); |
---|
40 | if (!defined ('PDF_TYPE_STREAM')) |
---|
41 | define ('PDF_TYPE_STREAM', 10); |
---|
42 | if (!defined ('PDF_TYPE_BOOLEAN')) |
---|
43 | define ('PDF_TYPE_BOOLEAN', 11); |
---|
44 | if (!defined ('PDF_TYPE_REAL')) |
---|
45 | define ('PDF_TYPE_REAL', 12); |
---|
46 | |
---|
47 | require_once('pdf_context.php'); |
---|
48 | |
---|
49 | if (!class_exists('pdf_parser', false)) { |
---|
50 | |
---|
51 | class pdf_parser { |
---|
52 | |
---|
53 | /** |
---|
54 | * Filename |
---|
55 | * @var string |
---|
56 | */ |
---|
57 | var $filename; |
---|
58 | |
---|
59 | /** |
---|
60 | * File resource |
---|
61 | * @var resource |
---|
62 | */ |
---|
63 | var $f; |
---|
64 | |
---|
65 | /** |
---|
66 | * PDF Context |
---|
67 | * @var object pdf_context-Instance |
---|
68 | */ |
---|
69 | var $c; |
---|
70 | |
---|
71 | /** |
---|
72 | * xref-Data |
---|
73 | * @var array |
---|
74 | */ |
---|
75 | var $xref; |
---|
76 | |
---|
77 | /** |
---|
78 | * root-Object |
---|
79 | * @var array |
---|
80 | */ |
---|
81 | var $root; |
---|
82 | |
---|
83 | /** |
---|
84 | * PDF version of the loaded document |
---|
85 | * @var string |
---|
86 | */ |
---|
87 | var $pdfVersion; |
---|
88 | |
---|
89 | /** |
---|
90 | * For reading encrypted documents and xref/objectstreams are in use |
---|
91 | * |
---|
92 | * @var boolean |
---|
93 | */ |
---|
94 | var $readPlain = true; |
---|
95 | |
---|
96 | /** |
---|
97 | * Constructor |
---|
98 | * |
---|
99 | * @param string $filename Source-Filename |
---|
100 | */ |
---|
101 | function pdf_parser($filename) { |
---|
102 | $this->filename = $filename; |
---|
103 | |
---|
104 | $this->f = @fopen($this->filename, 'rb'); |
---|
105 | |
---|
106 | if (!$this->f) |
---|
107 | $this->error(sprintf('Cannot open %s !', $filename)); |
---|
108 | |
---|
109 | $this->getPDFVersion(); |
---|
110 | |
---|
111 | $this->c = new pdf_context($this->f); |
---|
112 | |
---|
113 | // Read xref-Data |
---|
114 | $this->xref = array(); |
---|
115 | $this->pdf_read_xref($this->xref, $this->pdf_find_xref()); |
---|
116 | |
---|
117 | // Check for Encryption |
---|
118 | $this->getEncryption(); |
---|
119 | |
---|
120 | // Read root |
---|
121 | $this->pdf_read_root(); |
---|
122 | } |
---|
123 | |
---|
124 | /** |
---|
125 | * Close the opened file |
---|
126 | */ |
---|
127 | function closeFile() { |
---|
128 | if (isset($this->f) && is_resource($this->f)) { |
---|
129 | fclose($this->f); |
---|
130 | unset($this->f); |
---|
131 | } |
---|
132 | } |
---|
133 | |
---|
134 | /** |
---|
135 | * Print Error and die |
---|
136 | * |
---|
137 | * @param string $msg Error-Message |
---|
138 | */ |
---|
139 | function error($msg) { |
---|
140 | die('<b>PDF-Parser Error:</b> '.$msg); |
---|
141 | } |
---|
142 | |
---|
143 | /** |
---|
144 | * Check Trailer for Encryption |
---|
145 | */ |
---|
146 | function getEncryption() { |
---|
147 | if (isset($this->xref['trailer'][1]['/Encrypt'])) { |
---|
148 | $this->error('File is encrypted!'); |
---|
149 | } |
---|
150 | } |
---|
151 | |
---|
152 | /** |
---|
153 | * Find/Return /Root |
---|
154 | * |
---|
155 | * @return array |
---|
156 | */ |
---|
157 | function pdf_find_root() { |
---|
158 | if ($this->xref['trailer'][1]['/Root'][0] != PDF_TYPE_OBJREF) { |
---|
159 | $this->error('Wrong Type of Root-Element! Must be an indirect reference'); |
---|
160 | } |
---|
161 | |
---|
162 | return $this->xref['trailer'][1]['/Root']; |
---|
163 | } |
---|
164 | |
---|
165 | /** |
---|
166 | * Read the /Root |
---|
167 | */ |
---|
168 | function pdf_read_root() { |
---|
169 | // read root |
---|
170 | $this->root = $this->pdf_resolve_object($this->c, $this->pdf_find_root()); |
---|
171 | } |
---|
172 | |
---|
173 | /** |
---|
174 | * Get PDF-Version |
---|
175 | * |
---|
176 | * And reset the PDF Version used in FPDI if needed |
---|
177 | */ |
---|
178 | function getPDFVersion() { |
---|
179 | fseek($this->f, 0); |
---|
180 | preg_match('/\d\.\d/',fread($this->f,16),$m); |
---|
181 | if (isset($m[0])) |
---|
182 | $this->pdfVersion = $m[0]; |
---|
183 | return $this->pdfVersion; |
---|
184 | } |
---|
185 | |
---|
186 | /** |
---|
187 | * Find the xref-Table |
---|
188 | */ |
---|
189 | function pdf_find_xref() { |
---|
190 | $toRead = 1500; |
---|
191 | |
---|
192 | $stat = fseek ($this->f, -$toRead, SEEK_END); |
---|
193 | if ($stat === -1) { |
---|
194 | fseek ($this->f, 0); |
---|
195 | } |
---|
196 | $data = fread($this->f, $toRead); |
---|
197 | |
---|
198 | $pos = strlen($data) - strpos(strrev($data), strrev('startxref')); |
---|
199 | $data = substr($data, $pos); |
---|
200 | |
---|
201 | if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) { |
---|
202 | $this->error('Unable to find pointer to xref table'); |
---|
203 | } |
---|
204 | |
---|
205 | return (int) $matches[1]; |
---|
206 | } |
---|
207 | |
---|
208 | /** |
---|
209 | * Read xref-table |
---|
210 | * |
---|
211 | * @param array $result Array of xref-table |
---|
212 | * @param integer $offset of xref-table |
---|
213 | */ |
---|
214 | function pdf_read_xref(&$result, $offset) { |
---|
215 | $o_pos = $offset-min(20, $offset); |
---|
216 | fseek($this->f, $o_pos); // set some bytes backwards to fetch errorious docs |
---|
217 | |
---|
218 | $data = fread($this->f, 100); |
---|
219 | |
---|
220 | $xrefPos = strrpos($data, 'xref'); |
---|
221 | |
---|
222 | if ($xrefPos === false) { |
---|
223 | fseek($this->f, $offset); |
---|
224 | $c = new pdf_context($this->f); |
---|
225 | $xrefStreamObjDec = $this->pdf_read_value($c); |
---|
226 | |
---|
227 | if (is_array($xrefStreamObjDec) && isset($xrefStreamObjDec[0]) && $xrefStreamObjDec[0] == PDF_TYPE_OBJDEC) { |
---|
228 | $this->error(sprintf('This document (%s) probably uses a compression technique which is not supported by the free parser shipped with FPDI.', $this->filename)); |
---|
229 | } else { |
---|
230 | $this->error('Unable to find xref table.'); |
---|
231 | } |
---|
232 | } |
---|
233 | |
---|
234 | if (!isset($result['xref_location'])) { |
---|
235 | $result['xref_location'] = $o_pos+$xrefPos; |
---|
236 | $result['max_object'] = 0; |
---|
237 | } |
---|
238 | |
---|
239 | $cylces = -1; |
---|
240 | $bytesPerCycle = 100; |
---|
241 | |
---|
242 | fseek($this->f, $o_pos = $o_pos+$xrefPos+4); // set the handle directly after the "xref"-keyword |
---|
243 | $data = fread($this->f, $bytesPerCycle); |
---|
244 | |
---|
245 | while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle*$cylces++, 0))) === false && !feof($this->f)) { |
---|
246 | $data .= fread($this->f, $bytesPerCycle); |
---|
247 | } |
---|
248 | |
---|
249 | if ($trailerPos === false) { |
---|
250 | $this->error('Trailer keyword not found after xref table'); |
---|
251 | } |
---|
252 | |
---|
253 | $data = substr($data, 0, $trailerPos); |
---|
254 | |
---|
255 | // get Line-Ending |
---|
256 | preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for linebreaks |
---|
257 | |
---|
258 | $differentLineEndings = count(array_unique($m[0])); |
---|
259 | if ($differentLineEndings > 1) { |
---|
260 | $lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY); |
---|
261 | } else { |
---|
262 | $lines = explode($m[0][1], $data); |
---|
263 | } |
---|
264 | |
---|
265 | $data = $differentLineEndings = $m = null; |
---|
266 | unset($data, $differentLineEndings, $m); |
---|
267 | |
---|
268 | $linesCount = count($lines); |
---|
269 | |
---|
270 | $start = 1; |
---|
271 | |
---|
272 | for ($i = 0; $i < $linesCount; $i++) { |
---|
273 | $line = trim($lines[$i]); |
---|
274 | if ($line) { |
---|
275 | $pieces = explode(' ', $line); |
---|
276 | $c = count($pieces); |
---|
277 | switch($c) { |
---|
278 | case 2: |
---|
279 | $start = (int)$pieces[0]; |
---|
280 | $end = $start+(int)$pieces[1]; |
---|
281 | if ($end > $result['max_object']) |
---|
282 | $result['max_object'] = $end; |
---|
283 | break; |
---|
284 | case 3: |
---|
285 | if (!isset($result['xref'][$start])) |
---|
286 | $result['xref'][$start] = array(); |
---|
287 | |
---|
288 | if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) { |
---|
289 | $result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null; |
---|
290 | } |
---|
291 | $start++; |
---|
292 | break; |
---|
293 | default: |
---|
294 | $this->error('Unexpected data in xref table'); |
---|
295 | } |
---|
296 | } |
---|
297 | } |
---|
298 | |
---|
299 | $lines = $pieces = $line = $start = $end = $gen = null; |
---|
300 | unset($lines, $pieces, $line, $start, $end, $gen); |
---|
301 | |
---|
302 | fseek($this->f, $o_pos+$trailerPos+7); |
---|
303 | |
---|
304 | $c = new pdf_context($this->f); |
---|
305 | $trailer = $this->pdf_read_value($c); |
---|
306 | |
---|
307 | $c = null; |
---|
308 | unset($c); |
---|
309 | |
---|
310 | if (!isset($result['trailer'])) { |
---|
311 | $result['trailer'] = $trailer; |
---|
312 | } |
---|
313 | |
---|
314 | if (isset($trailer[1]['/Prev'])) { |
---|
315 | $this->pdf_read_xref($result, $trailer[1]['/Prev'][1]); |
---|
316 | } |
---|
317 | |
---|
318 | $trailer = null; |
---|
319 | unset($trailer); |
---|
320 | |
---|
321 | return true; |
---|
322 | } |
---|
323 | |
---|
324 | /** |
---|
325 | * Reads an Value |
---|
326 | * |
---|
327 | * @param object $c pdf_context |
---|
328 | * @param string $token a Token |
---|
329 | * @return mixed |
---|
330 | */ |
---|
331 | function pdf_read_value(&$c, $token = null) { |
---|
332 | if (is_null($token)) { |
---|
333 | $token = $this->pdf_read_token($c); |
---|
334 | } |
---|
335 | |
---|
336 | if ($token === false) { |
---|
337 | return false; |
---|
338 | } |
---|
339 | |
---|
340 | switch ($token) { |
---|
341 | case '<': |
---|
342 | // This is a hex string. |
---|
343 | // Read the value, then the terminator |
---|
344 | |
---|
345 | $pos = $c->offset; |
---|
346 | |
---|
347 | while(1) { |
---|
348 | |
---|
349 | $match = strpos ($c->buffer, '>', $pos); |
---|
350 | |
---|
351 | // If you can't find it, try |
---|
352 | // reading more data from the stream |
---|
353 | |
---|
354 | if ($match === false) { |
---|
355 | if (!$c->increase_length()) { |
---|
356 | return false; |
---|
357 | } else { |
---|
358 | continue; |
---|
359 | } |
---|
360 | } |
---|
361 | |
---|
362 | $result = substr ($c->buffer, $c->offset, $match - $c->offset); |
---|
363 | $c->offset = $match + 1; |
---|
364 | |
---|
365 | return array (PDF_TYPE_HEX, $result); |
---|
366 | } |
---|
367 | |
---|
368 | break; |
---|
369 | case '<<': |
---|
370 | // This is a dictionary. |
---|
371 | |
---|
372 | $result = array(); |
---|
373 | |
---|
374 | // Recurse into this function until we reach |
---|
375 | // the end of the dictionary. |
---|
376 | while (($key = $this->pdf_read_token($c)) !== '>>') { |
---|
377 | if ($key === false) { |
---|
378 | return false; |
---|
379 | } |
---|
380 | |
---|
381 | if (($value = $this->pdf_read_value($c)) === false) { |
---|
382 | return false; |
---|
383 | } |
---|
384 | |
---|
385 | // Catch missing value |
---|
386 | if ($value[0] == PDF_TYPE_TOKEN && $value[1] == '>>') { |
---|
387 | $result[$key] = array(PDF_TYPE_NULL); |
---|
388 | break; |
---|
389 | } |
---|
390 | |
---|
391 | $result[$key] = $value; |
---|
392 | } |
---|
393 | |
---|
394 | return array (PDF_TYPE_DICTIONARY, $result); |
---|
395 | |
---|
396 | case '[': |
---|
397 | // This is an array. |
---|
398 | |
---|
399 | $result = array(); |
---|
400 | |
---|
401 | // Recurse into this function until we reach |
---|
402 | // the end of the array. |
---|
403 | while (($token = $this->pdf_read_token($c)) !== ']') { |
---|
404 | if ($token === false) { |
---|
405 | return false; |
---|
406 | } |
---|
407 | |
---|
408 | if (($value = $this->pdf_read_value($c, $token)) === false) { |
---|
409 | return false; |
---|
410 | } |
---|
411 | |
---|
412 | $result[] = $value; |
---|
413 | } |
---|
414 | |
---|
415 | return array (PDF_TYPE_ARRAY, $result); |
---|
416 | |
---|
417 | case '(' : |
---|
418 | // This is a string |
---|
419 | $pos = $c->offset; |
---|
420 | |
---|
421 | $openBrackets = 1; |
---|
422 | do { |
---|
423 | for (; $openBrackets != 0 && $pos < $c->length; $pos++) { |
---|
424 | switch (ord($c->buffer[$pos])) { |
---|
425 | case 0x28: // '(' |
---|
426 | $openBrackets++; |
---|
427 | break; |
---|
428 | case 0x29: // ')' |
---|
429 | $openBrackets--; |
---|
430 | break; |
---|
431 | case 0x5C: // backslash |
---|
432 | $pos++; |
---|
433 | } |
---|
434 | } |
---|
435 | } while($openBrackets != 0 && $c->increase_length()); |
---|
436 | |
---|
437 | $result = substr($c->buffer, $c->offset, $pos - $c->offset - 1); |
---|
438 | $c->offset = $pos; |
---|
439 | |
---|
440 | return array (PDF_TYPE_STRING, $result); |
---|
441 | |
---|
442 | case 'stream': |
---|
443 | $o_pos = ftell($c->file)-strlen($c->buffer); |
---|
444 | $o_offset = $c->offset; |
---|
445 | |
---|
446 | $c->reset($startpos = $o_pos + $o_offset); |
---|
447 | |
---|
448 | $e = 0; // ensure line breaks in front of the stream |
---|
449 | if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13)) |
---|
450 | $e++; |
---|
451 | if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10)) |
---|
452 | $e++; |
---|
453 | |
---|
454 | if ($this->actual_obj[1][1]['/Length'][0] == PDF_TYPE_OBJREF) { |
---|
455 | $tmp_c = new pdf_context($this->f); |
---|
456 | $tmp_length = $this->pdf_resolve_object($tmp_c, $this->actual_obj[1][1]['/Length']); |
---|
457 | $length = $tmp_length[1][1]; |
---|
458 | } else { |
---|
459 | $length = $this->actual_obj[1][1]['/Length'][1]; |
---|
460 | } |
---|
461 | |
---|
462 | if ($length > 0) { |
---|
463 | $c->reset($startpos+$e,$length); |
---|
464 | $v = $c->buffer; |
---|
465 | } else { |
---|
466 | $v = ''; |
---|
467 | } |
---|
468 | $c->reset($startpos+$e+$length+9); // 9 = strlen("endstream") |
---|
469 | |
---|
470 | return array(PDF_TYPE_STREAM, $v); |
---|
471 | |
---|
472 | default : |
---|
473 | if (is_numeric ($token)) { |
---|
474 | // A numeric token. Make sure that |
---|
475 | // it is not part of something else. |
---|
476 | if (($tok2 = $this->pdf_read_token ($c)) !== false) { |
---|
477 | if (is_numeric ($tok2)) { |
---|
478 | |
---|
479 | // Two numeric tokens in a row. |
---|
480 | // In this case, we're probably in |
---|
481 | // front of either an object reference |
---|
482 | // or an object specification. |
---|
483 | // Determine the case and return the data |
---|
484 | if (($tok3 = $this->pdf_read_token ($c)) !== false) { |
---|
485 | switch ($tok3) { |
---|
486 | case 'obj' : |
---|
487 | return array (PDF_TYPE_OBJDEC, (int) $token, (int) $tok2); |
---|
488 | case 'R' : |
---|
489 | return array (PDF_TYPE_OBJREF, (int) $token, (int) $tok2); |
---|
490 | } |
---|
491 | // If we get to this point, that numeric value up |
---|
492 | // there was just a numeric value. Push the extra |
---|
493 | // tokens back into the stack and return the value. |
---|
494 | array_push ($c->stack, $tok3); |
---|
495 | } |
---|
496 | } |
---|
497 | |
---|
498 | array_push ($c->stack, $tok2); |
---|
499 | } |
---|
500 | |
---|
501 | if ($token === (string)((int)$token)) |
---|
502 | return array (PDF_TYPE_NUMERIC, (int)$token); |
---|
503 | else |
---|
504 | return array (PDF_TYPE_REAL, (float)$token); |
---|
505 | } else if ($token == 'true' || $token == 'false') { |
---|
506 | return array (PDF_TYPE_BOOLEAN, $token == 'true'); |
---|
507 | } else if ($token == 'null') { |
---|
508 | return array (PDF_TYPE_NULL); |
---|
509 | } else { |
---|
510 | // Just a token. Return it. |
---|
511 | return array (PDF_TYPE_TOKEN, $token); |
---|
512 | } |
---|
513 | } |
---|
514 | } |
---|
515 | |
---|
516 | /** |
---|
517 | * Resolve an object |
---|
518 | * |
---|
519 | * @param object $c pdf_context |
---|
520 | * @param array $obj_spec The object-data |
---|
521 | * @param boolean $encapsulate Must set to true, cause the parsing and fpdi use this method only without this para |
---|
522 | */ |
---|
523 | function pdf_resolve_object(&$c, $obj_spec, $encapsulate = true) { |
---|
524 | // Exit if we get invalid data |
---|
525 | if (!is_array($obj_spec)) { |
---|
526 | $ret = false; |
---|
527 | return $ret; |
---|
528 | } |
---|
529 | |
---|
530 | if ($obj_spec[0] == PDF_TYPE_OBJREF) { |
---|
531 | |
---|
532 | // This is a reference, resolve it |
---|
533 | if (isset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]])) { |
---|
534 | |
---|
535 | // Save current file position |
---|
536 | // This is needed if you want to resolve |
---|
537 | // references while you're reading another object |
---|
538 | // (e.g.: if you need to determine the length |
---|
539 | // of a stream) |
---|
540 | |
---|
541 | $old_pos = ftell($c->file); |
---|
542 | |
---|
543 | // Reposition the file pointer and |
---|
544 | // load the object header. |
---|
545 | |
---|
546 | $c->reset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]]); |
---|
547 | |
---|
548 | $header = $this->pdf_read_value($c); |
---|
549 | |
---|
550 | if ($header[0] != PDF_TYPE_OBJDEC || $header[1] != $obj_spec[1] || $header[2] != $obj_spec[2]) { |
---|
551 | $toSearchFor = $obj_spec[1].' '.$obj_spec[2].' obj'; |
---|
552 | if (preg_match('/'.$toSearchFor.'/', $c->buffer)) { |
---|
553 | $c->offset = strpos($c->buffer, $toSearchFor) + strlen($toSearchFor); |
---|
554 | // reset stack |
---|
555 | $c->stack = array(); |
---|
556 | } else { |
---|
557 | $this->error("Unable to find object ({$obj_spec[1]}, {$obj_spec[2]}) at expected location"); |
---|
558 | } |
---|
559 | } |
---|
560 | |
---|
561 | // If we're being asked to store all the information |
---|
562 | // about the object, we add the object ID and generation |
---|
563 | // number for later use |
---|
564 | $result = array(); |
---|
565 | $this->actual_obj =& $result; |
---|
566 | if ($encapsulate) { |
---|
567 | $result = array ( |
---|
568 | PDF_TYPE_OBJECT, |
---|
569 | 'obj' => $obj_spec[1], |
---|
570 | 'gen' => $obj_spec[2] |
---|
571 | ); |
---|
572 | } |
---|
573 | |
---|
574 | // Now simply read the object data until |
---|
575 | // we encounter an end-of-object marker |
---|
576 | while(1) { |
---|
577 | $value = $this->pdf_read_value($c); |
---|
578 | if ($value === false || count($result) > 4) { |
---|
579 | // in this case the parser coudn't find an endobj so we break here |
---|
580 | break; |
---|
581 | } |
---|
582 | |
---|
583 | if ($value[0] == PDF_TYPE_TOKEN && $value[1] === 'endobj') { |
---|
584 | break; |
---|
585 | } |
---|
586 | |
---|
587 | $result[] = $value; |
---|
588 | } |
---|
589 | |
---|
590 | $c->reset($old_pos); |
---|
591 | |
---|
592 | if (isset($result[2][0]) && $result[2][0] == PDF_TYPE_STREAM) { |
---|
593 | $result[0] = PDF_TYPE_STREAM; |
---|
594 | } |
---|
595 | |
---|
596 | return $result; |
---|
597 | } |
---|
598 | } else { |
---|
599 | return $obj_spec; |
---|
600 | } |
---|
601 | } |
---|
602 | |
---|
603 | |
---|
604 | |
---|
605 | /** |
---|
606 | * Reads a token from the file |
---|
607 | * |
---|
608 | * @param object $c pdf_context |
---|
609 | * @return mixed |
---|
610 | */ |
---|
611 | function pdf_read_token(&$c) |
---|
612 | { |
---|
613 | // If there is a token available |
---|
614 | // on the stack, pop it out and |
---|
615 | // return it. |
---|
616 | |
---|
617 | if (count($c->stack)) { |
---|
618 | return array_pop($c->stack); |
---|
619 | } |
---|
620 | |
---|
621 | // Strip away any whitespace |
---|
622 | |
---|
623 | do { |
---|
624 | if (!$c->ensure_content()) { |
---|
625 | return false; |
---|
626 | } |
---|
627 | $c->offset += strspn($c->buffer, " \n\r\t", $c->offset); |
---|
628 | } while ($c->offset >= $c->length - 1); |
---|
629 | |
---|
630 | // Get the first character in the stream |
---|
631 | |
---|
632 | $char = $c->buffer[$c->offset++]; |
---|
633 | |
---|
634 | switch ($char) { |
---|
635 | |
---|
636 | case '[': |
---|
637 | case ']': |
---|
638 | case '(': |
---|
639 | case ')': |
---|
640 | |
---|
641 | // This is either an array or literal string |
---|
642 | // delimiter, Return it |
---|
643 | |
---|
644 | return $char; |
---|
645 | |
---|
646 | case '<': |
---|
647 | case '>': |
---|
648 | |
---|
649 | // This could either be a hex string or |
---|
650 | // dictionary delimiter. Determine the |
---|
651 | // appropriate case and return the token |
---|
652 | |
---|
653 | if ($c->buffer[$c->offset] == $char) { |
---|
654 | if (!$c->ensure_content()) { |
---|
655 | return false; |
---|
656 | } |
---|
657 | $c->offset++; |
---|
658 | return $char . $char; |
---|
659 | } else { |
---|
660 | return $char; |
---|
661 | } |
---|
662 | |
---|
663 | case '%': |
---|
664 | |
---|
665 | // This is a comment - jump over it! |
---|
666 | |
---|
667 | $pos = $c->offset; |
---|
668 | while(1) { |
---|
669 | $match = preg_match("/(\r\n|\r|\n)/", $c->buffer, $m, PREG_OFFSET_CAPTURE, $pos); |
---|
670 | if ($match === 0) { |
---|
671 | if (!$c->increase_length()) { |
---|
672 | return false; |
---|
673 | } else { |
---|
674 | continue; |
---|
675 | } |
---|
676 | } |
---|
677 | |
---|
678 | $c->offset = $m[0][1]+strlen($m[0][0]); |
---|
679 | |
---|
680 | return $this->pdf_read_token($c); |
---|
681 | } |
---|
682 | |
---|
683 | default: |
---|
684 | |
---|
685 | // This is "another" type of token (probably |
---|
686 | // a dictionary entry or a numeric value) |
---|
687 | // Find the end and return it. |
---|
688 | |
---|
689 | if (!$c->ensure_content()) { |
---|
690 | return false; |
---|
691 | } |
---|
692 | |
---|
693 | while(1) { |
---|
694 | |
---|
695 | // Determine the length of the token |
---|
696 | |
---|
697 | $pos = strcspn($c->buffer, " %[]<>()\r\n\t/", $c->offset); |
---|
698 | |
---|
699 | if ($c->offset + $pos <= $c->length - 1) { |
---|
700 | break; |
---|
701 | } else { |
---|
702 | // If the script reaches this point, |
---|
703 | // the token may span beyond the end |
---|
704 | // of the current buffer. Therefore, |
---|
705 | // we increase the size of the buffer |
---|
706 | // and try again--just to be safe. |
---|
707 | |
---|
708 | $c->increase_length(); |
---|
709 | } |
---|
710 | } |
---|
711 | |
---|
712 | $result = substr($c->buffer, $c->offset - 1, $pos + 1); |
---|
713 | |
---|
714 | $c->offset += $pos; |
---|
715 | return $result; |
---|
716 | } |
---|
717 | } |
---|
718 | } |
---|
719 | } |
---|