pdf_parser.php 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913
  1. <?php
  2. //
  3. // FPDI - Version 1.5.2
  4. //
  5. // Copyright 2004-2014 Setasign - Jan Slabon
  6. //
  7. // Licensed under the Apache License, Version 2.0 (the "License");
  8. // you may not use this file except in compliance with the License.
  9. // You may obtain a copy of the License at
  10. //
  11. // http://www.apache.org/licenses/LICENSE-2.0
  12. //
  13. // Unless required by applicable law or agreed to in writing, software
  14. // distributed under the License is distributed on an "AS IS" BASIS,
  15. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. // See the License for the specific language governing permissions and
  17. // limitations under the License.
  18. //
  19. /**
  20. * Class pdf_parser
  21. */
  22. class pdf_parser
  23. {
  24. /**
  25. * Type constant
  26. *
  27. * @var integer
  28. */
  29. const TYPE_NULL = 0;
  30. /**
  31. * Type constant
  32. *
  33. * @var integer
  34. */
  35. const TYPE_NUMERIC = 1;
  36. /**
  37. * Type constant
  38. *
  39. * @var integer
  40. */
  41. const TYPE_TOKEN = 2;
  42. /**
  43. * Type constant
  44. *
  45. * @var integer
  46. */
  47. const TYPE_HEX = 3;
  48. /**
  49. * Type constant
  50. *
  51. * @var integer
  52. */
  53. const TYPE_STRING = 4;
  54. /**
  55. * Type constant
  56. *
  57. * @var integer
  58. */
  59. const TYPE_DICTIONARY = 5;
  60. /**
  61. * Type constant
  62. *
  63. * @var integer
  64. */
  65. const TYPE_ARRAY = 6;
  66. /**
  67. * Type constant
  68. *
  69. * @var integer
  70. */
  71. const TYPE_OBJDEC = 7;
  72. /**
  73. * Type constant
  74. *
  75. * @var integer
  76. */
  77. const TYPE_OBJREF = 8;
  78. /**
  79. * Type constant
  80. *
  81. * @var integer
  82. */
  83. const TYPE_OBJECT = 9;
  84. /**
  85. * Type constant
  86. *
  87. * @var integer
  88. */
  89. const TYPE_STREAM = 10;
  90. /**
  91. * Type constant
  92. *
  93. * @var integer
  94. */
  95. const TYPE_BOOLEAN = 11;
  96. /**
  97. * Type constant
  98. *
  99. * @var integer
  100. */
  101. const TYPE_REAL = 12;
  102. /**
  103. * Define the amount of byte in which the initial keyword of a PDF document should be searched.
  104. *
  105. * @var int
  106. */
  107. static public $searchForStartxrefLength = 5500;
  108. /**
  109. * Filename
  110. *
  111. * @var string
  112. */
  113. public $filename;
  114. /**
  115. * File resource
  116. *
  117. * @var resource
  118. */
  119. protected $_f;
  120. /**
  121. * PDF Context
  122. *
  123. * @var pdf_context
  124. */
  125. protected $_c;
  126. /**
  127. * xref-Data
  128. *
  129. * @var array
  130. */
  131. protected $_xref;
  132. /**
  133. * Data of the Root object
  134. *
  135. * @var array
  136. */
  137. protected $_root;
  138. /**
  139. * PDF version of the loaded document
  140. *
  141. * @var string
  142. */
  143. protected $_pdfVersion;
  144. /**
  145. * For reading encrypted documents and xref/object streams are in use
  146. *
  147. * @var boolean
  148. */
  149. protected $_readPlain = true;
  150. /**
  151. * The current read object
  152. *
  153. * @var array
  154. */
  155. protected $_currentObj;
  156. /**
  157. * Constructor
  158. *
  159. * @param string $filename Source filename
  160. * @throws InvalidArgumentException
  161. */
  162. public function __construct($filename)
  163. {
  164. $this->filename = $filename;
  165. $this->_f = @fopen($this->filename, 'rb');
  166. if (!$this->_f) {
  167. throw new InvalidArgumentException(sprintf('Cannot open %s !', $filename));
  168. }
  169. $this->getPdfVersion();
  170. require_once('pdf_context.php');
  171. $this->_c = new pdf_context($this->_f);
  172. // Read xref-Data
  173. $this->_xref = array();
  174. $this->_readXref($this->_xref, $this->_findXref());
  175. // Check for Encryption
  176. $this->getEncryption();
  177. // Read root
  178. $this->_readRoot();
  179. }
  180. /**
  181. * Destructor
  182. */
  183. public function __destruct()
  184. {
  185. $this->closeFile();
  186. }
  187. /**
  188. * Close the opened file
  189. */
  190. public function closeFile()
  191. {
  192. if (isset($this->_f) && is_resource($this->_f)) {
  193. fclose($this->_f);
  194. unset($this->_f);
  195. }
  196. }
  197. /**
  198. * Check Trailer for Encryption
  199. *
  200. * @throws Exception
  201. */
  202. public function getEncryption()
  203. {
  204. if (isset($this->_xref['trailer'][1]['/Encrypt'])) {
  205. throw new Exception('File is encrypted!');
  206. }
  207. }
  208. /**
  209. * Get PDF-Version
  210. *
  211. * @return string
  212. */
  213. public function getPdfVersion()
  214. {
  215. if ($this->_pdfVersion === null) {
  216. fseek($this->_f, 0);
  217. preg_match('/\d\.\d/', fread($this->_f, 16), $m);
  218. if (isset($m[0]))
  219. $this->_pdfVersion = $m[0];
  220. }
  221. return $this->_pdfVersion;
  222. }
  223. /**
  224. * Read the /Root dictionary
  225. */
  226. protected function _readRoot()
  227. {
  228. if ($this->_xref['trailer'][1]['/Root'][0] != self::TYPE_OBJREF) {
  229. throw new Exception('Wrong Type of Root-Element! Must be an indirect reference');
  230. }
  231. $this->_root = $this->resolveObject($this->_xref['trailer'][1]['/Root']);
  232. }
  233. /**
  234. * Find the xref table
  235. *
  236. * @return integer
  237. * @throws Exception
  238. */
  239. protected function _findXref()
  240. {
  241. $toRead = self::$searchForStartxrefLength;
  242. $stat = fseek($this->_f, -$toRead, SEEK_END);
  243. if ($stat === -1) {
  244. fseek($this->_f, 0);
  245. }
  246. $data = fread($this->_f, $toRead);
  247. $keywordPos = strpos(strrev($data), strrev('startxref'));
  248. if (false === $keywordPos) {
  249. $keywordPos = strpos(strrev($data), strrev('startref'));
  250. }
  251. if (false === $keywordPos) {
  252. throw new Exception('Unable to find "startxref" keyword.');
  253. }
  254. $pos = strlen($data) - $keywordPos;
  255. $data = substr($data, $pos);
  256. if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) {
  257. throw new Exception('Unable to find pointer to xref table.');
  258. }
  259. return (int) $matches[1];
  260. }
  261. /**
  262. * Read the xref table
  263. *
  264. * @param array $result Array of xref table entries
  265. * @param integer $offset of xref table
  266. * @return boolean
  267. * @throws Exception
  268. */
  269. protected function _readXref(&$result, $offset)
  270. {
  271. $tempPos = $offset - min(20, $offset);
  272. fseek($this->_f, $tempPos); // set some bytes backwards to fetch corrupted docs
  273. $data = fread($this->_f, 100);
  274. $xrefPos = strrpos($data, 'xref');
  275. if ($xrefPos === false) {
  276. $this->_c->reset($offset);
  277. $xrefStreamObjDec = $this->_readValue($this->_c);
  278. if (is_array($xrefStreamObjDec) && isset($xrefStreamObjDec[0]) && $xrefStreamObjDec[0] == self::TYPE_OBJDEC) {
  279. throw new Exception(
  280. sprintf(
  281. 'This document (%s) probably uses a compression technique which is not supported by the ' .
  282. 'free parser shipped with FPDI. (See https://www.setasign.com/fpdi-pdf-parser for more details)',
  283. $this->filename
  284. )
  285. );
  286. } else {
  287. throw new Exception('Unable to find xref table.');
  288. }
  289. }
  290. if (!isset($result['xrefLocation'])) {
  291. $result['xrefLocation'] = $tempPos + $xrefPos;
  292. $result['maxObject'] = 0;
  293. }
  294. $cycles = -1;
  295. $bytesPerCycle = 100;
  296. fseek($this->_f, $tempPos = $tempPos + $xrefPos + 4); // set the handle directly after the "xref"-keyword
  297. $data = fread($this->_f, $bytesPerCycle);
  298. while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle * $cycles++, 0))) === false && !feof($this->_f)) {
  299. $data .= fread($this->_f, $bytesPerCycle);
  300. }
  301. if ($trailerPos === false) {
  302. throw new Exception('Trailer keyword not found after xref table');
  303. }
  304. $data = ltrim(substr($data, 0, $trailerPos));
  305. // get Line-Ending
  306. preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for line breaks
  307. $differentLineEndings = count(array_unique($m[0]));
  308. if ($differentLineEndings > 1) {
  309. $lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY);
  310. } else {
  311. $lines = explode($m[0][0], $data);
  312. }
  313. $data = $differentLineEndings = $m = null;
  314. unset($data, $differentLineEndings, $m);
  315. $linesCount = count($lines);
  316. $start = 1;
  317. for ($i = 0; $i < $linesCount; $i++) {
  318. $line = trim($lines[$i]);
  319. if ($line) {
  320. $pieces = explode(' ', $line);
  321. $c = count($pieces);
  322. switch($c) {
  323. case 2:
  324. $start = (int)$pieces[0];
  325. $end = $start + (int)$pieces[1];
  326. if ($end > $result['maxObject'])
  327. $result['maxObject'] = $end;
  328. break;
  329. case 3:
  330. if (!isset($result['xref'][$start]))
  331. $result['xref'][$start] = array();
  332. if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) {
  333. $result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null;
  334. }
  335. $start++;
  336. break;
  337. default:
  338. throw new Exception('Unexpected data in xref table');
  339. }
  340. }
  341. }
  342. $lines = $pieces = $line = $start = $end = $gen = null;
  343. unset($lines, $pieces, $line, $start, $end, $gen);
  344. $this->_c->reset($tempPos + $trailerPos + 7);
  345. $trailer = $this->_readValue($this->_c);
  346. if (!isset($result['trailer'])) {
  347. $result['trailer'] = $trailer;
  348. }
  349. if (isset($trailer[1]['/Prev'])) {
  350. $this->_readXref($result, $trailer[1]['/Prev'][1]);
  351. }
  352. $trailer = null;
  353. unset($trailer);
  354. return true;
  355. }
  356. /**
  357. * Reads a PDF value
  358. *
  359. * @param pdf_context $c
  360. * @param string $token A token
  361. * @return mixed
  362. */
  363. protected function _readValue(&$c, $token = null)
  364. {
  365. if (is_null($token)) {
  366. $token = $this->_readToken($c);
  367. }
  368. if ($token === false) {
  369. return false;
  370. }
  371. switch ($token) {
  372. case '<':
  373. // This is a hex string.
  374. // Read the value, then the terminator
  375. $pos = $c->offset;
  376. while(1) {
  377. $match = strpos ($c->buffer, '>', $pos);
  378. // If you can't find it, try
  379. // reading more data from the stream
  380. if ($match === false) {
  381. if (!$c->increaseLength()) {
  382. return false;
  383. } else {
  384. continue;
  385. }
  386. }
  387. $result = substr ($c->buffer, $c->offset, $match - $c->offset);
  388. $c->offset = $match + 1;
  389. return array (self::TYPE_HEX, $result);
  390. }
  391. break;
  392. case '<<':
  393. // This is a dictionary.
  394. $result = array();
  395. // Recurse into this function until we reach
  396. // the end of the dictionary.
  397. while (($key = $this->_readToken($c)) !== '>>') {
  398. if ($key === false) {
  399. return false;
  400. }
  401. if (($value = $this->_readValue($c)) === false) {
  402. return false;
  403. }
  404. // Catch missing value
  405. if ($value[0] == self::TYPE_TOKEN && $value[1] == '>>') {
  406. $result[$key] = array(self::TYPE_NULL);
  407. break;
  408. }
  409. $result[$key] = $value;
  410. }
  411. return array (self::TYPE_DICTIONARY, $result);
  412. case '[':
  413. // This is an array.
  414. $result = array();
  415. // Recurse into this function until we reach
  416. // the end of the array.
  417. while (($token = $this->_readToken($c)) !== ']') {
  418. if ($token === false) {
  419. return false;
  420. }
  421. if (($value = $this->_readValue($c, $token)) === false) {
  422. return false;
  423. }
  424. $result[] = $value;
  425. }
  426. return array (self::TYPE_ARRAY, $result);
  427. case '(':
  428. // This is a string
  429. $pos = $c->offset;
  430. $openBrackets = 1;
  431. do {
  432. for (; $openBrackets != 0 && $pos < $c->length; $pos++) {
  433. switch (ord($c->buffer[$pos])) {
  434. case 0x28: // '('
  435. $openBrackets++;
  436. break;
  437. case 0x29: // ')'
  438. $openBrackets--;
  439. break;
  440. case 0x5C: // backslash
  441. $pos++;
  442. }
  443. }
  444. } while($openBrackets != 0 && $c->increaseLength());
  445. $result = substr($c->buffer, $c->offset, $pos - $c->offset - 1);
  446. $c->offset = $pos;
  447. return array (self::TYPE_STRING, $result);
  448. case 'stream':
  449. $tempPos = $c->getPos() - strlen($c->buffer);
  450. $tempOffset = $c->offset;
  451. $c->reset($startPos = $tempPos + $tempOffset);
  452. $e = 0; // ensure line breaks in front of the stream
  453. if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13))
  454. $e++;
  455. if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10))
  456. $e++;
  457. if ($this->_currentObj[1][1]['/Length'][0] == self::TYPE_OBJREF) {
  458. $tmpLength = $this->resolveObject($this->_currentObj[1][1]['/Length']);
  459. $length = $tmpLength[1][1];
  460. } else {
  461. $length = $this->_currentObj[1][1]['/Length'][1];
  462. }
  463. if ($length > 0) {
  464. $c->reset($startPos + $e, $length);
  465. $v = $c->buffer;
  466. } else {
  467. $v = '';
  468. }
  469. $c->reset($startPos + $e + $length);
  470. $endstream = $this->_readToken($c);
  471. if ($endstream != 'endstream') {
  472. $c->reset($startPos + $e + $length + 9); // 9 = strlen("endstream")
  473. // We don't throw an error here because the next
  474. // round trip will start at a new offset
  475. }
  476. return array(self::TYPE_STREAM, $v);
  477. default :
  478. if (is_numeric($token)) {
  479. // A numeric token. Make sure that
  480. // it is not part of something else.
  481. if (($tok2 = $this->_readToken($c)) !== false) {
  482. if (is_numeric($tok2)) {
  483. // Two numeric tokens in a row.
  484. // In this case, we're probably in
  485. // front of either an object reference
  486. // or an object specification.
  487. // Determine the case and return the data
  488. if (($tok3 = $this->_readToken($c)) !== false) {
  489. switch ($tok3) {
  490. case 'obj':
  491. return array(self::TYPE_OBJDEC, (int)$token, (int)$tok2);
  492. case 'R':
  493. return array(self::TYPE_OBJREF, (int)$token, (int)$tok2);
  494. }
  495. // If we get to this point, that numeric value up
  496. // there was just a numeric value. Push the extra
  497. // tokens back into the stack and return the value.
  498. array_push($c->stack, $tok3);
  499. }
  500. }
  501. array_push($c->stack, $tok2);
  502. }
  503. if ($token === (string)((int)$token))
  504. return array(self::TYPE_NUMERIC, (int)$token);
  505. else
  506. return array(self::TYPE_REAL, (float)$token);
  507. } else if ($token == 'true' || $token == 'false') {
  508. return array(self::TYPE_BOOLEAN, $token == 'true');
  509. } else if ($token == 'null') {
  510. return array(self::TYPE_NULL);
  511. } else {
  512. // Just a token. Return it.
  513. return array(self::TYPE_TOKEN, $token);
  514. }
  515. }
  516. }
  517. /**
  518. * Resolve an object
  519. *
  520. * @param array $objSpec The object-data
  521. * @return array|boolean
  522. * @throws Exception
  523. */
  524. public function resolveObject($objSpec)
  525. {
  526. $c = $this->_c;
  527. // Exit if we get invalid data
  528. if (!is_array($objSpec)) {
  529. return false;
  530. }
  531. if ($objSpec[0] == self::TYPE_OBJREF) {
  532. // This is a reference, resolve it
  533. if (isset($this->_xref['xref'][$objSpec[1]][$objSpec[2]])) {
  534. // Save current file position
  535. // This is needed if you want to resolve
  536. // references while you're reading another object
  537. // (e.g.: if you need to determine the length
  538. // of a stream)
  539. $oldPos = $c->getPos();
  540. // Reposition the file pointer and
  541. // load the object header.
  542. $c->reset($this->_xref['xref'][$objSpec[1]][$objSpec[2]]);
  543. $header = $this->_readValue($c);
  544. if ($header[0] != self::TYPE_OBJDEC || $header[1] != $objSpec[1] || $header[2] != $objSpec[2]) {
  545. $toSearchFor = $objSpec[1] . ' ' . $objSpec[2] . ' obj';
  546. if (preg_match('/' . $toSearchFor . '/', $c->buffer)) {
  547. $c->offset = strpos($c->buffer, $toSearchFor) + strlen($toSearchFor);
  548. // reset stack
  549. $c->stack = array();
  550. } else {
  551. throw new Exception(
  552. sprintf("Unable to find object (%s, %s) at expected location.", $objSpec[1], $objSpec[2])
  553. );
  554. }
  555. }
  556. // If we're being asked to store all the information
  557. // about the object, we add the object ID and generation
  558. // number for later use
  559. $result = array (
  560. self::TYPE_OBJECT,
  561. 'obj' => $objSpec[1],
  562. 'gen' => $objSpec[2]
  563. );
  564. $this->_currentObj =& $result;
  565. // Now simply read the object data until
  566. // we encounter an end-of-object marker
  567. while (true) {
  568. $value = $this->_readValue($c);
  569. if ($value === false || count($result) > 4) {
  570. // in this case the parser couldn't find an "endobj" so we break here
  571. break;
  572. }
  573. if ($value[0] == self::TYPE_TOKEN && $value[1] === 'endobj') {
  574. break;
  575. }
  576. $result[] = $value;
  577. }
  578. $c->reset($oldPos);
  579. if (isset($result[2][0]) && $result[2][0] == self::TYPE_STREAM) {
  580. $result[0] = self::TYPE_STREAM;
  581. }
  582. } else {
  583. throw new Exception(
  584. sprintf("Unable to find object (%s, %s) at expected location.", $objSpec[1], $objSpec[2])
  585. );
  586. }
  587. return $result;
  588. } else {
  589. return $objSpec;
  590. }
  591. }
  592. /**
  593. * Reads a token from the context
  594. *
  595. * @param pdf_context $c
  596. * @return mixed
  597. */
  598. protected function _readToken($c)
  599. {
  600. // If there is a token available
  601. // on the stack, pop it out and
  602. // return it.
  603. if (count($c->stack)) {
  604. return array_pop($c->stack);
  605. }
  606. // Strip away any whitespace
  607. do {
  608. if (!$c->ensureContent()) {
  609. return false;
  610. }
  611. $c->offset += strspn($c->buffer, "\x20\x0A\x0C\x0D\x09\x00", $c->offset);
  612. } while ($c->offset >= $c->length - 1);
  613. // Get the first character in the stream
  614. $char = $c->buffer[$c->offset++];
  615. switch ($char) {
  616. case '[':
  617. case ']':
  618. case '(':
  619. case ')':
  620. // This is either an array or literal string
  621. // delimiter, Return it
  622. return $char;
  623. case '<':
  624. case '>':
  625. // This could either be a hex string or
  626. // dictionary delimiter. Determine the
  627. // appropriate case and return the token
  628. if ($c->buffer[$c->offset] == $char) {
  629. if (!$c->ensureContent()) {
  630. return false;
  631. }
  632. $c->offset++;
  633. return $char . $char;
  634. } else {
  635. return $char;
  636. }
  637. case '%':
  638. // This is a comment - jump over it!
  639. $pos = $c->offset;
  640. while(1) {
  641. $match = preg_match("/(\r\n|\r|\n)/", $c->buffer, $m, PREG_OFFSET_CAPTURE, $pos);
  642. if ($match === 0) {
  643. if (!$c->increaseLength()) {
  644. return false;
  645. } else {
  646. continue;
  647. }
  648. }
  649. $c->offset = $m[0][1] + strlen($m[0][0]);
  650. return $this->_readToken($c);
  651. }
  652. default:
  653. // This is "another" type of token (probably
  654. // a dictionary entry or a numeric value)
  655. // Find the end and return it.
  656. if (!$c->ensureContent()) {
  657. return false;
  658. }
  659. while(1) {
  660. // Determine the length of the token
  661. $pos = strcspn($c->buffer, "\x20%[]<>()/\x0A\x0C\x0D\x09\x00", $c->offset);
  662. if ($c->offset + $pos <= $c->length - 1) {
  663. break;
  664. } else {
  665. // If the script reaches this point,
  666. // the token may span beyond the end
  667. // of the current buffer. Therefore,
  668. // we increase the size of the buffer
  669. // and try again--just to be safe.
  670. $c->increaseLength();
  671. }
  672. }
  673. $result = substr($c->buffer, $c->offset - 1, $pos + 1);
  674. $c->offset += $pos;
  675. return $result;
  676. }
  677. }
  678. /**
  679. * Un-filter a stream object
  680. *
  681. * @param array $obj
  682. * @return string
  683. * @throws Exception
  684. */
  685. protected function _unFilterStream($obj)
  686. {
  687. $filters = array();
  688. if (isset($obj[1][1]['/Filter'])) {
  689. $filter = $obj[1][1]['/Filter'];
  690. if ($filter[0] == pdf_parser::TYPE_OBJREF) {
  691. $tmpFilter = $this->resolveObject($filter);
  692. $filter = $tmpFilter[1];
  693. }
  694. if ($filter[0] == pdf_parser::TYPE_TOKEN) {
  695. $filters[] = $filter;
  696. } else if ($filter[0] == pdf_parser::TYPE_ARRAY) {
  697. $filters = $filter[1];
  698. }
  699. }
  700. $stream = $obj[2][1];
  701. foreach ($filters AS $filter) {
  702. switch ($filter[1]) {
  703. case '/FlateDecode':
  704. case '/Fl':
  705. if (function_exists('gzuncompress')) {
  706. $oStream = $stream;
  707. $stream = (strlen($stream) > 0) ? @gzuncompress($stream) : '';
  708. } else {
  709. throw new Exception(
  710. sprintf('To handle %s filter, please compile php with zlib support.', $filter[1])
  711. );
  712. }
  713. if ($stream === false) {
  714. $tries = 0;
  715. while ($tries < 8 && ($stream === false || strlen($stream) < strlen($oStream))) {
  716. $oStream = substr($oStream, 1);
  717. $stream = @gzinflate($oStream);
  718. $tries++;
  719. }
  720. if ($stream === false) {
  721. throw new Exception('Error while decompressing stream.');
  722. }
  723. }
  724. break;
  725. case '/LZWDecode':
  726. require_once('filters/FilterLZW.php');
  727. $decoder = new FilterLZW();
  728. $stream = $decoder->decode($stream);
  729. break;
  730. case '/ASCII85Decode':
  731. require_once('filters/FilterASCII85.php');
  732. $decoder = new FilterASCII85();
  733. $stream = $decoder->decode($stream);
  734. break;
  735. case '/ASCIIHexDecode':
  736. require_once('filters/FilterASCIIHexDecode.php');
  737. $decoder = new FilterASCIIHexDecode();
  738. $stream = $decoder->decode($stream);
  739. break;
  740. case null:
  741. break;
  742. default:
  743. throw new Exception(sprintf('Unsupported Filter: %s', $filter[1]));
  744. }
  745. }
  746. return $stream;
  747. }
  748. }