fpdi_pdf_parser.php 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. <?php
  2. //
  3. // FPDI - Version 1.5.2
  4. //
  5. // Copyright 2004-2014 Setasign - Jan Slabon
  6. //
  7. // Licensed under the Apache License, Version 2.0 (the "License");
  8. // you may not use this file except in compliance with the License.
  9. // You may obtain a copy of the License at
  10. //
  11. // http://www.apache.org/licenses/LICENSE-2.0
  12. //
  13. // Unless required by applicable law or agreed to in writing, software
  14. // distributed under the License is distributed on an "AS IS" BASIS,
  15. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. // See the License for the specific language governing permissions and
  17. // limitations under the License.
  18. //
  19. require_once('pdf_parser.php');
  20. /**
  21. * Class fpdi_pdf_parser
  22. */
  23. class fpdi_pdf_parser extends pdf_parser
  24. {
  25. /**
  26. * Pages
  27. *
  28. * Index begins at 0
  29. *
  30. * @var array
  31. */
  32. protected $_pages;
  33. /**
  34. * Page count
  35. *
  36. * @var integer
  37. */
  38. protected $_pageCount;
  39. /**
  40. * Current page number
  41. *
  42. * @var integer
  43. */
  44. public $pageNo;
  45. /**
  46. * PDF version of imported document
  47. *
  48. * @var string
  49. */
  50. public $_pdfVersion;
  51. /**
  52. * Available BoxTypes
  53. *
  54. * @var array
  55. */
  56. public $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox');
  57. /**
  58. * The constructor.
  59. *
  60. * @param string $filename The source filename
  61. */
  62. public function __construct($filename)
  63. {
  64. parent::__construct($filename);
  65. // resolve Pages-Dictonary
  66. $pages = $this->resolveObject($this->_root[1][1]['/Pages']);
  67. // Read pages
  68. $this->_readPages($pages, $this->_pages);
  69. // count pages;
  70. $this->_pageCount = count($this->_pages);
  71. }
  72. /**
  73. * Get page count from source file.
  74. *
  75. * @return int
  76. */
  77. public function getPageCount()
  78. {
  79. return $this->_pageCount;
  80. }
  81. /**
  82. * Set the page number.
  83. *
  84. * @param int $pageNo Page number to use
  85. * @throws InvalidArgumentException
  86. */
  87. public function setPageNo($pageNo)
  88. {
  89. $pageNo = ((int) $pageNo) - 1;
  90. if ($pageNo < 0 || $pageNo >= $this->getPageCount()) {
  91. throw new InvalidArgumentException('Invalid page number!');
  92. }
  93. $this->pageNo = $pageNo;
  94. }
  95. /**
  96. * Get page-resources from current page
  97. *
  98. * @return array|boolean
  99. */
  100. public function getPageResources()
  101. {
  102. return $this->_getPageResources($this->_pages[$this->pageNo]);
  103. }
  104. /**
  105. * Get page-resources from a /Page dictionary.
  106. *
  107. * @param array $obj Array of pdf-data
  108. * @return array|boolean
  109. */
  110. protected function _getPageResources($obj)
  111. {
  112. $obj = $this->resolveObject($obj);
  113. // If the current object has a resources
  114. // dictionary associated with it, we use
  115. // it. Otherwise, we move back to its
  116. // parent object.
  117. if (isset($obj[1][1]['/Resources'])) {
  118. $res = $this->resolveObject($obj[1][1]['/Resources']);
  119. if ($res[0] == pdf_parser::TYPE_OBJECT)
  120. return $res[1];
  121. return $res;
  122. }
  123. if (!isset($obj[1][1]['/Parent'])) {
  124. return false;
  125. }
  126. $res = $this->_getPageResources($obj[1][1]['/Parent']);
  127. if ($res[0] == pdf_parser::TYPE_OBJECT)
  128. return $res[1];
  129. return $res;
  130. }
  131. /**
  132. * Get content of current page.
  133. *
  134. * If /Contents is an array, the streams are concatenated
  135. *
  136. * @return string
  137. */
  138. public function getContent()
  139. {
  140. $buffer = '';
  141. if (isset($this->_pages[$this->pageNo][1][1]['/Contents'])) {
  142. $contents = $this->_getPageContent($this->_pages[$this->pageNo][1][1]['/Contents']);
  143. foreach ($contents AS $tmpContent) {
  144. $buffer .= $this->_unFilterStream($tmpContent) . ' ';
  145. }
  146. }
  147. return $buffer;
  148. }
  149. /**
  150. * Resolve all content objects.
  151. *
  152. * @param array $contentRef
  153. * @return array
  154. */
  155. protected function _getPageContent($contentRef)
  156. {
  157. $contents = array();
  158. if ($contentRef[0] == pdf_parser::TYPE_OBJREF) {
  159. $content = $this->resolveObject($contentRef);
  160. if ($content[1][0] == pdf_parser::TYPE_ARRAY) {
  161. $contents = $this->_getPageContent($content[1]);
  162. } else {
  163. $contents[] = $content;
  164. }
  165. } else if ($contentRef[0] == pdf_parser::TYPE_ARRAY) {
  166. foreach ($contentRef[1] AS $tmp_content_ref) {
  167. $contents = array_merge($contents, $this->_getPageContent($tmp_content_ref));
  168. }
  169. }
  170. return $contents;
  171. }
  172. /**
  173. * Get a boundary box from a page
  174. *
  175. * Array format is same as used by FPDF_TPL.
  176. *
  177. * @param array $page a /Page dictionary
  178. * @param string $boxIndex Type of box {see {@link $availableBoxes})
  179. * @param float Scale factor from user space units to points
  180. *
  181. * @return array|boolean
  182. */
  183. protected function _getPageBox($page, $boxIndex, $k)
  184. {
  185. $page = $this->resolveObject($page);
  186. $box = null;
  187. if (isset($page[1][1][$boxIndex])) {
  188. $box = $page[1][1][$boxIndex];
  189. }
  190. if (!is_null($box) && $box[0] == pdf_parser::TYPE_OBJREF) {
  191. $tmp_box = $this->resolveObject($box);
  192. $box = $tmp_box[1];
  193. }
  194. if (!is_null($box) && $box[0] == pdf_parser::TYPE_ARRAY) {
  195. $b = $box[1];
  196. return array(
  197. 'x' => $b[0][1] / $k,
  198. 'y' => $b[1][1] / $k,
  199. 'w' => abs($b[0][1] - $b[2][1]) / $k,
  200. 'h' => abs($b[1][1] - $b[3][1]) / $k,
  201. 'llx' => min($b[0][1], $b[2][1]) / $k,
  202. 'lly' => min($b[1][1], $b[3][1]) / $k,
  203. 'urx' => max($b[0][1], $b[2][1]) / $k,
  204. 'ury' => max($b[1][1], $b[3][1]) / $k,
  205. );
  206. } else if (!isset($page[1][1]['/Parent'])) {
  207. return false;
  208. } else {
  209. return $this->_getPageBox($this->resolveObject($page[1][1]['/Parent']), $boxIndex, $k);
  210. }
  211. }
  212. /**
  213. * Get all page boundary boxes by page number
  214. *
  215. * @param int $pageNo The page number
  216. * @param float $k Scale factor from user space units to points
  217. * @return array
  218. * @throws InvalidArgumentException
  219. */
  220. public function getPageBoxes($pageNo, $k)
  221. {
  222. if (!isset($this->_pages[$pageNo - 1])) {
  223. throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.');
  224. }
  225. return $this->_getPageBoxes($this->_pages[$pageNo - 1], $k);
  226. }
  227. /**
  228. * Get all boxes from /Page dictionary
  229. *
  230. * @param array $page A /Page dictionary
  231. * @param float $k Scale factor from user space units to points
  232. * @return array
  233. */
  234. protected function _getPageBoxes($page, $k)
  235. {
  236. $boxes = array();
  237. foreach($this->availableBoxes AS $box) {
  238. if ($_box = $this->_getPageBox($page, $box, $k)) {
  239. $boxes[$box] = $_box;
  240. }
  241. }
  242. return $boxes;
  243. }
  244. /**
  245. * Get the page rotation by page number
  246. *
  247. * @param integer $pageNo
  248. * @throws InvalidArgumentException
  249. * @return array
  250. */
  251. public function getPageRotation($pageNo)
  252. {
  253. if (!isset($this->_pages[$pageNo - 1])) {
  254. throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.');
  255. }
  256. return $this->_getPageRotation($this->_pages[$pageNo - 1]);
  257. }
  258. /**
  259. * Get the rotation value of a page
  260. *
  261. * @param array $obj A /Page dictionary
  262. * @return array|bool
  263. */
  264. protected function _getPageRotation($obj)
  265. {
  266. $obj = $this->resolveObject($obj);
  267. if (isset($obj[1][1]['/Rotate'])) {
  268. $res = $this->resolveObject($obj[1][1]['/Rotate']);
  269. if ($res[0] == pdf_parser::TYPE_OBJECT)
  270. return $res[1];
  271. return $res;
  272. }
  273. if (!isset($obj[1][1]['/Parent'])) {
  274. return false;
  275. }
  276. $res = $this->_getPageRotation($obj[1][1]['/Parent']);
  277. if ($res[0] == pdf_parser::TYPE_OBJECT)
  278. return $res[1];
  279. return $res;
  280. }
  281. /**
  282. * Read all pages
  283. *
  284. * @param array $pages /Pages dictionary
  285. * @param array $result The result array
  286. * @throws Exception
  287. */
  288. protected function _readPages(&$pages, &$result)
  289. {
  290. // Get the kids dictionary
  291. $_kids = $this->resolveObject($pages[1][1]['/Kids']);
  292. if (!is_array($_kids)) {
  293. throw new Exception('Cannot find /Kids in current /Page-Dictionary');
  294. }
  295. if ($_kids[0] === self::TYPE_OBJECT) {
  296. $_kids = $_kids[1];
  297. }
  298. $kids = $_kids[1];
  299. foreach ($kids as $v) {
  300. $pg = $this->resolveObject($v);
  301. if ($pg[1][1]['/Type'][1] === '/Pages') {
  302. // If one of the kids is an embedded
  303. // /Pages array, resolve it as well.
  304. $this->_readPages($pg, $result);
  305. } else {
  306. $result[] = $pg;
  307. }
  308. }
  309. }
  310. }