tcpdf_filters.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. <?php
  2. //============================================================+
  3. // File name : tcpdf_filters.php
  4. // Version : 1.0.001
  5. // Begin : 2011-05-23
  6. // Last Update : 2014-04-25
  7. // Author : Nicola Asuni - Tecnick.com LTD - www.tecnick.com - info@tecnick.com
  8. // License : GNU-LGPL v3 (http://www.gnu.org/copyleft/lesser.html)
  9. // -------------------------------------------------------------------
  10. // Copyright (C) 2011-2013 Nicola Asuni - Tecnick.com LTD
  11. //
  12. // This file is part of TCPDF software library.
  13. //
  14. // TCPDF is free software: you can redistribute it and/or modify it
  15. // under the terms of the GNU Lesser General Public License as
  16. // published by the Free Software Foundation, either version 3 of the
  17. // License, or (at your option) any later version.
  18. //
  19. // TCPDF is distributed in the hope that it will be useful, but
  20. // WITHOUT ANY WARRANTY; without even the implied warranty of
  21. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  22. // See the GNU Lesser General Public License for more details.
  23. //
  24. // You should have received a copy of the License
  25. // along with TCPDF. If not, see
  26. // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
  27. //
  28. // See LICENSE.TXT file for more information.
  29. // -------------------------------------------------------------------
  30. //
  31. // Description : This is a PHP class for decoding common PDF filters (PDF 32000-2008 - 7.4 Filters).
  32. //
  33. //============================================================+
  34. /**
  35. * @file
  36. * This is a PHP class for decoding common PDF filters (PDF 32000-2008 - 7.4 Filters).<br>
  37. * @package com.tecnick.tcpdf
  38. * @author Nicola Asuni
  39. * @version 1.0.001
  40. */
  41. /**
  42. * @class TCPDF_FILTERS
  43. * This is a PHP class for decoding common PDF filters (PDF 32000-2008 - 7.4 Filters).<br>
  44. * @package com.tecnick.tcpdf
  45. * @brief This is a PHP class for decoding common PDF filters.
  46. * @version 1.0.001
  47. * @author Nicola Asuni - info@tecnick.com
  48. */
  49. class TCPDF_FILTERS {
  50. /**
  51. * Define a list of available filter decoders.
  52. * @private static
  53. */
  54. private static $available_filters = array('ASCIIHexDecode', 'ASCII85Decode', 'LZWDecode', 'FlateDecode', 'RunLengthDecode');
  55. // -----------------------------------------------------------------------------
  56. /**
  57. * Get a list of available decoding filters.
  58. * @return array Array of available filter decoders.
  59. * @since 1.0.000 (2011-05-23)
  60. * @public static
  61. */
  62. public static function getAvailableFilters() {
  63. return self::$available_filters;
  64. }
  65. /**
  66. * Decode data using the specified filter type.
  67. * @param string $filter Filter name.
  68. * @param string $data Data to decode.
  69. * @return string Decoded data string.
  70. * @since 1.0.000 (2011-05-23)
  71. * @public static
  72. */
  73. public static function decodeFilter($filter, $data) {
  74. switch ($filter) {
  75. case 'ASCIIHexDecode': {
  76. return self::decodeFilterASCIIHexDecode($data);
  77. break;
  78. }
  79. case 'ASCII85Decode': {
  80. return self::decodeFilterASCII85Decode($data);
  81. break;
  82. }
  83. case 'LZWDecode': {
  84. return self::decodeFilterLZWDecode($data);
  85. break;
  86. }
  87. case 'FlateDecode': {
  88. return self::decodeFilterFlateDecode($data);
  89. break;
  90. }
  91. case 'RunLengthDecode': {
  92. return self::decodeFilterRunLengthDecode($data);
  93. break;
  94. }
  95. case 'CCITTFaxDecode': {
  96. return self::decodeFilterCCITTFaxDecode($data);
  97. break;
  98. }
  99. case 'JBIG2Decode': {
  100. return self::decodeFilterJBIG2Decode($data);
  101. break;
  102. }
  103. case 'DCTDecode': {
  104. return self::decodeFilterDCTDecode($data);
  105. break;
  106. }
  107. case 'JPXDecode': {
  108. return self::decodeFilterJPXDecode($data);
  109. break;
  110. }
  111. case 'Crypt': {
  112. return self::decodeFilterCrypt($data);
  113. break;
  114. }
  115. default: {
  116. return self::decodeFilterStandard($data);
  117. break;
  118. }
  119. }
  120. }
  121. // --- FILTERS (PDF 32000-2008 - 7.4 Filters) ------------------------------
  122. /**
  123. * Standard
  124. * Default decoding filter (leaves data unchanged).
  125. * @param string $data Data to decode.
  126. * @return string Decoded data string.
  127. * @since 1.0.000 (2011-05-23)
  128. * @public static
  129. */
  130. public static function decodeFilterStandard($data) {
  131. return $data;
  132. }
  133. /**
  134. * ASCIIHexDecode
  135. * Decodes data encoded in an ASCII hexadecimal representation, reproducing the original binary data.
  136. * @param string $data Data to decode.
  137. * @return string Decoded data string.
  138. * @since 1.0.000 (2011-05-23)
  139. * @public static
  140. */
  141. public static function decodeFilterASCIIHexDecode($data) {
  142. // initialize string to return
  143. $decoded = '';
  144. // all white-space characters shall be ignored
  145. $data = preg_replace('/[\s]/', '', $data);
  146. // check for EOD character: GREATER-THAN SIGN (3Eh)
  147. $eod = strpos($data, '>');
  148. if ($eod !== false) {
  149. // remove EOD and extra data (if any)
  150. $data = substr($data, 0, $eod);
  151. $eod = true;
  152. }
  153. // get data length
  154. $data_length = strlen($data);
  155. if (($data_length % 2) != 0) {
  156. // odd number of hexadecimal digits
  157. if ($eod) {
  158. // EOD shall behave as if a 0 (zero) followed the last digit
  159. $data = substr($data, 0, -1).'0'.substr($data, -1);
  160. } else {
  161. self::Error('decodeFilterASCIIHexDecode: invalid code');
  162. }
  163. }
  164. // check for invalid characters
  165. if (preg_match('/[^a-fA-F\d]/', $data) > 0) {
  166. self::Error('decodeFilterASCIIHexDecode: invalid code');
  167. }
  168. // get one byte of binary data for each pair of ASCII hexadecimal digits
  169. $decoded = pack('H*', $data);
  170. return $decoded;
  171. }
  172. /**
  173. * ASCII85Decode
  174. * Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data.
  175. * @param string $data Data to decode.
  176. * @return string Decoded data string.
  177. * @since 1.0.000 (2011-05-23)
  178. * @public static
  179. */
  180. public static function decodeFilterASCII85Decode($data) {
  181. // initialize string to return
  182. $decoded = '';
  183. // all white-space characters shall be ignored
  184. $data = preg_replace('/[\s]/', '', $data);
  185. // remove start sequence 2-character sequence <~ (3Ch)(7Eh)
  186. if (strpos($data, '<~') !== false) {
  187. // remove EOD and extra data (if any)
  188. $data = substr($data, 2);
  189. }
  190. // check for EOD: 2-character sequence ~> (7Eh)(3Eh)
  191. $eod = strpos($data, '~>');
  192. if ($eod !== false) {
  193. // remove EOD and extra data (if any)
  194. $data = substr($data, 0, $eod);
  195. }
  196. // data length
  197. $data_length = strlen($data);
  198. // check for invalid characters
  199. if (preg_match('/[^\x21-\x75,\x74]/', $data) > 0) {
  200. self::Error('decodeFilterASCII85Decode: invalid code');
  201. }
  202. // z sequence
  203. $zseq = chr(0).chr(0).chr(0).chr(0);
  204. // position inside a group of 4 bytes (0-3)
  205. $group_pos = 0;
  206. $tuple = 0;
  207. $pow85 = array((85*85*85*85), (85*85*85), (85*85), 85, 1);
  208. $last_pos = ($data_length - 1);
  209. // for each byte
  210. for ($i = 0; $i < $data_length; ++$i) {
  211. // get char value
  212. $char = ord($data[$i]);
  213. if ($char == 122) { // 'z'
  214. if ($group_pos == 0) {
  215. $decoded .= $zseq;
  216. } else {
  217. self::Error('decodeFilterASCII85Decode: invalid code');
  218. }
  219. } else {
  220. // the value represented by a group of 5 characters should never be greater than 2^32 - 1
  221. $tuple += (($char - 33) * $pow85[$group_pos]);
  222. if ($group_pos == 4) {
  223. $decoded .= chr($tuple >> 24).chr($tuple >> 16).chr($tuple >> 8).chr($tuple);
  224. $tuple = 0;
  225. $group_pos = 0;
  226. } else {
  227. ++$group_pos;
  228. }
  229. }
  230. }
  231. if ($group_pos > 1) {
  232. $tuple += $pow85[($group_pos - 1)];
  233. }
  234. // last tuple (if any)
  235. switch ($group_pos) {
  236. case 4: {
  237. $decoded .= chr($tuple >> 24).chr($tuple >> 16).chr($tuple >> 8);
  238. break;
  239. }
  240. case 3: {
  241. $decoded .= chr($tuple >> 24).chr($tuple >> 16);
  242. break;
  243. }
  244. case 2: {
  245. $decoded .= chr($tuple >> 24);
  246. break;
  247. }
  248. case 1: {
  249. self::Error('decodeFilterASCII85Decode: invalid code');
  250. break;
  251. }
  252. }
  253. return $decoded;
  254. }
  255. /**
  256. * LZWDecode
  257. * Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method, reproducing the original text or binary data.
  258. * @param string $data Data to decode.
  259. * @return string Decoded data string.
  260. * @since 1.0.000 (2011-05-23)
  261. * @public static
  262. */
  263. public static function decodeFilterLZWDecode($data) {
  264. // initialize string to return
  265. $decoded = '';
  266. // data length
  267. $data_length = strlen($data);
  268. // convert string to binary string
  269. $bitstring = '';
  270. for ($i = 0; $i < $data_length; ++$i) {
  271. $bitstring .= sprintf('%08b', ord($data[$i]));
  272. }
  273. // get the number of bits
  274. $data_length = strlen($bitstring);
  275. // initialize code length in bits
  276. $bitlen = 9;
  277. // initialize dictionary index
  278. $dix = 258;
  279. // initialize the dictionary (with the first 256 entries).
  280. $dictionary = array();
  281. for ($i = 0; $i < 256; ++$i) {
  282. $dictionary[$i] = chr($i);
  283. }
  284. // previous val
  285. $prev_index = 0;
  286. // while we encounter EOD marker (257), read code_length bits
  287. while (($data_length > 0) AND (($index = bindec(substr($bitstring, 0, $bitlen))) != 257)) {
  288. // remove read bits from string
  289. $bitstring = substr($bitstring, $bitlen);
  290. // update number of bits
  291. $data_length -= $bitlen;
  292. if ($index == 256) { // clear-table marker
  293. // reset code length in bits
  294. $bitlen = 9;
  295. // reset dictionary index
  296. $dix = 258;
  297. $prev_index = 256;
  298. // reset the dictionary (with the first 256 entries).
  299. $dictionary = array();
  300. for ($i = 0; $i < 256; ++$i) {
  301. $dictionary[$i] = chr($i);
  302. }
  303. } elseif ($prev_index == 256) {
  304. // first entry
  305. $decoded .= $dictionary[$index];
  306. $prev_index = $index;
  307. } else {
  308. // check if index exist in the dictionary
  309. if ($index < $dix) {
  310. // index exist on dictionary
  311. $decoded .= $dictionary[$index];
  312. $dic_val = $dictionary[$prev_index].$dictionary[$index][0];
  313. // store current index
  314. $prev_index = $index;
  315. } else {
  316. // index do not exist on dictionary
  317. $dic_val = $dictionary[$prev_index].$dictionary[$prev_index][0];
  318. $decoded .= $dic_val;
  319. }
  320. // update dictionary
  321. $dictionary[$dix] = $dic_val;
  322. ++$dix;
  323. // change bit length by case
  324. if ($dix == 2047) {
  325. $bitlen = 12;
  326. } elseif ($dix == 1023) {
  327. $bitlen = 11;
  328. } elseif ($dix == 511) {
  329. $bitlen = 10;
  330. }
  331. }
  332. }
  333. return $decoded;
  334. }
  335. /**
  336. * FlateDecode
  337. * Decompresses data encoded using the zlib/deflate compression method, reproducing the original text or binary data.
  338. * @param string $data Data to decode.
  339. * @return string Decoded data string.
  340. * @since 1.0.000 (2011-05-23)
  341. * @public static
  342. */
  343. public static function decodeFilterFlateDecode($data) {
  344. // initialize string to return
  345. $decoded = @gzuncompress($data);
  346. if ($decoded === false) {
  347. self::Error('decodeFilterFlateDecode: invalid code');
  348. }
  349. return $decoded;
  350. }
  351. /**
  352. * RunLengthDecode
  353. * Decompresses data encoded using a byte-oriented run-length encoding algorithm.
  354. * @param string $data Data to decode.
  355. * @since 1.0.000 (2011-05-23)
  356. * @public static
  357. */
  358. public static function decodeFilterRunLengthDecode($data) {
  359. // initialize string to return
  360. $decoded = '';
  361. // data length
  362. $data_length = strlen($data);
  363. $i = 0;
  364. while($i < $data_length) {
  365. // get current byte value
  366. $byte = ord($data[$i]);
  367. if ($byte == 128) {
  368. // a length value of 128 denote EOD
  369. break;
  370. } elseif ($byte < 128) {
  371. // if the length byte is in the range 0 to 127
  372. // the following length + 1 (1 to 128) bytes shall be copied literally during decompression
  373. $decoded .= substr($data, ($i + 1), ($byte + 1));
  374. // move to next block
  375. $i += ($byte + 2);
  376. } else {
  377. // if length is in the range 129 to 255,
  378. // the following single byte shall be copied 257 - length (2 to 128) times during decompression
  379. $decoded .= str_repeat($data[($i + 1)], (257 - $byte));
  380. // move to next block
  381. $i += 2;
  382. }
  383. }
  384. return $decoded;
  385. }
  386. /**
  387. * CCITTFaxDecode (NOT IMPLEMETED - RETURN AN EXCEPTION)
  388. * Decompresses data encoded using the CCITT facsimile standard, reproducing the original data (typically monochrome image data at 1 bit per pixel).
  389. * @param string $data Data to decode.
  390. * @return string Decoded data string.
  391. * @since 1.0.000 (2011-05-23)
  392. * @public static
  393. */
  394. public static function decodeFilterCCITTFaxDecode($data) {
  395. self::Error('~decodeFilterCCITTFaxDecode: this method has not been yet implemented');
  396. //return $data;
  397. }
  398. /**
  399. * JBIG2Decode (NOT IMPLEMETED - RETURN AN EXCEPTION)
  400. * Decompresses data encoded using the JBIG2 standard, reproducing the original monochrome (1 bit per pixel) image data (or an approximation of that data).
  401. * @param string $data Data to decode.
  402. * @return string Decoded data string.
  403. * @since 1.0.000 (2011-05-23)
  404. * @public static
  405. */
  406. public static function decodeFilterJBIG2Decode($data) {
  407. self::Error('~decodeFilterJBIG2Decode: this method has not been yet implemented');
  408. //return $data;
  409. }
  410. /**
  411. * DCTDecode (NOT IMPLEMETED - RETURN AN EXCEPTION)
  412. * Decompresses data encoded using a DCT (discrete cosine transform) technique based on the JPEG standard, reproducing image sample data that approximates the original data.
  413. * @param string $data Data to decode.
  414. * @return string Decoded data string.
  415. * @since 1.0.000 (2011-05-23)
  416. * @public static
  417. */
  418. public static function decodeFilterDCTDecode($data) {
  419. self::Error('~decodeFilterDCTDecode: this method has not been yet implemented');
  420. //return $data;
  421. }
  422. /**
  423. * JPXDecode (NOT IMPLEMETED - RETURN AN EXCEPTION)
  424. * Decompresses data encoded using the wavelet-based JPEG2000 standard, reproducing the original image data.
  425. * @param string $data Data to decode.
  426. * @return string Decoded data string.
  427. * @since 1.0.000 (2011-05-23)
  428. * @public static
  429. */
  430. public static function decodeFilterJPXDecode($data) {
  431. self::Error('~decodeFilterJPXDecode: this method has not been yet implemented');
  432. //return $data;
  433. }
  434. /**
  435. * Crypt (NOT IMPLEMETED - RETURN AN EXCEPTION)
  436. * Decrypts data encrypted by a security handler, reproducing the data as it was before encryption.
  437. * @param string $data Data to decode.
  438. * @return string Decoded data string.
  439. * @since 1.0.000 (2011-05-23)
  440. * @public static
  441. */
  442. public static function decodeFilterCrypt($data) {
  443. self::Error('~decodeFilterCrypt: this method has not been yet implemented');
  444. //return $data;
  445. }
  446. // --- END FILTERS SECTION -------------------------------------------------
  447. /**
  448. * Throw an exception.
  449. * @param string $msg The error message
  450. * @since 1.0.000 (2011-05-23)
  451. * @public static
  452. */
  453. public static function Error($msg) {
  454. throw new Exception('TCPDF_PARSER ERROR: '.$msg);
  455. }
  456. } // END OF TCPDF_FILTERS CLASS
  457. //============================================================+
  458. // END OF FILE
  459. //============================================================+