lizard_decompress.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. /*
  2. Lizard - Fast LZ compression algorithm
  3. Copyright (C) 2011-2016, Yann Collet
  4. Copyright (C) 2016-2017, Przemyslaw Skibinski <[email protected]>
  5. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  6. Redistribution and use in source and binary forms, with or without
  7. modification, are permitted provided that the following conditions are
  8. met:
  9. * Redistributions of source code must retain the above copyright
  10. notice, this list of conditions and the following disclaimer.
  11. * Redistributions in binary form must reproduce the above
  12. copyright notice, this list of conditions and the following disclaimer
  13. in the documentation and/or other materials provided with the
  14. distribution.
  15. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  16. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  17. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  18. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  19. OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  20. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  21. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. You can contact the author at :
  27. - Lizard source repository : https://github.com/inikep/lizard
  28. */
  29. /**************************************
  30. * Includes
  31. **************************************/
  32. //#define LIZARD_STATS 1 // 0=simple stats, 1=more, 2=full
  33. #ifdef LIZARD_STATS
  34. #include "test/lizard_stats.h"
  35. #endif
  36. #include "lizard_compress.h"
  37. #include "lizard_decompress.h"
  38. #include "lizard_common.h"
  39. #include <stdio.h> // printf
  40. #include <stdint.h> // intptr_t
  41. /*-************************************
  42. * Local Structures and types
  43. **************************************/
  44. typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
  45. typedef enum { full = 0, partial = 1 } earlyEnd_directive;
  46. #include "lizard_decompress_lz4.h"
  47. #ifndef USE_LZ4_ONLY
  48. #ifdef LIZARD_USE_TEST
  49. #include "test/lizard_common_test.h"
  50. #include "test/lizard_decompress_test.h"
  51. #else
  52. #include "lizard_decompress_liz.h"
  53. #endif
  54. #endif
  55. #include "entropy/huf.h"
  56. /*-*****************************
  57. * Decompression functions
  58. *******************************/
  59. FORCE_INLINE size_t Lizard_readStream(int flag, const BYTE** ip, const BYTE* const iend, BYTE* op, BYTE* const oend, const BYTE** streamPtr, const BYTE** streamEnd, int streamFlag)
  60. {
  61. if (!flag) {
  62. if (*ip > iend - 3) return 0;
  63. *streamPtr = *ip + 3;
  64. *streamEnd = *streamPtr + MEM_readLE24(*ip);
  65. if (*streamEnd < *streamPtr) return 0;
  66. *ip = *streamEnd;
  67. #ifdef LIZARD_STATS
  68. uncompr_stream[streamFlag] += *streamEnd-*streamPtr;
  69. #else
  70. (void)streamFlag;
  71. #endif
  72. return 1;
  73. } else {
  74. #ifndef LIZARD_NO_HUFFMAN
  75. size_t res, streamLen, comprStreamLen;
  76. if (*ip > iend - 6) return 0;
  77. streamLen = MEM_readLE24(*ip);
  78. comprStreamLen = MEM_readLE24(*ip + 3);
  79. if ((op > oend - streamLen) || (*ip + comprStreamLen > iend - 6)) return 0;
  80. res = HUF_decompress(op, streamLen, *ip + 6, comprStreamLen);
  81. if (HUF_isError(res) || (res != streamLen)) return 0;
  82. *ip += comprStreamLen + 6;
  83. *streamPtr = op;
  84. *streamEnd = *streamPtr + streamLen;
  85. #ifdef LIZARD_STATS
  86. compr_stream[streamFlag] += comprStreamLen + 6;
  87. decompr_stream[streamFlag] += *streamEnd-*streamPtr;
  88. #endif
  89. return 1;
  90. #else
  91. fprintf(stderr, "compiled with LIZARD_NO_HUFFMAN\n");
  92. (void)op; (void)oend;
  93. return 0;
  94. #endif
  95. }
  96. }
  97. FORCE_INLINE int Lizard_decompress_generic(
  98. const char* source,
  99. char* const dest,
  100. int inputSize,
  101. int outputSize, /* this value is the max size of Output Buffer. */
  102. int partialDecoding, /* full, partial */
  103. int targetOutputSize, /* only used if partialDecoding==partial */
  104. int dict, /* noDict, withPrefix64k, usingExtDict */
  105. const BYTE* const lowPrefix, /* == dest if dict == noDict */
  106. const BYTE* const dictStart, /* only if dict==usingExtDict */
  107. const size_t dictSize /* note : = 0 if noDict */
  108. )
  109. {
  110. /* Local Variables */
  111. const BYTE* ip = (const BYTE*) source, *istart = (const BYTE*) source;
  112. const BYTE* const iend = ip + inputSize;
  113. BYTE* op = (BYTE*) dest;
  114. BYTE* const oend = op + outputSize;
  115. BYTE* oexit = op + targetOutputSize;
  116. Lizard_parameters params;
  117. Lizard_dstream_t ctx;
  118. BYTE* decompFlagsBase, *decompOff24Base, *decompOff16Base, *decompLiteralsBase = NULL;
  119. int res, compressionLevel;
  120. if (inputSize < 1) { LIZARD_LOG_DECOMPRESS("inputSize=%d outputSize=%d targetOutputSize=%d partialDecoding=%d\n", inputSize, outputSize, targetOutputSize, partialDecoding); return 0; }
  121. compressionLevel = *ip++;
  122. if (compressionLevel < LIZARD_MIN_CLEVEL || compressionLevel > LIZARD_MAX_CLEVEL) {
  123. LIZARD_LOG_DECOMPRESS("ERROR Lizard_decompress_generic inputSize=%d compressionLevel=%d\n", inputSize, compressionLevel);
  124. return -1;
  125. }
  126. LIZARD_LOG_DECOMPRESS("Lizard_decompress_generic ip=%p inputSize=%d targetOutputSize=%d dest=%p outputSize=%d cLevel=%d dict=%d dictSize=%d dictStart=%p partialDecoding=%d\n", ip, inputSize, targetOutputSize, dest, outputSize, compressionLevel, dict, (int)dictSize, dictStart, partialDecoding);
  127. decompLiteralsBase = (BYTE*)malloc(4*LIZARD_HUF_BLOCK_SIZE);
  128. if (!decompLiteralsBase) return -1;
  129. decompFlagsBase = decompLiteralsBase + LIZARD_HUF_BLOCK_SIZE;
  130. decompOff24Base = decompFlagsBase + LIZARD_HUF_BLOCK_SIZE;
  131. decompOff16Base = decompOff24Base + LIZARD_HUF_BLOCK_SIZE;
  132. #ifdef LIZARD_STATS
  133. init_stats();
  134. #endif
  135. (void)istart;
  136. while (ip < iend)
  137. {
  138. res = *ip++;
  139. if (res == LIZARD_FLAG_UNCOMPRESSED) /* uncompressed */
  140. {
  141. uint32_t length;
  142. if (ip > iend - 3) { LIZARD_LOG_DECOMPRESS("UNCOMPRESSED ip[%p] > iend[%p] - 3\n", ip, iend); goto _output_error; }
  143. length = MEM_readLE24(ip);
  144. ip += 3;
  145. // printf("%d: total=%d block=%d UNCOMPRESSED op=%p oexit=%p oend=%p\n", (int)(op-(BYTE*)dest) ,(int)(ip-istart), length, op, oexit, oend);
  146. if (ip + length > iend || op + length > oend) { LIZARD_LOG_DECOMPRESS("UNCOMPRESSED ip[%p]+length[%d] > iend[%p]\n", ip, length, iend); goto _output_error; }
  147. memcpy(op, ip, length);
  148. op += length;
  149. ip += length;
  150. if ((partialDecoding) && (op >= oexit)) break;
  151. #ifdef LIZARD_STATS
  152. uncompr_stream[LIZARD_STREAM_UNCOMPRESSED] += length;
  153. #endif
  154. continue;
  155. }
  156. if (res&LIZARD_FLAG_LEN) {
  157. LIZARD_LOG_DECOMPRESS("res=%d\n", res); goto _output_error;
  158. }
  159. if (ip > iend - 5*3) goto _output_error;
  160. ctx.lenPtr = (const BYTE*)ip + 3;
  161. ctx.lenEnd = ctx.lenPtr + MEM_readLE24(ip);
  162. if (ctx.lenEnd < ctx.lenPtr || (ctx.lenEnd > iend - 3)) goto _output_error;
  163. #ifdef LIZARD_STATS
  164. uncompr_stream[LIZARD_STREAM_LEN] += ctx.lenEnd-ctx.lenPtr + 3;
  165. #endif
  166. ip = ctx.lenEnd;
  167. { size_t streamLen;
  168. #ifdef LIZARD_USE_LOGS
  169. const BYTE* ipos;
  170. size_t comprFlagsLen, comprLiteralsLen, total;
  171. #endif
  172. streamLen = Lizard_readStream(res&LIZARD_FLAG_OFFSET16, &ip, iend, decompOff16Base, decompOff16Base + LIZARD_HUF_BLOCK_SIZE, &ctx.offset16Ptr, &ctx.offset16End, LIZARD_STREAM_OFFSET16);
  173. if (streamLen == 0) goto _output_error;
  174. streamLen = Lizard_readStream(res&LIZARD_FLAG_OFFSET24, &ip, iend, decompOff24Base, decompOff24Base + LIZARD_HUF_BLOCK_SIZE, &ctx.offset24Ptr, &ctx.offset24End, LIZARD_STREAM_OFFSET24);
  175. if (streamLen == 0) goto _output_error;
  176. #ifdef LIZARD_USE_LOGS
  177. ipos = ip;
  178. streamLen = Lizard_readStream(res&LIZARD_FLAG_FLAGS, &ip, iend, decompFlagsBase, decompFlagsBase + LIZARD_HUF_BLOCK_SIZE, &ctx.flagsPtr, &ctx.flagsEnd, LIZARD_STREAM_FLAGS);
  179. if (streamLen == 0) goto _output_error;
  180. streamLen = (size_t)(ctx.flagsEnd-ctx.flagsPtr);
  181. comprFlagsLen = ((size_t)(ip - ipos) + 3 >= streamLen) ? 0 : (size_t)(ip - ipos);
  182. ipos = ip;
  183. #else
  184. streamLen = Lizard_readStream(res&LIZARD_FLAG_FLAGS, &ip, iend, decompFlagsBase, decompFlagsBase + LIZARD_HUF_BLOCK_SIZE, &ctx.flagsPtr, &ctx.flagsEnd, LIZARD_STREAM_FLAGS);
  185. if (streamLen == 0) goto _output_error;
  186. #endif
  187. streamLen = Lizard_readStream(res&LIZARD_FLAG_LITERALS, &ip, iend, decompLiteralsBase, decompLiteralsBase + LIZARD_HUF_BLOCK_SIZE, &ctx.literalsPtr, &ctx.literalsEnd, LIZARD_STREAM_LITERALS);
  188. if (streamLen == 0) goto _output_error;
  189. #ifdef LIZARD_USE_LOGS
  190. streamLen = (size_t)(ctx.literalsEnd-ctx.literalsPtr);
  191. comprLiteralsLen = ((size_t)(ip - ipos) + 3 >= streamLen) ? 0 : (size_t)(ip - ipos);
  192. total = (size_t)(ip-(ctx.lenEnd-1));
  193. #endif
  194. if (ip > iend) goto _output_error;
  195. LIZARD_LOG_DECOMPRESS("%d: total=%d block=%d flagsLen=%d(HUF=%d) literalsLen=%d(HUF=%d) offset16Len=%d offset24Len=%d lengthsLen=%d \n", (int)(op-(BYTE*)dest) ,(int)(ip-istart), (int)total,
  196. (int)(ctx.flagsEnd-ctx.flagsPtr), (int)comprFlagsLen, (int)(ctx.literalsEnd-ctx.literalsPtr), (int)comprLiteralsLen,
  197. (int)(ctx.offset16End-ctx.offset16Ptr), (int)(ctx.offset24End-ctx.offset24Ptr), (int)(ctx.lenEnd-ctx.lenPtr));
  198. }
  199. ctx.last_off = -LIZARD_INIT_LAST_OFFSET;
  200. params = Lizard_defaultParameters[compressionLevel - LIZARD_MIN_CLEVEL];
  201. if (params.decompressType == Lizard_coderwords_LZ4)
  202. res = Lizard_decompress_LZ4(&ctx, op, outputSize, partialDecoding, targetOutputSize, dict, lowPrefix, dictStart, dictSize, compressionLevel);
  203. else
  204. #ifdef USE_LZ4_ONLY
  205. res = Lizard_decompress_LZ4(&ctx, op, outputSize, partialDecoding, targetOutputSize, dict, lowPrefix, dictStart, dictSize, compressionLevel);
  206. #else
  207. res = Lizard_decompress_LIZv1(&ctx, op, outputSize, partialDecoding, targetOutputSize, dict, lowPrefix, dictStart, dictSize, compressionLevel);
  208. #endif
  209. LIZARD_LOG_DECOMPRESS("Lizard_decompress_generic res=%d inputSize=%d\n", res, (int)(ctx.literalsEnd-ctx.lenEnd));
  210. if (res <= 0) { free(decompLiteralsBase); return res; }
  211. op += res;
  212. outputSize -= res;
  213. if ((partialDecoding) && (op >= oexit)) break;
  214. }
  215. #ifdef LIZARD_STATS
  216. print_stats();
  217. #endif
  218. LIZARD_LOG_DECOMPRESS("Lizard_decompress_generic total=%d\n", (int)(op-(BYTE*)dest));
  219. free(decompLiteralsBase);
  220. return (int)(op-(BYTE*)dest);
  221. _output_error:
  222. LIZARD_LOG_DECOMPRESS("Lizard_decompress_generic ERROR ip=%p iend=%p\n", ip, iend);
  223. free(decompLiteralsBase);
  224. return -1;
  225. }
  226. int Lizard_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
  227. {
  228. return Lizard_decompress_generic(source, dest, compressedSize, maxDecompressedSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
  229. }
  230. int Lizard_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
  231. {
  232. return Lizard_decompress_generic(source, dest, compressedSize, maxDecompressedSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
  233. }
  234. /*===== streaming decompression functions =====*/
  235. /*
  236. * If you prefer dynamic allocation methods,
  237. * Lizard_createStreamDecode()
  238. * provides a pointer (void*) towards an initialized Lizard_streamDecode_t structure.
  239. */
  240. Lizard_streamDecode_t* Lizard_createStreamDecode(void)
  241. {
  242. Lizard_streamDecode_t* lizards = (Lizard_streamDecode_t*) ALLOCATOR(1, sizeof(Lizard_streamDecode_t));
  243. return lizards;
  244. }
  245. int Lizard_freeStreamDecode (Lizard_streamDecode_t* Lizard_stream)
  246. {
  247. FREEMEM(Lizard_stream);
  248. return 0;
  249. }
  250. /*!
  251. * Lizard_setStreamDecode() :
  252. * Use this function to instruct where to find the dictionary.
  253. * This function is not necessary if previous data is still available where it was decoded.
  254. * Loading a size of 0 is allowed (same effect as no dictionary).
  255. * Return : 1 if OK, 0 if error
  256. */
  257. int Lizard_setStreamDecode (Lizard_streamDecode_t* Lizard_streamDecode, const char* dictionary, int dictSize)
  258. {
  259. Lizard_streamDecode_t* lizardsd = (Lizard_streamDecode_t*) Lizard_streamDecode;
  260. lizardsd->prefixSize = (size_t) dictSize;
  261. lizardsd->prefixEnd = (const BYTE*) dictionary + dictSize;
  262. lizardsd->externalDict = NULL;
  263. lizardsd->extDictSize = 0;
  264. return 1;
  265. }
  266. /*
  267. *_continue() :
  268. These decoding functions allow decompression of multiple blocks in "streaming" mode.
  269. Previously decoded blocks must still be available at the memory position where they were decoded.
  270. If it's not possible, save the relevant part of decoded data into a safe buffer,
  271. and indicate where it stands using Lizard_setStreamDecode()
  272. */
  273. int Lizard_decompress_safe_continue (Lizard_streamDecode_t* Lizard_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
  274. {
  275. Lizard_streamDecode_t* lizardsd = (Lizard_streamDecode_t*) Lizard_streamDecode;
  276. int result;
  277. if (lizardsd->prefixEnd == (BYTE*)dest) {
  278. result = Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize,
  279. full, 0, usingExtDict, lizardsd->prefixEnd - lizardsd->prefixSize, lizardsd->externalDict, lizardsd->extDictSize);
  280. if (result <= 0) return result;
  281. lizardsd->prefixSize += result;
  282. lizardsd->prefixEnd += result;
  283. } else {
  284. lizardsd->extDictSize = lizardsd->prefixSize;
  285. lizardsd->externalDict = lizardsd->prefixEnd - lizardsd->extDictSize;
  286. result = Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize,
  287. full, 0, usingExtDict, (BYTE*)dest, lizardsd->externalDict, lizardsd->extDictSize);
  288. if (result <= 0) return result;
  289. lizardsd->prefixSize = result;
  290. lizardsd->prefixEnd = (BYTE*)dest + result;
  291. }
  292. return result;
  293. }
  294. /*
  295. Advanced decoding functions :
  296. *_usingDict() :
  297. These decoding functions work the same as "_continue" ones,
  298. the dictionary must be explicitly provided within parameters
  299. */
  300. int Lizard_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
  301. {
  302. if (dictSize==0)
  303. return Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
  304. if (dictStart+dictSize == dest)
  305. {
  306. if (dictSize >= (int)(LIZARD_DICT_SIZE - 1))
  307. return Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize, full, 0, withPrefix64k, (BYTE*)dest-LIZARD_DICT_SIZE, NULL, 0);
  308. return Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0);
  309. }
  310. return Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
  311. }
  312. /* debug function */
  313. int Lizard_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
  314. {
  315. return Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
  316. }