lz4.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815
  1. /*
  2. LZ4 - Fast LZ compression algorithm
  3. Copyright (C) 2011-2013, Yann Collet.
  4. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  5. Redistribution and use in source and binary forms, with or without
  6. modification, are permitted provided that the following conditions are
  7. met:
  8. * Redistributions of source code must retain the above copyright
  9. notice, this list of conditions and the following disclaimer.
  10. * Redistributions in binary form must reproduce the above
  11. copyright notice, this list of conditions and the following disclaimer
  12. in the documentation and/or other materials provided with the
  13. distribution.
  14. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  15. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  16. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  17. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  18. OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  20. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. You can contact the author at :
  26. - LZ4 source repository : http://code.google.com/p/lz4/
  27. - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
  28. */
  29. //**************************************
  30. // Tuning parameters
  31. //**************************************
  32. // MEMORY_USAGE :
  33. // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
  34. // Increasing memory usage improves compression ratio
  35. // Reduced memory usage can improve speed, due to cache effect
  36. // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
  37. #define MEMORY_USAGE 14
  38. // HEAPMODE :
  39. // Select how default compression functions will allocate memory for their hash table,
  40. // in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)).
  41. #define HEAPMODE 0
  42. //**************************************
  43. // CPU Feature Detection
  44. //**************************************
  45. // 32 or 64 bits ?
  46. #if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
  47. || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
  48. || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \
  49. || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) // Detects 64 bits mode
  50. # define LZ4_ARCH64 1
  51. #else
  52. # define LZ4_ARCH64 0
  53. #endif
  54. // Little Endian or Big Endian ?
  55. // Overwrite the #define below if you know your architecture endianess
  56. #if defined (__GLIBC__)
  57. # include <endian.h>
  58. # if (__BYTE_ORDER == __BIG_ENDIAN)
  59. # define LZ4_BIG_ENDIAN 1
  60. # endif
  61. #elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
  62. # define LZ4_BIG_ENDIAN 1
  63. #elif defined(__sparc) || defined(__sparc__) \
  64. || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
  65. || defined(__hpux) || defined(__hppa) \
  66. || defined(_MIPSEB) || defined(__s390__)
  67. # define LZ4_BIG_ENDIAN 1
  68. #else
  69. // Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
  70. #endif
  71. // Unaligned memory access is automatically enabled for "common" CPU, such as x86.
  72. // For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property
  73. // If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
  74. #if defined(__ARM_FEATURE_UNALIGNED)
  75. # define LZ4_FORCE_UNALIGNED_ACCESS 1
  76. #endif
  77. // Define this parameter if your target system or compiler does not support hardware bit count
  78. #if defined(_MSC_VER) && defined(_WIN32_WCE) // Visual Studio for Windows CE does not support Hardware bit count
  79. # define LZ4_FORCE_SW_BITCOUNT
  80. #endif
  81. // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
  82. // This option may provide a small boost to performance for some big endian cpu, although probably modest.
  83. // You may set this option to 1 if data will remain within closed environment.
  84. // This option is useless on Little_Endian CPU (such as x86)
  85. //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
  86. //**************************************
  87. // Compiler Options
  88. //**************************************
  89. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) // C99
  90. /* "restrict" is a known keyword */
  91. #else
  92. # define restrict // Disable restrict
  93. #endif
  94. #ifdef _MSC_VER // Visual Studio
  95. # define FORCE_INLINE static __forceinline
  96. # include <intrin.h> // For Visual 2005
  97. # if LZ4_ARCH64 // 64-bits
  98. # pragma intrinsic(_BitScanForward64) // For Visual 2005
  99. # pragma intrinsic(_BitScanReverse64) // For Visual 2005
  100. # else // 32-bits
  101. # pragma intrinsic(_BitScanForward) // For Visual 2005
  102. # pragma intrinsic(_BitScanReverse) // For Visual 2005
  103. # endif
  104. # pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
  105. #else
  106. # ifdef __GNUC__
  107. # define FORCE_INLINE static inline __attribute__((always_inline))
  108. # else
  109. # define FORCE_INLINE static inline
  110. # endif
  111. #endif
  112. #ifdef _MSC_VER
  113. # define lz4_bswap16(x) _byteswap_ushort(x)
  114. #else
  115. # define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
  116. #endif
  117. #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
  118. #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
  119. # define expect(expr,value) (__builtin_expect ((expr),(value)) )
  120. #else
  121. # define expect(expr,value) (expr)
  122. #endif
  123. #define likely(expr) expect((expr) != 0, 1)
  124. #define unlikely(expr) expect((expr) != 0, 0)
  125. //**************************************
  126. // Memory routines
  127. //**************************************
  128. #include <stdlib.h> // malloc, calloc, free
  129. #define ALLOCATOR(n,s) calloc(n,s)
  130. #define FREEMEM free
  131. #include <string.h> // memset, memcpy
  132. #define MEM_INIT memset
  133. //**************************************
  134. // Includes
  135. //**************************************
  136. #include "lz4.h"
  137. //**************************************
  138. // Basic Types
  139. //**************************************
  140. #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
  141. # include <stdint.h>
  142. typedef uint8_t BYTE;
  143. typedef uint16_t U16;
  144. typedef uint32_t U32;
  145. typedef int32_t S32;
  146. typedef uint64_t U64;
  147. #else
  148. typedef unsigned char BYTE;
  149. typedef unsigned short U16;
  150. typedef unsigned int U32;
  151. typedef signed int S32;
  152. typedef unsigned long long U64;
  153. #endif
  154. #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
  155. # define _PACKED __attribute__ ((packed))
  156. #else
  157. # define _PACKED
  158. #endif
  159. #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
  160. # if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
  161. # pragma pack(1)
  162. # else
  163. # pragma pack(push, 1)
  164. # endif
  165. #endif
  166. typedef struct { U16 v; } _PACKED U16_S;
  167. typedef struct { U32 v; } _PACKED U32_S;
  168. typedef struct { U64 v; } _PACKED U64_S;
  169. typedef struct {size_t v;} _PACKED size_t_S;
  170. #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
  171. # if defined(__SUNPRO_C) || defined(__SUNPRO_CC)
  172. # pragma pack(0)
  173. # else
  174. # pragma pack(pop)
  175. # endif
  176. #endif
  177. #define A16(x) (((U16_S *)(x))->v)
  178. #define A32(x) (((U32_S *)(x))->v)
  179. #define A64(x) (((U64_S *)(x))->v)
  180. #define AARCH(x) (((size_t_S *)(x))->v)
  181. //**************************************
  182. // Constants
  183. //**************************************
  184. #define LZ4_HASHLOG (MEMORY_USAGE-2)
  185. #define HASHTABLESIZE (1 << MEMORY_USAGE)
  186. #define HASHNBCELLS4 (1 << LZ4_HASHLOG)
  187. #define MINMATCH 4
  188. #define COPYLENGTH 8
  189. #define LASTLITERALS 5
  190. #define MFLIMIT (COPYLENGTH+MINMATCH)
  191. const int LZ4_minLength = (MFLIMIT+1);
  192. #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
  193. #define SKIPSTRENGTH 6 // Increasing this value will make the compression run slower on incompressible data
  194. #define MAXD_LOG 16
  195. #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
  196. #define ML_BITS 4
  197. #define ML_MASK ((1U<<ML_BITS)-1)
  198. #define RUN_BITS (8-ML_BITS)
  199. #define RUN_MASK ((1U<<RUN_BITS)-1)
  200. #define KB *(1U<<10)
  201. #define MB *(1U<<20)
  202. #define GB *(1U<<30)
  203. //**************************************
  204. // Structures and local types
  205. //**************************************
  206. typedef struct {
  207. U32 hashTable[HASHNBCELLS4];
  208. const BYTE* bufferStart;
  209. const BYTE* base;
  210. const BYTE* nextBlock;
  211. } LZ4_Data_Structure;
  212. typedef enum { notLimited = 0, limited = 1 } limitedOutput_directive;
  213. typedef enum { byPtr, byU32, byU16 } tableType_t;
  214. typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
  215. typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
  216. typedef enum { full = 0, partial = 1 } earlyEnd_directive;
  217. //**************************************
  218. // Architecture-specific macros
  219. //**************************************
  220. #define STEPSIZE sizeof(size_t)
  221. #define LZ4_COPYSTEP(d,s) { AARCH(d) = AARCH(s); d+=STEPSIZE; s+=STEPSIZE; }
  222. #define LZ4_COPY8(d,s) { LZ4_COPYSTEP(d,s); if (STEPSIZE<8) LZ4_COPYSTEP(d,s); }
  223. #define LZ4_SECURECOPY(d,s,e) { if ((STEPSIZE==4)||(d<e)) LZ4_WILDCOPY(d,s,e); }
  224. #if LZ4_ARCH64 // 64-bit
  225. # define HTYPE U32
  226. # define INITBASE(base) const BYTE* const base = ip
  227. #else // 32-bit
  228. # define HTYPE const BYTE*
  229. # define INITBASE(base) const int base = 0
  230. #endif
  231. #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
  232. # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
  233. # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
  234. #else // Little Endian
  235. # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
  236. # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
  237. #endif
  238. //**************************************
  239. // Macros
  240. //**************************************
  241. #define LZ4_WILDCOPY(d,s,e) { do { LZ4_COPY8(d,s) } while (d<e); } // at the end, d>=e;
  242. //****************************
  243. // Private functions
  244. //****************************
  245. #if LZ4_ARCH64
  246. FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
  247. {
  248. # if defined(LZ4_BIG_ENDIAN)
  249. # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
  250. unsigned long r = 0;
  251. _BitScanReverse64( &r, val );
  252. return (int)(r>>3);
  253. # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
  254. return (__builtin_clzll(val) >> 3);
  255. # else
  256. int r;
  257. if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
  258. if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
  259. r += (!val);
  260. return r;
  261. # endif
  262. # else
  263. # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
  264. unsigned long r = 0;
  265. _BitScanForward64( &r, val );
  266. return (int)(r>>3);
  267. # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
  268. return (__builtin_ctzll(val) >> 3);
  269. # else
  270. static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
  271. return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
  272. # endif
  273. # endif
  274. }
  275. #else
  276. FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
  277. {
  278. # if defined(LZ4_BIG_ENDIAN)
  279. # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
  280. unsigned long r = 0;
  281. _BitScanReverse( &r, val );
  282. return (int)(r>>3);
  283. # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
  284. return (__builtin_clz(val) >> 3);
  285. # else
  286. int r;
  287. if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
  288. r += (!val);
  289. return r;
  290. # endif
  291. # else
  292. # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
  293. unsigned long r;
  294. _BitScanForward( &r, val );
  295. return (int)(r>>3);
  296. # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
  297. return (__builtin_ctz(val) >> 3);
  298. # else
  299. static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
  300. return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
  301. # endif
  302. # endif
  303. }
  304. #endif
  305. //****************************
  306. // Compression functions
  307. //****************************
  308. FORCE_INLINE int LZ4_hashSequence(U32 sequence, tableType_t tableType)
  309. {
  310. if (tableType == byU16)
  311. return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
  312. else
  313. return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
  314. }
  315. FORCE_INLINE int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); }
  316. FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
  317. {
  318. switch (tableType)
  319. {
  320. case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; }
  321. case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; }
  322. case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; }
  323. }
  324. }
  325. FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
  326. {
  327. U32 h = LZ4_hashPosition(p, tableType);
  328. LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
  329. }
  330. FORCE_INLINE const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
  331. {
  332. if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
  333. if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
  334. { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; } // default, to ensure a return
  335. }
  336. FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
  337. {
  338. U32 h = LZ4_hashPosition(p, tableType);
  339. return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
  340. }
  341. FORCE_INLINE int LZ4_compress_generic(
  342. void* ctx,
  343. const char* source,
  344. char* dest,
  345. int inputSize,
  346. int maxOutputSize,
  347. limitedOutput_directive limitedOutput,
  348. tableType_t tableType,
  349. prefix64k_directive prefix)
  350. {
  351. const BYTE* ip = (const BYTE*) source;
  352. const BYTE* const base = (prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->base : (const BYTE*) source;
  353. const BYTE* const lowLimit = ((prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->bufferStart : (const BYTE*)source);
  354. const BYTE* anchor = (const BYTE*) source;
  355. const BYTE* const iend = ip + inputSize;
  356. const BYTE* const mflimit = iend - MFLIMIT;
  357. const BYTE* const matchlimit = iend - LASTLITERALS;
  358. BYTE* op = (BYTE*) dest;
  359. BYTE* const oend = op + maxOutputSize;
  360. int length;
  361. const int skipStrength = SKIPSTRENGTH;
  362. U32 forwardH;
  363. // Init conditions
  364. if ((prefix==withPrefix) && (ip != ((LZ4_Data_Structure*)ctx)->nextBlock)) return 0; // must continue from end of previous block
  365. if (prefix==withPrefix) ((LZ4_Data_Structure*)ctx)->nextBlock=iend; // do it now, due to potential early exit
  366. if ((tableType == byU16) && (inputSize>=LZ4_64KLIMIT)) return 0; // Size too large (not within 64K limit)
  367. if (inputSize<LZ4_minLength) goto _last_literals; // Input too small, no compression (all literals)
  368. // First Byte
  369. LZ4_putPosition(ip, ctx, tableType, base);
  370. ip++; forwardH = LZ4_hashPosition(ip, tableType);
  371. // Main Loop
  372. for ( ; ; )
  373. {
  374. int findMatchAttempts = (1U << skipStrength) + 3;
  375. const BYTE* forwardIp = ip;
  376. const BYTE* ref;
  377. BYTE* token;
  378. // Find a match
  379. do {
  380. U32 h = forwardH;
  381. int step = findMatchAttempts++ >> skipStrength;
  382. ip = forwardIp;
  383. forwardIp = ip + step;
  384. if unlikely(forwardIp > mflimit) { goto _last_literals; }
  385. forwardH = LZ4_hashPosition(forwardIp, tableType);
  386. ref = LZ4_getPositionOnHash(h, ctx, tableType, base);
  387. LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
  388. } while ((ref + MAX_DISTANCE < ip) || (A32(ref) != A32(ip)));
  389. // Catch up
  390. while ((ip>anchor) && (ref > lowLimit) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
  391. // Encode Literal length
  392. length = (int)(ip - anchor);
  393. token = op++;
  394. if ((limitedOutput) && unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend)) return 0; // Check output limit
  395. if (length>=(int)RUN_MASK)
  396. {
  397. int len = length-RUN_MASK;
  398. *token=(RUN_MASK<<ML_BITS);
  399. for(; len >= 255 ; len-=255) *op++ = 255;
  400. *op++ = (BYTE)len;
  401. }
  402. else *token = (BYTE)(length<<ML_BITS);
  403. // Copy Literals
  404. { BYTE* end=(op)+(length); LZ4_WILDCOPY(op,anchor,end); op=end; }
  405. _next_match:
  406. // Encode Offset
  407. LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
  408. // Start Counting
  409. ip+=MINMATCH; ref+=MINMATCH; // MinMatch already verified
  410. anchor = ip;
  411. while likely(ip<matchlimit-(STEPSIZE-1))
  412. {
  413. size_t diff = AARCH(ref) ^ AARCH(ip);
  414. if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
  415. ip += LZ4_NbCommonBytes(diff);
  416. goto _endCount;
  417. }
  418. if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
  419. if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
  420. if ((ip<matchlimit) && (*ref == *ip)) ip++;
  421. _endCount:
  422. // Encode MatchLength
  423. length = (int)(ip - anchor);
  424. if ((limitedOutput) && unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend)) return 0; // Check output limit
  425. if (length>=(int)ML_MASK)
  426. {
  427. *token += ML_MASK;
  428. length -= ML_MASK;
  429. for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; }
  430. if (length >= 255) { length-=255; *op++ = 255; }
  431. *op++ = (BYTE)length;
  432. }
  433. else *token += (BYTE)(length);
  434. // Test end of chunk
  435. if (ip > mflimit) { anchor = ip; break; }
  436. // Fill table
  437. LZ4_putPosition(ip-2, ctx, tableType, base);
  438. // Test next position
  439. ref = LZ4_getPosition(ip, ctx, tableType, base);
  440. LZ4_putPosition(ip, ctx, tableType, base);
  441. if ((ref + MAX_DISTANCE >= ip) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
  442. // Prepare next loop
  443. anchor = ip++;
  444. forwardH = LZ4_hashPosition(ip, tableType);
  445. }
  446. _last_literals:
  447. // Encode Last Literals
  448. {
  449. int lastRun = (int)(iend - anchor);
  450. if ((limitedOutput) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; // Check output limit
  451. if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun >= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
  452. else *op++ = (BYTE)(lastRun<<ML_BITS);
  453. memcpy(op, anchor, iend - anchor);
  454. op += iend-anchor;
  455. }
  456. // End
  457. return (int) (((char*)op)-dest);
  458. }
  459. int LZ4_compress(const char* source, char* dest, int inputSize)
  460. {
  461. #if (HEAPMODE)
  462. void* ctx = ALLOCATOR(HASHNBCELLS4, 4); // Aligned on 4-bytes boundaries
  463. #else
  464. U32 ctx[1U<<(MEMORY_USAGE-2)] = {0}; // Ensure data is aligned on 4-bytes boundaries
  465. #endif
  466. int result;
  467. if (inputSize < (int)LZ4_64KLIMIT)
  468. result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16, noPrefix);
  469. else
  470. result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
  471. #if (HEAPMODE)
  472. FREEMEM(ctx);
  473. #endif
  474. return result;
  475. }
  476. int LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize)
  477. {
  478. return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, 0, notLimited, byU32, withPrefix);
  479. }
  480. int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
  481. {
  482. #if (HEAPMODE)
  483. void* ctx = ALLOCATOR(HASHNBCELLS4, 4); // Aligned on 4-bytes boundaries
  484. #else
  485. U32 ctx[1U<<(MEMORY_USAGE-2)] = {0}; // Ensure data is aligned on 4-bytes boundaries
  486. #endif
  487. int result;
  488. if (inputSize < (int)LZ4_64KLIMIT)
  489. result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, byU16, noPrefix);
  490. else
  491. result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
  492. #if (HEAPMODE)
  493. FREEMEM(ctx);
  494. #endif
  495. return result;
  496. }
  497. int LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize)
  498. {
  499. return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, maxOutputSize, limited, byU32, withPrefix);
  500. }
  501. //****************************
  502. // Stream functions
  503. //****************************
  504. FORCE_INLINE void LZ4_init(LZ4_Data_Structure* lz4ds, const BYTE* base)
  505. {
  506. MEM_INIT(lz4ds->hashTable, 0, sizeof(lz4ds->hashTable));
  507. lz4ds->bufferStart = base;
  508. lz4ds->base = base;
  509. lz4ds->nextBlock = base;
  510. }
  511. void* LZ4_create (const char* inputBuffer)
  512. {
  513. void* lz4ds = ALLOCATOR(1, sizeof(LZ4_Data_Structure));
  514. LZ4_init ((LZ4_Data_Structure*)lz4ds, (const BYTE*)inputBuffer);
  515. return lz4ds;
  516. }
  517. int LZ4_free (void* LZ4_Data)
  518. {
  519. FREEMEM(LZ4_Data);
  520. return (0);
  521. }
  522. char* LZ4_slideInputBuffer (void* LZ4_Data)
  523. {
  524. LZ4_Data_Structure* lz4ds = (LZ4_Data_Structure*)LZ4_Data;
  525. size_t delta = lz4ds->nextBlock - (lz4ds->bufferStart + 64 KB);
  526. if(lz4ds->base - delta > lz4ds->base) // underflow control
  527. {
  528. size_t newBaseDelta = (lz4ds->nextBlock - 64 KB) - lz4ds->base;
  529. int nH;
  530. for (nH=0; nH < HASHNBCELLS4; nH++)
  531. {
  532. if (lz4ds->hashTable[nH] < (U32)newBaseDelta) lz4ds->hashTable[nH] = 0;
  533. else lz4ds->hashTable[nH] -= (U32)newBaseDelta;
  534. }
  535. lz4ds->base += newBaseDelta;
  536. }
  537. memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB);
  538. lz4ds->nextBlock -= delta;
  539. lz4ds->base -= delta;
  540. return (char*)(lz4ds->nextBlock);
  541. }
  542. //****************************
  543. // Decompression functions
  544. //****************************
  545. // This generic decompression function cover all use cases.
  546. // It shall be instanciated several times, using different sets of directives
  547. // Note that it is essential this generic function is really inlined,
  548. // in order to remove useless branches during compilation optimisation.
  549. FORCE_INLINE int LZ4_decompress_generic(
  550. const char* source,
  551. char* dest,
  552. int inputSize, //
  553. int outputSize, // If endOnInput==endOnInputSize, this value is the max size of Output Buffer.
  554. int endOnInput, // endOnOutputSize, endOnInputSize
  555. int prefix64k, // noPrefix, withPrefix
  556. int partialDecoding, // full, partial
  557. int targetOutputSize // only used if partialDecoding==partial
  558. )
  559. {
  560. // Local Variables
  561. const BYTE* restrict ip = (const BYTE*) source;
  562. const BYTE* ref;
  563. const BYTE* const iend = ip + inputSize;
  564. BYTE* op = (BYTE*) dest;
  565. BYTE* const oend = op + outputSize;
  566. BYTE* cpy;
  567. BYTE* oexit = op + targetOutputSize;
  568. const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; // static reduces speed for LZ4_decompress_safe() on GCC64
  569. static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
  570. // Special cases
  571. if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; // targetOutputSize too high => decode everything
  572. if ((endOnInput) && unlikely(outputSize==0)) return ((inputSize==1) && (*ip==0)) ? 0 : -1; // Empty output buffer
  573. if ((!endOnInput) && unlikely(outputSize==0)) return (*ip==0?1:-1);
  574. // Main Loop
  575. while (1)
  576. {
  577. unsigned token;
  578. size_t length;
  579. // get runlength
  580. token = *ip++;
  581. if ((length=(token>>ML_BITS)) == RUN_MASK)
  582. {
  583. unsigned s=255;
  584. while (((endOnInput)?ip<iend:1) && (s==255))
  585. {
  586. s = *ip++;
  587. length += s;
  588. }
  589. }
  590. // copy literals
  591. cpy = op+length;
  592. if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
  593. || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
  594. {
  595. if (partialDecoding)
  596. {
  597. if (cpy > oend) goto _output_error; // Error : write attempt beyond end of output buffer
  598. if ((endOnInput) && (ip+length > iend)) goto _output_error; // Error : read attempt beyond end of input buffer
  599. }
  600. else
  601. {
  602. if ((!endOnInput) && (cpy != oend)) goto _output_error; // Error : block decoding must stop exactly there
  603. if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; // Error : input must be consumed
  604. }
  605. memcpy(op, ip, length);
  606. ip += length;
  607. op += length;
  608. break; // Necessarily EOF, due to parsing restrictions
  609. }
  610. LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy;
  611. // get offset
  612. LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
  613. if ((prefix64k==noPrefix) && unlikely(ref < (BYTE* const)dest)) goto _output_error; // Error : offset outside destination buffer
  614. // get matchlength
  615. if ((length=(token&ML_MASK)) == ML_MASK)
  616. {
  617. while ((!endOnInput) || (ip<iend-(LASTLITERALS+1))) // Ensure enough bytes remain for LASTLITERALS + token
  618. {
  619. unsigned s = *ip++;
  620. length += s;
  621. if (s==255) continue;
  622. break;
  623. }
  624. }
  625. // copy repeated sequence
  626. if unlikely((op-ref)<(int)STEPSIZE)
  627. {
  628. const size_t dec64 = dec64table[(sizeof(void*)==4) ? 0 : op-ref];
  629. op[0] = ref[0];
  630. op[1] = ref[1];
  631. op[2] = ref[2];
  632. op[3] = ref[3];
  633. op += 4, ref += 4; ref -= dec32table[op-ref];
  634. A32(op) = A32(ref);
  635. op += STEPSIZE-4; ref -= dec64;
  636. } else { LZ4_COPYSTEP(op,ref); }
  637. cpy = op + length - (STEPSIZE-4);
  638. if unlikely(cpy>oend-COPYLENGTH-(STEPSIZE-4))
  639. {
  640. if (cpy > oend-LASTLITERALS) goto _output_error; // Error : last 5 bytes must be literals
  641. LZ4_SECURECOPY(op, ref, (oend-COPYLENGTH));
  642. while(op<cpy) *op++=*ref++;
  643. op=cpy;
  644. continue;
  645. }
  646. LZ4_WILDCOPY(op, ref, cpy);
  647. op=cpy; // correction
  648. }
  649. // end of decoding
  650. if (endOnInput)
  651. return (int) (((char*)op)-dest); // Nb of output bytes decoded
  652. else
  653. return (int) (((char*)ip)-source); // Nb of input bytes read
  654. // Overflow error detected
  655. _output_error:
  656. return (int) (-(((char*)ip)-source))-1;
  657. }
  658. int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize)
  659. {
  660. return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, full, 0);
  661. }
  662. int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int inputSize, int maxOutputSize)
  663. {
  664. return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, withPrefix, full, 0);
  665. }
  666. int LZ4_decompress_safe_partial(const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize)
  667. {
  668. return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, partial, targetOutputSize);
  669. }
  670. int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int outputSize)
  671. {
  672. return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
  673. }
  674. int LZ4_decompress_fast(const char* source, char* dest, int outputSize)
  675. {
  676. #ifdef _MSC_VER // This version is faster with Visual
  677. return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, noPrefix, full, 0);
  678. #else
  679. return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
  680. #endif
  681. }