lz4hc.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817
  1. /*
  2. LZ4 HC - High Compression Mode of LZ4
  3. Copyright (C) 2011-2013, Yann Collet.
  4. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  5. Redistribution and use in source and binary forms, with or without
  6. modification, are permitted provided that the following conditions are
  7. met:
  8. * Redistributions of source code must retain the above copyright
  9. notice, this list of conditions and the following disclaimer.
  10. * Redistributions in binary form must reproduce the above
  11. copyright notice, this list of conditions and the following disclaimer
  12. in the documentation and/or other materials provided with the
  13. distribution.
  14. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  15. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  16. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  17. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  18. OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  20. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. You can contact the author at :
  26. - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
  27. - LZ4 source repository : http://code.google.com/p/lz4/
  28. */
  29. //**************************************
  30. // Memory routines
  31. //**************************************
  32. #include <stdlib.h> // calloc, free
  33. #define ALLOCATOR(s) calloc(1,s)
  34. #define FREEMEM free
  35. #include <string.h> // memset, memcpy
  36. #define MEM_INIT memset
  37. //**************************************
  38. // CPU Feature Detection
  39. //**************************************
  40. // 32 or 64 bits ?
  41. #if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
  42. || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
  43. || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \
  44. || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) // Detects 64 bits mode
  45. # define LZ4_ARCH64 1
  46. #else
  47. # define LZ4_ARCH64 0
  48. #endif
  49. // Little Endian or Big Endian ?
  50. // Overwrite the #define below if you know your architecture endianess
  51. #if defined (__GLIBC__)
  52. # include <endian.h>
  53. # if (__BYTE_ORDER == __BIG_ENDIAN)
  54. # define LZ4_BIG_ENDIAN 1
  55. # endif
  56. #elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
  57. # define LZ4_BIG_ENDIAN 1
  58. #elif defined(__sparc) || defined(__sparc__) \
  59. || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
  60. || defined(__hpux) || defined(__hppa) \
  61. || defined(_MIPSEB) || defined(__s390__)
  62. # define LZ4_BIG_ENDIAN 1
  63. #else
  64. // Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
  65. #endif
  66. // Unaligned memory access is automatically enabled for "common" CPU, such as x86.
  67. // For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
  68. // If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
  69. #if defined(__ARM_FEATURE_UNALIGNED)
  70. # define LZ4_FORCE_UNALIGNED_ACCESS 1
  71. #endif
  72. // Define this parameter if your target system or compiler does not support hardware bit count
  73. #if defined(_MSC_VER) && defined(_WIN32_WCE) // Visual Studio for Windows CE does not support Hardware bit count
  74. # define LZ4_FORCE_SW_BITCOUNT
  75. #endif
  76. //**************************************
  77. // Compiler Options
  78. //**************************************
  79. #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
  80. /* "restrict" is a known keyword */
  81. #else
  82. # define restrict // Disable restrict
  83. #endif
  84. #ifdef _MSC_VER // Visual Studio
  85. # define FORCE_INLINE static __forceinline
  86. # include <intrin.h> // For Visual 2005
  87. # if LZ4_ARCH64 // 64-bits
  88. # pragma intrinsic(_BitScanForward64) // For Visual 2005
  89. # pragma intrinsic(_BitScanReverse64) // For Visual 2005
  90. # else // 32-bits
  91. # pragma intrinsic(_BitScanForward) // For Visual 2005
  92. # pragma intrinsic(_BitScanReverse) // For Visual 2005
  93. # endif
  94. # pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
  95. # pragma warning(disable : 4701) // disable: C4701: potentially uninitialized local variable used
  96. #else
  97. # ifdef __GNUC__
  98. # define FORCE_INLINE static inline __attribute__((always_inline))
  99. # else
  100. # define FORCE_INLINE static inline
  101. # endif
  102. #endif
  103. #ifdef _MSC_VER // Visual Studio
  104. # define lz4_bswap16(x) _byteswap_ushort(x)
  105. #else
  106. # define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
  107. #endif
  108. //**************************************
  109. // Includes
  110. //**************************************
  111. #include "lz4hc.h"
  112. #include "lz4.h"
  113. //**************************************
  114. // Basic Types
  115. //**************************************
  116. #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
  117. # include <stdint.h>
  118. typedef uint8_t BYTE;
  119. typedef uint16_t U16;
  120. typedef uint32_t U32;
  121. typedef int32_t S32;
  122. typedef uint64_t U64;
  123. #else
  124. typedef unsigned char BYTE;
  125. typedef unsigned short U16;
  126. typedef unsigned int U32;
  127. typedef signed int S32;
  128. typedef unsigned long long U64;
  129. #endif
  130. #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
  131. # define _PACKED __attribute__ ((packed))
  132. #else
  133. # define _PACKED
  134. #endif
  135. #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
  136. # ifdef __IBMC__
  137. # pragma pack(1)
  138. # else
  139. # pragma pack(push, 1)
  140. # endif
  141. #endif
  142. typedef struct _U16_S { U16 v; } _PACKED U16_S;
  143. typedef struct _U32_S { U32 v; } _PACKED U32_S;
  144. typedef struct _U64_S { U64 v; } _PACKED U64_S;
  145. #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
  146. # pragma pack(pop)
  147. #endif
  148. #define A64(x) (((U64_S *)(x))->v)
  149. #define A32(x) (((U32_S *)(x))->v)
  150. #define A16(x) (((U16_S *)(x))->v)
  151. //**************************************
  152. // Constants
  153. //**************************************
  154. #define MINMATCH 4
  155. #define DICTIONARY_LOGSIZE 16
  156. #define MAXD (1<<DICTIONARY_LOGSIZE)
  157. #define MAXD_MASK ((U32)(MAXD - 1))
  158. #define MAX_DISTANCE (MAXD - 1)
  159. #define HASH_LOG (DICTIONARY_LOGSIZE-1)
  160. #define HASHTABLESIZE (1 << HASH_LOG)
  161. #define HASH_MASK (HASHTABLESIZE - 1)
  162. #define MAX_NB_ATTEMPTS 256
  163. #define ML_BITS 4
  164. #define ML_MASK (size_t)((1U<<ML_BITS)-1)
  165. #define RUN_BITS (8-ML_BITS)
  166. #define RUN_MASK ((1U<<RUN_BITS)-1)
  167. #define COPYLENGTH 8
  168. #define LASTLITERALS 5
  169. #define MFLIMIT (COPYLENGTH+MINMATCH)
  170. #define MINLENGTH (MFLIMIT+1)
  171. #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
  172. #define KB *(1U<<10)
  173. #define MB *(1U<<20)
  174. #define GB *(1U<<30)
  175. //**************************************
  176. // Architecture-specific macros
  177. //**************************************
  178. #if LZ4_ARCH64 // 64-bit
  179. # define STEPSIZE 8
  180. # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8;
  181. # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d)
  182. # define UARCH U64
  183. # define AARCH A64
  184. # define HTYPE U32
  185. # define INITBASE(b,s) const BYTE* const b = s
  186. #else // 32-bit
  187. # define STEPSIZE 4
  188. # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4;
  189. # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
  190. # define UARCH U32
  191. # define AARCH A32
  192. //# define HTYPE const BYTE*
  193. //# define INITBASE(b,s) const int b = 0
  194. # define HTYPE U32
  195. # define INITBASE(b,s) const BYTE* const b = s
  196. #endif
  197. #if defined(LZ4_BIG_ENDIAN)
  198. # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
  199. # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
  200. #else // Little Endian
  201. # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
  202. # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
  203. #endif
  204. //************************************************************
  205. // Local Types
  206. //************************************************************
  207. typedef struct
  208. {
  209. const BYTE* inputBuffer;
  210. const BYTE* base;
  211. const BYTE* end;
  212. HTYPE hashTable[HASHTABLESIZE];
  213. U16 chainTable[MAXD];
  214. const BYTE* nextToUpdate;
  215. } LZ4HC_Data_Structure;
  216. //**************************************
  217. // Macros
  218. //**************************************
  219. #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e);
  220. #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; }
  221. #define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
  222. #define HASH_VALUE(p) HASH_FUNCTION(A32(p))
  223. #define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base)
  224. #define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK]
  225. #define GETNEXT(p) ((p) - (size_t)DELTANEXT(p))
  226. //**************************************
  227. // Private functions
  228. //**************************************
  229. #if LZ4_ARCH64
  230. FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
  231. {
  232. #if defined(LZ4_BIG_ENDIAN)
  233. # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
  234. unsigned long r = 0;
  235. _BitScanReverse64( &r, val );
  236. return (int)(r>>3);
  237. # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
  238. return (__builtin_clzll(val) >> 3);
  239. # else
  240. int r;
  241. if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
  242. if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
  243. r += (!val);
  244. return r;
  245. # endif
  246. #else
  247. # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
  248. unsigned long r = 0;
  249. _BitScanForward64( &r, val );
  250. return (int)(r>>3);
  251. # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
  252. return (__builtin_ctzll(val) >> 3);
  253. # else
  254. static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
  255. return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
  256. # endif
  257. #endif
  258. }
  259. #else
  260. FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
  261. {
  262. #if defined(LZ4_BIG_ENDIAN)
  263. # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
  264. unsigned long r;
  265. _BitScanReverse( &r, val );
  266. return (int)(r>>3);
  267. # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
  268. return (__builtin_clz(val) >> 3);
  269. # else
  270. int r;
  271. if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
  272. r += (!val);
  273. return r;
  274. # endif
  275. #else
  276. # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
  277. unsigned long r;
  278. _BitScanForward( &r, val );
  279. return (int)(r>>3);
  280. # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
  281. return (__builtin_ctz(val) >> 3);
  282. # else
  283. static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
  284. return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
  285. # endif
  286. #endif
  287. }
  288. #endif
  289. FORCE_INLINE void LZ4_initHC (LZ4HC_Data_Structure* hc4, const BYTE* base)
  290. {
  291. MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
  292. MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
  293. hc4->nextToUpdate = base + 1;
  294. hc4->base = base;
  295. hc4->inputBuffer = base;
  296. hc4->end = base;
  297. }
  298. void* LZ4_createHC (const char* inputBuffer)
  299. {
  300. void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));
  301. LZ4_initHC ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer);
  302. return hc4;
  303. }
  304. int LZ4_freeHC (void* LZ4HC_Data)
  305. {
  306. FREEMEM(LZ4HC_Data);
  307. return (0);
  308. }
  309. // Update chains up to ip (excluded)
  310. FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
  311. {
  312. U16* chainTable = hc4->chainTable;
  313. HTYPE* HashTable = hc4->hashTable;
  314. INITBASE(base,hc4->base);
  315. while(hc4->nextToUpdate < ip)
  316. {
  317. const BYTE* const p = hc4->nextToUpdate;
  318. size_t delta = (p) - HASH_POINTER(p);
  319. if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
  320. DELTANEXT(p) = (U16)delta;
  321. HashTable[HASH_VALUE(p)] = (HTYPE)((p) - base);
  322. hc4->nextToUpdate++;
  323. }
  324. }
  325. char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
  326. {
  327. LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data;
  328. U32 distance = (U32)(hc4->end - hc4->inputBuffer) - 64 KB;
  329. distance = (distance >> 16) << 16; // Must be a multiple of 64 KB
  330. LZ4HC_Insert(hc4, hc4->end - MINMATCH);
  331. memcpy((void*)(hc4->end - 64 KB - distance), (const void*)(hc4->end - 64 KB), 64 KB);
  332. hc4->nextToUpdate -= distance;
  333. hc4->base -= distance;
  334. if ((U32)(hc4->inputBuffer - hc4->base) > 1 GB + 64 KB) // Avoid overflow
  335. {
  336. int i;
  337. hc4->base += 1 GB;
  338. for (i=0; i<HASHTABLESIZE; i++) hc4->hashTable[i] -= 1 GB;
  339. }
  340. hc4->end -= distance;
  341. return (char*)(hc4->end);
  342. }
  343. FORCE_INLINE size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit)
  344. {
  345. const BYTE* p1t = p1;
  346. while (p1t<matchlimit-(STEPSIZE-1))
  347. {
  348. UARCH diff = AARCH(p2) ^ AARCH(p1t);
  349. if (!diff) { p1t+=STEPSIZE; p2+=STEPSIZE; continue; }
  350. p1t += LZ4_NbCommonBytes(diff);
  351. return (p1t - p1);
  352. }
  353. if (LZ4_ARCH64) if ((p1t<(matchlimit-3)) && (A32(p2) == A32(p1t))) { p1t+=4; p2+=4; }
  354. if ((p1t<(matchlimit-1)) && (A16(p2) == A16(p1t))) { p1t+=2; p2+=2; }
  355. if ((p1t<matchlimit) && (*p2 == *p1t)) p1t++;
  356. return (p1t - p1);
  357. }
  358. FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)
  359. {
  360. U16* const chainTable = hc4->chainTable;
  361. HTYPE* const HashTable = hc4->hashTable;
  362. const BYTE* ref;
  363. INITBASE(base,hc4->base);
  364. int nbAttempts=MAX_NB_ATTEMPTS;
  365. size_t repl=0, ml=0;
  366. U16 delta=0; // useless assignment, to remove an uninitialization warning
  367. // HC4 match finder
  368. LZ4HC_Insert(hc4, ip);
  369. ref = HASH_POINTER(ip);
  370. #define REPEAT_OPTIMIZATION
  371. #ifdef REPEAT_OPTIMIZATION
  372. // Detect repetitive sequences of length <= 4
  373. if ((U32)(ip-ref) <= 4) // potential repetition
  374. {
  375. if (A32(ref) == A32(ip)) // confirmed
  376. {
  377. delta = (U16)(ip-ref);
  378. repl = ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
  379. *matchpos = ref;
  380. }
  381. ref = GETNEXT(ref);
  382. }
  383. #endif
  384. while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts))
  385. {
  386. nbAttempts--;
  387. if (*(ref+ml) == *(ip+ml))
  388. if (A32(ref) == A32(ip))
  389. {
  390. size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
  391. if (mlt > ml) { ml = mlt; *matchpos = ref; }
  392. }
  393. ref = GETNEXT(ref);
  394. }
  395. #ifdef REPEAT_OPTIMIZATION
  396. // Complete table
  397. if (repl)
  398. {
  399. const BYTE* ptr = ip;
  400. const BYTE* end;
  401. end = ip + repl - (MINMATCH-1);
  402. while(ptr < end-delta)
  403. {
  404. DELTANEXT(ptr) = delta; // Pre-Load
  405. ptr++;
  406. }
  407. do
  408. {
  409. DELTANEXT(ptr) = delta;
  410. HashTable[HASH_VALUE(ptr)] = (HTYPE)((ptr) - base); // Head of chain
  411. ptr++;
  412. } while(ptr < end);
  413. hc4->nextToUpdate = end;
  414. }
  415. #endif
  416. return (int)ml;
  417. }
  418. FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)
  419. {
  420. U16* const chainTable = hc4->chainTable;
  421. HTYPE* const HashTable = hc4->hashTable;
  422. INITBASE(base,hc4->base);
  423. const BYTE* ref;
  424. int nbAttempts = MAX_NB_ATTEMPTS;
  425. int delta = (int)(ip-startLimit);
  426. // First Match
  427. LZ4HC_Insert(hc4, ip);
  428. ref = HASH_POINTER(ip);
  429. while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts))
  430. {
  431. nbAttempts--;
  432. if (*(startLimit + longest) == *(ref - delta + longest))
  433. if (A32(ref) == A32(ip))
  434. {
  435. #if 1
  436. const BYTE* reft = ref+MINMATCH;
  437. const BYTE* ipt = ip+MINMATCH;
  438. const BYTE* startt = ip;
  439. while (ipt<matchlimit-(STEPSIZE-1))
  440. {
  441. UARCH diff = AARCH(reft) ^ AARCH(ipt);
  442. if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }
  443. ipt += LZ4_NbCommonBytes(diff);
  444. goto _endCount;
  445. }
  446. if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }
  447. if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }
  448. if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
  449. _endCount:
  450. reft = ref;
  451. #else
  452. // Easier for code maintenance, but unfortunately slower too
  453. const BYTE* startt = ip;
  454. const BYTE* reft = ref;
  455. const BYTE* ipt = ip + MINMATCH + LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit);
  456. #endif
  457. while ((startt>startLimit) && (reft > hc4->inputBuffer) && (startt[-1] == reft[-1])) {startt--; reft--;}
  458. if ((ipt-startt) > longest)
  459. {
  460. longest = (int)(ipt-startt);
  461. *matchpos = reft;
  462. *startpos = startt;
  463. }
  464. }
  465. ref = GETNEXT(ref);
  466. }
  467. return longest;
  468. }
  469. typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
  470. FORCE_INLINE int LZ4HC_encodeSequence (
  471. const BYTE** ip,
  472. BYTE** op,
  473. const BYTE** anchor,
  474. int matchLength,
  475. const BYTE* ref,
  476. limitedOutput_directive limitedOutputBuffer,
  477. BYTE* oend)
  478. {
  479. int length;
  480. BYTE* token;
  481. // Encode Literal length
  482. length = (int)(*ip - *anchor);
  483. token = (*op)++;
  484. if ((limitedOutputBuffer) && ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend)) return 1; // Check output limit
  485. if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
  486. else *token = (BYTE)(length<<ML_BITS);
  487. // Copy Literals
  488. LZ4_BLINDCOPY(*anchor, *op, length);
  489. // Encode Offset
  490. LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
  491. // Encode MatchLength
  492. length = (int)(matchLength-MINMATCH);
  493. if ((limitedOutputBuffer) && (*op + (1 + LASTLITERALS) + (length>>8) > oend)) return 1; // Check output limit
  494. if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
  495. else *token += (BYTE)(length);
  496. // Prepare next loop
  497. *ip += matchLength;
  498. *anchor = *ip;
  499. return 0;
  500. }
  501. static int LZ4HC_compress_generic (
  502. void* ctxvoid,
  503. const char* source,
  504. char* dest,
  505. int inputSize,
  506. int maxOutputSize,
  507. limitedOutput_directive limit
  508. )
  509. {
  510. LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
  511. const BYTE* ip = (const BYTE*) source;
  512. const BYTE* anchor = ip;
  513. const BYTE* const iend = ip + inputSize;
  514. const BYTE* const mflimit = iend - MFLIMIT;
  515. const BYTE* const matchlimit = (iend - LASTLITERALS);
  516. BYTE* op = (BYTE*) dest;
  517. BYTE* const oend = op + maxOutputSize;
  518. int ml, ml2, ml3, ml0;
  519. const BYTE* ref=NULL;
  520. const BYTE* start2=NULL;
  521. const BYTE* ref2=NULL;
  522. const BYTE* start3=NULL;
  523. const BYTE* ref3=NULL;
  524. const BYTE* start0;
  525. const BYTE* ref0;
  526. // Ensure blocks follow each other
  527. if (ip != ctx->end) return 0;
  528. ctx->end += inputSize;
  529. ip++;
  530. // Main Loop
  531. while (ip < mflimit)
  532. {
  533. ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
  534. if (!ml) { ip++; continue; }
  535. // saved, in case we would skip too much
  536. start0 = ip;
  537. ref0 = ref;
  538. ml0 = ml;
  539. _Search2:
  540. if (ip+ml < mflimit)
  541. ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
  542. else ml2 = ml;
  543. if (ml2 == ml) // No better match
  544. {
  545. if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
  546. continue;
  547. }
  548. if (start0 < ip)
  549. {
  550. if (start2 < ip + ml0) // empirical
  551. {
  552. ip = start0;
  553. ref = ref0;
  554. ml = ml0;
  555. }
  556. }
  557. // Here, start0==ip
  558. if ((start2 - ip) < 3) // First Match too small : removed
  559. {
  560. ml = ml2;
  561. ip = start2;
  562. ref =ref2;
  563. goto _Search2;
  564. }
  565. _Search3:
  566. // Currently we have :
  567. // ml2 > ml1, and
  568. // ip1+3 <= ip2 (usually < ip1+ml1)
  569. if ((start2 - ip) < OPTIMAL_ML)
  570. {
  571. int correction;
  572. int new_ml = ml;
  573. if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
  574. if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
  575. correction = new_ml - (int)(start2 - ip);
  576. if (correction > 0)
  577. {
  578. start2 += correction;
  579. ref2 += correction;
  580. ml2 -= correction;
  581. }
  582. }
  583. // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18)
  584. if (start2 + ml2 < mflimit)
  585. ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
  586. else ml3 = ml2;
  587. if (ml3 == ml2) // No better match : 2 sequences to encode
  588. {
  589. // ip & ref are known; Now for ml
  590. if (start2 < ip+ml) ml = (int)(start2 - ip);
  591. // Now, encode 2 sequences
  592. if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
  593. ip = start2;
  594. if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
  595. continue;
  596. }
  597. if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
  598. {
  599. if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
  600. {
  601. if (start2 < ip+ml)
  602. {
  603. int correction = (int)(ip+ml - start2);
  604. start2 += correction;
  605. ref2 += correction;
  606. ml2 -= correction;
  607. if (ml2 < MINMATCH)
  608. {
  609. start2 = start3;
  610. ref2 = ref3;
  611. ml2 = ml3;
  612. }
  613. }
  614. if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
  615. ip = start3;
  616. ref = ref3;
  617. ml = ml3;
  618. start0 = start2;
  619. ref0 = ref2;
  620. ml0 = ml2;
  621. goto _Search2;
  622. }
  623. start2 = start3;
  624. ref2 = ref3;
  625. ml2 = ml3;
  626. goto _Search3;
  627. }
  628. // OK, now we have 3 ascending matches; let's write at least the first one
  629. // ip & ref are known; Now for ml
  630. if (start2 < ip+ml)
  631. {
  632. if ((start2 - ip) < (int)ML_MASK)
  633. {
  634. int correction;
  635. if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
  636. if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
  637. correction = ml - (int)(start2 - ip);
  638. if (correction > 0)
  639. {
  640. start2 += correction;
  641. ref2 += correction;
  642. ml2 -= correction;
  643. }
  644. }
  645. else
  646. {
  647. ml = (int)(start2 - ip);
  648. }
  649. }
  650. if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
  651. ip = start2;
  652. ref = ref2;
  653. ml = ml2;
  654. start2 = start3;
  655. ref2 = ref3;
  656. ml2 = ml3;
  657. goto _Search3;
  658. }
  659. // Encode Last Literals
  660. {
  661. int lastRun = (int)(iend - anchor);
  662. if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; // Check output limit
  663. if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
  664. else *op++ = (BYTE)(lastRun<<ML_BITS);
  665. memcpy(op, anchor, iend - anchor);
  666. op += iend-anchor;
  667. }
  668. // End
  669. return (int) (((char*)op)-dest);
  670. }
  671. int LZ4_compressHC(const char* source, char* dest, int inputSize)
  672. {
  673. void* ctx = LZ4_createHC(source);
  674. int result;
  675. if (ctx==NULL) return 0;
  676. result = LZ4HC_compress_generic (ctx, source, dest, inputSize, 0, noLimit);
  677. LZ4_freeHC(ctx);
  678. return result;
  679. }
  680. int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize)
  681. {
  682. return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, noLimit);
  683. }
  684. int LZ4_compressHC_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
  685. {
  686. void* ctx = LZ4_createHC(source);
  687. int result;
  688. if (ctx==NULL) return 0;
  689. result = LZ4HC_compress_generic (ctx, source, dest, inputSize, maxOutputSize, limitedOutput);
  690. LZ4_freeHC(ctx);
  691. return result;
  692. }
  693. int LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize)
  694. {
  695. return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, limitedOutput);
  696. }