lz4hc.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730
  1. /*
  2. LZ4 HC - High Compression Mode of LZ4
  3. Copyright (C) 2011-2012, Yann Collet.
  4. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  5. Redistribution and use in source and binary forms, with or without
  6. modification, are permitted provided that the following conditions are
  7. met:
  8. * Redistributions of source code must retain the above copyright
  9. notice, this list of conditions and the following disclaimer.
  10. * Redistributions in binary form must reproduce the above
  11. copyright notice, this list of conditions and the following disclaimer
  12. in the documentation and/or other materials provided with the
  13. distribution.
  14. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  15. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  16. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  17. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  18. OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  20. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. You can contact the author at :
  26. - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
  27. - LZ4 source repository : http://code.google.com/p/lz4/
  28. */
  29. //**************************************
  30. // CPU Feature Detection
  31. //**************************************
  32. // 32 or 64 bits ?
  33. #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) ) // Detects 64 bits mode
  34. # define LZ4_ARCH64 1
  35. #else
  36. # define LZ4_ARCH64 0
  37. #endif
  38. // Little Endian or Big Endian ?
  39. // Overwrite the #define below if you know your architecture endianess
  40. #if defined (__GLIBC__)
  41. # include <endian.h>
  42. # if (__BYTE_ORDER == __BIG_ENDIAN)
  43. # define LZ4_BIG_ENDIAN 1
  44. # endif
  45. #elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
  46. # define LZ4_BIG_ENDIAN 1
  47. #elif defined(__sparc) || defined(__sparc__) \
  48. || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \
  49. || defined(__hpux) || defined(__hppa) \
  50. || defined(_MIPSEB) || defined(__s390__)
  51. # define LZ4_BIG_ENDIAN 1
  52. #else
  53. // Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
  54. #endif
  55. // Unaligned memory access is automatically enabled for "common" CPU, such as x86.
  56. // For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
  57. // If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
  58. #if defined(__ARM_FEATURE_UNALIGNED)
  59. # define LZ4_FORCE_UNALIGNED_ACCESS 1
  60. #endif
  61. // Define this parameter if your target system or compiler does not support hardware bit count
  62. #if defined(_MSC_VER) && defined(_WIN32_WCE) // Visual Studio for Windows CE does not support Hardware bit count
  63. # define LZ4_FORCE_SW_BITCOUNT
  64. #endif
  65. //**************************************
  66. // Compiler Options
  67. //**************************************
  68. #if __STDC_VERSION__ >= 199901L // C99
  69. /* "restrict" is a known keyword */
  70. #else
  71. # define restrict // Disable restrict
  72. #endif
  73. #ifdef _MSC_VER
  74. # define inline __inline // Visual is not C99, but supports some kind of inline
  75. # define forceinline __forceinline
  76. # include <intrin.h> // For Visual 2005
  77. # if LZ4_ARCH64 // 64-bit
  78. # pragma intrinsic(_BitScanForward64) // For Visual 2005
  79. # pragma intrinsic(_BitScanReverse64) // For Visual 2005
  80. # else
  81. # pragma intrinsic(_BitScanForward) // For Visual 2005
  82. # pragma intrinsic(_BitScanReverse) // For Visual 2005
  83. # endif
  84. #else
  85. # ifdef __GNUC__
  86. # define forceinline inline __attribute__((always_inline))
  87. # else
  88. # define forceinline inline
  89. # endif
  90. #endif
  91. #ifdef _MSC_VER // Visual Studio
  92. #define lz4_bswap16(x) _byteswap_ushort(x)
  93. #else
  94. #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
  95. #endif
  96. //**************************************
  97. // Includes
  98. //**************************************
  99. #include <stdlib.h> // calloc, free
  100. #include <string.h> // memset, memcpy
  101. #include "lz4hc.h"
  102. #define ALLOCATOR(s) calloc(1,s)
  103. #define FREEMEM free
  104. #define MEM_INIT memset
  105. //**************************************
  106. // Basic Types
  107. //**************************************
  108. #if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively
  109. #define BYTE unsigned __int8
  110. #define U16 unsigned __int16
  111. #define U32 unsigned __int32
  112. #define S32 __int32
  113. #define U64 unsigned __int64
  114. #else
  115. #include <stdint.h>
  116. #define BYTE uint8_t
  117. #define U16 uint16_t
  118. #define U32 uint32_t
  119. #define S32 int32_t
  120. #define U64 uint64_t
  121. #endif
  122. #ifndef LZ4_FORCE_UNALIGNED_ACCESS
  123. #pragma pack(push, 1)
  124. #endif
  125. typedef struct _U16_S { U16 v; } U16_S;
  126. typedef struct _U32_S { U32 v; } U32_S;
  127. typedef struct _U64_S { U64 v; } U64_S;
  128. #ifndef LZ4_FORCE_UNALIGNED_ACCESS
  129. #pragma pack(pop)
  130. #endif
  131. #define A64(x) (((U64_S *)(x))->v)
  132. #define A32(x) (((U32_S *)(x))->v)
  133. #define A16(x) (((U16_S *)(x))->v)
  134. //**************************************
  135. // Constants
  136. //**************************************
  137. #define MINMATCH 4
  138. #define DICTIONARY_LOGSIZE 16
  139. #define MAXD (1<<DICTIONARY_LOGSIZE)
  140. #define MAXD_MASK ((U32)(MAXD - 1))
  141. #define MAX_DISTANCE (MAXD - 1)
  142. #define HASH_LOG (DICTIONARY_LOGSIZE-1)
  143. #define HASHTABLESIZE (1 << HASH_LOG)
  144. #define HASH_MASK (HASHTABLESIZE - 1)
  145. #define MAX_NB_ATTEMPTS 256
  146. #define ML_BITS 4
  147. #define ML_MASK (size_t)((1U<<ML_BITS)-1)
  148. #define RUN_BITS (8-ML_BITS)
  149. #define RUN_MASK ((1U<<RUN_BITS)-1)
  150. #define COPYLENGTH 8
  151. #define LASTLITERALS 5
  152. #define MFLIMIT (COPYLENGTH+MINMATCH)
  153. #define MINLENGTH (MFLIMIT+1)
  154. #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
  155. //**************************************
  156. // Architecture-specific macros
  157. //**************************************
  158. #if LZ4_ARCH64 // 64-bit
  159. #define STEPSIZE 8
  160. #define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8;
  161. #define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d)
  162. #define UARCH U64
  163. #define AARCH A64
  164. #define HTYPE U32
  165. #define INITBASE(b,s) const BYTE* const b = s
  166. #else // 32-bit
  167. #define STEPSIZE 4
  168. #define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4;
  169. #define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
  170. #define UARCH U32
  171. #define AARCH A32
  172. #define HTYPE const BYTE*
  173. #define INITBASE(b,s) const int b = 0
  174. #endif
  175. #if defined(LZ4_BIG_ENDIAN)
  176. #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
  177. #define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
  178. #else // Little Endian
  179. #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
  180. #define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
  181. #endif
  182. //************************************************************
  183. // Local Types
  184. //************************************************************
  185. typedef struct
  186. {
  187. const BYTE* base;
  188. HTYPE hashTable[HASHTABLESIZE];
  189. U16 chainTable[MAXD];
  190. const BYTE* nextToUpdate;
  191. } LZ4HC_Data_Structure;
  192. //**************************************
  193. // Macros
  194. //**************************************
  195. #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e);
  196. #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; }
  197. #define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
  198. #define HASH_VALUE(p) HASH_FUNCTION(A32(p))
  199. #define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base)
  200. #define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK]
  201. #define GETNEXT(p) ((p) - (size_t)DELTANEXT(p))
  202. //**************************************
  203. // Private functions
  204. //**************************************
  205. #if LZ4_ARCH64
  206. inline static int LZ4_NbCommonBytes (register U64 val)
  207. {
  208. #if defined(LZ4_BIG_ENDIAN)
  209. #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
  210. unsigned long r = 0;
  211. _BitScanReverse64( &r, val );
  212. return (int)(r>>3);
  213. #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
  214. return (__builtin_clzll(val) >> 3);
  215. #else
  216. int r;
  217. if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
  218. if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
  219. r += (!val);
  220. return r;
  221. #endif
  222. #else
  223. #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
  224. unsigned long r = 0;
  225. _BitScanForward64( &r, val );
  226. return (int)(r>>3);
  227. #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
  228. return (__builtin_ctzll(val) >> 3);
  229. #else
  230. static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
  231. return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
  232. #endif
  233. #endif
  234. }
  235. #else
  236. inline static int LZ4_NbCommonBytes (register U32 val)
  237. {
  238. #if defined(LZ4_BIG_ENDIAN)
  239. #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
  240. unsigned long r;
  241. _BitScanReverse( &r, val );
  242. return (int)(r>>3);
  243. #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
  244. return (__builtin_clz(val) >> 3);
  245. #else
  246. int r;
  247. if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
  248. r += (!val);
  249. return r;
  250. #endif
  251. #else
  252. #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
  253. unsigned long r;
  254. _BitScanForward( &r, val );
  255. return (int)(r>>3);
  256. #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
  257. return (__builtin_ctz(val) >> 3);
  258. #else
  259. static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
  260. return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
  261. #endif
  262. #endif
  263. }
  264. #endif
  265. inline static int LZ4HC_Init (LZ4HC_Data_Structure* hc4, const BYTE* base)
  266. {
  267. MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
  268. MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
  269. hc4->nextToUpdate = base + LZ4_ARCH64;
  270. hc4->base = base;
  271. return 1;
  272. }
  273. inline static void* LZ4HC_Create (const BYTE* base)
  274. {
  275. void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));
  276. LZ4HC_Init ((LZ4HC_Data_Structure*)hc4, base);
  277. return hc4;
  278. }
  279. inline static int LZ4HC_Free (void** LZ4HC_Data)
  280. {
  281. FREEMEM(*LZ4HC_Data);
  282. *LZ4HC_Data = NULL;
  283. return (1);
  284. }
  285. // Update chains up to ip (excluded)
  286. forceinline static void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
  287. {
  288. U16* chainTable = hc4->chainTable;
  289. HTYPE* HashTable = hc4->hashTable;
  290. INITBASE(base,hc4->base);
  291. while(hc4->nextToUpdate < ip)
  292. {
  293. const BYTE* p = hc4->nextToUpdate;
  294. size_t delta = (p) - HASH_POINTER(p);
  295. if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
  296. DELTANEXT(p) = (U16)delta;
  297. HashTable[HASH_VALUE(p)] = (p) - base;
  298. hc4->nextToUpdate++;
  299. }
  300. }
  301. forceinline static size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit)
  302. {
  303. const BYTE* p1t = p1;
  304. while (p1t<matchlimit-(STEPSIZE-1))
  305. {
  306. UARCH diff = AARCH(p2) ^ AARCH(p1t);
  307. if (!diff) { p1t+=STEPSIZE; p2+=STEPSIZE; continue; }
  308. p1t += LZ4_NbCommonBytes(diff);
  309. return (p1t - p1);
  310. }
  311. if (LZ4_ARCH64) if ((p1t<(matchlimit-3)) && (A32(p2) == A32(p1t))) { p1t+=4; p2+=4; }
  312. if ((p1t<(matchlimit-1)) && (A16(p2) == A16(p1t))) { p1t+=2; p2+=2; }
  313. if ((p1t<matchlimit) && (*p2 == *p1t)) p1t++;
  314. return (p1t - p1);
  315. }
  316. forceinline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)
  317. {
  318. U16* const chainTable = hc4->chainTable;
  319. HTYPE* const HashTable = hc4->hashTable;
  320. const BYTE* ref;
  321. INITBASE(base,hc4->base);
  322. int nbAttempts=MAX_NB_ATTEMPTS;
  323. size_t repl=0, ml=0;
  324. U16 delta;
  325. // HC4 match finder
  326. LZ4HC_Insert(hc4, ip);
  327. ref = HASH_POINTER(ip);
  328. #define REPEAT_OPTIMIZATION
  329. #ifdef REPEAT_OPTIMIZATION
  330. // Detect repetitive sequences of length <= 4
  331. if (ref >= ip-4) // potential repetition
  332. {
  333. if (A32(ref) == A32(ip)) // confirmed
  334. {
  335. delta = (U16)(ip-ref);
  336. repl = ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
  337. *matchpos = ref;
  338. }
  339. ref = GETNEXT(ref);
  340. }
  341. #endif
  342. while ((ref >= ip-MAX_DISTANCE) && (nbAttempts))
  343. {
  344. nbAttempts--;
  345. if (*(ref+ml) == *(ip+ml))
  346. if (A32(ref) == A32(ip))
  347. {
  348. size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
  349. if (mlt > ml) { ml = mlt; *matchpos = ref; }
  350. }
  351. ref = GETNEXT(ref);
  352. }
  353. #ifdef REPEAT_OPTIMIZATION
  354. // Complete table
  355. if (repl)
  356. {
  357. const BYTE* ptr = ip;
  358. const BYTE* end;
  359. end = ip + repl - (MINMATCH-1);
  360. while(ptr < end-delta)
  361. {
  362. DELTANEXT(ptr) = delta; // Pre-Load
  363. ptr++;
  364. }
  365. do
  366. {
  367. DELTANEXT(ptr) = delta;
  368. HashTable[HASH_VALUE(ptr)] = (ptr) - base; // Head of chain
  369. ptr++;
  370. } while(ptr < end);
  371. hc4->nextToUpdate = end;
  372. }
  373. #endif
  374. return (int)ml;
  375. }
  376. forceinline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)
  377. {
  378. U16* const chainTable = hc4->chainTable;
  379. HTYPE* const HashTable = hc4->hashTable;
  380. INITBASE(base,hc4->base);
  381. const BYTE* ref;
  382. int nbAttempts = MAX_NB_ATTEMPTS;
  383. int delta = (int)(ip-startLimit);
  384. // First Match
  385. LZ4HC_Insert(hc4, ip);
  386. ref = HASH_POINTER(ip);
  387. while ((ref >= ip-MAX_DISTANCE) && (nbAttempts))
  388. {
  389. nbAttempts--;
  390. if (*(startLimit + longest) == *(ref - delta + longest))
  391. if (A32(ref) == A32(ip))
  392. {
  393. #if 1
  394. const BYTE* reft = ref+MINMATCH;
  395. const BYTE* ipt = ip+MINMATCH;
  396. const BYTE* startt = ip;
  397. while (ipt<matchlimit-(STEPSIZE-1))
  398. {
  399. UARCH diff = AARCH(reft) ^ AARCH(ipt);
  400. if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }
  401. ipt += LZ4_NbCommonBytes(diff);
  402. goto _endCount;
  403. }
  404. if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }
  405. if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }
  406. if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
  407. _endCount:
  408. reft = ref;
  409. #else
  410. // Easier for code maintenance, but unfortunately slower too
  411. const BYTE* startt = ip;
  412. const BYTE* reft = ref;
  413. const BYTE* ipt = ip + MINMATCH + LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit);
  414. #endif
  415. while ((startt>startLimit) && (reft > hc4->base) && (startt[-1] == reft[-1])) {startt--; reft--;}
  416. if ((ipt-startt) > longest)
  417. {
  418. longest = (int)(ipt-startt);
  419. *matchpos = reft;
  420. *startpos = startt;
  421. }
  422. }
  423. ref = GETNEXT(ref);
  424. }
  425. return longest;
  426. }
  427. forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** anchor, int ml, const BYTE* ref)
  428. {
  429. int length, len;
  430. BYTE* token;
  431. // Encode Literal length
  432. length = (int)(*ip - *anchor);
  433. token = (*op)++;
  434. if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
  435. else *token = (length<<ML_BITS);
  436. // Copy Literals
  437. LZ4_BLINDCOPY(*anchor, *op, length);
  438. // Encode Offset
  439. LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
  440. // Encode MatchLength
  441. len = (int)(ml-MINMATCH);
  442. if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (len > 254) { len-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)len; }
  443. else *token += len;
  444. // Prepare next loop
  445. *ip += ml;
  446. *anchor = *ip;
  447. return 0;
  448. }
  449. //****************************
  450. // Compression CODE
  451. //****************************
  452. int LZ4_compressHCCtx(LZ4HC_Data_Structure* ctx,
  453. const char* source,
  454. char* dest,
  455. int isize)
  456. {
  457. const BYTE* ip = (const BYTE*) source;
  458. const BYTE* anchor = ip;
  459. const BYTE* const iend = ip + isize;
  460. const BYTE* const mflimit = iend - MFLIMIT;
  461. const BYTE* const matchlimit = (iend - LASTLITERALS);
  462. BYTE* op = (BYTE*) dest;
  463. int ml, ml2, ml3, ml0;
  464. const BYTE* ref=NULL;
  465. const BYTE* start2=NULL;
  466. const BYTE* ref2=NULL;
  467. const BYTE* start3=NULL;
  468. const BYTE* ref3=NULL;
  469. const BYTE* start0;
  470. const BYTE* ref0;
  471. ip++;
  472. // Main Loop
  473. while (ip < mflimit)
  474. {
  475. ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
  476. if (!ml) { ip++; continue; }
  477. // saved, in case we would skip too much
  478. start0 = ip;
  479. ref0 = ref;
  480. ml0 = ml;
  481. _Search2:
  482. if (ip+ml < mflimit)
  483. ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
  484. else ml2 = ml;
  485. if (ml2 == ml) // No better match
  486. {
  487. LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
  488. continue;
  489. }
  490. if (start0 < ip)
  491. {
  492. if (start2 < ip + ml0) // empirical
  493. {
  494. ip = start0;
  495. ref = ref0;
  496. ml = ml0;
  497. }
  498. }
  499. // Here, start0==ip
  500. if ((start2 - ip) < 3) // First Match too small : removed
  501. {
  502. ml = ml2;
  503. ip = start2;
  504. ref =ref2;
  505. goto _Search2;
  506. }
  507. _Search3:
  508. // Currently we have :
  509. // ml2 > ml1, and
  510. // ip1+3 <= ip2 (usually < ip1+ml1)
  511. if ((start2 - ip) < OPTIMAL_ML)
  512. {
  513. int correction;
  514. int new_ml = ml;
  515. if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
  516. if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
  517. correction = new_ml - (int)(start2 - ip);
  518. if (correction > 0)
  519. {
  520. start2 += correction;
  521. ref2 += correction;
  522. ml2 -= correction;
  523. }
  524. }
  525. // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18)
  526. if (start2 + ml2 < mflimit)
  527. ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
  528. else ml3 = ml2;
  529. if (ml3 == ml2) // No better match : 2 sequences to encode
  530. {
  531. // ip & ref are known; Now for ml
  532. if (start2 < ip+ml) ml = (int)(start2 - ip);
  533. // Now, encode 2 sequences
  534. LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
  535. ip = start2;
  536. LZ4_encodeSequence(&ip, &op, &anchor, ml2, ref2);
  537. continue;
  538. }
  539. if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
  540. {
  541. if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
  542. {
  543. if (start2 < ip+ml)
  544. {
  545. int correction = (int)(ip+ml - start2);
  546. start2 += correction;
  547. ref2 += correction;
  548. ml2 -= correction;
  549. if (ml2 < MINMATCH)
  550. {
  551. start2 = start3;
  552. ref2 = ref3;
  553. ml2 = ml3;
  554. }
  555. }
  556. LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
  557. ip = start3;
  558. ref = ref3;
  559. ml = ml3;
  560. start0 = start2;
  561. ref0 = ref2;
  562. ml0 = ml2;
  563. goto _Search2;
  564. }
  565. start2 = start3;
  566. ref2 = ref3;
  567. ml2 = ml3;
  568. goto _Search3;
  569. }
  570. // OK, now we have 3 ascending matches; let's write at least the first one
  571. // ip & ref are known; Now for ml
  572. if (start2 < ip+ml)
  573. {
  574. if ((start2 - ip) < (int)ML_MASK)
  575. {
  576. int correction;
  577. if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
  578. if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
  579. correction = ml - (int)(start2 - ip);
  580. if (correction > 0)
  581. {
  582. start2 += correction;
  583. ref2 += correction;
  584. ml2 -= correction;
  585. }
  586. }
  587. else
  588. {
  589. ml = (int)(start2 - ip);
  590. }
  591. }
  592. LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
  593. ip = start2;
  594. ref = ref2;
  595. ml = ml2;
  596. start2 = start3;
  597. ref2 = ref3;
  598. ml2 = ml3;
  599. goto _Search3;
  600. }
  601. // Encode Last Literals
  602. {
  603. int lastRun = (int)(iend - anchor);
  604. if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
  605. else *op++ = (lastRun<<ML_BITS);
  606. memcpy(op, anchor, iend - anchor);
  607. op += iend-anchor;
  608. }
  609. // End
  610. return (int) (((char*)op)-dest);
  611. }
  612. int LZ4_compressHC(const char* source,
  613. char* dest,
  614. int isize)
  615. {
  616. void* ctx = LZ4HC_Create((const BYTE*)source);
  617. int result = LZ4_compressHCCtx(ctx, source, dest, isize);
  618. LZ4HC_Free (&ctx);
  619. return result;
  620. }