huf_compress.c 55 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370
  1. /* ******************************************************************
  2. * Huffman encoder, part of New Generation Entropy library
  3. * Copyright (c) Yann Collet, Facebook, Inc.
  4. *
  5. * You can contact the author at :
  6. * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
  7. * - Public forum : https://groups.google.com/forum/#!forum/lz4c
  8. *
  9. * This source code is licensed under both the BSD-style license (found in the
  10. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  11. * in the COPYING file in the root directory of this source tree).
  12. * You may select, at your option, one of the above-listed licenses.
  13. ****************************************************************** */
  14. /* **************************************************************
  15. * Compiler specifics
  16. ****************************************************************/
  17. #ifdef _MSC_VER /* Visual Studio */
  18. # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
  19. #endif
  20. /* **************************************************************
  21. * Includes
  22. ****************************************************************/
  23. #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
  24. #include "../common/compiler.h"
  25. #include "../common/bitstream.h"
  26. #include "hist.h"
  27. #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
  28. #include "../common/fse.h" /* header compression */
  29. #define HUF_STATIC_LINKING_ONLY
  30. #include "../common/huf.h"
  31. #include "../common/error_private.h"
  32. /* **************************************************************
  33. * Error Management
  34. ****************************************************************/
  35. #define HUF_isError ERR_isError
  36. #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
  37. /* **************************************************************
  38. * Utils
  39. ****************************************************************/
  40. unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
  41. {
  42. return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
  43. }
  44. /* *******************************************************
  45. * HUF : Huffman block compression
  46. *********************************************************/
  47. #define HUF_WORKSPACE_MAX_ALIGNMENT 8
  48. static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
  49. {
  50. size_t const mask = align - 1;
  51. size_t const rem = (size_t)workspace & mask;
  52. size_t const add = (align - rem) & mask;
  53. BYTE* const aligned = (BYTE*)workspace + add;
  54. assert((align & (align - 1)) == 0); /* pow 2 */
  55. assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
  56. if (*workspaceSizePtr >= add) {
  57. assert(add < align);
  58. assert(((size_t)aligned & mask) == 0);
  59. *workspaceSizePtr -= add;
  60. return aligned;
  61. } else {
  62. *workspaceSizePtr = 0;
  63. return NULL;
  64. }
  65. }
  66. /* HUF_compressWeights() :
  67. * Same as FSE_compress(), but dedicated to huff0's weights compression.
  68. * The use case needs much less stack memory.
  69. * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
  70. */
  71. #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
  72. typedef struct {
  73. FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
  74. U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
  75. unsigned count[HUF_TABLELOG_MAX+1];
  76. S16 norm[HUF_TABLELOG_MAX+1];
  77. } HUF_CompressWeightsWksp;
  78. static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
  79. {
  80. BYTE* const ostart = (BYTE*) dst;
  81. BYTE* op = ostart;
  82. BYTE* const oend = ostart + dstSize;
  83. unsigned maxSymbolValue = HUF_TABLELOG_MAX;
  84. U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
  85. HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
  86. if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
  87. /* init conditions */
  88. if (wtSize <= 1) return 0; /* Not compressible */
  89. /* Scan input and build symbol stats */
  90. { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
  91. if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
  92. if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
  93. }
  94. tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
  95. CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
  96. /* Write table description header */
  97. { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
  98. op += hSize;
  99. }
  100. /* Compress */
  101. CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
  102. { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
  103. if (cSize == 0) return 0; /* not enough space for compressed data */
  104. op += cSize;
  105. }
  106. return (size_t)(op-ostart);
  107. }
  108. static size_t HUF_getNbBits(HUF_CElt elt)
  109. {
  110. return elt & 0xFF;
  111. }
  112. static size_t HUF_getNbBitsFast(HUF_CElt elt)
  113. {
  114. return elt;
  115. }
  116. static size_t HUF_getValue(HUF_CElt elt)
  117. {
  118. return elt & ~0xFF;
  119. }
  120. static size_t HUF_getValueFast(HUF_CElt elt)
  121. {
  122. return elt;
  123. }
  124. static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
  125. {
  126. assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
  127. *elt = nbBits;
  128. }
  129. static void HUF_setValue(HUF_CElt* elt, size_t value)
  130. {
  131. size_t const nbBits = HUF_getNbBits(*elt);
  132. if (nbBits > 0) {
  133. assert((value >> nbBits) == 0);
  134. *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
  135. }
  136. }
  137. typedef struct {
  138. HUF_CompressWeightsWksp wksp;
  139. BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
  140. BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
  141. } HUF_WriteCTableWksp;
  142. size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
  143. const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
  144. void* workspace, size_t workspaceSize)
  145. {
  146. HUF_CElt const* const ct = CTable + 1;
  147. BYTE* op = (BYTE*)dst;
  148. U32 n;
  149. HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
  150. /* check conditions */
  151. if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
  152. if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
  153. /* convert to weight */
  154. wksp->bitsToWeight[0] = 0;
  155. for (n=1; n<huffLog+1; n++)
  156. wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
  157. for (n=0; n<maxSymbolValue; n++)
  158. wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
  159. /* attempt weights compression by FSE */
  160. if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
  161. { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
  162. if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
  163. op[0] = (BYTE)hSize;
  164. return hSize+1;
  165. } }
  166. /* write raw values as 4-bits (max : 15) */
  167. if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
  168. if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
  169. op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
  170. wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
  171. for (n=0; n<maxSymbolValue; n+=2)
  172. op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
  173. return ((maxSymbolValue+1)/2) + 1;
  174. }
  175. /*! HUF_writeCTable() :
  176. `CTable` : Huffman tree to save, using huf representation.
  177. @return : size of saved CTable */
  178. size_t HUF_writeCTable (void* dst, size_t maxDstSize,
  179. const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
  180. {
  181. HUF_WriteCTableWksp wksp;
  182. return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
  183. }
  184. size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
  185. {
  186. BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
  187. U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
  188. U32 tableLog = 0;
  189. U32 nbSymbols = 0;
  190. HUF_CElt* const ct = CTable + 1;
  191. /* get symbol weights */
  192. CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
  193. *hasZeroWeights = (rankVal[0] > 0);
  194. /* check result */
  195. if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
  196. if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
  197. CTable[0] = tableLog;
  198. /* Prepare base value per rank */
  199. { U32 n, nextRankStart = 0;
  200. for (n=1; n<=tableLog; n++) {
  201. U32 curr = nextRankStart;
  202. nextRankStart += (rankVal[n] << (n-1));
  203. rankVal[n] = curr;
  204. } }
  205. /* fill nbBits */
  206. { U32 n; for (n=0; n<nbSymbols; n++) {
  207. const U32 w = huffWeight[n];
  208. HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
  209. } }
  210. /* fill val */
  211. { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
  212. U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
  213. { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
  214. /* determine stating value per rank */
  215. valPerRank[tableLog+1] = 0; /* for w==0 */
  216. { U16 min = 0;
  217. U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */
  218. valPerRank[n] = min; /* get starting value within each rank */
  219. min += nbPerRank[n];
  220. min >>= 1;
  221. } }
  222. /* assign value within rank, symbol order */
  223. { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
  224. }
  225. *maxSymbolValuePtr = nbSymbols - 1;
  226. return readSize;
  227. }
  228. U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
  229. {
  230. const HUF_CElt* ct = CTable + 1;
  231. assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
  232. return (U32)HUF_getNbBits(ct[symbolValue]);
  233. }
  234. typedef struct nodeElt_s {
  235. U32 count;
  236. U16 parent;
  237. BYTE byte;
  238. BYTE nbBits;
  239. } nodeElt;
  240. /**
  241. * HUF_setMaxHeight():
  242. * Enforces maxNbBits on the Huffman tree described in huffNode.
  243. *
  244. * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
  245. * the tree to so that it is a valid canonical Huffman tree.
  246. *
  247. * @pre The sum of the ranks of each symbol == 2^largestBits,
  248. * where largestBits == huffNode[lastNonNull].nbBits.
  249. * @post The sum of the ranks of each symbol == 2^largestBits,
  250. * where largestBits is the return value <= maxNbBits.
  251. *
  252. * @param huffNode The Huffman tree modified in place to enforce maxNbBits.
  253. * @param lastNonNull The symbol with the lowest count in the Huffman tree.
  254. * @param maxNbBits The maximum allowed number of bits, which the Huffman tree
  255. * may not respect. After this function the Huffman tree will
  256. * respect maxNbBits.
  257. * @return The maximum number of bits of the Huffman tree after adjustment,
  258. * necessarily no more than maxNbBits.
  259. */
  260. static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
  261. {
  262. const U32 largestBits = huffNode[lastNonNull].nbBits;
  263. /* early exit : no elt > maxNbBits, so the tree is already valid. */
  264. if (largestBits <= maxNbBits) return largestBits;
  265. /* there are several too large elements (at least >= 2) */
  266. { int totalCost = 0;
  267. const U32 baseCost = 1 << (largestBits - maxNbBits);
  268. int n = (int)lastNonNull;
  269. /* Adjust any ranks > maxNbBits to maxNbBits.
  270. * Compute totalCost, which is how far the sum of the ranks is
  271. * we are over 2^largestBits after adjust the offending ranks.
  272. */
  273. while (huffNode[n].nbBits > maxNbBits) {
  274. totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
  275. huffNode[n].nbBits = (BYTE)maxNbBits;
  276. n--;
  277. }
  278. /* n stops at huffNode[n].nbBits <= maxNbBits */
  279. assert(huffNode[n].nbBits <= maxNbBits);
  280. /* n end at index of smallest symbol using < maxNbBits */
  281. while (huffNode[n].nbBits == maxNbBits) --n;
  282. /* renorm totalCost from 2^largestBits to 2^maxNbBits
  283. * note : totalCost is necessarily a multiple of baseCost */
  284. assert((totalCost & (baseCost - 1)) == 0);
  285. totalCost >>= (largestBits - maxNbBits);
  286. assert(totalCost > 0);
  287. /* repay normalized cost */
  288. { U32 const noSymbol = 0xF0F0F0F0;
  289. U32 rankLast[HUF_TABLELOG_MAX+2];
  290. /* Get pos of last (smallest = lowest cum. count) symbol per rank */
  291. ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
  292. { U32 currentNbBits = maxNbBits;
  293. int pos;
  294. for (pos=n ; pos >= 0; pos--) {
  295. if (huffNode[pos].nbBits >= currentNbBits) continue;
  296. currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
  297. rankLast[maxNbBits-currentNbBits] = (U32)pos;
  298. } }
  299. while (totalCost > 0) {
  300. /* Try to reduce the next power of 2 above totalCost because we
  301. * gain back half the rank.
  302. */
  303. U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
  304. for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
  305. U32 const highPos = rankLast[nBitsToDecrease];
  306. U32 const lowPos = rankLast[nBitsToDecrease-1];
  307. if (highPos == noSymbol) continue;
  308. /* Decrease highPos if no symbols of lowPos or if it is
  309. * not cheaper to remove 2 lowPos than highPos.
  310. */
  311. if (lowPos == noSymbol) break;
  312. { U32 const highTotal = huffNode[highPos].count;
  313. U32 const lowTotal = 2 * huffNode[lowPos].count;
  314. if (highTotal <= lowTotal) break;
  315. } }
  316. /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
  317. assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
  318. /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
  319. while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
  320. nBitsToDecrease++;
  321. assert(rankLast[nBitsToDecrease] != noSymbol);
  322. /* Increase the number of bits to gain back half the rank cost. */
  323. totalCost -= 1 << (nBitsToDecrease-1);
  324. huffNode[rankLast[nBitsToDecrease]].nbBits++;
  325. /* Fix up the new rank.
  326. * If the new rank was empty, this symbol is now its smallest.
  327. * Otherwise, this symbol will be the largest in the new rank so no adjustment.
  328. */
  329. if (rankLast[nBitsToDecrease-1] == noSymbol)
  330. rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
  331. /* Fix up the old rank.
  332. * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
  333. * it must be the only symbol in its rank, so the old rank now has no symbols.
  334. * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
  335. * the smallest node in the rank. If the previous position belongs to a different rank,
  336. * then the rank is now empty.
  337. */
  338. if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
  339. rankLast[nBitsToDecrease] = noSymbol;
  340. else {
  341. rankLast[nBitsToDecrease]--;
  342. if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
  343. rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
  344. }
  345. } /* while (totalCost > 0) */
  346. /* If we've removed too much weight, then we have to add it back.
  347. * To avoid overshooting again, we only adjust the smallest rank.
  348. * We take the largest nodes from the lowest rank 0 and move them
  349. * to rank 1. There's guaranteed to be enough rank 0 symbols because
  350. * TODO.
  351. */
  352. while (totalCost < 0) { /* Sometimes, cost correction overshoot */
  353. /* special case : no rank 1 symbol (using maxNbBits-1);
  354. * let's create one from largest rank 0 (using maxNbBits).
  355. */
  356. if (rankLast[1] == noSymbol) {
  357. while (huffNode[n].nbBits == maxNbBits) n--;
  358. huffNode[n+1].nbBits--;
  359. assert(n >= 0);
  360. rankLast[1] = (U32)(n+1);
  361. totalCost++;
  362. continue;
  363. }
  364. huffNode[ rankLast[1] + 1 ].nbBits--;
  365. rankLast[1]++;
  366. totalCost ++;
  367. }
  368. } /* repay normalized cost */
  369. } /* there are several too large elements (at least >= 2) */
  370. return maxNbBits;
  371. }
  372. typedef struct {
  373. U16 base;
  374. U16 curr;
  375. } rankPos;
  376. typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
  377. /* Number of buckets available for HUF_sort() */
  378. #define RANK_POSITION_TABLE_SIZE 192
  379. typedef struct {
  380. huffNodeTable huffNodeTbl;
  381. rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
  382. } HUF_buildCTable_wksp_tables;
  383. /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
  384. * Strategy is to use as many buckets as possible for representing distinct
  385. * counts while using the remainder to represent all "large" counts.
  386. *
  387. * To satisfy this requirement for 192 buckets, we can do the following:
  388. * Let buckets 0-166 represent distinct counts of [0, 166]
  389. * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
  390. */
  391. #define RANK_POSITION_MAX_COUNT_LOG 32
  392. #define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
  393. #define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
  394. /* Return the appropriate bucket index for a given count. See definition of
  395. * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
  396. */
  397. static U32 HUF_getIndex(U32 const count) {
  398. return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
  399. ? count
  400. : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
  401. }
  402. /* Helper swap function for HUF_quickSortPartition() */
  403. static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
  404. nodeElt tmp = *a;
  405. *a = *b;
  406. *b = tmp;
  407. }
  408. /* Returns 0 if the huffNode array is not sorted by descending count */
  409. MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
  410. U32 i;
  411. for (i = 1; i < maxSymbolValue1; ++i) {
  412. if (huffNode[i].count > huffNode[i-1].count) {
  413. return 0;
  414. }
  415. }
  416. return 1;
  417. }
  418. /* Insertion sort by descending order */
  419. HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
  420. int i;
  421. int const size = high-low+1;
  422. huffNode += low;
  423. for (i = 1; i < size; ++i) {
  424. nodeElt const key = huffNode[i];
  425. int j = i - 1;
  426. while (j >= 0 && huffNode[j].count < key.count) {
  427. huffNode[j + 1] = huffNode[j];
  428. j--;
  429. }
  430. huffNode[j + 1] = key;
  431. }
  432. }
  433. /* Pivot helper function for quicksort. */
  434. static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
  435. /* Simply select rightmost element as pivot. "Better" selectors like
  436. * median-of-three don't experimentally appear to have any benefit.
  437. */
  438. U32 const pivot = arr[high].count;
  439. int i = low - 1;
  440. int j = low;
  441. for ( ; j < high; j++) {
  442. if (arr[j].count > pivot) {
  443. i++;
  444. HUF_swapNodes(&arr[i], &arr[j]);
  445. }
  446. }
  447. HUF_swapNodes(&arr[i + 1], &arr[high]);
  448. return i + 1;
  449. }
  450. /* Classic quicksort by descending with partially iterative calls
  451. * to reduce worst case callstack size.
  452. */
  453. static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
  454. int const kInsertionSortThreshold = 8;
  455. if (high - low < kInsertionSortThreshold) {
  456. HUF_insertionSort(arr, low, high);
  457. return;
  458. }
  459. while (low < high) {
  460. int const idx = HUF_quickSortPartition(arr, low, high);
  461. if (idx - low < high - idx) {
  462. HUF_simpleQuickSort(arr, low, idx - 1);
  463. low = idx + 1;
  464. } else {
  465. HUF_simpleQuickSort(arr, idx + 1, high);
  466. high = idx - 1;
  467. }
  468. }
  469. }
  470. /**
  471. * HUF_sort():
  472. * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
  473. * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
  474. *
  475. * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
  476. * Must have (maxSymbolValue + 1) entries.
  477. * @param[in] count Histogram of the symbols.
  478. * @param[in] maxSymbolValue Maximum symbol value.
  479. * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
  480. */
  481. static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
  482. U32 n;
  483. U32 const maxSymbolValue1 = maxSymbolValue+1;
  484. /* Compute base and set curr to base.
  485. * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
  486. * See HUF_getIndex to see bucketing strategy.
  487. * We attribute each symbol to lowerRank's base value, because we want to know where
  488. * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
  489. */
  490. ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
  491. for (n = 0; n < maxSymbolValue1; ++n) {
  492. U32 lowerRank = HUF_getIndex(count[n]);
  493. assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
  494. rankPosition[lowerRank].base++;
  495. }
  496. assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
  497. /* Set up the rankPosition table */
  498. for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
  499. rankPosition[n-1].base += rankPosition[n].base;
  500. rankPosition[n-1].curr = rankPosition[n-1].base;
  501. }
  502. /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
  503. for (n = 0; n < maxSymbolValue1; ++n) {
  504. U32 const c = count[n];
  505. U32 const r = HUF_getIndex(c) + 1;
  506. U32 const pos = rankPosition[r].curr++;
  507. assert(pos < maxSymbolValue1);
  508. huffNode[pos].count = c;
  509. huffNode[pos].byte = (BYTE)n;
  510. }
  511. /* Sort each bucket. */
  512. for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
  513. U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
  514. U32 const bucketStartIdx = rankPosition[n].base;
  515. if (bucketSize > 1) {
  516. assert(bucketStartIdx < maxSymbolValue1);
  517. HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
  518. }
  519. }
  520. assert(HUF_isSorted(huffNode, maxSymbolValue1));
  521. }
  522. /** HUF_buildCTable_wksp() :
  523. * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
  524. * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
  525. */
  526. #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
  527. /* HUF_buildTree():
  528. * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
  529. *
  530. * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.
  531. * @param maxSymbolValue The maximum symbol value.
  532. * @return The smallest node in the Huffman tree (by count).
  533. */
  534. static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
  535. {
  536. nodeElt* const huffNode0 = huffNode - 1;
  537. int nonNullRank;
  538. int lowS, lowN;
  539. int nodeNb = STARTNODE;
  540. int n, nodeRoot;
  541. /* init for parents */
  542. nonNullRank = (int)maxSymbolValue;
  543. while(huffNode[nonNullRank].count == 0) nonNullRank--;
  544. lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
  545. huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
  546. huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
  547. nodeNb++; lowS-=2;
  548. for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
  549. huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
  550. /* create parents */
  551. while (nodeNb <= nodeRoot) {
  552. int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
  553. int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
  554. huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
  555. huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
  556. nodeNb++;
  557. }
  558. /* distribute weights (unlimited tree height) */
  559. huffNode[nodeRoot].nbBits = 0;
  560. for (n=nodeRoot-1; n>=STARTNODE; n--)
  561. huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
  562. for (n=0; n<=nonNullRank; n++)
  563. huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
  564. return nonNullRank;
  565. }
  566. /**
  567. * HUF_buildCTableFromTree():
  568. * Build the CTable given the Huffman tree in huffNode.
  569. *
  570. * @param[out] CTable The output Huffman CTable.
  571. * @param huffNode The Huffman tree.
  572. * @param nonNullRank The last and smallest node in the Huffman tree.
  573. * @param maxSymbolValue The maximum symbol value.
  574. * @param maxNbBits The exact maximum number of bits used in the Huffman tree.
  575. */
  576. static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
  577. {
  578. HUF_CElt* const ct = CTable + 1;
  579. /* fill result into ctable (val, nbBits) */
  580. int n;
  581. U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
  582. U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
  583. int const alphabetSize = (int)(maxSymbolValue + 1);
  584. for (n=0; n<=nonNullRank; n++)
  585. nbPerRank[huffNode[n].nbBits]++;
  586. /* determine starting value per rank */
  587. { U16 min = 0;
  588. for (n=(int)maxNbBits; n>0; n--) {
  589. valPerRank[n] = min; /* get starting value within each rank */
  590. min += nbPerRank[n];
  591. min >>= 1;
  592. } }
  593. for (n=0; n<alphabetSize; n++)
  594. HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
  595. for (n=0; n<alphabetSize; n++)
  596. HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
  597. CTable[0] = maxNbBits;
  598. }
  599. size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
  600. {
  601. HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
  602. nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
  603. nodeElt* const huffNode = huffNode0+1;
  604. int nonNullRank;
  605. /* safety checks */
  606. if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
  607. return ERROR(workSpace_tooSmall);
  608. if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
  609. if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
  610. return ERROR(maxSymbolValue_tooLarge);
  611. ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
  612. /* sort, decreasing order */
  613. HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
  614. /* build tree */
  615. nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
  616. /* enforce maxTableLog */
  617. maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
  618. if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
  619. HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
  620. return maxNbBits;
  621. }
  622. size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
  623. {
  624. HUF_CElt const* ct = CTable + 1;
  625. size_t nbBits = 0;
  626. int s;
  627. for (s = 0; s <= (int)maxSymbolValue; ++s) {
  628. nbBits += HUF_getNbBits(ct[s]) * count[s];
  629. }
  630. return nbBits >> 3;
  631. }
  632. int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
  633. HUF_CElt const* ct = CTable + 1;
  634. int bad = 0;
  635. int s;
  636. for (s = 0; s <= (int)maxSymbolValue; ++s) {
  637. bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
  638. }
  639. return !bad;
  640. }
  641. size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
  642. /** HUF_CStream_t:
  643. * Huffman uses its own BIT_CStream_t implementation.
  644. * There are three major differences from BIT_CStream_t:
  645. * 1. HUF_addBits() takes a HUF_CElt (size_t) which is
  646. * the pair (nbBits, value) in the format:
  647. * format:
  648. * - Bits [0, 4) = nbBits
  649. * - Bits [4, 64 - nbBits) = 0
  650. * - Bits [64 - nbBits, 64) = value
  651. * 2. The bitContainer is built from the upper bits and
  652. * right shifted. E.g. to add a new value of N bits
  653. * you right shift the bitContainer by N, then or in
  654. * the new value into the N upper bits.
  655. * 3. The bitstream has two bit containers. You can add
  656. * bits to the second container and merge them into
  657. * the first container.
  658. */
  659. #define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
  660. typedef struct {
  661. size_t bitContainer[2];
  662. size_t bitPos[2];
  663. BYTE* startPtr;
  664. BYTE* ptr;
  665. BYTE* endPtr;
  666. } HUF_CStream_t;
  667. /**! HUF_initCStream():
  668. * Initializes the bitstream.
  669. * @returns 0 or an error code.
  670. */
  671. static size_t HUF_initCStream(HUF_CStream_t* bitC,
  672. void* startPtr, size_t dstCapacity)
  673. {
  674. ZSTD_memset(bitC, 0, sizeof(*bitC));
  675. bitC->startPtr = (BYTE*)startPtr;
  676. bitC->ptr = bitC->startPtr;
  677. bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
  678. if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
  679. return 0;
  680. }
  681. /*! HUF_addBits():
  682. * Adds the symbol stored in HUF_CElt elt to the bitstream.
  683. *
  684. * @param elt The element we're adding. This is a (nbBits, value) pair.
  685. * See the HUF_CStream_t docs for the format.
  686. * @param idx Insert into the bitstream at this idx.
  687. * @param kFast This is a template parameter. If the bitstream is guaranteed
  688. * to have at least 4 unused bits after this call it may be 1,
  689. * otherwise it must be 0. HUF_addBits() is faster when fast is set.
  690. */
  691. FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
  692. {
  693. assert(idx <= 1);
  694. assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
  695. /* This is efficient on x86-64 with BMI2 because shrx
  696. * only reads the low 6 bits of the register. The compiler
  697. * knows this and elides the mask. When fast is set,
  698. * every operation can use the same value loaded from elt.
  699. */
  700. bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
  701. bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
  702. /* We only read the low 8 bits of bitC->bitPos[idx] so it
  703. * doesn't matter that the high bits have noise from the value.
  704. */
  705. bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
  706. assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
  707. /* The last 4-bits of elt are dirty if fast is set,
  708. * so we must not be overwriting bits that have already been
  709. * inserted into the bit container.
  710. */
  711. #if DEBUGLEVEL >= 1
  712. {
  713. size_t const nbBits = HUF_getNbBits(elt);
  714. size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
  715. (void)dirtyBits;
  716. /* Middle bits are 0. */
  717. assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
  718. /* We didn't overwrite any bits in the bit container. */
  719. assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
  720. (void)dirtyBits;
  721. }
  722. #endif
  723. }
  724. FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
  725. {
  726. bitC->bitContainer[1] = 0;
  727. bitC->bitPos[1] = 0;
  728. }
  729. /*! HUF_mergeIndex1() :
  730. * Merges the bit container @ index 1 into the bit container @ index 0
  731. * and zeros the bit container @ index 1.
  732. */
  733. FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
  734. {
  735. assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
  736. bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
  737. bitC->bitContainer[0] |= bitC->bitContainer[1];
  738. bitC->bitPos[0] += bitC->bitPos[1];
  739. assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
  740. }
  741. /*! HUF_flushBits() :
  742. * Flushes the bits in the bit container @ index 0.
  743. *
  744. * @post bitPos will be < 8.
  745. * @param kFast If kFast is set then we must know a-priori that
  746. * the bit container will not overflow.
  747. */
  748. FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
  749. {
  750. /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
  751. size_t const nbBits = bitC->bitPos[0] & 0xFF;
  752. size_t const nbBytes = nbBits >> 3;
  753. /* The top nbBits bits of bitContainer are the ones we need. */
  754. size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
  755. /* Mask bitPos to account for the bytes we consumed. */
  756. bitC->bitPos[0] &= 7;
  757. assert(nbBits > 0);
  758. assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
  759. assert(bitC->ptr <= bitC->endPtr);
  760. MEM_writeLEST(bitC->ptr, bitContainer);
  761. bitC->ptr += nbBytes;
  762. assert(!kFast || bitC->ptr <= bitC->endPtr);
  763. if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
  764. /* bitContainer doesn't need to be modified because the leftover
  765. * bits are already the top bitPos bits. And we don't care about
  766. * noise in the lower values.
  767. */
  768. }
  769. /*! HUF_endMark()
  770. * @returns The Huffman stream end mark: A 1-bit value = 1.
  771. */
  772. static HUF_CElt HUF_endMark(void)
  773. {
  774. HUF_CElt endMark;
  775. HUF_setNbBits(&endMark, 1);
  776. HUF_setValue(&endMark, 1);
  777. return endMark;
  778. }
  779. /*! HUF_closeCStream() :
  780. * @return Size of CStream, in bytes,
  781. * or 0 if it could not fit into dstBuffer */
  782. static size_t HUF_closeCStream(HUF_CStream_t* bitC)
  783. {
  784. HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
  785. HUF_flushBits(bitC, /* kFast */ 0);
  786. {
  787. size_t const nbBits = bitC->bitPos[0] & 0xFF;
  788. if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
  789. return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
  790. }
  791. }
  792. FORCE_INLINE_TEMPLATE void
  793. HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
  794. {
  795. HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
  796. }
  797. FORCE_INLINE_TEMPLATE void
  798. HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
  799. const BYTE* ip, size_t srcSize,
  800. const HUF_CElt* ct,
  801. int kUnroll, int kFastFlush, int kLastFast)
  802. {
  803. /* Join to kUnroll */
  804. int n = (int)srcSize;
  805. int rem = n % kUnroll;
  806. if (rem > 0) {
  807. for (; rem > 0; --rem) {
  808. HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
  809. }
  810. HUF_flushBits(bitC, kFastFlush);
  811. }
  812. assert(n % kUnroll == 0);
  813. /* Join to 2 * kUnroll */
  814. if (n % (2 * kUnroll)) {
  815. int u;
  816. for (u = 1; u < kUnroll; ++u) {
  817. HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
  818. }
  819. HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
  820. HUF_flushBits(bitC, kFastFlush);
  821. n -= kUnroll;
  822. }
  823. assert(n % (2 * kUnroll) == 0);
  824. for (; n>0; n-= 2 * kUnroll) {
  825. /* Encode kUnroll symbols into the bitstream @ index 0. */
  826. int u;
  827. for (u = 1; u < kUnroll; ++u) {
  828. HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
  829. }
  830. HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
  831. HUF_flushBits(bitC, kFastFlush);
  832. /* Encode kUnroll symbols into the bitstream @ index 1.
  833. * This allows us to start filling the bit container
  834. * without any data dependencies.
  835. */
  836. HUF_zeroIndex1(bitC);
  837. for (u = 1; u < kUnroll; ++u) {
  838. HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
  839. }
  840. HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
  841. /* Merge bitstream @ index 1 into the bitstream @ index 0 */
  842. HUF_mergeIndex1(bitC);
  843. HUF_flushBits(bitC, kFastFlush);
  844. }
  845. assert(n == 0);
  846. }
  847. /**
  848. * Returns a tight upper bound on the output space needed by Huffman
  849. * with 8 bytes buffer to handle over-writes. If the output is at least
  850. * this large we don't need to do bounds checks during Huffman encoding.
  851. */
  852. static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
  853. {
  854. return ((srcSize * tableLog) >> 3) + 8;
  855. }
  856. FORCE_INLINE_TEMPLATE size_t
  857. HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
  858. const void* src, size_t srcSize,
  859. const HUF_CElt* CTable)
  860. {
  861. U32 const tableLog = (U32)CTable[0];
  862. HUF_CElt const* ct = CTable + 1;
  863. const BYTE* ip = (const BYTE*) src;
  864. BYTE* const ostart = (BYTE*)dst;
  865. BYTE* const oend = ostart + dstSize;
  866. BYTE* op = ostart;
  867. HUF_CStream_t bitC;
  868. /* init */
  869. if (dstSize < 8) return 0; /* not enough space to compress */
  870. { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
  871. if (HUF_isError(initErr)) return 0; }
  872. if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
  873. HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
  874. else {
  875. if (MEM_32bits()) {
  876. switch (tableLog) {
  877. case 11:
  878. HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
  879. break;
  880. case 10: ZSTD_FALLTHROUGH;
  881. case 9: ZSTD_FALLTHROUGH;
  882. case 8:
  883. HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
  884. break;
  885. case 7: ZSTD_FALLTHROUGH;
  886. default:
  887. HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
  888. break;
  889. }
  890. } else {
  891. switch (tableLog) {
  892. case 11:
  893. HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
  894. break;
  895. case 10:
  896. HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
  897. break;
  898. case 9:
  899. HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
  900. break;
  901. case 8:
  902. HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
  903. break;
  904. case 7:
  905. HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
  906. break;
  907. case 6: ZSTD_FALLTHROUGH;
  908. default:
  909. HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
  910. break;
  911. }
  912. }
  913. }
  914. assert(bitC.ptr <= bitC.endPtr);
  915. return HUF_closeCStream(&bitC);
  916. }
  917. #if DYNAMIC_BMI2
  918. static BMI2_TARGET_ATTRIBUTE size_t
  919. HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
  920. const void* src, size_t srcSize,
  921. const HUF_CElt* CTable)
  922. {
  923. return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
  924. }
  925. static size_t
  926. HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
  927. const void* src, size_t srcSize,
  928. const HUF_CElt* CTable)
  929. {
  930. return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
  931. }
  932. static size_t
  933. HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
  934. const void* src, size_t srcSize,
  935. const HUF_CElt* CTable, const int bmi2)
  936. {
  937. if (bmi2) {
  938. return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
  939. }
  940. return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
  941. }
  942. #else
  943. static size_t
  944. HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
  945. const void* src, size_t srcSize,
  946. const HUF_CElt* CTable, const int bmi2)
  947. {
  948. (void)bmi2;
  949. return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
  950. }
  951. #endif
  952. size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
  953. {
  954. return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
  955. }
  956. size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
  957. {
  958. return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
  959. }
  960. static size_t
  961. HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
  962. const void* src, size_t srcSize,
  963. const HUF_CElt* CTable, int bmi2)
  964. {
  965. size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
  966. const BYTE* ip = (const BYTE*) src;
  967. const BYTE* const iend = ip + srcSize;
  968. BYTE* const ostart = (BYTE*) dst;
  969. BYTE* const oend = ostart + dstSize;
  970. BYTE* op = ostart;
  971. if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */
  972. if (srcSize < 12) return 0; /* no saving possible : too small input */
  973. op += 6; /* jumpTable */
  974. assert(op <= oend);
  975. { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
  976. if (cSize == 0 || cSize > 65535) return 0;
  977. MEM_writeLE16(ostart, (U16)cSize);
  978. op += cSize;
  979. }
  980. ip += segmentSize;
  981. assert(op <= oend);
  982. { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
  983. if (cSize == 0 || cSize > 65535) return 0;
  984. MEM_writeLE16(ostart+2, (U16)cSize);
  985. op += cSize;
  986. }
  987. ip += segmentSize;
  988. assert(op <= oend);
  989. { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
  990. if (cSize == 0 || cSize > 65535) return 0;
  991. MEM_writeLE16(ostart+4, (U16)cSize);
  992. op += cSize;
  993. }
  994. ip += segmentSize;
  995. assert(op <= oend);
  996. assert(ip <= iend);
  997. { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
  998. if (cSize == 0 || cSize > 65535) return 0;
  999. op += cSize;
  1000. }
  1001. return (size_t)(op-ostart);
  1002. }
  1003. size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
  1004. {
  1005. return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
  1006. }
  1007. size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
  1008. {
  1009. return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
  1010. }
  1011. typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
  1012. static size_t HUF_compressCTable_internal(
  1013. BYTE* const ostart, BYTE* op, BYTE* const oend,
  1014. const void* src, size_t srcSize,
  1015. HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
  1016. {
  1017. size_t const cSize = (nbStreams==HUF_singleStream) ?
  1018. HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
  1019. HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
  1020. if (HUF_isError(cSize)) { return cSize; }
  1021. if (cSize==0) { return 0; } /* uncompressible */
  1022. op += cSize;
  1023. /* check compressibility */
  1024. assert(op >= ostart);
  1025. if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
  1026. return (size_t)(op-ostart);
  1027. }
  1028. typedef struct {
  1029. unsigned count[HUF_SYMBOLVALUE_MAX + 1];
  1030. HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
  1031. union {
  1032. HUF_buildCTable_wksp_tables buildCTable_wksp;
  1033. HUF_WriteCTableWksp writeCTable_wksp;
  1034. U32 hist_wksp[HIST_WKSP_SIZE_U32];
  1035. } wksps;
  1036. } HUF_compress_tables_t;
  1037. #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
  1038. #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
  1039. /* HUF_compress_internal() :
  1040. * `workSpace_align4` must be aligned on 4-bytes boundaries,
  1041. * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
  1042. static size_t
  1043. HUF_compress_internal (void* dst, size_t dstSize,
  1044. const void* src, size_t srcSize,
  1045. unsigned maxSymbolValue, unsigned huffLog,
  1046. HUF_nbStreams_e nbStreams,
  1047. void* workSpace, size_t wkspSize,
  1048. HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
  1049. const int bmi2, unsigned suspectUncompressible)
  1050. {
  1051. HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
  1052. BYTE* const ostart = (BYTE*)dst;
  1053. BYTE* const oend = ostart + dstSize;
  1054. BYTE* op = ostart;
  1055. HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
  1056. /* checks & inits */
  1057. if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
  1058. if (!srcSize) return 0; /* Uncompressed */
  1059. if (!dstSize) return 0; /* cannot fit anything within dst budget */
  1060. if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
  1061. if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
  1062. if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
  1063. if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
  1064. if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
  1065. /* Heuristic : If old table is valid, use it for small inputs */
  1066. if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
  1067. return HUF_compressCTable_internal(ostart, op, oend,
  1068. src, srcSize,
  1069. nbStreams, oldHufTable, bmi2);
  1070. }
  1071. /* If uncompressible data is suspected, do a smaller sampling first */
  1072. DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
  1073. if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
  1074. size_t largestTotal = 0;
  1075. { unsigned maxSymbolValueBegin = maxSymbolValue;
  1076. CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
  1077. largestTotal += largestBegin;
  1078. }
  1079. { unsigned maxSymbolValueEnd = maxSymbolValue;
  1080. CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
  1081. largestTotal += largestEnd;
  1082. }
  1083. if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
  1084. }
  1085. /* Scan input and build symbol stats */
  1086. { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
  1087. if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
  1088. if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
  1089. }
  1090. /* Check validity of previous table */
  1091. if ( repeat
  1092. && *repeat == HUF_repeat_check
  1093. && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
  1094. *repeat = HUF_repeat_none;
  1095. }
  1096. /* Heuristic : use existing table for small inputs */
  1097. if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
  1098. return HUF_compressCTable_internal(ostart, op, oend,
  1099. src, srcSize,
  1100. nbStreams, oldHufTable, bmi2);
  1101. }
  1102. /* Build Huffman Tree */
  1103. huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
  1104. { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
  1105. maxSymbolValue, huffLog,
  1106. &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
  1107. CHECK_F(maxBits);
  1108. huffLog = (U32)maxBits;
  1109. }
  1110. /* Zero unused symbols in CTable, so we can check it for validity */
  1111. {
  1112. size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
  1113. size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
  1114. ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
  1115. }
  1116. /* Write table description header */
  1117. { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
  1118. &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
  1119. /* Check if using previous huffman table is beneficial */
  1120. if (repeat && *repeat != HUF_repeat_none) {
  1121. size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
  1122. size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
  1123. if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
  1124. return HUF_compressCTable_internal(ostart, op, oend,
  1125. src, srcSize,
  1126. nbStreams, oldHufTable, bmi2);
  1127. } }
  1128. /* Use the new huffman table */
  1129. if (hSize + 12ul >= srcSize) { return 0; }
  1130. op += hSize;
  1131. if (repeat) { *repeat = HUF_repeat_none; }
  1132. if (oldHufTable)
  1133. ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
  1134. }
  1135. return HUF_compressCTable_internal(ostart, op, oend,
  1136. src, srcSize,
  1137. nbStreams, table->CTable, bmi2);
  1138. }
  1139. size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
  1140. const void* src, size_t srcSize,
  1141. unsigned maxSymbolValue, unsigned huffLog,
  1142. void* workSpace, size_t wkspSize)
  1143. {
  1144. return HUF_compress_internal(dst, dstSize, src, srcSize,
  1145. maxSymbolValue, huffLog, HUF_singleStream,
  1146. workSpace, wkspSize,
  1147. NULL, NULL, 0, 0 /*bmi2*/, 0);
  1148. }
  1149. size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
  1150. const void* src, size_t srcSize,
  1151. unsigned maxSymbolValue, unsigned huffLog,
  1152. void* workSpace, size_t wkspSize,
  1153. HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
  1154. int bmi2, unsigned suspectUncompressible)
  1155. {
  1156. return HUF_compress_internal(dst, dstSize, src, srcSize,
  1157. maxSymbolValue, huffLog, HUF_singleStream,
  1158. workSpace, wkspSize, hufTable,
  1159. repeat, preferRepeat, bmi2, suspectUncompressible);
  1160. }
  1161. /* HUF_compress4X_repeat():
  1162. * compress input using 4 streams.
  1163. * provide workspace to generate compression tables */
  1164. size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
  1165. const void* src, size_t srcSize,
  1166. unsigned maxSymbolValue, unsigned huffLog,
  1167. void* workSpace, size_t wkspSize)
  1168. {
  1169. return HUF_compress_internal(dst, dstSize, src, srcSize,
  1170. maxSymbolValue, huffLog, HUF_fourStreams,
  1171. workSpace, wkspSize,
  1172. NULL, NULL, 0, 0 /*bmi2*/, 0);
  1173. }
  1174. /* HUF_compress4X_repeat():
  1175. * compress input using 4 streams.
  1176. * consider skipping quickly
  1177. * re-use an existing huffman compression table */
  1178. size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
  1179. const void* src, size_t srcSize,
  1180. unsigned maxSymbolValue, unsigned huffLog,
  1181. void* workSpace, size_t wkspSize,
  1182. HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
  1183. {
  1184. return HUF_compress_internal(dst, dstSize, src, srcSize,
  1185. maxSymbolValue, huffLog, HUF_fourStreams,
  1186. workSpace, wkspSize,
  1187. hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
  1188. }
  1189. #ifndef ZSTD_NO_UNUSED_FUNCTIONS
  1190. /** HUF_buildCTable() :
  1191. * @return : maxNbBits
  1192. * Note : count is used before tree is written, so they can safely overlap
  1193. */
  1194. size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
  1195. {
  1196. HUF_buildCTable_wksp_tables workspace;
  1197. return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
  1198. }
  1199. size_t HUF_compress1X (void* dst, size_t dstSize,
  1200. const void* src, size_t srcSize,
  1201. unsigned maxSymbolValue, unsigned huffLog)
  1202. {
  1203. U64 workSpace[HUF_WORKSPACE_SIZE_U64];
  1204. return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
  1205. }
  1206. size_t HUF_compress2 (void* dst, size_t dstSize,
  1207. const void* src, size_t srcSize,
  1208. unsigned maxSymbolValue, unsigned huffLog)
  1209. {
  1210. U64 workSpace[HUF_WORKSPACE_SIZE_U64];
  1211. return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
  1212. }
  1213. size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
  1214. {
  1215. return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
  1216. }
  1217. #endif