huf_compress.c 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914
  1. /* ******************************************************************
  2. * Huffman encoder, part of New Generation Entropy library
  3. * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
  4. *
  5. * You can contact the author at :
  6. * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
  7. * - Public forum : https://groups.google.com/forum/#!forum/lz4c
  8. *
  9. * This source code is licensed under both the BSD-style license (found in the
  10. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  11. * in the COPYING file in the root directory of this source tree).
  12. * You may select, at your option, one of the above-listed licenses.
  13. ****************************************************************** */
  14. /* **************************************************************
  15. * Compiler specifics
  16. ****************************************************************/
  17. #ifdef _MSC_VER /* Visual Studio */
  18. # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
  19. #endif
  20. /* **************************************************************
  21. * Includes
  22. ****************************************************************/
  23. #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
  24. #include "../common/compiler.h"
  25. #include "../common/bitstream.h"
  26. #include "hist.h"
  27. #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
  28. #include "../common/fse.h" /* header compression */
  29. #define HUF_STATIC_LINKING_ONLY
  30. #include "../common/huf.h"
  31. #include "../common/error_private.h"
  32. /* **************************************************************
  33. * Error Management
  34. ****************************************************************/
  35. #define HUF_isError ERR_isError
  36. #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
  37. /* **************************************************************
  38. * Utils
  39. ****************************************************************/
  40. unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
  41. {
  42. return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
  43. }
  44. /* *******************************************************
  45. * HUF : Huffman block compression
  46. *********************************************************/
  47. /* HUF_compressWeights() :
  48. * Same as FSE_compress(), but dedicated to huff0's weights compression.
  49. * The use case needs much less stack memory.
  50. * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
  51. */
  52. #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
  53. static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
  54. {
  55. BYTE* const ostart = (BYTE*) dst;
  56. BYTE* op = ostart;
  57. BYTE* const oend = ostart + dstSize;
  58. unsigned maxSymbolValue = HUF_TABLELOG_MAX;
  59. U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
  60. FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
  61. U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
  62. unsigned count[HUF_TABLELOG_MAX+1];
  63. S16 norm[HUF_TABLELOG_MAX+1];
  64. /* init conditions */
  65. if (wtSize <= 1) return 0; /* Not compressible */
  66. /* Scan input and build symbol stats */
  67. { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
  68. if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
  69. if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
  70. }
  71. tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
  72. CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
  73. /* Write table description header */
  74. { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
  75. op += hSize;
  76. }
  77. /* Compress */
  78. CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
  79. { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
  80. if (cSize == 0) return 0; /* not enough space for compressed data */
  81. op += cSize;
  82. }
  83. return (size_t)(op-ostart);
  84. }
  85. /*! HUF_writeCTable() :
  86. `CTable` : Huffman tree to save, using huf representation.
  87. @return : size of saved CTable */
  88. size_t HUF_writeCTable (void* dst, size_t maxDstSize,
  89. const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
  90. {
  91. BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
  92. BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
  93. BYTE* op = (BYTE*)dst;
  94. U32 n;
  95. /* check conditions */
  96. if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
  97. /* convert to weight */
  98. bitsToWeight[0] = 0;
  99. for (n=1; n<huffLog+1; n++)
  100. bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
  101. for (n=0; n<maxSymbolValue; n++)
  102. huffWeight[n] = bitsToWeight[CTable[n].nbBits];
  103. /* attempt weights compression by FSE */
  104. { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
  105. if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
  106. op[0] = (BYTE)hSize;
  107. return hSize+1;
  108. } }
  109. /* write raw values as 4-bits (max : 15) */
  110. if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
  111. if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
  112. op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
  113. huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
  114. for (n=0; n<maxSymbolValue; n+=2)
  115. op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
  116. return ((maxSymbolValue+1)/2) + 1;
  117. }
  118. size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
  119. {
  120. BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
  121. U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
  122. U32 tableLog = 0;
  123. U32 nbSymbols = 0;
  124. /* get symbol weights */
  125. CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
  126. *hasZeroWeights = (rankVal[0] > 0);
  127. /* check result */
  128. if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
  129. if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
  130. /* Prepare base value per rank */
  131. { U32 n, nextRankStart = 0;
  132. for (n=1; n<=tableLog; n++) {
  133. U32 curr = nextRankStart;
  134. nextRankStart += (rankVal[n] << (n-1));
  135. rankVal[n] = curr;
  136. } }
  137. /* fill nbBits */
  138. { U32 n; for (n=0; n<nbSymbols; n++) {
  139. const U32 w = huffWeight[n];
  140. CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
  141. } }
  142. /* fill val */
  143. { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
  144. U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
  145. { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
  146. /* determine stating value per rank */
  147. valPerRank[tableLog+1] = 0; /* for w==0 */
  148. { U16 min = 0;
  149. U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */
  150. valPerRank[n] = min; /* get starting value within each rank */
  151. min += nbPerRank[n];
  152. min >>= 1;
  153. } }
  154. /* assign value within rank, symbol order */
  155. { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
  156. }
  157. *maxSymbolValuePtr = nbSymbols - 1;
  158. return readSize;
  159. }
  160. U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
  161. {
  162. const HUF_CElt* table = (const HUF_CElt*)symbolTable;
  163. assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
  164. return table[symbolValue].nbBits;
  165. }
  166. typedef struct nodeElt_s {
  167. U32 count;
  168. U16 parent;
  169. BYTE byte;
  170. BYTE nbBits;
  171. } nodeElt;
  172. /**
  173. * HUF_setMaxHeight():
  174. * Enforces maxNbBits on the Huffman tree described in huffNode.
  175. *
  176. * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
  177. * the tree to so that it is a valid canonical Huffman tree.
  178. *
  179. * @pre The sum of the ranks of each symbol == 2^largestBits,
  180. * where largestBits == huffNode[lastNonNull].nbBits.
  181. * @post The sum of the ranks of each symbol == 2^largestBits,
  182. * where largestBits is the return value <= maxNbBits.
  183. *
  184. * @param huffNode The Huffman tree modified in place to enforce maxNbBits.
  185. * @param lastNonNull The symbol with the lowest count in the Huffman tree.
  186. * @param maxNbBits The maximum allowed number of bits, which the Huffman tree
  187. * may not respect. After this function the Huffman tree will
  188. * respect maxNbBits.
  189. * @return The maximum number of bits of the Huffman tree after adjustment,
  190. * necessarily no more than maxNbBits.
  191. */
  192. static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
  193. {
  194. const U32 largestBits = huffNode[lastNonNull].nbBits;
  195. /* early exit : no elt > maxNbBits, so the tree is already valid. */
  196. if (largestBits <= maxNbBits) return largestBits;
  197. /* there are several too large elements (at least >= 2) */
  198. { int totalCost = 0;
  199. const U32 baseCost = 1 << (largestBits - maxNbBits);
  200. int n = (int)lastNonNull;
  201. /* Adjust any ranks > maxNbBits to maxNbBits.
  202. * Compute totalCost, which is how far the sum of the ranks is
  203. * we are over 2^largestBits after adjust the offending ranks.
  204. */
  205. while (huffNode[n].nbBits > maxNbBits) {
  206. totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
  207. huffNode[n].nbBits = (BYTE)maxNbBits;
  208. n--;
  209. }
  210. /* n stops at huffNode[n].nbBits <= maxNbBits */
  211. assert(huffNode[n].nbBits <= maxNbBits);
  212. /* n end at index of smallest symbol using < maxNbBits */
  213. while (huffNode[n].nbBits == maxNbBits) --n;
  214. /* renorm totalCost from 2^largestBits to 2^maxNbBits
  215. * note : totalCost is necessarily a multiple of baseCost */
  216. assert((totalCost & (baseCost - 1)) == 0);
  217. totalCost >>= (largestBits - maxNbBits);
  218. assert(totalCost > 0);
  219. /* repay normalized cost */
  220. { U32 const noSymbol = 0xF0F0F0F0;
  221. U32 rankLast[HUF_TABLELOG_MAX+2];
  222. /* Get pos of last (smallest = lowest cum. count) symbol per rank */
  223. ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
  224. { U32 currentNbBits = maxNbBits;
  225. int pos;
  226. for (pos=n ; pos >= 0; pos--) {
  227. if (huffNode[pos].nbBits >= currentNbBits) continue;
  228. currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
  229. rankLast[maxNbBits-currentNbBits] = (U32)pos;
  230. } }
  231. while (totalCost > 0) {
  232. /* Try to reduce the next power of 2 above totalCost because we
  233. * gain back half the rank.
  234. */
  235. U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
  236. for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
  237. U32 const highPos = rankLast[nBitsToDecrease];
  238. U32 const lowPos = rankLast[nBitsToDecrease-1];
  239. if (highPos == noSymbol) continue;
  240. /* Decrease highPos if no symbols of lowPos or if it is
  241. * not cheaper to remove 2 lowPos than highPos.
  242. */
  243. if (lowPos == noSymbol) break;
  244. { U32 const highTotal = huffNode[highPos].count;
  245. U32 const lowTotal = 2 * huffNode[lowPos].count;
  246. if (highTotal <= lowTotal) break;
  247. } }
  248. /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
  249. assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
  250. /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
  251. while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
  252. nBitsToDecrease++;
  253. assert(rankLast[nBitsToDecrease] != noSymbol);
  254. /* Increase the number of bits to gain back half the rank cost. */
  255. totalCost -= 1 << (nBitsToDecrease-1);
  256. huffNode[rankLast[nBitsToDecrease]].nbBits++;
  257. /* Fix up the new rank.
  258. * If the new rank was empty, this symbol is now its smallest.
  259. * Otherwise, this symbol will be the largest in the new rank so no adjustment.
  260. */
  261. if (rankLast[nBitsToDecrease-1] == noSymbol)
  262. rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
  263. /* Fix up the old rank.
  264. * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
  265. * it must be the only symbol in its rank, so the old rank now has no symbols.
  266. * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
  267. * the smallest node in the rank. If the previous position belongs to a different rank,
  268. * then the rank is now empty.
  269. */
  270. if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
  271. rankLast[nBitsToDecrease] = noSymbol;
  272. else {
  273. rankLast[nBitsToDecrease]--;
  274. if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
  275. rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
  276. }
  277. } /* while (totalCost > 0) */
  278. /* If we've removed too much weight, then we have to add it back.
  279. * To avoid overshooting again, we only adjust the smallest rank.
  280. * We take the largest nodes from the lowest rank 0 and move them
  281. * to rank 1. There's guaranteed to be enough rank 0 symbols because
  282. * TODO.
  283. */
  284. while (totalCost < 0) { /* Sometimes, cost correction overshoot */
  285. /* special case : no rank 1 symbol (using maxNbBits-1);
  286. * let's create one from largest rank 0 (using maxNbBits).
  287. */
  288. if (rankLast[1] == noSymbol) {
  289. while (huffNode[n].nbBits == maxNbBits) n--;
  290. huffNode[n+1].nbBits--;
  291. assert(n >= 0);
  292. rankLast[1] = (U32)(n+1);
  293. totalCost++;
  294. continue;
  295. }
  296. huffNode[ rankLast[1] + 1 ].nbBits--;
  297. rankLast[1]++;
  298. totalCost ++;
  299. }
  300. } /* repay normalized cost */
  301. } /* there are several too large elements (at least >= 2) */
  302. return maxNbBits;
  303. }
  304. typedef struct {
  305. U32 base;
  306. U32 curr;
  307. } rankPos;
  308. typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
  309. #define RANK_POSITION_TABLE_SIZE 32
  310. typedef struct {
  311. huffNodeTable huffNodeTbl;
  312. rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
  313. } HUF_buildCTable_wksp_tables;
  314. /**
  315. * HUF_sort():
  316. * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
  317. *
  318. * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
  319. * Must have (maxSymbolValue + 1) entries.
  320. * @param[in] count Histogram of the symbols.
  321. * @param[in] maxSymbolValue Maximum symbol value.
  322. * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
  323. */
  324. static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
  325. {
  326. int n;
  327. int const maxSymbolValue1 = (int)maxSymbolValue + 1;
  328. /* Compute base and set curr to base.
  329. * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
  330. * Then 2^lowerRank <= count[n]+1 <= 2^rank.
  331. * We attribute each symbol to lowerRank's base value, because we want to know where
  332. * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
  333. */
  334. ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
  335. for (n = 0; n < maxSymbolValue1; ++n) {
  336. U32 lowerRank = BIT_highbit32(count[n] + 1);
  337. rankPosition[lowerRank].base++;
  338. }
  339. assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
  340. for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
  341. rankPosition[n-1].base += rankPosition[n].base;
  342. rankPosition[n-1].curr = rankPosition[n-1].base;
  343. }
  344. /* Sort */
  345. for (n = 0; n < maxSymbolValue1; ++n) {
  346. U32 const c = count[n];
  347. U32 const r = BIT_highbit32(c+1) + 1;
  348. U32 pos = rankPosition[r].curr++;
  349. /* Insert into the correct position in the rank.
  350. * We have at most 256 symbols, so this insertion should be fine.
  351. */
  352. while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
  353. huffNode[pos] = huffNode[pos-1];
  354. pos--;
  355. }
  356. huffNode[pos].count = c;
  357. huffNode[pos].byte = (BYTE)n;
  358. }
  359. }
  360. /** HUF_buildCTable_wksp() :
  361. * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
  362. * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
  363. */
  364. #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
  365. /* HUF_buildTree():
  366. * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
  367. *
  368. * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.
  369. * @param maxSymbolValue The maximum symbol value.
  370. * @return The smallest node in the Huffman tree (by count).
  371. */
  372. static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
  373. {
  374. nodeElt* const huffNode0 = huffNode - 1;
  375. int nonNullRank;
  376. int lowS, lowN;
  377. int nodeNb = STARTNODE;
  378. int n, nodeRoot;
  379. /* init for parents */
  380. nonNullRank = (int)maxSymbolValue;
  381. while(huffNode[nonNullRank].count == 0) nonNullRank--;
  382. lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
  383. huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
  384. huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
  385. nodeNb++; lowS-=2;
  386. for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
  387. huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
  388. /* create parents */
  389. while (nodeNb <= nodeRoot) {
  390. int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
  391. int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
  392. huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
  393. huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
  394. nodeNb++;
  395. }
  396. /* distribute weights (unlimited tree height) */
  397. huffNode[nodeRoot].nbBits = 0;
  398. for (n=nodeRoot-1; n>=STARTNODE; n--)
  399. huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
  400. for (n=0; n<=nonNullRank; n++)
  401. huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
  402. return nonNullRank;
  403. }
  404. /**
  405. * HUF_buildCTableFromTree():
  406. * Build the CTable given the Huffman tree in huffNode.
  407. *
  408. * @param[out] CTable The output Huffman CTable.
  409. * @param huffNode The Huffman tree.
  410. * @param nonNullRank The last and smallest node in the Huffman tree.
  411. * @param maxSymbolValue The maximum symbol value.
  412. * @param maxNbBits The exact maximum number of bits used in the Huffman tree.
  413. */
  414. static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
  415. {
  416. /* fill result into ctable (val, nbBits) */
  417. int n;
  418. U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
  419. U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
  420. int const alphabetSize = (int)(maxSymbolValue + 1);
  421. for (n=0; n<=nonNullRank; n++)
  422. nbPerRank[huffNode[n].nbBits]++;
  423. /* determine starting value per rank */
  424. { U16 min = 0;
  425. for (n=(int)maxNbBits; n>0; n--) {
  426. valPerRank[n] = min; /* get starting value within each rank */
  427. min += nbPerRank[n];
  428. min >>= 1;
  429. } }
  430. for (n=0; n<alphabetSize; n++)
  431. CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
  432. for (n=0; n<alphabetSize; n++)
  433. CTable[n].val = valPerRank[CTable[n].nbBits]++; /* assign value within rank, symbol order */
  434. }
  435. size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
  436. {
  437. HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
  438. nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
  439. nodeElt* const huffNode = huffNode0+1;
  440. int nonNullRank;
  441. /* safety checks */
  442. if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
  443. if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
  444. return ERROR(workSpace_tooSmall);
  445. if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
  446. if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
  447. return ERROR(maxSymbolValue_tooLarge);
  448. ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
  449. /* sort, decreasing order */
  450. HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
  451. /* build tree */
  452. nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
  453. /* enforce maxTableLog */
  454. maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
  455. if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
  456. HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
  457. return maxNbBits;
  458. }
  459. size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
  460. {
  461. size_t nbBits = 0;
  462. int s;
  463. for (s = 0; s <= (int)maxSymbolValue; ++s) {
  464. nbBits += CTable[s].nbBits * count[s];
  465. }
  466. return nbBits >> 3;
  467. }
  468. int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
  469. int bad = 0;
  470. int s;
  471. for (s = 0; s <= (int)maxSymbolValue; ++s) {
  472. bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
  473. }
  474. return !bad;
  475. }
  476. size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
  477. FORCE_INLINE_TEMPLATE void
  478. HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
  479. {
  480. BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
  481. }
  482. #define HUF_FLUSHBITS(s) BIT_flushBits(s)
  483. #define HUF_FLUSHBITS_1(stream) \
  484. if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
  485. #define HUF_FLUSHBITS_2(stream) \
  486. if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
  487. FORCE_INLINE_TEMPLATE size_t
  488. HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
  489. const void* src, size_t srcSize,
  490. const HUF_CElt* CTable)
  491. {
  492. const BYTE* ip = (const BYTE*) src;
  493. BYTE* const ostart = (BYTE*)dst;
  494. BYTE* const oend = ostart + dstSize;
  495. BYTE* op = ostart;
  496. size_t n;
  497. BIT_CStream_t bitC;
  498. /* init */
  499. if (dstSize < 8) return 0; /* not enough space to compress */
  500. { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
  501. if (HUF_isError(initErr)) return 0; }
  502. n = srcSize & ~3; /* join to mod 4 */
  503. switch (srcSize & 3)
  504. {
  505. case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
  506. HUF_FLUSHBITS_2(&bitC);
  507. /* fall-through */
  508. case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
  509. HUF_FLUSHBITS_1(&bitC);
  510. /* fall-through */
  511. case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
  512. HUF_FLUSHBITS(&bitC);
  513. /* fall-through */
  514. case 0 : /* fall-through */
  515. default: break;
  516. }
  517. for (; n>0; n-=4) { /* note : n&3==0 at this stage */
  518. HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
  519. HUF_FLUSHBITS_1(&bitC);
  520. HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
  521. HUF_FLUSHBITS_2(&bitC);
  522. HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
  523. HUF_FLUSHBITS_1(&bitC);
  524. HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
  525. HUF_FLUSHBITS(&bitC);
  526. }
  527. return BIT_closeCStream(&bitC);
  528. }
  529. #if DYNAMIC_BMI2
  530. static TARGET_ATTRIBUTE("bmi2") size_t
  531. HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
  532. const void* src, size_t srcSize,
  533. const HUF_CElt* CTable)
  534. {
  535. return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
  536. }
  537. static size_t
  538. HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
  539. const void* src, size_t srcSize,
  540. const HUF_CElt* CTable)
  541. {
  542. return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
  543. }
  544. static size_t
  545. HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
  546. const void* src, size_t srcSize,
  547. const HUF_CElt* CTable, const int bmi2)
  548. {
  549. if (bmi2) {
  550. return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
  551. }
  552. return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
  553. }
  554. #else
  555. static size_t
  556. HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
  557. const void* src, size_t srcSize,
  558. const HUF_CElt* CTable, const int bmi2)
  559. {
  560. (void)bmi2;
  561. return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
  562. }
  563. #endif
  564. size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
  565. {
  566. return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
  567. }
  568. static size_t
  569. HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
  570. const void* src, size_t srcSize,
  571. const HUF_CElt* CTable, int bmi2)
  572. {
  573. size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
  574. const BYTE* ip = (const BYTE*) src;
  575. const BYTE* const iend = ip + srcSize;
  576. BYTE* const ostart = (BYTE*) dst;
  577. BYTE* const oend = ostart + dstSize;
  578. BYTE* op = ostart;
  579. if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */
  580. if (srcSize < 12) return 0; /* no saving possible : too small input */
  581. op += 6; /* jumpTable */
  582. assert(op <= oend);
  583. { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
  584. if (cSize==0) return 0;
  585. assert(cSize <= 65535);
  586. MEM_writeLE16(ostart, (U16)cSize);
  587. op += cSize;
  588. }
  589. ip += segmentSize;
  590. assert(op <= oend);
  591. { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
  592. if (cSize==0) return 0;
  593. assert(cSize <= 65535);
  594. MEM_writeLE16(ostart+2, (U16)cSize);
  595. op += cSize;
  596. }
  597. ip += segmentSize;
  598. assert(op <= oend);
  599. { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
  600. if (cSize==0) return 0;
  601. assert(cSize <= 65535);
  602. MEM_writeLE16(ostart+4, (U16)cSize);
  603. op += cSize;
  604. }
  605. ip += segmentSize;
  606. assert(op <= oend);
  607. assert(ip <= iend);
  608. { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
  609. if (cSize==0) return 0;
  610. op += cSize;
  611. }
  612. return (size_t)(op-ostart);
  613. }
  614. size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
  615. {
  616. return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
  617. }
  618. typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
  619. static size_t HUF_compressCTable_internal(
  620. BYTE* const ostart, BYTE* op, BYTE* const oend,
  621. const void* src, size_t srcSize,
  622. HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
  623. {
  624. size_t const cSize = (nbStreams==HUF_singleStream) ?
  625. HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
  626. HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
  627. if (HUF_isError(cSize)) { return cSize; }
  628. if (cSize==0) { return 0; } /* uncompressible */
  629. op += cSize;
  630. /* check compressibility */
  631. assert(op >= ostart);
  632. if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
  633. return (size_t)(op-ostart);
  634. }
  635. typedef struct {
  636. unsigned count[HUF_SYMBOLVALUE_MAX + 1];
  637. HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
  638. HUF_buildCTable_wksp_tables buildCTable_wksp;
  639. } HUF_compress_tables_t;
  640. /* HUF_compress_internal() :
  641. * `workSpace_align4` must be aligned on 4-bytes boundaries,
  642. * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
  643. static size_t
  644. HUF_compress_internal (void* dst, size_t dstSize,
  645. const void* src, size_t srcSize,
  646. unsigned maxSymbolValue, unsigned huffLog,
  647. HUF_nbStreams_e nbStreams,
  648. void* workSpace_align4, size_t wkspSize,
  649. HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
  650. const int bmi2)
  651. {
  652. HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
  653. BYTE* const ostart = (BYTE*)dst;
  654. BYTE* const oend = ostart + dstSize;
  655. BYTE* op = ostart;
  656. HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
  657. assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */
  658. /* checks & inits */
  659. if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
  660. if (!srcSize) return 0; /* Uncompressed */
  661. if (!dstSize) return 0; /* cannot fit anything within dst budget */
  662. if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
  663. if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
  664. if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
  665. if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
  666. if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
  667. /* Heuristic : If old table is valid, use it for small inputs */
  668. if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
  669. return HUF_compressCTable_internal(ostart, op, oend,
  670. src, srcSize,
  671. nbStreams, oldHufTable, bmi2);
  672. }
  673. /* Scan input and build symbol stats */
  674. { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
  675. if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
  676. if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
  677. }
  678. /* Check validity of previous table */
  679. if ( repeat
  680. && *repeat == HUF_repeat_check
  681. && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
  682. *repeat = HUF_repeat_none;
  683. }
  684. /* Heuristic : use existing table for small inputs */
  685. if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
  686. return HUF_compressCTable_internal(ostart, op, oend,
  687. src, srcSize,
  688. nbStreams, oldHufTable, bmi2);
  689. }
  690. /* Build Huffman Tree */
  691. huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
  692. { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
  693. maxSymbolValue, huffLog,
  694. &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
  695. CHECK_F(maxBits);
  696. huffLog = (U32)maxBits;
  697. /* Zero unused symbols in CTable, so we can check it for validity */
  698. ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
  699. sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
  700. }
  701. /* Write table description header */
  702. { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
  703. /* Check if using previous huffman table is beneficial */
  704. if (repeat && *repeat != HUF_repeat_none) {
  705. size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
  706. size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
  707. if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
  708. return HUF_compressCTable_internal(ostart, op, oend,
  709. src, srcSize,
  710. nbStreams, oldHufTable, bmi2);
  711. } }
  712. /* Use the new huffman table */
  713. if (hSize + 12ul >= srcSize) { return 0; }
  714. op += hSize;
  715. if (repeat) { *repeat = HUF_repeat_none; }
  716. if (oldHufTable)
  717. ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
  718. }
  719. return HUF_compressCTable_internal(ostart, op, oend,
  720. src, srcSize,
  721. nbStreams, table->CTable, bmi2);
  722. }
  723. size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
  724. const void* src, size_t srcSize,
  725. unsigned maxSymbolValue, unsigned huffLog,
  726. void* workSpace, size_t wkspSize)
  727. {
  728. return HUF_compress_internal(dst, dstSize, src, srcSize,
  729. maxSymbolValue, huffLog, HUF_singleStream,
  730. workSpace, wkspSize,
  731. NULL, NULL, 0, 0 /*bmi2*/);
  732. }
  733. size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
  734. const void* src, size_t srcSize,
  735. unsigned maxSymbolValue, unsigned huffLog,
  736. void* workSpace, size_t wkspSize,
  737. HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
  738. {
  739. return HUF_compress_internal(dst, dstSize, src, srcSize,
  740. maxSymbolValue, huffLog, HUF_singleStream,
  741. workSpace, wkspSize, hufTable,
  742. repeat, preferRepeat, bmi2);
  743. }
  744. /* HUF_compress4X_repeat():
  745. * compress input using 4 streams.
  746. * provide workspace to generate compression tables */
  747. size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
  748. const void* src, size_t srcSize,
  749. unsigned maxSymbolValue, unsigned huffLog,
  750. void* workSpace, size_t wkspSize)
  751. {
  752. return HUF_compress_internal(dst, dstSize, src, srcSize,
  753. maxSymbolValue, huffLog, HUF_fourStreams,
  754. workSpace, wkspSize,
  755. NULL, NULL, 0, 0 /*bmi2*/);
  756. }
  757. /* HUF_compress4X_repeat():
  758. * compress input using 4 streams.
  759. * re-use an existing huffman compression table */
  760. size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
  761. const void* src, size_t srcSize,
  762. unsigned maxSymbolValue, unsigned huffLog,
  763. void* workSpace, size_t wkspSize,
  764. HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
  765. {
  766. return HUF_compress_internal(dst, dstSize, src, srcSize,
  767. maxSymbolValue, huffLog, HUF_fourStreams,
  768. workSpace, wkspSize,
  769. hufTable, repeat, preferRepeat, bmi2);
  770. }
  771. #ifndef ZSTD_NO_UNUSED_FUNCTIONS
  772. /** HUF_buildCTable() :
  773. * @return : maxNbBits
  774. * Note : count is used before tree is written, so they can safely overlap
  775. */
  776. size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
  777. {
  778. HUF_buildCTable_wksp_tables workspace;
  779. return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
  780. }
  781. size_t HUF_compress1X (void* dst, size_t dstSize,
  782. const void* src, size_t srcSize,
  783. unsigned maxSymbolValue, unsigned huffLog)
  784. {
  785. unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
  786. return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
  787. }
  788. size_t HUF_compress2 (void* dst, size_t dstSize,
  789. const void* src, size_t srcSize,
  790. unsigned maxSymbolValue, unsigned huffLog)
  791. {
  792. unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
  793. return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
  794. }
  795. size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
  796. {
  797. return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
  798. }
  799. #endif