huf_decompress.c 73 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889
  1. /* ******************************************************************
  2. * huff0 huffman decoder,
  3. * part of Finite State Entropy library
  4. * Copyright (c) Yann Collet, Facebook, Inc.
  5. *
  6. * You can contact the author at :
  7. * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
  8. *
  9. * This source code is licensed under both the BSD-style license (found in the
  10. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  11. * in the COPYING file in the root directory of this source tree).
  12. * You may select, at your option, one of the above-listed licenses.
  13. ****************************************************************** */
  14. /* **************************************************************
  15. * Dependencies
  16. ****************************************************************/
  17. #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
  18. #include "../common/compiler.h"
  19. #include "../common/bitstream.h" /* BIT_* */
  20. #include "../common/fse.h" /* to compress headers */
  21. #define HUF_STATIC_LINKING_ONLY
  22. #include "../common/huf.h"
  23. #include "../common/error_private.h"
  24. #include "../common/zstd_internal.h"
  25. /* **************************************************************
  26. * Constants
  27. ****************************************************************/
  28. #define HUF_DECODER_FAST_TABLELOG 11
  29. /* **************************************************************
  30. * Macros
  31. ****************************************************************/
  32. /* These two optional macros force the use one way or another of the two
  33. * Huffman decompression implementations. You can't force in both directions
  34. * at the same time.
  35. */
  36. #if defined(HUF_FORCE_DECOMPRESS_X1) && \
  37. defined(HUF_FORCE_DECOMPRESS_X2)
  38. #error "Cannot force the use of the X1 and X2 decoders at the same time!"
  39. #endif
  40. #if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
  41. # define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
  42. #else
  43. # define HUF_ASM_X86_64_BMI2_ATTRS
  44. #endif
  45. #ifdef __cplusplus
  46. # define HUF_EXTERN_C extern "C"
  47. #else
  48. # define HUF_EXTERN_C
  49. #endif
  50. #define HUF_ASM_DECL HUF_EXTERN_C
  51. #if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
  52. # define HUF_NEED_BMI2_FUNCTION 1
  53. #else
  54. # define HUF_NEED_BMI2_FUNCTION 0
  55. #endif
  56. #if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
  57. # define HUF_NEED_DEFAULT_FUNCTION 1
  58. #else
  59. # define HUF_NEED_DEFAULT_FUNCTION 0
  60. #endif
  61. /* **************************************************************
  62. * Error Management
  63. ****************************************************************/
  64. #define HUF_isError ERR_isError
  65. /* **************************************************************
  66. * Byte alignment for workSpace management
  67. ****************************************************************/
  68. #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
  69. #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
  70. /* **************************************************************
  71. * BMI2 Variant Wrappers
  72. ****************************************************************/
  73. #if DYNAMIC_BMI2
  74. #define HUF_DGEN(fn) \
  75. \
  76. static size_t fn##_default( \
  77. void* dst, size_t dstSize, \
  78. const void* cSrc, size_t cSrcSize, \
  79. const HUF_DTable* DTable) \
  80. { \
  81. return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
  82. } \
  83. \
  84. static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
  85. void* dst, size_t dstSize, \
  86. const void* cSrc, size_t cSrcSize, \
  87. const HUF_DTable* DTable) \
  88. { \
  89. return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
  90. } \
  91. \
  92. static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
  93. size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
  94. { \
  95. if (bmi2) { \
  96. return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
  97. } \
  98. return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
  99. }
  100. #else
  101. #define HUF_DGEN(fn) \
  102. static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
  103. size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
  104. { \
  105. (void)bmi2; \
  106. return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
  107. }
  108. #endif
  109. /*-***************************/
  110. /* generic DTableDesc */
  111. /*-***************************/
  112. typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
  113. static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
  114. {
  115. DTableDesc dtd;
  116. ZSTD_memcpy(&dtd, table, sizeof(dtd));
  117. return dtd;
  118. }
  119. #if ZSTD_ENABLE_ASM_X86_64_BMI2
  120. static size_t HUF_initDStream(BYTE const* ip) {
  121. BYTE const lastByte = ip[7];
  122. size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
  123. size_t const value = MEM_readLEST(ip) | 1;
  124. assert(bitsConsumed <= 8);
  125. return value << bitsConsumed;
  126. }
  127. typedef struct {
  128. BYTE const* ip[4];
  129. BYTE* op[4];
  130. U64 bits[4];
  131. void const* dt;
  132. BYTE const* ilimit;
  133. BYTE* oend;
  134. BYTE const* iend[4];
  135. } HUF_DecompressAsmArgs;
  136. /**
  137. * Initializes args for the asm decoding loop.
  138. * @returns 0 on success
  139. * 1 if the fallback implementation should be used.
  140. * Or an error code on failure.
  141. */
  142. static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
  143. {
  144. void const* dt = DTable + 1;
  145. U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
  146. const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
  147. BYTE* const oend = (BYTE*)dst + dstSize;
  148. /* The following condition is false on x32 platform,
  149. * but HUF_asm is not compatible with this ABI */
  150. if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
  151. /* strict minimum : jump table + 1 byte per stream */
  152. if (srcSize < 10)
  153. return ERROR(corruption_detected);
  154. /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
  155. * If table log is not correct at this point, fallback to the old decoder.
  156. * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
  157. */
  158. if (dtLog != HUF_DECODER_FAST_TABLELOG)
  159. return 1;
  160. /* Read the jump table. */
  161. {
  162. const BYTE* const istart = (const BYTE*)src;
  163. size_t const length1 = MEM_readLE16(istart);
  164. size_t const length2 = MEM_readLE16(istart+2);
  165. size_t const length3 = MEM_readLE16(istart+4);
  166. size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
  167. args->iend[0] = istart + 6; /* jumpTable */
  168. args->iend[1] = args->iend[0] + length1;
  169. args->iend[2] = args->iend[1] + length2;
  170. args->iend[3] = args->iend[2] + length3;
  171. /* HUF_initDStream() requires this, and this small of an input
  172. * won't benefit from the ASM loop anyways.
  173. * length1 must be >= 16 so that ip[0] >= ilimit before the loop
  174. * starts.
  175. */
  176. if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
  177. return 1;
  178. if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
  179. }
  180. /* ip[] contains the position that is currently loaded into bits[]. */
  181. args->ip[0] = args->iend[1] - sizeof(U64);
  182. args->ip[1] = args->iend[2] - sizeof(U64);
  183. args->ip[2] = args->iend[3] - sizeof(U64);
  184. args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
  185. /* op[] contains the output pointers. */
  186. args->op[0] = (BYTE*)dst;
  187. args->op[1] = args->op[0] + (dstSize+3)/4;
  188. args->op[2] = args->op[1] + (dstSize+3)/4;
  189. args->op[3] = args->op[2] + (dstSize+3)/4;
  190. /* No point to call the ASM loop for tiny outputs. */
  191. if (args->op[3] >= oend)
  192. return 1;
  193. /* bits[] is the bit container.
  194. * It is read from the MSB down to the LSB.
  195. * It is shifted left as it is read, and zeros are
  196. * shifted in. After the lowest valid bit a 1 is
  197. * set, so that CountTrailingZeros(bits[]) can be used
  198. * to count how many bits we've consumed.
  199. */
  200. args->bits[0] = HUF_initDStream(args->ip[0]);
  201. args->bits[1] = HUF_initDStream(args->ip[1]);
  202. args->bits[2] = HUF_initDStream(args->ip[2]);
  203. args->bits[3] = HUF_initDStream(args->ip[3]);
  204. /* If ip[] >= ilimit, it is guaranteed to be safe to
  205. * reload bits[]. It may be beyond its section, but is
  206. * guaranteed to be valid (>= istart).
  207. */
  208. args->ilimit = ilimit;
  209. args->oend = oend;
  210. args->dt = dt;
  211. return 0;
  212. }
  213. static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
  214. {
  215. /* Validate that we haven't overwritten. */
  216. if (args->op[stream] > segmentEnd)
  217. return ERROR(corruption_detected);
  218. /* Validate that we haven't read beyond iend[].
  219. * Note that ip[] may be < iend[] because the MSB is
  220. * the next bit to read, and we may have consumed 100%
  221. * of the stream, so down to iend[i] - 8 is valid.
  222. */
  223. if (args->ip[stream] < args->iend[stream] - 8)
  224. return ERROR(corruption_detected);
  225. /* Construct the BIT_DStream_t. */
  226. bit->bitContainer = MEM_readLE64(args->ip[stream]);
  227. bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
  228. bit->start = (const char*)args->iend[0];
  229. bit->limitPtr = bit->start + sizeof(size_t);
  230. bit->ptr = (const char*)args->ip[stream];
  231. return 0;
  232. }
  233. #endif
  234. #ifndef HUF_FORCE_DECOMPRESS_X2
  235. /*-***************************/
  236. /* single-symbol decoding */
  237. /*-***************************/
  238. typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */
  239. /**
  240. * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
  241. * a time.
  242. */
  243. static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
  244. U64 D4;
  245. if (MEM_isLittleEndian()) {
  246. D4 = (symbol << 8) + nbBits;
  247. } else {
  248. D4 = symbol + (nbBits << 8);
  249. }
  250. D4 *= 0x0001000100010001ULL;
  251. return D4;
  252. }
  253. /**
  254. * Increase the tableLog to targetTableLog and rescales the stats.
  255. * If tableLog > targetTableLog this is a no-op.
  256. * @returns New tableLog
  257. */
  258. static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
  259. {
  260. if (tableLog > targetTableLog)
  261. return tableLog;
  262. if (tableLog < targetTableLog) {
  263. U32 const scale = targetTableLog - tableLog;
  264. U32 s;
  265. /* Increase the weight for all non-zero probability symbols by scale. */
  266. for (s = 0; s < nbSymbols; ++s) {
  267. huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
  268. }
  269. /* Update rankVal to reflect the new weights.
  270. * All weights except 0 get moved to weight + scale.
  271. * Weights [1, scale] are empty.
  272. */
  273. for (s = targetTableLog; s > scale; --s) {
  274. rankVal[s] = rankVal[s - scale];
  275. }
  276. for (s = scale; s > 0; --s) {
  277. rankVal[s] = 0;
  278. }
  279. }
  280. return targetTableLog;
  281. }
  282. typedef struct {
  283. U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
  284. U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
  285. U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
  286. BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
  287. BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
  288. } HUF_ReadDTableX1_Workspace;
  289. size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
  290. {
  291. return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
  292. }
  293. size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
  294. {
  295. U32 tableLog = 0;
  296. U32 nbSymbols = 0;
  297. size_t iSize;
  298. void* const dtPtr = DTable + 1;
  299. HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
  300. HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
  301. DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
  302. if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
  303. DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
  304. /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
  305. iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
  306. if (HUF_isError(iSize)) return iSize;
  307. /* Table header */
  308. { DTableDesc dtd = HUF_getDTableDesc(DTable);
  309. U32 const maxTableLog = dtd.maxTableLog + 1;
  310. U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
  311. tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
  312. if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
  313. dtd.tableType = 0;
  314. dtd.tableLog = (BYTE)tableLog;
  315. ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
  316. }
  317. /* Compute symbols and rankStart given rankVal:
  318. *
  319. * rankVal already contains the number of values of each weight.
  320. *
  321. * symbols contains the symbols ordered by weight. First are the rankVal[0]
  322. * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
  323. * symbols[0] is filled (but unused) to avoid a branch.
  324. *
  325. * rankStart contains the offset where each rank belongs in the DTable.
  326. * rankStart[0] is not filled because there are no entries in the table for
  327. * weight 0.
  328. */
  329. {
  330. int n;
  331. int nextRankStart = 0;
  332. int const unroll = 4;
  333. int const nLimit = (int)nbSymbols - unroll + 1;
  334. for (n=0; n<(int)tableLog+1; n++) {
  335. U32 const curr = nextRankStart;
  336. nextRankStart += wksp->rankVal[n];
  337. wksp->rankStart[n] = curr;
  338. }
  339. for (n=0; n < nLimit; n += unroll) {
  340. int u;
  341. for (u=0; u < unroll; ++u) {
  342. size_t const w = wksp->huffWeight[n+u];
  343. wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
  344. }
  345. }
  346. for (; n < (int)nbSymbols; ++n) {
  347. size_t const w = wksp->huffWeight[n];
  348. wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
  349. }
  350. }
  351. /* fill DTable
  352. * We fill all entries of each weight in order.
  353. * That way length is a constant for each iteration of the outer loop.
  354. * We can switch based on the length to a different inner loop which is
  355. * optimized for that particular case.
  356. */
  357. {
  358. U32 w;
  359. int symbol=wksp->rankVal[0];
  360. int rankStart=0;
  361. for (w=1; w<tableLog+1; ++w) {
  362. int const symbolCount = wksp->rankVal[w];
  363. int const length = (1 << w) >> 1;
  364. int uStart = rankStart;
  365. BYTE const nbBits = (BYTE)(tableLog + 1 - w);
  366. int s;
  367. int u;
  368. switch (length) {
  369. case 1:
  370. for (s=0; s<symbolCount; ++s) {
  371. HUF_DEltX1 D;
  372. D.byte = wksp->symbols[symbol + s];
  373. D.nbBits = nbBits;
  374. dt[uStart] = D;
  375. uStart += 1;
  376. }
  377. break;
  378. case 2:
  379. for (s=0; s<symbolCount; ++s) {
  380. HUF_DEltX1 D;
  381. D.byte = wksp->symbols[symbol + s];
  382. D.nbBits = nbBits;
  383. dt[uStart+0] = D;
  384. dt[uStart+1] = D;
  385. uStart += 2;
  386. }
  387. break;
  388. case 4:
  389. for (s=0; s<symbolCount; ++s) {
  390. U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
  391. MEM_write64(dt + uStart, D4);
  392. uStart += 4;
  393. }
  394. break;
  395. case 8:
  396. for (s=0; s<symbolCount; ++s) {
  397. U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
  398. MEM_write64(dt + uStart, D4);
  399. MEM_write64(dt + uStart + 4, D4);
  400. uStart += 8;
  401. }
  402. break;
  403. default:
  404. for (s=0; s<symbolCount; ++s) {
  405. U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
  406. for (u=0; u < length; u += 16) {
  407. MEM_write64(dt + uStart + u + 0, D4);
  408. MEM_write64(dt + uStart + u + 4, D4);
  409. MEM_write64(dt + uStart + u + 8, D4);
  410. MEM_write64(dt + uStart + u + 12, D4);
  411. }
  412. assert(u == length);
  413. uStart += length;
  414. }
  415. break;
  416. }
  417. symbol += symbolCount;
  418. rankStart += symbolCount * length;
  419. }
  420. }
  421. return iSize;
  422. }
  423. FORCE_INLINE_TEMPLATE BYTE
  424. HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
  425. {
  426. size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
  427. BYTE const c = dt[val].byte;
  428. BIT_skipBits(Dstream, dt[val].nbBits);
  429. return c;
  430. }
  431. #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
  432. *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
  433. #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
  434. if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
  435. HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
  436. #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
  437. if (MEM_64bits()) \
  438. HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
  439. HINT_INLINE size_t
  440. HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
  441. {
  442. BYTE* const pStart = p;
  443. /* up to 4 symbols at a time */
  444. if ((pEnd - p) > 3) {
  445. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
  446. HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
  447. HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
  448. HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
  449. HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
  450. }
  451. } else {
  452. BIT_reloadDStream(bitDPtr);
  453. }
  454. /* [0-3] symbols remaining */
  455. if (MEM_32bits())
  456. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
  457. HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
  458. /* no more data to retrieve from bitstream, no need to reload */
  459. while (p < pEnd)
  460. HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
  461. return pEnd-pStart;
  462. }
  463. FORCE_INLINE_TEMPLATE size_t
  464. HUF_decompress1X1_usingDTable_internal_body(
  465. void* dst, size_t dstSize,
  466. const void* cSrc, size_t cSrcSize,
  467. const HUF_DTable* DTable)
  468. {
  469. BYTE* op = (BYTE*)dst;
  470. BYTE* const oend = op + dstSize;
  471. const void* dtPtr = DTable + 1;
  472. const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
  473. BIT_DStream_t bitD;
  474. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  475. U32 const dtLog = dtd.tableLog;
  476. CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
  477. HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
  478. if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
  479. return dstSize;
  480. }
  481. FORCE_INLINE_TEMPLATE size_t
  482. HUF_decompress4X1_usingDTable_internal_body(
  483. void* dst, size_t dstSize,
  484. const void* cSrc, size_t cSrcSize,
  485. const HUF_DTable* DTable)
  486. {
  487. /* Check */
  488. if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
  489. { const BYTE* const istart = (const BYTE*) cSrc;
  490. BYTE* const ostart = (BYTE*) dst;
  491. BYTE* const oend = ostart + dstSize;
  492. BYTE* const olimit = oend - 3;
  493. const void* const dtPtr = DTable + 1;
  494. const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
  495. /* Init */
  496. BIT_DStream_t bitD1;
  497. BIT_DStream_t bitD2;
  498. BIT_DStream_t bitD3;
  499. BIT_DStream_t bitD4;
  500. size_t const length1 = MEM_readLE16(istart);
  501. size_t const length2 = MEM_readLE16(istart+2);
  502. size_t const length3 = MEM_readLE16(istart+4);
  503. size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
  504. const BYTE* const istart1 = istart + 6; /* jumpTable */
  505. const BYTE* const istart2 = istart1 + length1;
  506. const BYTE* const istart3 = istart2 + length2;
  507. const BYTE* const istart4 = istart3 + length3;
  508. const size_t segmentSize = (dstSize+3) / 4;
  509. BYTE* const opStart2 = ostart + segmentSize;
  510. BYTE* const opStart3 = opStart2 + segmentSize;
  511. BYTE* const opStart4 = opStart3 + segmentSize;
  512. BYTE* op1 = ostart;
  513. BYTE* op2 = opStart2;
  514. BYTE* op3 = opStart3;
  515. BYTE* op4 = opStart4;
  516. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  517. U32 const dtLog = dtd.tableLog;
  518. U32 endSignal = 1;
  519. if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
  520. if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
  521. CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
  522. CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
  523. CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
  524. CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
  525. /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
  526. if ((size_t)(oend - op4) >= sizeof(size_t)) {
  527. for ( ; (endSignal) & (op4 < olimit) ; ) {
  528. HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
  529. HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
  530. HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
  531. HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
  532. HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
  533. HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
  534. HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
  535. HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
  536. HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
  537. HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
  538. HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
  539. HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
  540. HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
  541. HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
  542. HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
  543. HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
  544. endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
  545. endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
  546. endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
  547. endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
  548. }
  549. }
  550. /* check corruption */
  551. /* note : should not be necessary : op# advance in lock step, and we control op4.
  552. * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
  553. if (op1 > opStart2) return ERROR(corruption_detected);
  554. if (op2 > opStart3) return ERROR(corruption_detected);
  555. if (op3 > opStart4) return ERROR(corruption_detected);
  556. /* note : op4 supposed already verified within main loop */
  557. /* finish bitStreams one by one */
  558. HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
  559. HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
  560. HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
  561. HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
  562. /* check */
  563. { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
  564. if (!endCheck) return ERROR(corruption_detected); }
  565. /* decoded size */
  566. return dstSize;
  567. }
  568. }
  569. #if HUF_NEED_BMI2_FUNCTION
  570. static BMI2_TARGET_ATTRIBUTE
  571. size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
  572. size_t cSrcSize, HUF_DTable const* DTable) {
  573. return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
  574. }
  575. #endif
  576. #if HUF_NEED_DEFAULT_FUNCTION
  577. static
  578. size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
  579. size_t cSrcSize, HUF_DTable const* DTable) {
  580. return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
  581. }
  582. #endif
  583. #if ZSTD_ENABLE_ASM_X86_64_BMI2
  584. HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
  585. static HUF_ASM_X86_64_BMI2_ATTRS
  586. size_t
  587. HUF_decompress4X1_usingDTable_internal_bmi2_asm(
  588. void* dst, size_t dstSize,
  589. const void* cSrc, size_t cSrcSize,
  590. const HUF_DTable* DTable)
  591. {
  592. void const* dt = DTable + 1;
  593. const BYTE* const iend = (const BYTE*)cSrc + 6;
  594. BYTE* const oend = (BYTE*)dst + dstSize;
  595. HUF_DecompressAsmArgs args;
  596. {
  597. size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
  598. FORWARD_IF_ERROR(ret, "Failed to init asm args");
  599. if (ret != 0)
  600. return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
  601. }
  602. assert(args.ip[0] >= args.ilimit);
  603. HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
  604. /* Our loop guarantees that ip[] >= ilimit and that we haven't
  605. * overwritten any op[].
  606. */
  607. assert(args.ip[0] >= iend);
  608. assert(args.ip[1] >= iend);
  609. assert(args.ip[2] >= iend);
  610. assert(args.ip[3] >= iend);
  611. assert(args.op[3] <= oend);
  612. (void)iend;
  613. /* finish bit streams one by one. */
  614. {
  615. size_t const segmentSize = (dstSize+3) / 4;
  616. BYTE* segmentEnd = (BYTE*)dst;
  617. int i;
  618. for (i = 0; i < 4; ++i) {
  619. BIT_DStream_t bit;
  620. if (segmentSize <= (size_t)(oend - segmentEnd))
  621. segmentEnd += segmentSize;
  622. else
  623. segmentEnd = oend;
  624. FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
  625. /* Decompress and validate that we've produced exactly the expected length. */
  626. args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
  627. if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
  628. }
  629. }
  630. /* decoded size */
  631. return dstSize;
  632. }
  633. #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
  634. typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
  635. const void *cSrc,
  636. size_t cSrcSize,
  637. const HUF_DTable *DTable);
  638. HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
  639. static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
  640. size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
  641. {
  642. #if DYNAMIC_BMI2
  643. if (bmi2) {
  644. # if ZSTD_ENABLE_ASM_X86_64_BMI2
  645. return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
  646. # else
  647. return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
  648. # endif
  649. }
  650. #else
  651. (void)bmi2;
  652. #endif
  653. #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
  654. return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
  655. #else
  656. return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
  657. #endif
  658. }
  659. size_t HUF_decompress1X1_usingDTable(
  660. void* dst, size_t dstSize,
  661. const void* cSrc, size_t cSrcSize,
  662. const HUF_DTable* DTable)
  663. {
  664. DTableDesc dtd = HUF_getDTableDesc(DTable);
  665. if (dtd.tableType != 0) return ERROR(GENERIC);
  666. return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  667. }
  668. size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
  669. const void* cSrc, size_t cSrcSize,
  670. void* workSpace, size_t wkspSize)
  671. {
  672. const BYTE* ip = (const BYTE*) cSrc;
  673. size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
  674. if (HUF_isError(hSize)) return hSize;
  675. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  676. ip += hSize; cSrcSize -= hSize;
  677. return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
  678. }
  679. size_t HUF_decompress4X1_usingDTable(
  680. void* dst, size_t dstSize,
  681. const void* cSrc, size_t cSrcSize,
  682. const HUF_DTable* DTable)
  683. {
  684. DTableDesc dtd = HUF_getDTableDesc(DTable);
  685. if (dtd.tableType != 0) return ERROR(GENERIC);
  686. return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  687. }
  688. static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
  689. const void* cSrc, size_t cSrcSize,
  690. void* workSpace, size_t wkspSize, int bmi2)
  691. {
  692. const BYTE* ip = (const BYTE*) cSrc;
  693. size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
  694. if (HUF_isError(hSize)) return hSize;
  695. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  696. ip += hSize; cSrcSize -= hSize;
  697. return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
  698. }
  699. size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
  700. const void* cSrc, size_t cSrcSize,
  701. void* workSpace, size_t wkspSize)
  702. {
  703. return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
  704. }
  705. #endif /* HUF_FORCE_DECOMPRESS_X2 */
  706. #ifndef HUF_FORCE_DECOMPRESS_X1
  707. /* *************************/
  708. /* double-symbols decoding */
  709. /* *************************/
  710. typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
  711. typedef struct { BYTE symbol; } sortedSymbol_t;
  712. typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
  713. typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
  714. /**
  715. * Constructs a HUF_DEltX2 in a U32.
  716. */
  717. static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
  718. {
  719. U32 seq;
  720. DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
  721. DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
  722. DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
  723. DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
  724. if (MEM_isLittleEndian()) {
  725. seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
  726. return seq + (nbBits << 16) + ((U32)level << 24);
  727. } else {
  728. seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
  729. return (seq << 16) + (nbBits << 8) + (U32)level;
  730. }
  731. }
  732. /**
  733. * Constructs a HUF_DEltX2.
  734. */
  735. static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
  736. {
  737. HUF_DEltX2 DElt;
  738. U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
  739. DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
  740. ZSTD_memcpy(&DElt, &val, sizeof(val));
  741. return DElt;
  742. }
  743. /**
  744. * Constructs 2 HUF_DEltX2s and packs them into a U64.
  745. */
  746. static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
  747. {
  748. U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
  749. return (U64)DElt + ((U64)DElt << 32);
  750. }
  751. /**
  752. * Fills the DTable rank with all the symbols from [begin, end) that are each
  753. * nbBits long.
  754. *
  755. * @param DTableRank The start of the rank in the DTable.
  756. * @param begin The first symbol to fill (inclusive).
  757. * @param end The last symbol to fill (exclusive).
  758. * @param nbBits Each symbol is nbBits long.
  759. * @param tableLog The table log.
  760. * @param baseSeq If level == 1 { 0 } else { the first level symbol }
  761. * @param level The level in the table. Must be 1 or 2.
  762. */
  763. static void HUF_fillDTableX2ForWeight(
  764. HUF_DEltX2* DTableRank,
  765. sortedSymbol_t const* begin, sortedSymbol_t const* end,
  766. U32 nbBits, U32 tableLog,
  767. U16 baseSeq, int const level)
  768. {
  769. U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
  770. const sortedSymbol_t* ptr;
  771. assert(level >= 1 && level <= 2);
  772. switch (length) {
  773. case 1:
  774. for (ptr = begin; ptr != end; ++ptr) {
  775. HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
  776. *DTableRank++ = DElt;
  777. }
  778. break;
  779. case 2:
  780. for (ptr = begin; ptr != end; ++ptr) {
  781. HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
  782. DTableRank[0] = DElt;
  783. DTableRank[1] = DElt;
  784. DTableRank += 2;
  785. }
  786. break;
  787. case 4:
  788. for (ptr = begin; ptr != end; ++ptr) {
  789. U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
  790. ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
  791. ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
  792. DTableRank += 4;
  793. }
  794. break;
  795. case 8:
  796. for (ptr = begin; ptr != end; ++ptr) {
  797. U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
  798. ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
  799. ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
  800. ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
  801. ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
  802. DTableRank += 8;
  803. }
  804. break;
  805. default:
  806. for (ptr = begin; ptr != end; ++ptr) {
  807. U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
  808. HUF_DEltX2* const DTableRankEnd = DTableRank + length;
  809. for (; DTableRank != DTableRankEnd; DTableRank += 8) {
  810. ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
  811. ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
  812. ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
  813. ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
  814. }
  815. }
  816. break;
  817. }
  818. }
  819. /* HUF_fillDTableX2Level2() :
  820. * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
  821. static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
  822. const U32* rankVal, const int minWeight, const int maxWeight1,
  823. const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
  824. U32 nbBitsBaseline, U16 baseSeq)
  825. {
  826. /* Fill skipped values (all positions up to rankVal[minWeight]).
  827. * These are positions only get a single symbol because the combined weight
  828. * is too large.
  829. */
  830. if (minWeight>1) {
  831. U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
  832. U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
  833. int const skipSize = rankVal[minWeight];
  834. assert(length > 1);
  835. assert((U32)skipSize < length);
  836. switch (length) {
  837. case 2:
  838. assert(skipSize == 1);
  839. ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
  840. break;
  841. case 4:
  842. assert(skipSize <= 4);
  843. ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
  844. ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
  845. break;
  846. default:
  847. {
  848. int i;
  849. for (i = 0; i < skipSize; i += 8) {
  850. ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
  851. ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
  852. ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
  853. ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
  854. }
  855. }
  856. }
  857. }
  858. /* Fill each of the second level symbols by weight. */
  859. {
  860. int w;
  861. for (w = minWeight; w < maxWeight1; ++w) {
  862. int const begin = rankStart[w];
  863. int const end = rankStart[w+1];
  864. U32 const nbBits = nbBitsBaseline - w;
  865. U32 const totalBits = nbBits + consumedBits;
  866. HUF_fillDTableX2ForWeight(
  867. DTable + rankVal[w],
  868. sortedSymbols + begin, sortedSymbols + end,
  869. totalBits, targetLog,
  870. baseSeq, /* level */ 2);
  871. }
  872. }
  873. }
  874. static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
  875. const sortedSymbol_t* sortedList,
  876. const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
  877. const U32 nbBitsBaseline)
  878. {
  879. U32* const rankVal = rankValOrigin[0];
  880. const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
  881. const U32 minBits = nbBitsBaseline - maxWeight;
  882. int w;
  883. int const wEnd = (int)maxWeight + 1;
  884. /* Fill DTable in order of weight. */
  885. for (w = 1; w < wEnd; ++w) {
  886. int const begin = (int)rankStart[w];
  887. int const end = (int)rankStart[w+1];
  888. U32 const nbBits = nbBitsBaseline - w;
  889. if (targetLog-nbBits >= minBits) {
  890. /* Enough room for a second symbol. */
  891. int start = rankVal[w];
  892. U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
  893. int minWeight = nbBits + scaleLog;
  894. int s;
  895. if (minWeight < 1) minWeight = 1;
  896. /* Fill the DTable for every symbol of weight w.
  897. * These symbols get at least 1 second symbol.
  898. */
  899. for (s = begin; s != end; ++s) {
  900. HUF_fillDTableX2Level2(
  901. DTable + start, targetLog, nbBits,
  902. rankValOrigin[nbBits], minWeight, wEnd,
  903. sortedList, rankStart,
  904. nbBitsBaseline, sortedList[s].symbol);
  905. start += length;
  906. }
  907. } else {
  908. /* Only a single symbol. */
  909. HUF_fillDTableX2ForWeight(
  910. DTable + rankVal[w],
  911. sortedList + begin, sortedList + end,
  912. nbBits, targetLog,
  913. /* baseSeq */ 0, /* level */ 1);
  914. }
  915. }
  916. }
  917. typedef struct {
  918. rankValCol_t rankVal[HUF_TABLELOG_MAX];
  919. U32 rankStats[HUF_TABLELOG_MAX + 1];
  920. U32 rankStart0[HUF_TABLELOG_MAX + 3];
  921. sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
  922. BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
  923. U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
  924. } HUF_ReadDTableX2_Workspace;
  925. size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
  926. const void* src, size_t srcSize,
  927. void* workSpace, size_t wkspSize)
  928. {
  929. return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
  930. }
  931. size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
  932. const void* src, size_t srcSize,
  933. void* workSpace, size_t wkspSize, int bmi2)
  934. {
  935. U32 tableLog, maxW, nbSymbols;
  936. DTableDesc dtd = HUF_getDTableDesc(DTable);
  937. U32 maxTableLog = dtd.maxTableLog;
  938. size_t iSize;
  939. void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
  940. HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
  941. U32 *rankStart;
  942. HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
  943. if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
  944. rankStart = wksp->rankStart0 + 1;
  945. ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
  946. ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
  947. DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
  948. if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
  949. /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
  950. iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);
  951. if (HUF_isError(iSize)) return iSize;
  952. /* check result */
  953. if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
  954. if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
  955. /* find maxWeight */
  956. for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
  957. /* Get start index of each weight */
  958. { U32 w, nextRankStart = 0;
  959. for (w=1; w<maxW+1; w++) {
  960. U32 curr = nextRankStart;
  961. nextRankStart += wksp->rankStats[w];
  962. rankStart[w] = curr;
  963. }
  964. rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
  965. rankStart[maxW+1] = nextRankStart;
  966. }
  967. /* sort symbols by weight */
  968. { U32 s;
  969. for (s=0; s<nbSymbols; s++) {
  970. U32 const w = wksp->weightList[s];
  971. U32 const r = rankStart[w]++;
  972. wksp->sortedSymbol[r].symbol = (BYTE)s;
  973. }
  974. rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
  975. }
  976. /* Build rankVal */
  977. { U32* const rankVal0 = wksp->rankVal[0];
  978. { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
  979. U32 nextRankVal = 0;
  980. U32 w;
  981. for (w=1; w<maxW+1; w++) {
  982. U32 curr = nextRankVal;
  983. nextRankVal += wksp->rankStats[w] << (w+rescale);
  984. rankVal0[w] = curr;
  985. } }
  986. { U32 const minBits = tableLog+1 - maxW;
  987. U32 consumed;
  988. for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
  989. U32* const rankValPtr = wksp->rankVal[consumed];
  990. U32 w;
  991. for (w = 1; w < maxW+1; w++) {
  992. rankValPtr[w] = rankVal0[w] >> consumed;
  993. } } } }
  994. HUF_fillDTableX2(dt, maxTableLog,
  995. wksp->sortedSymbol,
  996. wksp->rankStart0, wksp->rankVal, maxW,
  997. tableLog+1);
  998. dtd.tableLog = (BYTE)maxTableLog;
  999. dtd.tableType = 1;
  1000. ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
  1001. return iSize;
  1002. }
  1003. FORCE_INLINE_TEMPLATE U32
  1004. HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
  1005. {
  1006. size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
  1007. ZSTD_memcpy(op, &dt[val].sequence, 2);
  1008. BIT_skipBits(DStream, dt[val].nbBits);
  1009. return dt[val].length;
  1010. }
  1011. FORCE_INLINE_TEMPLATE U32
  1012. HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
  1013. {
  1014. size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
  1015. ZSTD_memcpy(op, &dt[val].sequence, 1);
  1016. if (dt[val].length==1) {
  1017. BIT_skipBits(DStream, dt[val].nbBits);
  1018. } else {
  1019. if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
  1020. BIT_skipBits(DStream, dt[val].nbBits);
  1021. if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
  1022. /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
  1023. DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
  1024. }
  1025. }
  1026. return 1;
  1027. }
  1028. #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
  1029. ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
  1030. #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
  1031. if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
  1032. ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
  1033. #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
  1034. if (MEM_64bits()) \
  1035. ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
  1036. HINT_INLINE size_t
  1037. HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
  1038. const HUF_DEltX2* const dt, const U32 dtLog)
  1039. {
  1040. BYTE* const pStart = p;
  1041. /* up to 8 symbols at a time */
  1042. if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
  1043. if (dtLog <= 11 && MEM_64bits()) {
  1044. /* up to 10 symbols at a time */
  1045. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
  1046. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  1047. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  1048. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  1049. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  1050. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  1051. }
  1052. } else {
  1053. /* up to 8 symbols at a time */
  1054. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
  1055. HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
  1056. HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
  1057. HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
  1058. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  1059. }
  1060. }
  1061. } else {
  1062. BIT_reloadDStream(bitDPtr);
  1063. }
  1064. /* closer to end : up to 2 symbols at a time */
  1065. if ((size_t)(pEnd - p) >= 2) {
  1066. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
  1067. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  1068. while (p <= pEnd-2)
  1069. HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
  1070. }
  1071. if (p < pEnd)
  1072. p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
  1073. return p-pStart;
  1074. }
  1075. FORCE_INLINE_TEMPLATE size_t
  1076. HUF_decompress1X2_usingDTable_internal_body(
  1077. void* dst, size_t dstSize,
  1078. const void* cSrc, size_t cSrcSize,
  1079. const HUF_DTable* DTable)
  1080. {
  1081. BIT_DStream_t bitD;
  1082. /* Init */
  1083. CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
  1084. /* decode */
  1085. { BYTE* const ostart = (BYTE*) dst;
  1086. BYTE* const oend = ostart + dstSize;
  1087. const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
  1088. const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
  1089. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  1090. HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
  1091. }
  1092. /* check */
  1093. if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
  1094. /* decoded size */
  1095. return dstSize;
  1096. }
  1097. FORCE_INLINE_TEMPLATE size_t
  1098. HUF_decompress4X2_usingDTable_internal_body(
  1099. void* dst, size_t dstSize,
  1100. const void* cSrc, size_t cSrcSize,
  1101. const HUF_DTable* DTable)
  1102. {
  1103. if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
  1104. { const BYTE* const istart = (const BYTE*) cSrc;
  1105. BYTE* const ostart = (BYTE*) dst;
  1106. BYTE* const oend = ostart + dstSize;
  1107. BYTE* const olimit = oend - (sizeof(size_t)-1);
  1108. const void* const dtPtr = DTable+1;
  1109. const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
  1110. /* Init */
  1111. BIT_DStream_t bitD1;
  1112. BIT_DStream_t bitD2;
  1113. BIT_DStream_t bitD3;
  1114. BIT_DStream_t bitD4;
  1115. size_t const length1 = MEM_readLE16(istart);
  1116. size_t const length2 = MEM_readLE16(istart+2);
  1117. size_t const length3 = MEM_readLE16(istart+4);
  1118. size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
  1119. const BYTE* const istart1 = istart + 6; /* jumpTable */
  1120. const BYTE* const istart2 = istart1 + length1;
  1121. const BYTE* const istart3 = istart2 + length2;
  1122. const BYTE* const istart4 = istart3 + length3;
  1123. size_t const segmentSize = (dstSize+3) / 4;
  1124. BYTE* const opStart2 = ostart + segmentSize;
  1125. BYTE* const opStart3 = opStart2 + segmentSize;
  1126. BYTE* const opStart4 = opStart3 + segmentSize;
  1127. BYTE* op1 = ostart;
  1128. BYTE* op2 = opStart2;
  1129. BYTE* op3 = opStart3;
  1130. BYTE* op4 = opStart4;
  1131. U32 endSignal = 1;
  1132. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  1133. U32 const dtLog = dtd.tableLog;
  1134. if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
  1135. if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
  1136. CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
  1137. CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
  1138. CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
  1139. CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
  1140. /* 16-32 symbols per loop (4-8 symbols per stream) */
  1141. if ((size_t)(oend - op4) >= sizeof(size_t)) {
  1142. for ( ; (endSignal) & (op4 < olimit); ) {
  1143. #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
  1144. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  1145. HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
  1146. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  1147. HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
  1148. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  1149. HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
  1150. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  1151. HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
  1152. endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
  1153. endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
  1154. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  1155. HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
  1156. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  1157. HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
  1158. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  1159. HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
  1160. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  1161. HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
  1162. endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
  1163. endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
  1164. #else
  1165. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  1166. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  1167. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  1168. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  1169. HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
  1170. HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
  1171. HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
  1172. HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
  1173. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  1174. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  1175. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  1176. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  1177. HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
  1178. HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
  1179. HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
  1180. HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
  1181. endSignal = (U32)LIKELY((U32)
  1182. (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
  1183. & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
  1184. & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
  1185. & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
  1186. #endif
  1187. }
  1188. }
  1189. /* check corruption */
  1190. if (op1 > opStart2) return ERROR(corruption_detected);
  1191. if (op2 > opStart3) return ERROR(corruption_detected);
  1192. if (op3 > opStart4) return ERROR(corruption_detected);
  1193. /* note : op4 already verified within main loop */
  1194. /* finish bitStreams one by one */
  1195. HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
  1196. HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
  1197. HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
  1198. HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
  1199. /* check */
  1200. { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
  1201. if (!endCheck) return ERROR(corruption_detected); }
  1202. /* decoded size */
  1203. return dstSize;
  1204. }
  1205. }
  1206. #if HUF_NEED_BMI2_FUNCTION
  1207. static BMI2_TARGET_ATTRIBUTE
  1208. size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
  1209. size_t cSrcSize, HUF_DTable const* DTable) {
  1210. return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
  1211. }
  1212. #endif
  1213. #if HUF_NEED_DEFAULT_FUNCTION
  1214. static
  1215. size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
  1216. size_t cSrcSize, HUF_DTable const* DTable) {
  1217. return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
  1218. }
  1219. #endif
  1220. #if ZSTD_ENABLE_ASM_X86_64_BMI2
  1221. HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
  1222. static HUF_ASM_X86_64_BMI2_ATTRS size_t
  1223. HUF_decompress4X2_usingDTable_internal_bmi2_asm(
  1224. void* dst, size_t dstSize,
  1225. const void* cSrc, size_t cSrcSize,
  1226. const HUF_DTable* DTable) {
  1227. void const* dt = DTable + 1;
  1228. const BYTE* const iend = (const BYTE*)cSrc + 6;
  1229. BYTE* const oend = (BYTE*)dst + dstSize;
  1230. HUF_DecompressAsmArgs args;
  1231. {
  1232. size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
  1233. FORWARD_IF_ERROR(ret, "Failed to init asm args");
  1234. if (ret != 0)
  1235. return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
  1236. }
  1237. assert(args.ip[0] >= args.ilimit);
  1238. HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
  1239. /* note : op4 already verified within main loop */
  1240. assert(args.ip[0] >= iend);
  1241. assert(args.ip[1] >= iend);
  1242. assert(args.ip[2] >= iend);
  1243. assert(args.ip[3] >= iend);
  1244. assert(args.op[3] <= oend);
  1245. (void)iend;
  1246. /* finish bitStreams one by one */
  1247. {
  1248. size_t const segmentSize = (dstSize+3) / 4;
  1249. BYTE* segmentEnd = (BYTE*)dst;
  1250. int i;
  1251. for (i = 0; i < 4; ++i) {
  1252. BIT_DStream_t bit;
  1253. if (segmentSize <= (size_t)(oend - segmentEnd))
  1254. segmentEnd += segmentSize;
  1255. else
  1256. segmentEnd = oend;
  1257. FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
  1258. args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
  1259. if (args.op[i] != segmentEnd)
  1260. return ERROR(corruption_detected);
  1261. }
  1262. }
  1263. /* decoded size */
  1264. return dstSize;
  1265. }
  1266. #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
  1267. static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
  1268. size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
  1269. {
  1270. #if DYNAMIC_BMI2
  1271. if (bmi2) {
  1272. # if ZSTD_ENABLE_ASM_X86_64_BMI2
  1273. return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
  1274. # else
  1275. return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
  1276. # endif
  1277. }
  1278. #else
  1279. (void)bmi2;
  1280. #endif
  1281. #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
  1282. return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
  1283. #else
  1284. return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
  1285. #endif
  1286. }
  1287. HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
  1288. size_t HUF_decompress1X2_usingDTable(
  1289. void* dst, size_t dstSize,
  1290. const void* cSrc, size_t cSrcSize,
  1291. const HUF_DTable* DTable)
  1292. {
  1293. DTableDesc dtd = HUF_getDTableDesc(DTable);
  1294. if (dtd.tableType != 1) return ERROR(GENERIC);
  1295. return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  1296. }
  1297. size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
  1298. const void* cSrc, size_t cSrcSize,
  1299. void* workSpace, size_t wkspSize)
  1300. {
  1301. const BYTE* ip = (const BYTE*) cSrc;
  1302. size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
  1303. workSpace, wkspSize);
  1304. if (HUF_isError(hSize)) return hSize;
  1305. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  1306. ip += hSize; cSrcSize -= hSize;
  1307. return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
  1308. }
  1309. size_t HUF_decompress4X2_usingDTable(
  1310. void* dst, size_t dstSize,
  1311. const void* cSrc, size_t cSrcSize,
  1312. const HUF_DTable* DTable)
  1313. {
  1314. DTableDesc dtd = HUF_getDTableDesc(DTable);
  1315. if (dtd.tableType != 1) return ERROR(GENERIC);
  1316. return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  1317. }
  1318. static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
  1319. const void* cSrc, size_t cSrcSize,
  1320. void* workSpace, size_t wkspSize, int bmi2)
  1321. {
  1322. const BYTE* ip = (const BYTE*) cSrc;
  1323. size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
  1324. workSpace, wkspSize);
  1325. if (HUF_isError(hSize)) return hSize;
  1326. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  1327. ip += hSize; cSrcSize -= hSize;
  1328. return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
  1329. }
  1330. size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
  1331. const void* cSrc, size_t cSrcSize,
  1332. void* workSpace, size_t wkspSize)
  1333. {
  1334. return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
  1335. }
  1336. #endif /* HUF_FORCE_DECOMPRESS_X1 */
  1337. /* ***********************************/
  1338. /* Universal decompression selectors */
  1339. /* ***********************************/
  1340. size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
  1341. const void* cSrc, size_t cSrcSize,
  1342. const HUF_DTable* DTable)
  1343. {
  1344. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  1345. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1346. (void)dtd;
  1347. assert(dtd.tableType == 0);
  1348. return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  1349. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1350. (void)dtd;
  1351. assert(dtd.tableType == 1);
  1352. return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  1353. #else
  1354. return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
  1355. HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  1356. #endif
  1357. }
  1358. size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
  1359. const void* cSrc, size_t cSrcSize,
  1360. const HUF_DTable* DTable)
  1361. {
  1362. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  1363. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1364. (void)dtd;
  1365. assert(dtd.tableType == 0);
  1366. return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  1367. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1368. (void)dtd;
  1369. assert(dtd.tableType == 1);
  1370. return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  1371. #else
  1372. return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
  1373. HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  1374. #endif
  1375. }
  1376. #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
  1377. typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
  1378. static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
  1379. {
  1380. /* single, double, quad */
  1381. {{0,0}, {1,1}}, /* Q==0 : impossible */
  1382. {{0,0}, {1,1}}, /* Q==1 : impossible */
  1383. {{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */
  1384. {{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */
  1385. {{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */
  1386. {{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */
  1387. {{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */
  1388. {{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */
  1389. {{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */
  1390. {{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */
  1391. {{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */
  1392. {{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */
  1393. {{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */
  1394. {{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */
  1395. {{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */
  1396. {{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */
  1397. };
  1398. #endif
  1399. /** HUF_selectDecoder() :
  1400. * Tells which decoder is likely to decode faster,
  1401. * based on a set of pre-computed metrics.
  1402. * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
  1403. * Assumption : 0 < dstSize <= 128 KB */
  1404. U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
  1405. {
  1406. assert(dstSize > 0);
  1407. assert(dstSize <= 128*1024);
  1408. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1409. (void)dstSize;
  1410. (void)cSrcSize;
  1411. return 0;
  1412. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1413. (void)dstSize;
  1414. (void)cSrcSize;
  1415. return 1;
  1416. #else
  1417. /* decoder timing evaluation */
  1418. { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
  1419. U32 const D256 = (U32)(dstSize >> 8);
  1420. U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
  1421. U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
  1422. DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */
  1423. return DTime1 < DTime0;
  1424. }
  1425. #endif
  1426. }
  1427. size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
  1428. size_t dstSize, const void* cSrc,
  1429. size_t cSrcSize, void* workSpace,
  1430. size_t wkspSize)
  1431. {
  1432. /* validation checks */
  1433. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  1434. if (cSrcSize == 0) return ERROR(corruption_detected);
  1435. { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
  1436. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1437. (void)algoNb;
  1438. assert(algoNb == 0);
  1439. return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
  1440. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1441. (void)algoNb;
  1442. assert(algoNb == 1);
  1443. return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
  1444. #else
  1445. return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
  1446. cSrcSize, workSpace, wkspSize):
  1447. HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
  1448. #endif
  1449. }
  1450. }
  1451. size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
  1452. const void* cSrc, size_t cSrcSize,
  1453. void* workSpace, size_t wkspSize)
  1454. {
  1455. /* validation checks */
  1456. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  1457. if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
  1458. if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
  1459. if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
  1460. { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
  1461. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1462. (void)algoNb;
  1463. assert(algoNb == 0);
  1464. return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
  1465. cSrcSize, workSpace, wkspSize);
  1466. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1467. (void)algoNb;
  1468. assert(algoNb == 1);
  1469. return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
  1470. cSrcSize, workSpace, wkspSize);
  1471. #else
  1472. return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
  1473. cSrcSize, workSpace, wkspSize):
  1474. HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
  1475. cSrcSize, workSpace, wkspSize);
  1476. #endif
  1477. }
  1478. }
  1479. size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
  1480. {
  1481. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  1482. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1483. (void)dtd;
  1484. assert(dtd.tableType == 0);
  1485. return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1486. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1487. (void)dtd;
  1488. assert(dtd.tableType == 1);
  1489. return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1490. #else
  1491. return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
  1492. HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1493. #endif
  1494. }
  1495. #ifndef HUF_FORCE_DECOMPRESS_X2
  1496. size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
  1497. {
  1498. const BYTE* ip = (const BYTE*) cSrc;
  1499. size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
  1500. if (HUF_isError(hSize)) return hSize;
  1501. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  1502. ip += hSize; cSrcSize -= hSize;
  1503. return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
  1504. }
  1505. #endif
  1506. size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
  1507. {
  1508. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  1509. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1510. (void)dtd;
  1511. assert(dtd.tableType == 0);
  1512. return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1513. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1514. (void)dtd;
  1515. assert(dtd.tableType == 1);
  1516. return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1517. #else
  1518. return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
  1519. HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1520. #endif
  1521. }
  1522. size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
  1523. {
  1524. /* validation checks */
  1525. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  1526. if (cSrcSize == 0) return ERROR(corruption_detected);
  1527. { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
  1528. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1529. (void)algoNb;
  1530. assert(algoNb == 0);
  1531. return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
  1532. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1533. (void)algoNb;
  1534. assert(algoNb == 1);
  1535. return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
  1536. #else
  1537. return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
  1538. HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
  1539. #endif
  1540. }
  1541. }
  1542. #ifndef ZSTD_NO_UNUSED_FUNCTIONS
  1543. #ifndef HUF_FORCE_DECOMPRESS_X2
  1544. size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
  1545. {
  1546. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  1547. return HUF_readDTableX1_wksp(DTable, src, srcSize,
  1548. workSpace, sizeof(workSpace));
  1549. }
  1550. size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
  1551. const void* cSrc, size_t cSrcSize)
  1552. {
  1553. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  1554. return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
  1555. workSpace, sizeof(workSpace));
  1556. }
  1557. size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  1558. {
  1559. HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
  1560. return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
  1561. }
  1562. #endif
  1563. #ifndef HUF_FORCE_DECOMPRESS_X1
  1564. size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
  1565. {
  1566. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  1567. return HUF_readDTableX2_wksp(DTable, src, srcSize,
  1568. workSpace, sizeof(workSpace));
  1569. }
  1570. size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
  1571. const void* cSrc, size_t cSrcSize)
  1572. {
  1573. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  1574. return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
  1575. workSpace, sizeof(workSpace));
  1576. }
  1577. size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  1578. {
  1579. HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
  1580. return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
  1581. }
  1582. #endif
  1583. #ifndef HUF_FORCE_DECOMPRESS_X2
  1584. size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  1585. {
  1586. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  1587. return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
  1588. workSpace, sizeof(workSpace));
  1589. }
  1590. size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  1591. {
  1592. HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
  1593. return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
  1594. }
  1595. #endif
  1596. #ifndef HUF_FORCE_DECOMPRESS_X1
  1597. size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
  1598. const void* cSrc, size_t cSrcSize)
  1599. {
  1600. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  1601. return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
  1602. workSpace, sizeof(workSpace));
  1603. }
  1604. size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  1605. {
  1606. HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
  1607. return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
  1608. }
  1609. #endif
  1610. typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
  1611. size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  1612. {
  1613. #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
  1614. static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
  1615. #endif
  1616. /* validation checks */
  1617. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  1618. if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
  1619. if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
  1620. if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
  1621. { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
  1622. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1623. (void)algoNb;
  1624. assert(algoNb == 0);
  1625. return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
  1626. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1627. (void)algoNb;
  1628. assert(algoNb == 1);
  1629. return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
  1630. #else
  1631. return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
  1632. #endif
  1633. }
  1634. }
  1635. size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  1636. {
  1637. /* validation checks */
  1638. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  1639. if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
  1640. if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
  1641. if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
  1642. { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
  1643. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1644. (void)algoNb;
  1645. assert(algoNb == 0);
  1646. return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
  1647. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1648. (void)algoNb;
  1649. assert(algoNb == 1);
  1650. return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
  1651. #else
  1652. return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
  1653. HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
  1654. #endif
  1655. }
  1656. }
  1657. size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  1658. {
  1659. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  1660. return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
  1661. workSpace, sizeof(workSpace));
  1662. }
  1663. size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
  1664. const void* cSrc, size_t cSrcSize)
  1665. {
  1666. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  1667. return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
  1668. workSpace, sizeof(workSpace));
  1669. }
  1670. #endif