zstd_decompress_block.c 91 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072
  1. /*
  2. * Copyright (c) Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. /* zstd_decompress_block :
  11. * this module takes care of decompressing _compressed_ block */
  12. /*-*******************************************************
  13. * Dependencies
  14. *********************************************************/
  15. #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
  16. #include "../common/compiler.h" /* prefetch */
  17. #include "../common/cpu.h" /* bmi2 */
  18. #include "../common/mem.h" /* low level memory routines */
  19. #define FSE_STATIC_LINKING_ONLY
  20. #include "../common/fse.h"
  21. #define HUF_STATIC_LINKING_ONLY
  22. #include "../common/huf.h"
  23. #include "../common/zstd_internal.h"
  24. #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
  25. #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
  26. #include "zstd_decompress_block.h"
  27. /*_*******************************************************
  28. * Macros
  29. **********************************************************/
  30. /* These two optional macros force the use one way or another of the two
  31. * ZSTD_decompressSequences implementations. You can't force in both directions
  32. * at the same time.
  33. */
  34. #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
  35. defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
  36. #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
  37. #endif
  38. /*_*******************************************************
  39. * Memory operations
  40. **********************************************************/
  41. static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
  42. /*-*************************************************************
  43. * Block decoding
  44. ***************************************************************/
  45. /*! ZSTD_getcBlockSize() :
  46. * Provides the size of compressed block from block header `src` */
  47. size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
  48. blockProperties_t* bpPtr)
  49. {
  50. RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
  51. { U32 const cBlockHeader = MEM_readLE24(src);
  52. U32 const cSize = cBlockHeader >> 3;
  53. bpPtr->lastBlock = cBlockHeader & 1;
  54. bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
  55. bpPtr->origSize = cSize; /* only useful for RLE */
  56. if (bpPtr->blockType == bt_rle) return 1;
  57. RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
  58. return cSize;
  59. }
  60. }
  61. /* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
  62. static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
  63. const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
  64. {
  65. if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
  66. {
  67. /* room for litbuffer to fit without read faulting */
  68. dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
  69. dctx->litBufferEnd = dctx->litBuffer + litSize;
  70. dctx->litBufferLocation = ZSTD_in_dst;
  71. }
  72. else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
  73. {
  74. /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
  75. if (splitImmediately) {
  76. /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
  77. dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
  78. dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
  79. }
  80. else {
  81. /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
  82. dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
  83. dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
  84. }
  85. dctx->litBufferLocation = ZSTD_split;
  86. }
  87. else
  88. {
  89. /* fits entirely within litExtraBuffer, so no split is necessary */
  90. dctx->litBuffer = dctx->litExtraBuffer;
  91. dctx->litBufferEnd = dctx->litBuffer + litSize;
  92. dctx->litBufferLocation = ZSTD_not_in_dst;
  93. }
  94. }
  95. /* Hidden declaration for fullbench */
  96. size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
  97. const void* src, size_t srcSize,
  98. void* dst, size_t dstCapacity, const streaming_operation streaming);
  99. /*! ZSTD_decodeLiteralsBlock() :
  100. * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
  101. * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
  102. * block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
  103. * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
  104. *
  105. * @return : nb of bytes read from src (< srcSize )
  106. * note : symbol not declared but exposed for fullbench */
  107. size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
  108. const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
  109. void* dst, size_t dstCapacity, const streaming_operation streaming)
  110. {
  111. DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
  112. RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
  113. { const BYTE* const istart = (const BYTE*) src;
  114. symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
  115. switch(litEncType)
  116. {
  117. case set_repeat:
  118. DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
  119. RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
  120. ZSTD_FALLTHROUGH;
  121. case set_compressed:
  122. RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
  123. { size_t lhSize, litSize, litCSize;
  124. U32 singleStream=0;
  125. U32 const lhlCode = (istart[0] >> 2) & 3;
  126. U32 const lhc = MEM_readLE32(istart);
  127. size_t hufSuccess;
  128. size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
  129. switch(lhlCode)
  130. {
  131. case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
  132. /* 2 - 2 - 10 - 10 */
  133. singleStream = !lhlCode;
  134. lhSize = 3;
  135. litSize = (lhc >> 4) & 0x3FF;
  136. litCSize = (lhc >> 14) & 0x3FF;
  137. break;
  138. case 2:
  139. /* 2 - 2 - 14 - 14 */
  140. lhSize = 4;
  141. litSize = (lhc >> 4) & 0x3FFF;
  142. litCSize = lhc >> 18;
  143. break;
  144. case 3:
  145. /* 2 - 2 - 18 - 18 */
  146. lhSize = 5;
  147. litSize = (lhc >> 4) & 0x3FFFF;
  148. litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
  149. break;
  150. }
  151. RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
  152. RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
  153. RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
  154. RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
  155. ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
  156. /* prefetch huffman table if cold */
  157. if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
  158. PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
  159. }
  160. if (litEncType==set_repeat) {
  161. if (singleStream) {
  162. hufSuccess = HUF_decompress1X_usingDTable_bmi2(
  163. dctx->litBuffer, litSize, istart+lhSize, litCSize,
  164. dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
  165. } else {
  166. hufSuccess = HUF_decompress4X_usingDTable_bmi2(
  167. dctx->litBuffer, litSize, istart+lhSize, litCSize,
  168. dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
  169. }
  170. } else {
  171. if (singleStream) {
  172. #if defined(HUF_FORCE_DECOMPRESS_X2)
  173. hufSuccess = HUF_decompress1X_DCtx_wksp(
  174. dctx->entropy.hufTable, dctx->litBuffer, litSize,
  175. istart+lhSize, litCSize, dctx->workspace,
  176. sizeof(dctx->workspace));
  177. #else
  178. hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
  179. dctx->entropy.hufTable, dctx->litBuffer, litSize,
  180. istart+lhSize, litCSize, dctx->workspace,
  181. sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
  182. #endif
  183. } else {
  184. hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
  185. dctx->entropy.hufTable, dctx->litBuffer, litSize,
  186. istart+lhSize, litCSize, dctx->workspace,
  187. sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
  188. }
  189. }
  190. if (dctx->litBufferLocation == ZSTD_split)
  191. {
  192. ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
  193. ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
  194. dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
  195. dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
  196. }
  197. RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
  198. dctx->litPtr = dctx->litBuffer;
  199. dctx->litSize = litSize;
  200. dctx->litEntropy = 1;
  201. if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
  202. return litCSize + lhSize;
  203. }
  204. case set_basic:
  205. { size_t litSize, lhSize;
  206. U32 const lhlCode = ((istart[0]) >> 2) & 3;
  207. size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
  208. switch(lhlCode)
  209. {
  210. case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
  211. lhSize = 1;
  212. litSize = istart[0] >> 3;
  213. break;
  214. case 1:
  215. lhSize = 2;
  216. litSize = MEM_readLE16(istart) >> 4;
  217. break;
  218. case 3:
  219. lhSize = 3;
  220. litSize = MEM_readLE24(istart) >> 4;
  221. break;
  222. }
  223. RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
  224. RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
  225. ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
  226. if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
  227. RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
  228. if (dctx->litBufferLocation == ZSTD_split)
  229. {
  230. ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
  231. ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
  232. }
  233. else
  234. {
  235. ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
  236. }
  237. dctx->litPtr = dctx->litBuffer;
  238. dctx->litSize = litSize;
  239. return lhSize+litSize;
  240. }
  241. /* direct reference into compressed stream */
  242. dctx->litPtr = istart+lhSize;
  243. dctx->litSize = litSize;
  244. dctx->litBufferEnd = dctx->litPtr + litSize;
  245. dctx->litBufferLocation = ZSTD_not_in_dst;
  246. return lhSize+litSize;
  247. }
  248. case set_rle:
  249. { U32 const lhlCode = ((istart[0]) >> 2) & 3;
  250. size_t litSize, lhSize;
  251. size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
  252. switch(lhlCode)
  253. {
  254. case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
  255. lhSize = 1;
  256. litSize = istart[0] >> 3;
  257. break;
  258. case 1:
  259. lhSize = 2;
  260. litSize = MEM_readLE16(istart) >> 4;
  261. break;
  262. case 3:
  263. lhSize = 3;
  264. litSize = MEM_readLE24(istart) >> 4;
  265. RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
  266. break;
  267. }
  268. RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
  269. RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
  270. RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
  271. ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
  272. if (dctx->litBufferLocation == ZSTD_split)
  273. {
  274. ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
  275. ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
  276. }
  277. else
  278. {
  279. ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
  280. }
  281. dctx->litPtr = dctx->litBuffer;
  282. dctx->litSize = litSize;
  283. return lhSize+1;
  284. }
  285. default:
  286. RETURN_ERROR(corruption_detected, "impossible");
  287. }
  288. }
  289. }
  290. /* Default FSE distribution tables.
  291. * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
  292. * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
  293. * They were generated programmatically with following method :
  294. * - start from default distributions, present in /lib/common/zstd_internal.h
  295. * - generate tables normally, using ZSTD_buildFSETable()
  296. * - printout the content of tables
  297. * - pretify output, report below, test with fuzzer to ensure it's correct */
  298. /* Default FSE distribution table for Literal Lengths */
  299. static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
  300. { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
  301. /* nextState, nbAddBits, nbBits, baseVal */
  302. { 0, 0, 4, 0}, { 16, 0, 4, 0},
  303. { 32, 0, 5, 1}, { 0, 0, 5, 3},
  304. { 0, 0, 5, 4}, { 0, 0, 5, 6},
  305. { 0, 0, 5, 7}, { 0, 0, 5, 9},
  306. { 0, 0, 5, 10}, { 0, 0, 5, 12},
  307. { 0, 0, 6, 14}, { 0, 1, 5, 16},
  308. { 0, 1, 5, 20}, { 0, 1, 5, 22},
  309. { 0, 2, 5, 28}, { 0, 3, 5, 32},
  310. { 0, 4, 5, 48}, { 32, 6, 5, 64},
  311. { 0, 7, 5, 128}, { 0, 8, 6, 256},
  312. { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
  313. { 32, 0, 4, 0}, { 0, 0, 4, 1},
  314. { 0, 0, 5, 2}, { 32, 0, 5, 4},
  315. { 0, 0, 5, 5}, { 32, 0, 5, 7},
  316. { 0, 0, 5, 8}, { 32, 0, 5, 10},
  317. { 0, 0, 5, 11}, { 0, 0, 6, 13},
  318. { 32, 1, 5, 16}, { 0, 1, 5, 18},
  319. { 32, 1, 5, 22}, { 0, 2, 5, 24},
  320. { 32, 3, 5, 32}, { 0, 3, 5, 40},
  321. { 0, 6, 4, 64}, { 16, 6, 4, 64},
  322. { 32, 7, 5, 128}, { 0, 9, 6, 512},
  323. { 0, 11, 6, 2048}, { 48, 0, 4, 0},
  324. { 16, 0, 4, 1}, { 32, 0, 5, 2},
  325. { 32, 0, 5, 3}, { 32, 0, 5, 5},
  326. { 32, 0, 5, 6}, { 32, 0, 5, 8},
  327. { 32, 0, 5, 9}, { 32, 0, 5, 11},
  328. { 32, 0, 5, 12}, { 0, 0, 6, 15},
  329. { 32, 1, 5, 18}, { 32, 1, 5, 20},
  330. { 32, 2, 5, 24}, { 32, 2, 5, 28},
  331. { 32, 3, 5, 40}, { 32, 4, 5, 48},
  332. { 0, 16, 6,65536}, { 0, 15, 6,32768},
  333. { 0, 14, 6,16384}, { 0, 13, 6, 8192},
  334. }; /* LL_defaultDTable */
  335. /* Default FSE distribution table for Offset Codes */
  336. static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
  337. { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
  338. /* nextState, nbAddBits, nbBits, baseVal */
  339. { 0, 0, 5, 0}, { 0, 6, 4, 61},
  340. { 0, 9, 5, 509}, { 0, 15, 5,32765},
  341. { 0, 21, 5,2097149}, { 0, 3, 5, 5},
  342. { 0, 7, 4, 125}, { 0, 12, 5, 4093},
  343. { 0, 18, 5,262141}, { 0, 23, 5,8388605},
  344. { 0, 5, 5, 29}, { 0, 8, 4, 253},
  345. { 0, 14, 5,16381}, { 0, 20, 5,1048573},
  346. { 0, 2, 5, 1}, { 16, 7, 4, 125},
  347. { 0, 11, 5, 2045}, { 0, 17, 5,131069},
  348. { 0, 22, 5,4194301}, { 0, 4, 5, 13},
  349. { 16, 8, 4, 253}, { 0, 13, 5, 8189},
  350. { 0, 19, 5,524285}, { 0, 1, 5, 1},
  351. { 16, 6, 4, 61}, { 0, 10, 5, 1021},
  352. { 0, 16, 5,65533}, { 0, 28, 5,268435453},
  353. { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
  354. { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
  355. }; /* OF_defaultDTable */
  356. /* Default FSE distribution table for Match Lengths */
  357. static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
  358. { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
  359. /* nextState, nbAddBits, nbBits, baseVal */
  360. { 0, 0, 6, 3}, { 0, 0, 4, 4},
  361. { 32, 0, 5, 5}, { 0, 0, 5, 6},
  362. { 0, 0, 5, 8}, { 0, 0, 5, 9},
  363. { 0, 0, 5, 11}, { 0, 0, 6, 13},
  364. { 0, 0, 6, 16}, { 0, 0, 6, 19},
  365. { 0, 0, 6, 22}, { 0, 0, 6, 25},
  366. { 0, 0, 6, 28}, { 0, 0, 6, 31},
  367. { 0, 0, 6, 34}, { 0, 1, 6, 37},
  368. { 0, 1, 6, 41}, { 0, 2, 6, 47},
  369. { 0, 3, 6, 59}, { 0, 4, 6, 83},
  370. { 0, 7, 6, 131}, { 0, 9, 6, 515},
  371. { 16, 0, 4, 4}, { 0, 0, 4, 5},
  372. { 32, 0, 5, 6}, { 0, 0, 5, 7},
  373. { 32, 0, 5, 9}, { 0, 0, 5, 10},
  374. { 0, 0, 6, 12}, { 0, 0, 6, 15},
  375. { 0, 0, 6, 18}, { 0, 0, 6, 21},
  376. { 0, 0, 6, 24}, { 0, 0, 6, 27},
  377. { 0, 0, 6, 30}, { 0, 0, 6, 33},
  378. { 0, 1, 6, 35}, { 0, 1, 6, 39},
  379. { 0, 2, 6, 43}, { 0, 3, 6, 51},
  380. { 0, 4, 6, 67}, { 0, 5, 6, 99},
  381. { 0, 8, 6, 259}, { 32, 0, 4, 4},
  382. { 48, 0, 4, 4}, { 16, 0, 4, 5},
  383. { 32, 0, 5, 7}, { 32, 0, 5, 8},
  384. { 32, 0, 5, 10}, { 32, 0, 5, 11},
  385. { 0, 0, 6, 14}, { 0, 0, 6, 17},
  386. { 0, 0, 6, 20}, { 0, 0, 6, 23},
  387. { 0, 0, 6, 26}, { 0, 0, 6, 29},
  388. { 0, 0, 6, 32}, { 0, 16, 6,65539},
  389. { 0, 15, 6,32771}, { 0, 14, 6,16387},
  390. { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
  391. { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
  392. }; /* ML_defaultDTable */
  393. static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
  394. {
  395. void* ptr = dt;
  396. ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
  397. ZSTD_seqSymbol* const cell = dt + 1;
  398. DTableH->tableLog = 0;
  399. DTableH->fastMode = 0;
  400. cell->nbBits = 0;
  401. cell->nextState = 0;
  402. assert(nbAddBits < 255);
  403. cell->nbAdditionalBits = nbAddBits;
  404. cell->baseValue = baseValue;
  405. }
  406. /* ZSTD_buildFSETable() :
  407. * generate FSE decoding table for one symbol (ll, ml or off)
  408. * cannot fail if input is valid =>
  409. * all inputs are presumed validated at this stage */
  410. FORCE_INLINE_TEMPLATE
  411. void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
  412. const short* normalizedCounter, unsigned maxSymbolValue,
  413. const U32* baseValue, const U8* nbAdditionalBits,
  414. unsigned tableLog, void* wksp, size_t wkspSize)
  415. {
  416. ZSTD_seqSymbol* const tableDecode = dt+1;
  417. U32 const maxSV1 = maxSymbolValue + 1;
  418. U32 const tableSize = 1 << tableLog;
  419. U16* symbolNext = (U16*)wksp;
  420. BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
  421. U32 highThreshold = tableSize - 1;
  422. /* Sanity Checks */
  423. assert(maxSymbolValue <= MaxSeq);
  424. assert(tableLog <= MaxFSELog);
  425. assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
  426. (void)wkspSize;
  427. /* Init, lay down lowprob symbols */
  428. { ZSTD_seqSymbol_header DTableH;
  429. DTableH.tableLog = tableLog;
  430. DTableH.fastMode = 1;
  431. { S16 const largeLimit= (S16)(1 << (tableLog-1));
  432. U32 s;
  433. for (s=0; s<maxSV1; s++) {
  434. if (normalizedCounter[s]==-1) {
  435. tableDecode[highThreshold--].baseValue = s;
  436. symbolNext[s] = 1;
  437. } else {
  438. if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
  439. assert(normalizedCounter[s]>=0);
  440. symbolNext[s] = (U16)normalizedCounter[s];
  441. } } }
  442. ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
  443. }
  444. /* Spread symbols */
  445. assert(tableSize <= 512);
  446. /* Specialized symbol spreading for the case when there are
  447. * no low probability (-1 count) symbols. When compressing
  448. * small blocks we avoid low probability symbols to hit this
  449. * case, since header decoding speed matters more.
  450. */
  451. if (highThreshold == tableSize - 1) {
  452. size_t const tableMask = tableSize-1;
  453. size_t const step = FSE_TABLESTEP(tableSize);
  454. /* First lay down the symbols in order.
  455. * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
  456. * misses since small blocks generally have small table logs, so nearly
  457. * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
  458. * our buffer to handle the over-write.
  459. */
  460. {
  461. U64 const add = 0x0101010101010101ull;
  462. size_t pos = 0;
  463. U64 sv = 0;
  464. U32 s;
  465. for (s=0; s<maxSV1; ++s, sv += add) {
  466. int i;
  467. int const n = normalizedCounter[s];
  468. MEM_write64(spread + pos, sv);
  469. for (i = 8; i < n; i += 8) {
  470. MEM_write64(spread + pos + i, sv);
  471. }
  472. pos += n;
  473. }
  474. }
  475. /* Now we spread those positions across the table.
  476. * The benefit of doing it in two stages is that we avoid the the
  477. * variable size inner loop, which caused lots of branch misses.
  478. * Now we can run through all the positions without any branch misses.
  479. * We unroll the loop twice, since that is what emperically worked best.
  480. */
  481. {
  482. size_t position = 0;
  483. size_t s;
  484. size_t const unroll = 2;
  485. assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
  486. for (s = 0; s < (size_t)tableSize; s += unroll) {
  487. size_t u;
  488. for (u = 0; u < unroll; ++u) {
  489. size_t const uPosition = (position + (u * step)) & tableMask;
  490. tableDecode[uPosition].baseValue = spread[s + u];
  491. }
  492. position = (position + (unroll * step)) & tableMask;
  493. }
  494. assert(position == 0);
  495. }
  496. } else {
  497. U32 const tableMask = tableSize-1;
  498. U32 const step = FSE_TABLESTEP(tableSize);
  499. U32 s, position = 0;
  500. for (s=0; s<maxSV1; s++) {
  501. int i;
  502. int const n = normalizedCounter[s];
  503. for (i=0; i<n; i++) {
  504. tableDecode[position].baseValue = s;
  505. position = (position + step) & tableMask;
  506. while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
  507. } }
  508. assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
  509. }
  510. /* Build Decoding table */
  511. {
  512. U32 u;
  513. for (u=0; u<tableSize; u++) {
  514. U32 const symbol = tableDecode[u].baseValue;
  515. U32 const nextState = symbolNext[symbol]++;
  516. tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
  517. tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
  518. assert(nbAdditionalBits[symbol] < 255);
  519. tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
  520. tableDecode[u].baseValue = baseValue[symbol];
  521. }
  522. }
  523. }
  524. /* Avoids the FORCE_INLINE of the _body() function. */
  525. static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
  526. const short* normalizedCounter, unsigned maxSymbolValue,
  527. const U32* baseValue, const U8* nbAdditionalBits,
  528. unsigned tableLog, void* wksp, size_t wkspSize)
  529. {
  530. ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
  531. baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
  532. }
  533. #if DYNAMIC_BMI2
  534. BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
  535. const short* normalizedCounter, unsigned maxSymbolValue,
  536. const U32* baseValue, const U8* nbAdditionalBits,
  537. unsigned tableLog, void* wksp, size_t wkspSize)
  538. {
  539. ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
  540. baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
  541. }
  542. #endif
  543. void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
  544. const short* normalizedCounter, unsigned maxSymbolValue,
  545. const U32* baseValue, const U8* nbAdditionalBits,
  546. unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
  547. {
  548. #if DYNAMIC_BMI2
  549. if (bmi2) {
  550. ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
  551. baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
  552. return;
  553. }
  554. #endif
  555. (void)bmi2;
  556. ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
  557. baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
  558. }
  559. /*! ZSTD_buildSeqTable() :
  560. * @return : nb bytes read from src,
  561. * or an error code if it fails */
  562. static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
  563. symbolEncodingType_e type, unsigned max, U32 maxLog,
  564. const void* src, size_t srcSize,
  565. const U32* baseValue, const U8* nbAdditionalBits,
  566. const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
  567. int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
  568. int bmi2)
  569. {
  570. switch(type)
  571. {
  572. case set_rle :
  573. RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
  574. RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
  575. { U32 const symbol = *(const BYTE*)src;
  576. U32 const baseline = baseValue[symbol];
  577. U8 const nbBits = nbAdditionalBits[symbol];
  578. ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
  579. }
  580. *DTablePtr = DTableSpace;
  581. return 1;
  582. case set_basic :
  583. *DTablePtr = defaultTable;
  584. return 0;
  585. case set_repeat:
  586. RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
  587. /* prefetch FSE table if used */
  588. if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
  589. const void* const pStart = *DTablePtr;
  590. size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
  591. PREFETCH_AREA(pStart, pSize);
  592. }
  593. return 0;
  594. case set_compressed :
  595. { unsigned tableLog;
  596. S16 norm[MaxSeq+1];
  597. size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
  598. RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
  599. RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
  600. ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
  601. *DTablePtr = DTableSpace;
  602. return headerSize;
  603. }
  604. default :
  605. assert(0);
  606. RETURN_ERROR(GENERIC, "impossible");
  607. }
  608. }
  609. size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
  610. const void* src, size_t srcSize)
  611. {
  612. const BYTE* const istart = (const BYTE*)src;
  613. const BYTE* const iend = istart + srcSize;
  614. const BYTE* ip = istart;
  615. int nbSeq;
  616. DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
  617. /* check */
  618. RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
  619. /* SeqHead */
  620. nbSeq = *ip++;
  621. if (!nbSeq) {
  622. *nbSeqPtr=0;
  623. RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
  624. return 1;
  625. }
  626. if (nbSeq > 0x7F) {
  627. if (nbSeq == 0xFF) {
  628. RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
  629. nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
  630. ip+=2;
  631. } else {
  632. RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
  633. nbSeq = ((nbSeq-0x80)<<8) + *ip++;
  634. }
  635. }
  636. *nbSeqPtr = nbSeq;
  637. /* FSE table descriptors */
  638. RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
  639. { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
  640. symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
  641. symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
  642. ip++;
  643. /* Build DTables */
  644. { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
  645. LLtype, MaxLL, LLFSELog,
  646. ip, iend-ip,
  647. LL_base, LL_bits,
  648. LL_defaultDTable, dctx->fseEntropy,
  649. dctx->ddictIsCold, nbSeq,
  650. dctx->workspace, sizeof(dctx->workspace),
  651. ZSTD_DCtx_get_bmi2(dctx));
  652. RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
  653. ip += llhSize;
  654. }
  655. { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
  656. OFtype, MaxOff, OffFSELog,
  657. ip, iend-ip,
  658. OF_base, OF_bits,
  659. OF_defaultDTable, dctx->fseEntropy,
  660. dctx->ddictIsCold, nbSeq,
  661. dctx->workspace, sizeof(dctx->workspace),
  662. ZSTD_DCtx_get_bmi2(dctx));
  663. RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
  664. ip += ofhSize;
  665. }
  666. { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
  667. MLtype, MaxML, MLFSELog,
  668. ip, iend-ip,
  669. ML_base, ML_bits,
  670. ML_defaultDTable, dctx->fseEntropy,
  671. dctx->ddictIsCold, nbSeq,
  672. dctx->workspace, sizeof(dctx->workspace),
  673. ZSTD_DCtx_get_bmi2(dctx));
  674. RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
  675. ip += mlhSize;
  676. }
  677. }
  678. return ip-istart;
  679. }
  680. typedef struct {
  681. size_t litLength;
  682. size_t matchLength;
  683. size_t offset;
  684. } seq_t;
  685. typedef struct {
  686. size_t state;
  687. const ZSTD_seqSymbol* table;
  688. } ZSTD_fseState;
  689. typedef struct {
  690. BIT_DStream_t DStream;
  691. ZSTD_fseState stateLL;
  692. ZSTD_fseState stateOffb;
  693. ZSTD_fseState stateML;
  694. size_t prevOffset[ZSTD_REP_NUM];
  695. } seqState_t;
  696. /*! ZSTD_overlapCopy8() :
  697. * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
  698. * If the offset is < 8 then the offset is spread to at least 8 bytes.
  699. *
  700. * Precondition: *ip <= *op
  701. * Postcondition: *op - *op >= 8
  702. */
  703. HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
  704. assert(*ip <= *op);
  705. if (offset < 8) {
  706. /* close range match, overlap */
  707. static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
  708. static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
  709. int const sub2 = dec64table[offset];
  710. (*op)[0] = (*ip)[0];
  711. (*op)[1] = (*ip)[1];
  712. (*op)[2] = (*ip)[2];
  713. (*op)[3] = (*ip)[3];
  714. *ip += dec32table[offset];
  715. ZSTD_copy4(*op+4, *ip);
  716. *ip -= sub2;
  717. } else {
  718. ZSTD_copy8(*op, *ip);
  719. }
  720. *ip += 8;
  721. *op += 8;
  722. assert(*op - *ip >= 8);
  723. }
  724. /*! ZSTD_safecopy() :
  725. * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
  726. * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
  727. * This function is only called in the uncommon case where the sequence is near the end of the block. It
  728. * should be fast for a single long sequence, but can be slow for several short sequences.
  729. *
  730. * @param ovtype controls the overlap detection
  731. * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
  732. * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
  733. * The src buffer must be before the dst buffer.
  734. */
  735. static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
  736. ptrdiff_t const diff = op - ip;
  737. BYTE* const oend = op + length;
  738. assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
  739. (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
  740. if (length < 8) {
  741. /* Handle short lengths. */
  742. while (op < oend) *op++ = *ip++;
  743. return;
  744. }
  745. if (ovtype == ZSTD_overlap_src_before_dst) {
  746. /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
  747. assert(length >= 8);
  748. ZSTD_overlapCopy8(&op, &ip, diff);
  749. length -= 8;
  750. assert(op - ip >= 8);
  751. assert(op <= oend);
  752. }
  753. if (oend <= oend_w) {
  754. /* No risk of overwrite. */
  755. ZSTD_wildcopy(op, ip, length, ovtype);
  756. return;
  757. }
  758. if (op <= oend_w) {
  759. /* Wildcopy until we get close to the end. */
  760. assert(oend > oend_w);
  761. ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
  762. ip += oend_w - op;
  763. op += oend_w - op;
  764. }
  765. /* Handle the leftovers. */
  766. while (op < oend) *op++ = *ip++;
  767. }
  768. /* ZSTD_safecopyDstBeforeSrc():
  769. * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
  770. * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
  771. static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
  772. ptrdiff_t const diff = op - ip;
  773. BYTE* const oend = op + length;
  774. if (length < 8 || diff > -8) {
  775. /* Handle short lengths, close overlaps, and dst not before src. */
  776. while (op < oend) *op++ = *ip++;
  777. return;
  778. }
  779. if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
  780. ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
  781. ip += oend - WILDCOPY_OVERLENGTH - op;
  782. op += oend - WILDCOPY_OVERLENGTH - op;
  783. }
  784. /* Handle the leftovers. */
  785. while (op < oend) *op++ = *ip++;
  786. }
  787. /* ZSTD_execSequenceEnd():
  788. * This version handles cases that are near the end of the output buffer. It requires
  789. * more careful checks to make sure there is no overflow. By separating out these hard
  790. * and unlikely cases, we can speed up the common cases.
  791. *
  792. * NOTE: This function needs to be fast for a single long sequence, but doesn't need
  793. * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
  794. */
  795. FORCE_NOINLINE
  796. size_t ZSTD_execSequenceEnd(BYTE* op,
  797. BYTE* const oend, seq_t sequence,
  798. const BYTE** litPtr, const BYTE* const litLimit,
  799. const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
  800. {
  801. BYTE* const oLitEnd = op + sequence.litLength;
  802. size_t const sequenceLength = sequence.litLength + sequence.matchLength;
  803. const BYTE* const iLitEnd = *litPtr + sequence.litLength;
  804. const BYTE* match = oLitEnd - sequence.offset;
  805. BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
  806. /* bounds checks : careful of address space overflow in 32-bit mode */
  807. RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
  808. RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
  809. assert(op < op + sequenceLength);
  810. assert(oLitEnd < op + sequenceLength);
  811. /* copy literals */
  812. ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
  813. op = oLitEnd;
  814. *litPtr = iLitEnd;
  815. /* copy Match */
  816. if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
  817. /* offset beyond prefix */
  818. RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
  819. match = dictEnd - (prefixStart - match);
  820. if (match + sequence.matchLength <= dictEnd) {
  821. ZSTD_memmove(oLitEnd, match, sequence.matchLength);
  822. return sequenceLength;
  823. }
  824. /* span extDict & currentPrefixSegment */
  825. { size_t const length1 = dictEnd - match;
  826. ZSTD_memmove(oLitEnd, match, length1);
  827. op = oLitEnd + length1;
  828. sequence.matchLength -= length1;
  829. match = prefixStart;
  830. }
  831. }
  832. ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
  833. return sequenceLength;
  834. }
  835. /* ZSTD_execSequenceEndSplitLitBuffer():
  836. * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
  837. */
  838. FORCE_NOINLINE
  839. size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
  840. BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
  841. const BYTE** litPtr, const BYTE* const litLimit,
  842. const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
  843. {
  844. BYTE* const oLitEnd = op + sequence.litLength;
  845. size_t const sequenceLength = sequence.litLength + sequence.matchLength;
  846. const BYTE* const iLitEnd = *litPtr + sequence.litLength;
  847. const BYTE* match = oLitEnd - sequence.offset;
  848. /* bounds checks : careful of address space overflow in 32-bit mode */
  849. RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
  850. RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
  851. assert(op < op + sequenceLength);
  852. assert(oLitEnd < op + sequenceLength);
  853. /* copy literals */
  854. RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
  855. ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
  856. op = oLitEnd;
  857. *litPtr = iLitEnd;
  858. /* copy Match */
  859. if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
  860. /* offset beyond prefix */
  861. RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
  862. match = dictEnd - (prefixStart - match);
  863. if (match + sequence.matchLength <= dictEnd) {
  864. ZSTD_memmove(oLitEnd, match, sequence.matchLength);
  865. return sequenceLength;
  866. }
  867. /* span extDict & currentPrefixSegment */
  868. { size_t const length1 = dictEnd - match;
  869. ZSTD_memmove(oLitEnd, match, length1);
  870. op = oLitEnd + length1;
  871. sequence.matchLength -= length1;
  872. match = prefixStart;
  873. }
  874. }
  875. ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
  876. return sequenceLength;
  877. }
  878. HINT_INLINE
  879. size_t ZSTD_execSequence(BYTE* op,
  880. BYTE* const oend, seq_t sequence,
  881. const BYTE** litPtr, const BYTE* const litLimit,
  882. const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
  883. {
  884. BYTE* const oLitEnd = op + sequence.litLength;
  885. size_t const sequenceLength = sequence.litLength + sequence.matchLength;
  886. BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
  887. BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
  888. const BYTE* const iLitEnd = *litPtr + sequence.litLength;
  889. const BYTE* match = oLitEnd - sequence.offset;
  890. assert(op != NULL /* Precondition */);
  891. assert(oend_w < oend /* No underflow */);
  892. /* Handle edge cases in a slow path:
  893. * - Read beyond end of literals
  894. * - Match end is within WILDCOPY_OVERLIMIT of oend
  895. * - 32-bit mode and the match length overflows
  896. */
  897. if (UNLIKELY(
  898. iLitEnd > litLimit ||
  899. oMatchEnd > oend_w ||
  900. (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
  901. return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
  902. /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
  903. assert(op <= oLitEnd /* No overflow */);
  904. assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
  905. assert(oMatchEnd <= oend /* No underflow */);
  906. assert(iLitEnd <= litLimit /* Literal length is in bounds */);
  907. assert(oLitEnd <= oend_w /* Can wildcopy literals */);
  908. assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
  909. /* Copy Literals:
  910. * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
  911. * We likely don't need the full 32-byte wildcopy.
  912. */
  913. assert(WILDCOPY_OVERLENGTH >= 16);
  914. ZSTD_copy16(op, (*litPtr));
  915. if (UNLIKELY(sequence.litLength > 16)) {
  916. ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
  917. }
  918. op = oLitEnd;
  919. *litPtr = iLitEnd; /* update for next sequence */
  920. /* Copy Match */
  921. if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
  922. /* offset beyond prefix -> go into extDict */
  923. RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
  924. match = dictEnd + (match - prefixStart);
  925. if (match + sequence.matchLength <= dictEnd) {
  926. ZSTD_memmove(oLitEnd, match, sequence.matchLength);
  927. return sequenceLength;
  928. }
  929. /* span extDict & currentPrefixSegment */
  930. { size_t const length1 = dictEnd - match;
  931. ZSTD_memmove(oLitEnd, match, length1);
  932. op = oLitEnd + length1;
  933. sequence.matchLength -= length1;
  934. match = prefixStart;
  935. }
  936. }
  937. /* Match within prefix of 1 or more bytes */
  938. assert(op <= oMatchEnd);
  939. assert(oMatchEnd <= oend_w);
  940. assert(match >= prefixStart);
  941. assert(sequence.matchLength >= 1);
  942. /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
  943. * without overlap checking.
  944. */
  945. if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
  946. /* We bet on a full wildcopy for matches, since we expect matches to be
  947. * longer than literals (in general). In silesia, ~10% of matches are longer
  948. * than 16 bytes.
  949. */
  950. ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
  951. return sequenceLength;
  952. }
  953. assert(sequence.offset < WILDCOPY_VECLEN);
  954. /* Copy 8 bytes and spread the offset to be >= 8. */
  955. ZSTD_overlapCopy8(&op, &match, sequence.offset);
  956. /* If the match length is > 8 bytes, then continue with the wildcopy. */
  957. if (sequence.matchLength > 8) {
  958. assert(op < oMatchEnd);
  959. ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
  960. }
  961. return sequenceLength;
  962. }
  963. HINT_INLINE
  964. size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
  965. BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
  966. const BYTE** litPtr, const BYTE* const litLimit,
  967. const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
  968. {
  969. BYTE* const oLitEnd = op + sequence.litLength;
  970. size_t const sequenceLength = sequence.litLength + sequence.matchLength;
  971. BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
  972. const BYTE* const iLitEnd = *litPtr + sequence.litLength;
  973. const BYTE* match = oLitEnd - sequence.offset;
  974. assert(op != NULL /* Precondition */);
  975. assert(oend_w < oend /* No underflow */);
  976. /* Handle edge cases in a slow path:
  977. * - Read beyond end of literals
  978. * - Match end is within WILDCOPY_OVERLIMIT of oend
  979. * - 32-bit mode and the match length overflows
  980. */
  981. if (UNLIKELY(
  982. iLitEnd > litLimit ||
  983. oMatchEnd > oend_w ||
  984. (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
  985. return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
  986. /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
  987. assert(op <= oLitEnd /* No overflow */);
  988. assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
  989. assert(oMatchEnd <= oend /* No underflow */);
  990. assert(iLitEnd <= litLimit /* Literal length is in bounds */);
  991. assert(oLitEnd <= oend_w /* Can wildcopy literals */);
  992. assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
  993. /* Copy Literals:
  994. * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
  995. * We likely don't need the full 32-byte wildcopy.
  996. */
  997. assert(WILDCOPY_OVERLENGTH >= 16);
  998. ZSTD_copy16(op, (*litPtr));
  999. if (UNLIKELY(sequence.litLength > 16)) {
  1000. ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
  1001. }
  1002. op = oLitEnd;
  1003. *litPtr = iLitEnd; /* update for next sequence */
  1004. /* Copy Match */
  1005. if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
  1006. /* offset beyond prefix -> go into extDict */
  1007. RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
  1008. match = dictEnd + (match - prefixStart);
  1009. if (match + sequence.matchLength <= dictEnd) {
  1010. ZSTD_memmove(oLitEnd, match, sequence.matchLength);
  1011. return sequenceLength;
  1012. }
  1013. /* span extDict & currentPrefixSegment */
  1014. { size_t const length1 = dictEnd - match;
  1015. ZSTD_memmove(oLitEnd, match, length1);
  1016. op = oLitEnd + length1;
  1017. sequence.matchLength -= length1;
  1018. match = prefixStart;
  1019. } }
  1020. /* Match within prefix of 1 or more bytes */
  1021. assert(op <= oMatchEnd);
  1022. assert(oMatchEnd <= oend_w);
  1023. assert(match >= prefixStart);
  1024. assert(sequence.matchLength >= 1);
  1025. /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
  1026. * without overlap checking.
  1027. */
  1028. if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
  1029. /* We bet on a full wildcopy for matches, since we expect matches to be
  1030. * longer than literals (in general). In silesia, ~10% of matches are longer
  1031. * than 16 bytes.
  1032. */
  1033. ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
  1034. return sequenceLength;
  1035. }
  1036. assert(sequence.offset < WILDCOPY_VECLEN);
  1037. /* Copy 8 bytes and spread the offset to be >= 8. */
  1038. ZSTD_overlapCopy8(&op, &match, sequence.offset);
  1039. /* If the match length is > 8 bytes, then continue with the wildcopy. */
  1040. if (sequence.matchLength > 8) {
  1041. assert(op < oMatchEnd);
  1042. ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
  1043. }
  1044. return sequenceLength;
  1045. }
  1046. static void
  1047. ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
  1048. {
  1049. const void* ptr = dt;
  1050. const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
  1051. DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
  1052. DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
  1053. (U32)DStatePtr->state, DTableH->tableLog);
  1054. BIT_reloadDStream(bitD);
  1055. DStatePtr->table = dt + 1;
  1056. }
  1057. FORCE_INLINE_TEMPLATE void
  1058. ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
  1059. {
  1060. size_t const lowBits = BIT_readBits(bitD, nbBits);
  1061. DStatePtr->state = nextState + lowBits;
  1062. }
  1063. /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
  1064. * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
  1065. * bits before reloading. This value is the maximum number of bytes we read
  1066. * after reloading when we are decoding long offsets.
  1067. */
  1068. #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
  1069. (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
  1070. ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
  1071. : 0)
  1072. typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
  1073. FORCE_INLINE_TEMPLATE seq_t
  1074. ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
  1075. {
  1076. seq_t seq;
  1077. const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
  1078. const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
  1079. const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
  1080. seq.matchLength = mlDInfo->baseValue;
  1081. seq.litLength = llDInfo->baseValue;
  1082. { U32 const ofBase = ofDInfo->baseValue;
  1083. BYTE const llBits = llDInfo->nbAdditionalBits;
  1084. BYTE const mlBits = mlDInfo->nbAdditionalBits;
  1085. BYTE const ofBits = ofDInfo->nbAdditionalBits;
  1086. BYTE const totalBits = llBits+mlBits+ofBits;
  1087. U16 const llNext = llDInfo->nextState;
  1088. U16 const mlNext = mlDInfo->nextState;
  1089. U16 const ofNext = ofDInfo->nextState;
  1090. U32 const llnbBits = llDInfo->nbBits;
  1091. U32 const mlnbBits = mlDInfo->nbBits;
  1092. U32 const ofnbBits = ofDInfo->nbBits;
  1093. /*
  1094. * As gcc has better branch and block analyzers, sometimes it is only
  1095. * valuable to mark likelyness for clang, it gives around 3-4% of
  1096. * performance.
  1097. */
  1098. /* sequence */
  1099. { size_t offset;
  1100. #if defined(__clang__)
  1101. if (LIKELY(ofBits > 1)) {
  1102. #else
  1103. if (ofBits > 1) {
  1104. #endif
  1105. ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
  1106. ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
  1107. assert(ofBits <= MaxOff);
  1108. if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
  1109. U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
  1110. offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
  1111. BIT_reloadDStream(&seqState->DStream);
  1112. if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
  1113. assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
  1114. } else {
  1115. offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
  1116. if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
  1117. }
  1118. seqState->prevOffset[2] = seqState->prevOffset[1];
  1119. seqState->prevOffset[1] = seqState->prevOffset[0];
  1120. seqState->prevOffset[0] = offset;
  1121. } else {
  1122. U32 const ll0 = (llDInfo->baseValue == 0);
  1123. if (LIKELY((ofBits == 0))) {
  1124. offset = seqState->prevOffset[ll0];
  1125. seqState->prevOffset[1] = seqState->prevOffset[!ll0];
  1126. seqState->prevOffset[0] = offset;
  1127. } else {
  1128. offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
  1129. { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
  1130. temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
  1131. if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
  1132. seqState->prevOffset[1] = seqState->prevOffset[0];
  1133. seqState->prevOffset[0] = offset = temp;
  1134. } } }
  1135. seq.offset = offset;
  1136. }
  1137. #if defined(__clang__)
  1138. if (UNLIKELY(mlBits > 0))
  1139. #else
  1140. if (mlBits > 0)
  1141. #endif
  1142. seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
  1143. if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
  1144. BIT_reloadDStream(&seqState->DStream);
  1145. if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
  1146. BIT_reloadDStream(&seqState->DStream);
  1147. /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
  1148. ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
  1149. #if defined(__clang__)
  1150. if (UNLIKELY(llBits > 0))
  1151. #else
  1152. if (llBits > 0)
  1153. #endif
  1154. seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
  1155. if (MEM_32bits())
  1156. BIT_reloadDStream(&seqState->DStream);
  1157. DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
  1158. (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
  1159. ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
  1160. ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
  1161. if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
  1162. ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
  1163. }
  1164. return seq;
  1165. }
  1166. #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
  1167. MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
  1168. {
  1169. size_t const windowSize = dctx->fParams.windowSize;
  1170. /* No dictionary used. */
  1171. if (dctx->dictContentEndForFuzzing == NULL) return 0;
  1172. /* Dictionary is our prefix. */
  1173. if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
  1174. /* Dictionary is not our ext-dict. */
  1175. if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
  1176. /* Dictionary is not within our window size. */
  1177. if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
  1178. /* Dictionary is active. */
  1179. return 1;
  1180. }
  1181. MEM_STATIC void ZSTD_assertValidSequence(
  1182. ZSTD_DCtx const* dctx,
  1183. BYTE const* op, BYTE const* oend,
  1184. seq_t const seq,
  1185. BYTE const* prefixStart, BYTE const* virtualStart)
  1186. {
  1187. #if DEBUGLEVEL >= 1
  1188. size_t const windowSize = dctx->fParams.windowSize;
  1189. size_t const sequenceSize = seq.litLength + seq.matchLength;
  1190. BYTE const* const oLitEnd = op + seq.litLength;
  1191. DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
  1192. (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
  1193. assert(op <= oend);
  1194. assert((size_t)(oend - op) >= sequenceSize);
  1195. assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
  1196. if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
  1197. size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
  1198. /* Offset must be within the dictionary. */
  1199. assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
  1200. assert(seq.offset <= windowSize + dictSize);
  1201. } else {
  1202. /* Offset must be within our window. */
  1203. assert(seq.offset <= windowSize);
  1204. }
  1205. #else
  1206. (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
  1207. #endif
  1208. }
  1209. #endif
  1210. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
  1211. FORCE_INLINE_TEMPLATE size_t
  1212. DONT_VECTORIZE
  1213. ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
  1214. void* dst, size_t maxDstSize,
  1215. const void* seqStart, size_t seqSize, int nbSeq,
  1216. const ZSTD_longOffset_e isLongOffset,
  1217. const int frame)
  1218. {
  1219. const BYTE* ip = (const BYTE*)seqStart;
  1220. const BYTE* const iend = ip + seqSize;
  1221. BYTE* const ostart = (BYTE*)dst;
  1222. BYTE* const oend = ostart + maxDstSize;
  1223. BYTE* op = ostart;
  1224. const BYTE* litPtr = dctx->litPtr;
  1225. const BYTE* litBufferEnd = dctx->litBufferEnd;
  1226. const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
  1227. const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
  1228. const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
  1229. DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
  1230. (void)frame;
  1231. /* Regen sequences */
  1232. if (nbSeq) {
  1233. seqState_t seqState;
  1234. dctx->fseEntropy = 1;
  1235. { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
  1236. RETURN_ERROR_IF(
  1237. ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
  1238. corruption_detected, "");
  1239. ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
  1240. ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
  1241. ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
  1242. assert(dst != NULL);
  1243. ZSTD_STATIC_ASSERT(
  1244. BIT_DStream_unfinished < BIT_DStream_completed &&
  1245. BIT_DStream_endOfBuffer < BIT_DStream_completed &&
  1246. BIT_DStream_completed < BIT_DStream_overflow);
  1247. /* decompress without overrunning litPtr begins */
  1248. {
  1249. seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
  1250. /* Align the decompression loop to 32 + 16 bytes.
  1251. *
  1252. * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
  1253. * speed swings based on the alignment of the decompression loop. This
  1254. * performance swing is caused by parts of the decompression loop falling
  1255. * out of the DSB. The entire decompression loop should fit in the DSB,
  1256. * when it can't we get much worse performance. You can measure if you've
  1257. * hit the good case or the bad case with this perf command for some
  1258. * compressed file test.zst:
  1259. *
  1260. * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
  1261. * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
  1262. *
  1263. * If you see most cycles served out of the MITE you've hit the bad case.
  1264. * If you see most cycles served out of the DSB you've hit the good case.
  1265. * If it is pretty even then you may be in an okay case.
  1266. *
  1267. * This issue has been reproduced on the following CPUs:
  1268. * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
  1269. * Use Instruments->Counters to get DSB/MITE cycles.
  1270. * I never got performance swings, but I was able to
  1271. * go from the good case of mostly DSB to half of the
  1272. * cycles served from MITE.
  1273. * - Coffeelake: Intel i9-9900k
  1274. * - Coffeelake: Intel i7-9700k
  1275. *
  1276. * I haven't been able to reproduce the instability or DSB misses on any
  1277. * of the following CPUS:
  1278. * - Haswell
  1279. * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
  1280. * - Skylake
  1281. *
  1282. * Alignment is done for each of the three major decompression loops:
  1283. * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
  1284. * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
  1285. * - ZSTD_decompressSequences_body
  1286. * Alignment choices are made to minimize large swings on bad cases and influence on performance
  1287. * from changes external to this code, rather than to overoptimize on the current commit.
  1288. *
  1289. * If you are seeing performance stability this script can help test.
  1290. * It tests on 4 commits in zstd where I saw performance change.
  1291. *
  1292. * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
  1293. */
  1294. #if defined(__GNUC__) && defined(__x86_64__)
  1295. __asm__(".p2align 6");
  1296. # if __GNUC__ >= 7
  1297. /* good for gcc-7, gcc-9, and gcc-11 */
  1298. __asm__("nop");
  1299. __asm__(".p2align 5");
  1300. __asm__("nop");
  1301. __asm__(".p2align 4");
  1302. # if __GNUC__ == 8 || __GNUC__ == 10
  1303. /* good for gcc-8 and gcc-10 */
  1304. __asm__("nop");
  1305. __asm__(".p2align 3");
  1306. # endif
  1307. # endif
  1308. #endif
  1309. /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
  1310. for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
  1311. size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
  1312. #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
  1313. assert(!ZSTD_isError(oneSeqSize));
  1314. if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
  1315. #endif
  1316. if (UNLIKELY(ZSTD_isError(oneSeqSize)))
  1317. return oneSeqSize;
  1318. DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
  1319. op += oneSeqSize;
  1320. if (UNLIKELY(!--nbSeq))
  1321. break;
  1322. BIT_reloadDStream(&(seqState.DStream));
  1323. sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
  1324. }
  1325. /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
  1326. if (nbSeq > 0) {
  1327. const size_t leftoverLit = dctx->litBufferEnd - litPtr;
  1328. if (leftoverLit)
  1329. {
  1330. RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
  1331. ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
  1332. sequence.litLength -= leftoverLit;
  1333. op += leftoverLit;
  1334. }
  1335. litPtr = dctx->litExtraBuffer;
  1336. litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
  1337. dctx->litBufferLocation = ZSTD_not_in_dst;
  1338. {
  1339. size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
  1340. #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
  1341. assert(!ZSTD_isError(oneSeqSize));
  1342. if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
  1343. #endif
  1344. if (UNLIKELY(ZSTD_isError(oneSeqSize)))
  1345. return oneSeqSize;
  1346. DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
  1347. op += oneSeqSize;
  1348. if (--nbSeq)
  1349. BIT_reloadDStream(&(seqState.DStream));
  1350. }
  1351. }
  1352. }
  1353. if (nbSeq > 0) /* there is remaining lit from extra buffer */
  1354. {
  1355. #if defined(__GNUC__) && defined(__x86_64__)
  1356. __asm__(".p2align 6");
  1357. __asm__("nop");
  1358. # if __GNUC__ != 7
  1359. /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
  1360. __asm__(".p2align 4");
  1361. __asm__("nop");
  1362. __asm__(".p2align 3");
  1363. # elif __GNUC__ >= 11
  1364. __asm__(".p2align 3");
  1365. # else
  1366. __asm__(".p2align 5");
  1367. __asm__("nop");
  1368. __asm__(".p2align 3");
  1369. # endif
  1370. #endif
  1371. for (; ; ) {
  1372. seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
  1373. size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
  1374. #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
  1375. assert(!ZSTD_isError(oneSeqSize));
  1376. if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
  1377. #endif
  1378. if (UNLIKELY(ZSTD_isError(oneSeqSize)))
  1379. return oneSeqSize;
  1380. DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
  1381. op += oneSeqSize;
  1382. if (UNLIKELY(!--nbSeq))
  1383. break;
  1384. BIT_reloadDStream(&(seqState.DStream));
  1385. }
  1386. }
  1387. /* check if reached exact end */
  1388. DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
  1389. RETURN_ERROR_IF(nbSeq, corruption_detected, "");
  1390. RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
  1391. /* save reps for next block */
  1392. { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
  1393. }
  1394. /* last literal segment */
  1395. if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
  1396. {
  1397. size_t const lastLLSize = litBufferEnd - litPtr;
  1398. RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
  1399. if (op != NULL) {
  1400. ZSTD_memmove(op, litPtr, lastLLSize);
  1401. op += lastLLSize;
  1402. }
  1403. litPtr = dctx->litExtraBuffer;
  1404. litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
  1405. dctx->litBufferLocation = ZSTD_not_in_dst;
  1406. }
  1407. { size_t const lastLLSize = litBufferEnd - litPtr;
  1408. RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
  1409. if (op != NULL) {
  1410. ZSTD_memcpy(op, litPtr, lastLLSize);
  1411. op += lastLLSize;
  1412. }
  1413. }
  1414. return op-ostart;
  1415. }
  1416. FORCE_INLINE_TEMPLATE size_t
  1417. DONT_VECTORIZE
  1418. ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
  1419. void* dst, size_t maxDstSize,
  1420. const void* seqStart, size_t seqSize, int nbSeq,
  1421. const ZSTD_longOffset_e isLongOffset,
  1422. const int frame)
  1423. {
  1424. const BYTE* ip = (const BYTE*)seqStart;
  1425. const BYTE* const iend = ip + seqSize;
  1426. BYTE* const ostart = (BYTE*)dst;
  1427. BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
  1428. BYTE* op = ostart;
  1429. const BYTE* litPtr = dctx->litPtr;
  1430. const BYTE* const litEnd = litPtr + dctx->litSize;
  1431. const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
  1432. const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
  1433. const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
  1434. DEBUGLOG(5, "ZSTD_decompressSequences_body");
  1435. (void)frame;
  1436. /* Regen sequences */
  1437. if (nbSeq) {
  1438. seqState_t seqState;
  1439. dctx->fseEntropy = 1;
  1440. { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
  1441. RETURN_ERROR_IF(
  1442. ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
  1443. corruption_detected, "");
  1444. ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
  1445. ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
  1446. ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
  1447. assert(dst != NULL);
  1448. ZSTD_STATIC_ASSERT(
  1449. BIT_DStream_unfinished < BIT_DStream_completed &&
  1450. BIT_DStream_endOfBuffer < BIT_DStream_completed &&
  1451. BIT_DStream_completed < BIT_DStream_overflow);
  1452. #if defined(__GNUC__) && defined(__x86_64__)
  1453. __asm__(".p2align 6");
  1454. __asm__("nop");
  1455. # if __GNUC__ >= 7
  1456. __asm__(".p2align 5");
  1457. __asm__("nop");
  1458. __asm__(".p2align 3");
  1459. # else
  1460. __asm__(".p2align 4");
  1461. __asm__("nop");
  1462. __asm__(".p2align 3");
  1463. # endif
  1464. #endif
  1465. for ( ; ; ) {
  1466. seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
  1467. size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
  1468. #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
  1469. assert(!ZSTD_isError(oneSeqSize));
  1470. if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
  1471. #endif
  1472. if (UNLIKELY(ZSTD_isError(oneSeqSize)))
  1473. return oneSeqSize;
  1474. DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
  1475. op += oneSeqSize;
  1476. if (UNLIKELY(!--nbSeq))
  1477. break;
  1478. BIT_reloadDStream(&(seqState.DStream));
  1479. }
  1480. /* check if reached exact end */
  1481. DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
  1482. RETURN_ERROR_IF(nbSeq, corruption_detected, "");
  1483. RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
  1484. /* save reps for next block */
  1485. { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
  1486. }
  1487. /* last literal segment */
  1488. { size_t const lastLLSize = litEnd - litPtr;
  1489. RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
  1490. if (op != NULL) {
  1491. ZSTD_memcpy(op, litPtr, lastLLSize);
  1492. op += lastLLSize;
  1493. }
  1494. }
  1495. return op-ostart;
  1496. }
  1497. static size_t
  1498. ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
  1499. void* dst, size_t maxDstSize,
  1500. const void* seqStart, size_t seqSize, int nbSeq,
  1501. const ZSTD_longOffset_e isLongOffset,
  1502. const int frame)
  1503. {
  1504. return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
  1505. }
  1506. static size_t
  1507. ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
  1508. void* dst, size_t maxDstSize,
  1509. const void* seqStart, size_t seqSize, int nbSeq,
  1510. const ZSTD_longOffset_e isLongOffset,
  1511. const int frame)
  1512. {
  1513. return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
  1514. }
  1515. #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
  1516. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
  1517. FORCE_INLINE_TEMPLATE size_t
  1518. ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
  1519. const BYTE* const prefixStart, const BYTE* const dictEnd)
  1520. {
  1521. prefetchPos += sequence.litLength;
  1522. { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
  1523. const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
  1524. * No consequence though : memory address is only used for prefetching, not for dereferencing */
  1525. PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
  1526. }
  1527. return prefetchPos + sequence.matchLength;
  1528. }
  1529. /* This decoding function employs prefetching
  1530. * to reduce latency impact of cache misses.
  1531. * It's generally employed when block contains a significant portion of long-distance matches
  1532. * or when coupled with a "cold" dictionary */
  1533. FORCE_INLINE_TEMPLATE size_t
  1534. ZSTD_decompressSequencesLong_body(
  1535. ZSTD_DCtx* dctx,
  1536. void* dst, size_t maxDstSize,
  1537. const void* seqStart, size_t seqSize, int nbSeq,
  1538. const ZSTD_longOffset_e isLongOffset,
  1539. const int frame)
  1540. {
  1541. const BYTE* ip = (const BYTE*)seqStart;
  1542. const BYTE* const iend = ip + seqSize;
  1543. BYTE* const ostart = (BYTE*)dst;
  1544. BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
  1545. BYTE* op = ostart;
  1546. const BYTE* litPtr = dctx->litPtr;
  1547. const BYTE* litBufferEnd = dctx->litBufferEnd;
  1548. const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
  1549. const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
  1550. const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
  1551. (void)frame;
  1552. /* Regen sequences */
  1553. if (nbSeq) {
  1554. #define STORED_SEQS 8
  1555. #define STORED_SEQS_MASK (STORED_SEQS-1)
  1556. #define ADVANCED_SEQS STORED_SEQS
  1557. seq_t sequences[STORED_SEQS];
  1558. int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
  1559. seqState_t seqState;
  1560. int seqNb;
  1561. size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
  1562. dctx->fseEntropy = 1;
  1563. { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
  1564. assert(dst != NULL);
  1565. assert(iend >= ip);
  1566. RETURN_ERROR_IF(
  1567. ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
  1568. corruption_detected, "");
  1569. ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
  1570. ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
  1571. ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
  1572. /* prepare in advance */
  1573. for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
  1574. seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
  1575. prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
  1576. sequences[seqNb] = sequence;
  1577. }
  1578. RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
  1579. /* decompress without stomping litBuffer */
  1580. for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
  1581. seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
  1582. size_t oneSeqSize;
  1583. if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
  1584. {
  1585. /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
  1586. const size_t leftoverLit = dctx->litBufferEnd - litPtr;
  1587. if (leftoverLit)
  1588. {
  1589. RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
  1590. ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
  1591. sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
  1592. op += leftoverLit;
  1593. }
  1594. litPtr = dctx->litExtraBuffer;
  1595. litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
  1596. dctx->litBufferLocation = ZSTD_not_in_dst;
  1597. oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
  1598. #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
  1599. assert(!ZSTD_isError(oneSeqSize));
  1600. if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
  1601. #endif
  1602. if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
  1603. prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
  1604. sequences[seqNb & STORED_SEQS_MASK] = sequence;
  1605. op += oneSeqSize;
  1606. }
  1607. else
  1608. {
  1609. /* lit buffer is either wholly contained in first or second split, or not split at all*/
  1610. oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
  1611. ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
  1612. ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
  1613. #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
  1614. assert(!ZSTD_isError(oneSeqSize));
  1615. if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
  1616. #endif
  1617. if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
  1618. prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
  1619. sequences[seqNb & STORED_SEQS_MASK] = sequence;
  1620. op += oneSeqSize;
  1621. }
  1622. }
  1623. RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
  1624. /* finish queue */
  1625. seqNb -= seqAdvance;
  1626. for ( ; seqNb<nbSeq ; seqNb++) {
  1627. seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
  1628. if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
  1629. {
  1630. const size_t leftoverLit = dctx->litBufferEnd - litPtr;
  1631. if (leftoverLit)
  1632. {
  1633. RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
  1634. ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
  1635. sequence->litLength -= leftoverLit;
  1636. op += leftoverLit;
  1637. }
  1638. litPtr = dctx->litExtraBuffer;
  1639. litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
  1640. dctx->litBufferLocation = ZSTD_not_in_dst;
  1641. {
  1642. size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
  1643. #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
  1644. assert(!ZSTD_isError(oneSeqSize));
  1645. if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
  1646. #endif
  1647. if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
  1648. op += oneSeqSize;
  1649. }
  1650. }
  1651. else
  1652. {
  1653. size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
  1654. ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
  1655. ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
  1656. #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
  1657. assert(!ZSTD_isError(oneSeqSize));
  1658. if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
  1659. #endif
  1660. if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
  1661. op += oneSeqSize;
  1662. }
  1663. }
  1664. /* save reps for next block */
  1665. { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
  1666. }
  1667. /* last literal segment */
  1668. if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
  1669. {
  1670. size_t const lastLLSize = litBufferEnd - litPtr;
  1671. RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
  1672. if (op != NULL) {
  1673. ZSTD_memmove(op, litPtr, lastLLSize);
  1674. op += lastLLSize;
  1675. }
  1676. litPtr = dctx->litExtraBuffer;
  1677. litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
  1678. }
  1679. { size_t const lastLLSize = litBufferEnd - litPtr;
  1680. RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
  1681. if (op != NULL) {
  1682. ZSTD_memmove(op, litPtr, lastLLSize);
  1683. op += lastLLSize;
  1684. }
  1685. }
  1686. return op-ostart;
  1687. }
  1688. static size_t
  1689. ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
  1690. void* dst, size_t maxDstSize,
  1691. const void* seqStart, size_t seqSize, int nbSeq,
  1692. const ZSTD_longOffset_e isLongOffset,
  1693. const int frame)
  1694. {
  1695. return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
  1696. }
  1697. #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
  1698. #if DYNAMIC_BMI2
  1699. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
  1700. static BMI2_TARGET_ATTRIBUTE size_t
  1701. DONT_VECTORIZE
  1702. ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
  1703. void* dst, size_t maxDstSize,
  1704. const void* seqStart, size_t seqSize, int nbSeq,
  1705. const ZSTD_longOffset_e isLongOffset,
  1706. const int frame)
  1707. {
  1708. return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
  1709. }
  1710. static BMI2_TARGET_ATTRIBUTE size_t
  1711. DONT_VECTORIZE
  1712. ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
  1713. void* dst, size_t maxDstSize,
  1714. const void* seqStart, size_t seqSize, int nbSeq,
  1715. const ZSTD_longOffset_e isLongOffset,
  1716. const int frame)
  1717. {
  1718. return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
  1719. }
  1720. #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
  1721. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
  1722. static BMI2_TARGET_ATTRIBUTE size_t
  1723. ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
  1724. void* dst, size_t maxDstSize,
  1725. const void* seqStart, size_t seqSize, int nbSeq,
  1726. const ZSTD_longOffset_e isLongOffset,
  1727. const int frame)
  1728. {
  1729. return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
  1730. }
  1731. #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
  1732. #endif /* DYNAMIC_BMI2 */
  1733. typedef size_t (*ZSTD_decompressSequences_t)(
  1734. ZSTD_DCtx* dctx,
  1735. void* dst, size_t maxDstSize,
  1736. const void* seqStart, size_t seqSize, int nbSeq,
  1737. const ZSTD_longOffset_e isLongOffset,
  1738. const int frame);
  1739. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
  1740. static size_t
  1741. ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
  1742. const void* seqStart, size_t seqSize, int nbSeq,
  1743. const ZSTD_longOffset_e isLongOffset,
  1744. const int frame)
  1745. {
  1746. DEBUGLOG(5, "ZSTD_decompressSequences");
  1747. #if DYNAMIC_BMI2
  1748. if (ZSTD_DCtx_get_bmi2(dctx)) {
  1749. return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
  1750. }
  1751. #endif
  1752. return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
  1753. }
  1754. static size_t
  1755. ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
  1756. const void* seqStart, size_t seqSize, int nbSeq,
  1757. const ZSTD_longOffset_e isLongOffset,
  1758. const int frame)
  1759. {
  1760. DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
  1761. #if DYNAMIC_BMI2
  1762. if (ZSTD_DCtx_get_bmi2(dctx)) {
  1763. return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
  1764. }
  1765. #endif
  1766. return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
  1767. }
  1768. #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
  1769. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
  1770. /* ZSTD_decompressSequencesLong() :
  1771. * decompression function triggered when a minimum share of offsets is considered "long",
  1772. * aka out of cache.
  1773. * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
  1774. * This function will try to mitigate main memory latency through the use of prefetching */
  1775. static size_t
  1776. ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
  1777. void* dst, size_t maxDstSize,
  1778. const void* seqStart, size_t seqSize, int nbSeq,
  1779. const ZSTD_longOffset_e isLongOffset,
  1780. const int frame)
  1781. {
  1782. DEBUGLOG(5, "ZSTD_decompressSequencesLong");
  1783. #if DYNAMIC_BMI2
  1784. if (ZSTD_DCtx_get_bmi2(dctx)) {
  1785. return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
  1786. }
  1787. #endif
  1788. return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
  1789. }
  1790. #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
  1791. #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
  1792. !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
  1793. /* ZSTD_getLongOffsetsShare() :
  1794. * condition : offTable must be valid
  1795. * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
  1796. * compared to maximum possible of (1<<OffFSELog) */
  1797. static unsigned
  1798. ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
  1799. {
  1800. const void* ptr = offTable;
  1801. U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
  1802. const ZSTD_seqSymbol* table = offTable + 1;
  1803. U32 const max = 1 << tableLog;
  1804. U32 u, total = 0;
  1805. DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
  1806. assert(max <= (1 << OffFSELog)); /* max not too large */
  1807. for (u=0; u<max; u++) {
  1808. if (table[u].nbAdditionalBits > 22) total += 1;
  1809. }
  1810. assert(tableLog <= OffFSELog);
  1811. total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
  1812. return total;
  1813. }
  1814. #endif
  1815. size_t
  1816. ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
  1817. void* dst, size_t dstCapacity,
  1818. const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
  1819. { /* blockType == blockCompressed */
  1820. const BYTE* ip = (const BYTE*)src;
  1821. /* isLongOffset must be true if there are long offsets.
  1822. * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
  1823. * We don't expect that to be the case in 64-bit mode.
  1824. * In block mode, window size is not known, so we have to be conservative.
  1825. * (note: but it could be evaluated from current-lowLimit)
  1826. */
  1827. ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
  1828. DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
  1829. RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
  1830. /* Decode literals section */
  1831. { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
  1832. DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
  1833. if (ZSTD_isError(litCSize)) return litCSize;
  1834. ip += litCSize;
  1835. srcSize -= litCSize;
  1836. }
  1837. /* Build Decoding Tables */
  1838. {
  1839. /* These macros control at build-time which decompressor implementation
  1840. * we use. If neither is defined, we do some inspection and dispatch at
  1841. * runtime.
  1842. */
  1843. #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
  1844. !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
  1845. int usePrefetchDecoder = dctx->ddictIsCold;
  1846. #endif
  1847. int nbSeq;
  1848. size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
  1849. if (ZSTD_isError(seqHSize)) return seqHSize;
  1850. ip += seqHSize;
  1851. srcSize -= seqHSize;
  1852. RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
  1853. #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
  1854. !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
  1855. if ( !usePrefetchDecoder
  1856. && (!frame || (dctx->fParams.windowSize > (1<<24)))
  1857. && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
  1858. U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
  1859. U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
  1860. usePrefetchDecoder = (shareLongOffsets >= minShare);
  1861. }
  1862. #endif
  1863. dctx->ddictIsCold = 0;
  1864. #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
  1865. !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
  1866. if (usePrefetchDecoder)
  1867. #endif
  1868. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
  1869. return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
  1870. #endif
  1871. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
  1872. /* else */
  1873. if (dctx->litBufferLocation == ZSTD_split)
  1874. return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
  1875. else
  1876. return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
  1877. #endif
  1878. }
  1879. }
  1880. void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
  1881. {
  1882. if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
  1883. dctx->dictEnd = dctx->previousDstEnd;
  1884. dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
  1885. dctx->prefixStart = dst;
  1886. dctx->previousDstEnd = dst;
  1887. }
  1888. }
  1889. size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
  1890. void* dst, size_t dstCapacity,
  1891. const void* src, size_t srcSize)
  1892. {
  1893. size_t dSize;
  1894. ZSTD_checkContinuity(dctx, dst, dstCapacity);
  1895. dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
  1896. dctx->previousDstEnd = (char*)dst + dSize;
  1897. return dSize;
  1898. }