fileio.c 127 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219
  1. /*
  2. * Copyright (c) Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. /* *************************************
  11. * Compiler Options
  12. ***************************************/
  13. #ifdef _MSC_VER /* Visual */
  14. # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
  15. # pragma warning(disable : 4204) /* non-constant aggregate initializer */
  16. #endif
  17. #if defined(__MINGW32__) && !defined(_POSIX_SOURCE)
  18. # define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */
  19. #endif
  20. /*-*************************************
  21. * Includes
  22. ***************************************/
  23. #include "platform.h" /* Large Files support, SET_BINARY_MODE */
  24. #include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */
  25. #include <stdio.h> /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */
  26. #include <stdlib.h> /* malloc, free */
  27. #include <string.h> /* strcmp, strlen */
  28. #include <fcntl.h> /* O_WRONLY */
  29. #include <assert.h>
  30. #include <errno.h> /* errno */
  31. #include <limits.h> /* INT_MAX */
  32. #include <signal.h>
  33. #include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */
  34. #if defined (_MSC_VER)
  35. # include <sys/stat.h>
  36. # include <io.h>
  37. #endif
  38. #include "../lib/common/mem.h" /* U32, U64 */
  39. #include "fileio.h"
  40. #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
  41. #include "../lib/zstd.h"
  42. #include "../lib/zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */
  43. #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
  44. # include <zlib.h>
  45. # if !defined(z_const)
  46. # define z_const
  47. # endif
  48. #endif
  49. #if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
  50. # include <lzma.h>
  51. #endif
  52. #define LZ4_MAGICNUMBER 0x184D2204
  53. #if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
  54. # define LZ4F_ENABLE_OBSOLETE_ENUMS
  55. # include <lz4frame.h>
  56. # include <lz4.h>
  57. #endif
  58. /*-*************************************
  59. * Constants
  60. ***************************************/
  61. #define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */
  62. #define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */
  63. #define FNSPACE 30
  64. /* Default file permissions 0666 (modulated by umask) */
  65. #if !defined(_WIN32)
  66. /* These macros aren't defined on windows. */
  67. #define DEFAULT_FILE_PERMISSIONS (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
  68. #else
  69. #define DEFAULT_FILE_PERMISSIONS (0666)
  70. #endif
  71. /*-*************************************
  72. * Macros
  73. ***************************************/
  74. #define KB *(1 <<10)
  75. #define MB *(1 <<20)
  76. #define GB *(1U<<30)
  77. #undef MAX
  78. #define MAX(a,b) ((a)>(b) ? (a) : (b))
  79. struct FIO_display_prefs_s {
  80. int displayLevel; /* 0 : no display; 1: errors; 2: + result + interaction + warnings; 3: + progression; 4: + information */
  81. FIO_progressSetting_e progressSetting;
  82. };
  83. static FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto};
  84. #define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
  85. #define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__)
  86. #define DISPLAYLEVEL(l, ...) { if (g_display_prefs.displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
  87. static const U64 g_refreshRate = SEC_TO_MICRO / 6;
  88. static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
  89. #define READY_FOR_UPDATE() ((g_display_prefs.progressSetting != FIO_ps_never) && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate)
  90. #define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); }
  91. #define DISPLAYUPDATE(l, ...) { \
  92. if (g_display_prefs.displayLevel>=l && (g_display_prefs.progressSetting != FIO_ps_never)) { \
  93. if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \
  94. DELAY_NEXT_UPDATE(); \
  95. DISPLAY(__VA_ARGS__); \
  96. if (g_display_prefs.displayLevel>=4) fflush(stderr); \
  97. } } }
  98. #undef MIN /* in case it would be already defined */
  99. #define MIN(a,b) ((a) < (b) ? (a) : (b))
  100. #define EXM_THROW(error, ...) \
  101. { \
  102. DISPLAYLEVEL(1, "zstd: "); \
  103. DISPLAYLEVEL(5, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \
  104. DISPLAYLEVEL(1, "error %i : ", error); \
  105. DISPLAYLEVEL(1, __VA_ARGS__); \
  106. DISPLAYLEVEL(1, " \n"); \
  107. exit(error); \
  108. }
  109. #define CHECK_V(v, f) \
  110. v = f; \
  111. if (ZSTD_isError(v)) { \
  112. DISPLAYLEVEL(5, "%s \n", #f); \
  113. EXM_THROW(11, "%s", ZSTD_getErrorName(v)); \
  114. }
  115. #define CHECK(f) { size_t err; CHECK_V(err, f); }
  116. /*-************************************
  117. * Signal (Ctrl-C trapping)
  118. **************************************/
  119. static const char* g_artefact = NULL;
  120. static void INThandler(int sig)
  121. {
  122. assert(sig==SIGINT); (void)sig;
  123. #if !defined(_MSC_VER)
  124. signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */
  125. #endif
  126. if (g_artefact) {
  127. assert(UTIL_isRegularFile(g_artefact));
  128. remove(g_artefact);
  129. }
  130. DISPLAY("\n");
  131. exit(2);
  132. }
  133. static void addHandler(char const* dstFileName)
  134. {
  135. if (UTIL_isRegularFile(dstFileName)) {
  136. g_artefact = dstFileName;
  137. signal(SIGINT, INThandler);
  138. } else {
  139. g_artefact = NULL;
  140. }
  141. }
  142. /* Idempotent */
  143. static void clearHandler(void)
  144. {
  145. if (g_artefact) signal(SIGINT, SIG_DFL);
  146. g_artefact = NULL;
  147. }
  148. /*-*********************************************************
  149. * Termination signal trapping (Print debug stack trace)
  150. ***********************************************************/
  151. #if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */
  152. # if (__has_feature(address_sanitizer))
  153. # define BACKTRACE_ENABLE 0
  154. # endif /* __has_feature(address_sanitizer) */
  155. #elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */
  156. # define BACKTRACE_ENABLE 0
  157. #endif
  158. #if !defined(BACKTRACE_ENABLE)
  159. /* automatic detector : backtrace enabled by default on linux+glibc and osx */
  160. # if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \
  161. || (defined(__APPLE__) && defined(__MACH__))
  162. # define BACKTRACE_ENABLE 1
  163. # else
  164. # define BACKTRACE_ENABLE 0
  165. # endif
  166. #endif
  167. /* note : after this point, BACKTRACE_ENABLE is necessarily defined */
  168. #if BACKTRACE_ENABLE
  169. #include <execinfo.h> /* backtrace, backtrace_symbols */
  170. #define MAX_STACK_FRAMES 50
  171. static void ABRThandler(int sig) {
  172. const char* name;
  173. void* addrlist[MAX_STACK_FRAMES];
  174. char** symbollist;
  175. int addrlen, i;
  176. switch (sig) {
  177. case SIGABRT: name = "SIGABRT"; break;
  178. case SIGFPE: name = "SIGFPE"; break;
  179. case SIGILL: name = "SIGILL"; break;
  180. case SIGINT: name = "SIGINT"; break;
  181. case SIGSEGV: name = "SIGSEGV"; break;
  182. default: name = "UNKNOWN";
  183. }
  184. DISPLAY("Caught %s signal, printing stack:\n", name);
  185. /* Retrieve current stack addresses. */
  186. addrlen = backtrace(addrlist, MAX_STACK_FRAMES);
  187. if (addrlen == 0) {
  188. DISPLAY("\n");
  189. return;
  190. }
  191. /* Create readable strings to each frame. */
  192. symbollist = backtrace_symbols(addrlist, addrlen);
  193. /* Print the stack trace, excluding calls handling the signal. */
  194. for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) {
  195. DISPLAY("%s\n", symbollist[i]);
  196. }
  197. free(symbollist);
  198. /* Reset and raise the signal so default handler runs. */
  199. signal(sig, SIG_DFL);
  200. raise(sig);
  201. }
  202. #endif
  203. void FIO_addAbortHandler()
  204. {
  205. #if BACKTRACE_ENABLE
  206. signal(SIGABRT, ABRThandler);
  207. signal(SIGFPE, ABRThandler);
  208. signal(SIGILL, ABRThandler);
  209. signal(SIGSEGV, ABRThandler);
  210. signal(SIGBUS, ABRThandler);
  211. #endif
  212. }
  213. /*-************************************************************
  214. * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
  215. ***************************************************************/
  216. #if defined(_MSC_VER) && _MSC_VER >= 1400
  217. # define LONG_SEEK _fseeki64
  218. # define LONG_TELL _ftelli64
  219. #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
  220. # define LONG_SEEK fseeko
  221. # define LONG_TELL ftello
  222. #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
  223. # define LONG_SEEK fseeko64
  224. # define LONG_TELL ftello64
  225. #elif defined(_WIN32) && !defined(__DJGPP__)
  226. # include <windows.h>
  227. static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
  228. LARGE_INTEGER off;
  229. DWORD method;
  230. off.QuadPart = offset;
  231. if (origin == SEEK_END)
  232. method = FILE_END;
  233. else if (origin == SEEK_CUR)
  234. method = FILE_CURRENT;
  235. else
  236. method = FILE_BEGIN;
  237. if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
  238. return 0;
  239. else
  240. return -1;
  241. }
  242. static __int64 LONG_TELL(FILE* file) {
  243. LARGE_INTEGER off, newOff;
  244. off.QuadPart = 0;
  245. newOff.QuadPart = 0;
  246. SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, &newOff, FILE_CURRENT);
  247. return newOff.QuadPart;
  248. }
  249. #else
  250. # define LONG_SEEK fseek
  251. # define LONG_TELL ftell
  252. #endif
  253. /*-*************************************
  254. * Parameters: FIO_prefs_t
  255. ***************************************/
  256. /* typedef'd to FIO_prefs_t within fileio.h */
  257. struct FIO_prefs_s {
  258. /* Algorithm preferences */
  259. FIO_compressionType_t compressionType;
  260. U32 sparseFileSupport; /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */
  261. int dictIDFlag;
  262. int checksumFlag;
  263. int blockSize;
  264. int overlapLog;
  265. U32 adaptiveMode;
  266. U32 useRowMatchFinder;
  267. int rsyncable;
  268. int minAdaptLevel;
  269. int maxAdaptLevel;
  270. int ldmFlag;
  271. int ldmHashLog;
  272. int ldmMinMatch;
  273. int ldmBucketSizeLog;
  274. int ldmHashRateLog;
  275. size_t streamSrcSize;
  276. size_t targetCBlockSize;
  277. int srcSizeHint;
  278. int testMode;
  279. ZSTD_paramSwitch_e literalCompressionMode;
  280. /* IO preferences */
  281. U32 removeSrcFile;
  282. U32 overwrite;
  283. /* Computation resources preferences */
  284. unsigned memLimit;
  285. int nbWorkers;
  286. int excludeCompressedFiles;
  287. int patchFromMode;
  288. int contentSize;
  289. int allowBlockDevices;
  290. };
  291. /*-*************************************
  292. * Parameters: FIO_ctx_t
  293. ***************************************/
  294. /* typedef'd to FIO_ctx_t within fileio.h */
  295. struct FIO_ctx_s {
  296. /* file i/o info */
  297. int nbFilesTotal;
  298. int hasStdinInput;
  299. int hasStdoutOutput;
  300. /* file i/o state */
  301. int currFileIdx;
  302. int nbFilesProcessed;
  303. size_t totalBytesInput;
  304. size_t totalBytesOutput;
  305. };
  306. /*-*************************************
  307. * Parameters: Initialization
  308. ***************************************/
  309. #define FIO_OVERLAP_LOG_NOTSET 9999
  310. #define FIO_LDM_PARAM_NOTSET 9999
  311. FIO_prefs_t* FIO_createPreferences(void)
  312. {
  313. FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t));
  314. if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
  315. ret->compressionType = FIO_zstdCompression;
  316. ret->overwrite = 0;
  317. ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
  318. ret->dictIDFlag = 1;
  319. ret->checksumFlag = 1;
  320. ret->removeSrcFile = 0;
  321. ret->memLimit = 0;
  322. ret->nbWorkers = 1;
  323. ret->blockSize = 0;
  324. ret->overlapLog = FIO_OVERLAP_LOG_NOTSET;
  325. ret->adaptiveMode = 0;
  326. ret->rsyncable = 0;
  327. ret->minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */
  328. ret->maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */
  329. ret->ldmFlag = 0;
  330. ret->ldmHashLog = 0;
  331. ret->ldmMinMatch = 0;
  332. ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
  333. ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
  334. ret->streamSrcSize = 0;
  335. ret->targetCBlockSize = 0;
  336. ret->srcSizeHint = 0;
  337. ret->testMode = 0;
  338. ret->literalCompressionMode = ZSTD_ps_auto;
  339. ret->excludeCompressedFiles = 0;
  340. ret->allowBlockDevices = 0;
  341. return ret;
  342. }
  343. FIO_ctx_t* FIO_createContext(void)
  344. {
  345. FIO_ctx_t* const ret = (FIO_ctx_t*)malloc(sizeof(FIO_ctx_t));
  346. if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
  347. ret->currFileIdx = 0;
  348. ret->hasStdinInput = 0;
  349. ret->hasStdoutOutput = 0;
  350. ret->nbFilesTotal = 1;
  351. ret->nbFilesProcessed = 0;
  352. ret->totalBytesInput = 0;
  353. ret->totalBytesOutput = 0;
  354. return ret;
  355. }
  356. void FIO_freePreferences(FIO_prefs_t* const prefs)
  357. {
  358. free(prefs);
  359. }
  360. void FIO_freeContext(FIO_ctx_t* const fCtx)
  361. {
  362. free(fCtx);
  363. }
  364. /*-*************************************
  365. * Parameters: Display Options
  366. ***************************************/
  367. void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; }
  368. void FIO_setProgressSetting(FIO_progressSetting_e setting) { g_display_prefs.progressSetting = setting; }
  369. /*-*************************************
  370. * Parameters: Setters
  371. ***************************************/
  372. /* FIO_prefs_t functions */
  373. void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; }
  374. void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; }
  375. void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse) { prefs->sparseFileSupport = sparse; }
  376. void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; }
  377. void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; }
  378. void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag) { prefs->removeSrcFile = (flag>0); }
  379. void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; }
  380. void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) {
  381. #ifndef ZSTD_MULTITHREAD
  382. if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n");
  383. #endif
  384. prefs->nbWorkers = nbWorkers;
  385. }
  386. void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
  387. void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices) { prefs->allowBlockDevices = allowBlockDevices; }
  388. void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
  389. if (blockSize && prefs->nbWorkers==0)
  390. DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
  391. prefs->blockSize = blockSize;
  392. }
  393. void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){
  394. if (overlapLog && prefs->nbWorkers==0)
  395. DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
  396. prefs->overlapLog = overlapLog;
  397. }
  398. void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt) {
  399. if ((adapt>0) && (prefs->nbWorkers==0))
  400. EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n");
  401. prefs->adaptiveMode = adapt;
  402. }
  403. void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) {
  404. prefs->useRowMatchFinder = useRowMatchFinder;
  405. }
  406. void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
  407. if ((rsyncable>0) && (prefs->nbWorkers==0))
  408. EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n");
  409. prefs->rsyncable = rsyncable;
  410. }
  411. void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
  412. prefs->streamSrcSize = streamSrcSize;
  413. }
  414. void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
  415. prefs->targetCBlockSize = targetCBlockSize;
  416. }
  417. void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
  418. prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint);
  419. }
  420. void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) {
  421. prefs->testMode = (testMode!=0);
  422. }
  423. void FIO_setLiteralCompressionMode(
  424. FIO_prefs_t* const prefs,
  425. ZSTD_paramSwitch_e mode) {
  426. prefs->literalCompressionMode = mode;
  427. }
  428. void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel)
  429. {
  430. #ifndef ZSTD_NOCOMPRESS
  431. assert(minCLevel >= ZSTD_minCLevel());
  432. #endif
  433. prefs->minAdaptLevel = minCLevel;
  434. }
  435. void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel)
  436. {
  437. prefs->maxAdaptLevel = maxCLevel;
  438. }
  439. void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) {
  440. prefs->ldmFlag = (ldmFlag>0);
  441. }
  442. void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) {
  443. prefs->ldmHashLog = ldmHashLog;
  444. }
  445. void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) {
  446. prefs->ldmMinMatch = ldmMinMatch;
  447. }
  448. void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) {
  449. prefs->ldmBucketSizeLog = ldmBucketSizeLog;
  450. }
  451. void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) {
  452. prefs->ldmHashRateLog = ldmHashRateLog;
  453. }
  454. void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value)
  455. {
  456. prefs->patchFromMode = value != 0;
  457. }
  458. void FIO_setContentSize(FIO_prefs_t* const prefs, int value)
  459. {
  460. prefs->contentSize = value != 0;
  461. }
  462. /* FIO_ctx_t functions */
  463. void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) {
  464. fCtx->hasStdoutOutput = value;
  465. }
  466. void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value)
  467. {
  468. fCtx->nbFilesTotal = value;
  469. }
  470. void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames) {
  471. size_t i = 0;
  472. for ( ; i < filenames->tableSize; ++i) {
  473. if (!strcmp(stdinmark, filenames->fileNames[i])) {
  474. fCtx->hasStdinInput = 1;
  475. return;
  476. }
  477. }
  478. }
  479. /*-*************************************
  480. * Functions
  481. ***************************************/
  482. /** FIO_removeFile() :
  483. * @result : Unlink `fileName`, even if it's read-only */
  484. static int FIO_removeFile(const char* path)
  485. {
  486. stat_t statbuf;
  487. if (!UTIL_stat(path, &statbuf)) {
  488. DISPLAYLEVEL(2, "zstd: Failed to stat %s while trying to remove it\n", path);
  489. return 0;
  490. }
  491. if (!UTIL_isRegularFileStat(&statbuf)) {
  492. DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path);
  493. return 0;
  494. }
  495. #if defined(_WIN32) || defined(WIN32)
  496. /* windows doesn't allow remove read-only files,
  497. * so try to make it writable first */
  498. if (!(statbuf.st_mode & _S_IWRITE)) {
  499. UTIL_chmod(path, &statbuf, _S_IWRITE);
  500. }
  501. #endif
  502. return remove(path);
  503. }
  504. /** FIO_openSrcFile() :
  505. * condition : `srcFileName` must be non-NULL. `prefs` may be NULL.
  506. * @result : FILE* to `srcFileName`, or NULL if it fails */
  507. static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName)
  508. {
  509. stat_t statbuf;
  510. int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0;
  511. assert(srcFileName != NULL);
  512. if (!strcmp (srcFileName, stdinmark)) {
  513. DISPLAYLEVEL(4,"Using stdin for input \n");
  514. SET_BINARY_MODE(stdin);
  515. return stdin;
  516. }
  517. if (!UTIL_stat(srcFileName, &statbuf)) {
  518. DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n",
  519. srcFileName, strerror(errno));
  520. return NULL;
  521. }
  522. if (!UTIL_isRegularFileStat(&statbuf)
  523. && !UTIL_isFIFOStat(&statbuf)
  524. && !(allowBlockDevices && UTIL_isBlockDevStat(&statbuf))
  525. ) {
  526. DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
  527. srcFileName);
  528. return NULL;
  529. }
  530. { FILE* const f = fopen(srcFileName, "rb");
  531. if (f == NULL)
  532. DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
  533. return f;
  534. }
  535. }
  536. /** FIO_openDstFile() :
  537. * condition : `dstFileName` must be non-NULL.
  538. * @result : FILE* to `dstFileName`, or NULL if it fails */
  539. static FILE*
  540. FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
  541. const char* srcFileName, const char* dstFileName,
  542. const int mode)
  543. {
  544. if (prefs->testMode) return NULL; /* do not open file in test mode */
  545. assert(dstFileName != NULL);
  546. if (!strcmp (dstFileName, stdoutmark)) {
  547. DISPLAYLEVEL(4,"Using stdout for output \n");
  548. SET_BINARY_MODE(stdout);
  549. if (prefs->sparseFileSupport == 1) {
  550. prefs->sparseFileSupport = 0;
  551. DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
  552. }
  553. return stdout;
  554. }
  555. /* ensure dst is not the same as src */
  556. if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) {
  557. DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n");
  558. return NULL;
  559. }
  560. if (prefs->sparseFileSupport == 1) {
  561. prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
  562. }
  563. if (UTIL_isRegularFile(dstFileName)) {
  564. /* Check if destination file already exists */
  565. #if !defined(_WIN32)
  566. /* this test does not work on Windows :
  567. * `NUL` and `nul` are detected as regular files */
  568. if (!strcmp(dstFileName, nulmark)) {
  569. EXM_THROW(40, "%s is unexpectedly categorized as a regular file",
  570. dstFileName);
  571. }
  572. #endif
  573. if (!prefs->overwrite) {
  574. if (g_display_prefs.displayLevel <= 1) {
  575. /* No interaction possible */
  576. DISPLAY("zstd: %s already exists; not overwritten \n",
  577. dstFileName);
  578. return NULL;
  579. }
  580. DISPLAY("zstd: %s already exists; ", dstFileName);
  581. if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten \n", "yY", fCtx->hasStdinInput))
  582. return NULL;
  583. }
  584. /* need to unlink */
  585. FIO_removeFile(dstFileName);
  586. }
  587. {
  588. #if defined(_WIN32)
  589. /* Windows requires opening the file as a "binary" file to avoid
  590. * mangling. This macro doesn't exist on unix. */
  591. const int openflags = O_WRONLY|O_CREAT|O_TRUNC|O_BINARY;
  592. const int fd = _open(dstFileName, openflags, mode);
  593. FILE* f = NULL;
  594. if (fd != -1) {
  595. f = _fdopen(fd, "wb");
  596. }
  597. #else
  598. const int openflags = O_WRONLY|O_CREAT|O_TRUNC;
  599. const int fd = open(dstFileName, openflags, mode);
  600. FILE* f = NULL;
  601. if (fd != -1) {
  602. f = fdopen(fd, "wb");
  603. }
  604. #endif
  605. if (f == NULL) {
  606. DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
  607. }
  608. return f;
  609. }
  610. }
  611. /*! FIO_createDictBuffer() :
  612. * creates a buffer, pointed by `*bufferPtr`,
  613. * loads `filename` content into it, up to DICTSIZE_MAX bytes.
  614. * @return : loaded size
  615. * if fileName==NULL, returns 0 and a NULL pointer
  616. */
  617. static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_prefs_t* const prefs)
  618. {
  619. FILE* fileHandle;
  620. U64 fileSize;
  621. stat_t statbuf;
  622. assert(bufferPtr != NULL);
  623. *bufferPtr = NULL;
  624. if (fileName == NULL) return 0;
  625. DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
  626. if (!UTIL_stat(fileName, &statbuf)) {
  627. EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno));
  628. }
  629. if (!UTIL_isRegularFileStat(&statbuf)) {
  630. EXM_THROW(32, "Dictionary %s must be a regular file.", fileName);
  631. }
  632. fileHandle = fopen(fileName, "rb");
  633. if (fileHandle == NULL) {
  634. EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
  635. }
  636. fileSize = UTIL_getFileSizeStat(&statbuf);
  637. {
  638. size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
  639. if (fileSize > dictSizeMax) {
  640. EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
  641. fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
  642. }
  643. }
  644. *bufferPtr = malloc((size_t)fileSize);
  645. if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
  646. { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
  647. if (readSize != fileSize) {
  648. EXM_THROW(35, "Error reading dictionary file %s : %s",
  649. fileName, strerror(errno));
  650. }
  651. }
  652. fclose(fileHandle);
  653. return (size_t)fileSize;
  654. }
  655. /* FIO_checkFilenameCollisions() :
  656. * Checks for and warns if there are any files that would have the same output path
  657. */
  658. int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) {
  659. const char **filenameTableSorted, *prevElem, *filename;
  660. unsigned u;
  661. filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles);
  662. if (!filenameTableSorted) {
  663. DISPLAY("Unable to malloc new str array, not checking for name collisions\n");
  664. return 1;
  665. }
  666. for (u = 0; u < nbFiles; ++u) {
  667. filename = strrchr(filenameTable[u], PATH_SEP);
  668. if (filename == NULL) {
  669. filenameTableSorted[u] = filenameTable[u];
  670. } else {
  671. filenameTableSorted[u] = filename+1;
  672. }
  673. }
  674. qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr);
  675. prevElem = filenameTableSorted[0];
  676. for (u = 1; u < nbFiles; ++u) {
  677. if (strcmp(prevElem, filenameTableSorted[u]) == 0) {
  678. DISPLAY("WARNING: Two files have same filename: %s\n", prevElem);
  679. }
  680. prevElem = filenameTableSorted[u];
  681. }
  682. free((void*)filenameTableSorted);
  683. return 0;
  684. }
  685. static const char*
  686. extractFilename(const char* path, char separator)
  687. {
  688. const char* search = strrchr(path, separator);
  689. if (search == NULL) return path;
  690. return search+1;
  691. }
  692. /* FIO_createFilename_fromOutDir() :
  693. * Takes a source file name and specified output directory, and
  694. * allocates memory for and returns a pointer to final path.
  695. * This function never returns an error (it may abort() in case of pb)
  696. */
  697. static char*
  698. FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen)
  699. {
  700. const char* filenameStart;
  701. char separator;
  702. char* result;
  703. #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
  704. separator = '\\';
  705. #else
  706. separator = '/';
  707. #endif
  708. filenameStart = extractFilename(path, separator);
  709. #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
  710. filenameStart = extractFilename(filenameStart, '/'); /* sometimes, '/' separator is also used on Windows (mingw+msys2) */
  711. #endif
  712. result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1);
  713. if (!result) {
  714. EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno));
  715. }
  716. memcpy(result, outDirName, strlen(outDirName));
  717. if (outDirName[strlen(outDirName)-1] == separator) {
  718. memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart));
  719. } else {
  720. memcpy(result + strlen(outDirName), &separator, 1);
  721. memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart));
  722. }
  723. return result;
  724. }
  725. /* FIO_highbit64() :
  726. * gives position of highest bit.
  727. * note : only works for v > 0 !
  728. */
  729. static unsigned FIO_highbit64(unsigned long long v)
  730. {
  731. unsigned count = 0;
  732. assert(v != 0);
  733. v >>= 1;
  734. while (v) { v >>= 1; count++; }
  735. return count;
  736. }
  737. static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs,
  738. unsigned long long const dictSize,
  739. unsigned long long const maxSrcFileSize)
  740. {
  741. unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize));
  742. unsigned const maxWindowSize = (1U << ZSTD_WINDOWLOG_MAX);
  743. if (maxSize == UTIL_FILESIZE_UNKNOWN)
  744. EXM_THROW(42, "Using --patch-from with stdin requires --stream-size");
  745. assert(maxSize != UTIL_FILESIZE_UNKNOWN);
  746. if (maxSize > maxWindowSize)
  747. EXM_THROW(42, "Can't handle files larger than %u GB\n", maxWindowSize/(1 GB));
  748. FIO_setMemLimit(prefs, (unsigned)maxSize);
  749. }
  750. /* FIO_removeMultiFilesWarning() :
  751. * Returns 1 if the console should abort, 0 if console should proceed.
  752. * This function handles logic when processing multiple files with -o, displaying the appropriate warnings/prompts.
  753. *
  754. * If -f is specified, or there is just 1 file, zstd will always proceed as usual.
  755. * If --rm is specified, there will be a prompt asking for user confirmation.
  756. * If -f is specified with --rm, zstd will proceed as usual
  757. * If -q is specified with --rm, zstd will abort pre-emptively
  758. * If neither flag is specified, zstd will prompt the user for confirmation to proceed.
  759. * If --rm is not specified, then zstd will print a warning to the user (which can be silenced with -q).
  760. * However, if the output is stdout, we will always abort rather than displaying the warning prompt.
  761. */
  762. static int FIO_removeMultiFilesWarning(FIO_ctx_t* const fCtx, const FIO_prefs_t* const prefs, const char* outFileName, int displayLevelCutoff)
  763. {
  764. int error = 0;
  765. if (fCtx->nbFilesTotal > 1 && !prefs->overwrite) {
  766. if (g_display_prefs.displayLevel <= displayLevelCutoff) {
  767. if (prefs->removeSrcFile) {
  768. DISPLAYLEVEL(1, "zstd: Aborting... not deleting files and processing into dst: %s\n", outFileName);
  769. error = 1;
  770. }
  771. } else {
  772. if (!strcmp(outFileName, stdoutmark)) {
  773. DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. \n");
  774. } else {
  775. DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s \n", outFileName);
  776. }
  777. DISPLAYLEVEL(2, "The concatenated output CANNOT regenerate the original directory tree. \n")
  778. if (prefs->removeSrcFile) {
  779. if (fCtx->hasStdoutOutput) {
  780. DISPLAYLEVEL(1, "Aborting. Use -f if you really want to delete the files and output to stdout\n");
  781. error = 1;
  782. } else {
  783. error = g_display_prefs.displayLevel > displayLevelCutoff && UTIL_requireUserConfirmation("This is a destructive operation. Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput);
  784. }
  785. }
  786. }
  787. }
  788. return error;
  789. }
  790. #ifndef ZSTD_NOCOMPRESS
  791. /* **********************************************************************
  792. * Compression
  793. ************************************************************************/
  794. typedef struct {
  795. FILE* srcFile;
  796. FILE* dstFile;
  797. void* srcBuffer;
  798. size_t srcBufferSize;
  799. void* dstBuffer;
  800. size_t dstBufferSize;
  801. void* dictBuffer;
  802. size_t dictBufferSize;
  803. const char* dictFileName;
  804. ZSTD_CStream* cctx;
  805. } cRess_t;
  806. /** ZSTD_cycleLog() :
  807. * condition for correct operation : hashLog > 1 */
  808. static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
  809. {
  810. U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
  811. assert(hashLog > 1);
  812. return hashLog - btScale;
  813. }
  814. static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
  815. ZSTD_compressionParameters* comprParams,
  816. unsigned long long const dictSize,
  817. unsigned long long const maxSrcFileSize,
  818. int cLevel)
  819. {
  820. unsigned const fileWindowLog = FIO_highbit64(maxSrcFileSize) + 1;
  821. ZSTD_compressionParameters const cParams = ZSTD_getCParams(cLevel, (size_t)maxSrcFileSize, (size_t)dictSize);
  822. FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize);
  823. if (fileWindowLog > ZSTD_WINDOWLOG_MAX)
  824. DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n");
  825. comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog));
  826. if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) {
  827. if (!prefs->ldmFlag)
  828. DISPLAYLEVEL(1, "long mode automatically triggered\n");
  829. FIO_setLdmFlag(prefs, 1);
  830. }
  831. if (cParams.strategy >= ZSTD_btopt) {
  832. DISPLAYLEVEL(1, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
  833. DISPLAYLEVEL(1, "- Use --single-thread mode in the zstd cli\n");
  834. DISPLAYLEVEL(1, "- Set a larger targetLength (eg. --zstd=targetLength=4096)\n");
  835. DISPLAYLEVEL(1, "- Set a larger chainLog (eg. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
  836. DISPLAYLEVEL(1, "Also consider playing around with searchLog and hashLog\n");
  837. }
  838. }
  839. static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
  840. const char* dictFileName, unsigned long long const maxSrcFileSize,
  841. int cLevel, ZSTD_compressionParameters comprParams) {
  842. cRess_t ress;
  843. memset(&ress, 0, sizeof(ress));
  844. DISPLAYLEVEL(6, "FIO_createCResources \n");
  845. ress.cctx = ZSTD_createCCtx();
  846. if (ress.cctx == NULL)
  847. EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx",
  848. strerror(errno));
  849. ress.srcBufferSize = ZSTD_CStreamInSize();
  850. ress.srcBuffer = malloc(ress.srcBufferSize);
  851. ress.dstBufferSize = ZSTD_CStreamOutSize();
  852. /* need to update memLimit before calling createDictBuffer
  853. * because of memLimit check inside it */
  854. if (prefs->patchFromMode) {
  855. unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize;
  856. FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), ssSize > 0 ? ssSize : maxSrcFileSize, cLevel);
  857. }
  858. ress.dstBuffer = malloc(ress.dstBufferSize);
  859. ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs); /* works with dictFileName==NULL */
  860. if (!ress.srcBuffer || !ress.dstBuffer)
  861. EXM_THROW(31, "allocation error : not enough memory");
  862. /* Advanced parameters, including dictionary */
  863. if (dictFileName && (ress.dictBuffer==NULL))
  864. EXM_THROW(32, "allocation error : can't create dictBuffer");
  865. ress.dictFileName = dictFileName;
  866. if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
  867. comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
  868. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) ); /* always enable content size when available (note: supposed to be default) */
  869. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
  870. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
  871. /* compression level */
  872. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
  873. /* max compressed block size */
  874. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
  875. /* source size hint */
  876. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
  877. /* long distance matching */
  878. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
  879. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
  880. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) );
  881. if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
  882. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) );
  883. }
  884. if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
  885. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
  886. }
  887. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder));
  888. /* compression parameters */
  889. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
  890. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
  891. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) );
  892. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) );
  893. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
  894. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
  895. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, (int)comprParams.strategy) );
  896. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
  897. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) );
  898. /* multi-threading */
  899. #ifdef ZSTD_MULTITHREAD
  900. DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);
  901. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) );
  902. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) );
  903. if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) {
  904. DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog);
  905. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) );
  906. }
  907. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
  908. #endif
  909. /* dictionary */
  910. if (prefs->patchFromMode) {
  911. CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
  912. } else {
  913. CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
  914. }
  915. return ress;
  916. }
  917. static void FIO_freeCResources(const cRess_t* const ress)
  918. {
  919. free(ress->srcBuffer);
  920. free(ress->dstBuffer);
  921. free(ress->dictBuffer);
  922. ZSTD_freeCStream(ress->cctx); /* never fails */
  923. }
  924. #ifdef ZSTD_GZCOMPRESS
  925. static unsigned long long
  926. FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but not changed */
  927. const char* srcFileName, U64 const srcFileSize,
  928. int compressionLevel, U64* readsize)
  929. {
  930. unsigned long long inFileSize = 0, outFileSize = 0;
  931. z_stream strm;
  932. if (compressionLevel > Z_BEST_COMPRESSION)
  933. compressionLevel = Z_BEST_COMPRESSION;
  934. strm.zalloc = Z_NULL;
  935. strm.zfree = Z_NULL;
  936. strm.opaque = Z_NULL;
  937. { int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED,
  938. 15 /* maxWindowLogSize */ + 16 /* gzip only */,
  939. 8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */
  940. if (ret != Z_OK) {
  941. EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
  942. } }
  943. strm.next_in = 0;
  944. strm.avail_in = 0;
  945. strm.next_out = (Bytef*)ress->dstBuffer;
  946. strm.avail_out = (uInt)ress->dstBufferSize;
  947. while (1) {
  948. int ret;
  949. if (strm.avail_in == 0) {
  950. size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
  951. if (inSize == 0) break;
  952. inFileSize += inSize;
  953. strm.next_in = (z_const unsigned char*)ress->srcBuffer;
  954. strm.avail_in = (uInt)inSize;
  955. }
  956. ret = deflate(&strm, Z_NO_FLUSH);
  957. if (ret != Z_OK)
  958. EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret);
  959. { size_t const cSize = ress->dstBufferSize - strm.avail_out;
  960. if (cSize) {
  961. if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
  962. EXM_THROW(73, "Write error : cannot write to output file : %s ", strerror(errno));
  963. outFileSize += cSize;
  964. strm.next_out = (Bytef*)ress->dstBuffer;
  965. strm.avail_out = (uInt)ress->dstBufferSize;
  966. } }
  967. if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
  968. DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ",
  969. (unsigned)(inFileSize>>20),
  970. (double)outFileSize/inFileSize*100)
  971. } else {
  972. DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%% ",
  973. (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
  974. (double)outFileSize/inFileSize*100);
  975. } }
  976. while (1) {
  977. int const ret = deflate(&strm, Z_FINISH);
  978. { size_t const cSize = ress->dstBufferSize - strm.avail_out;
  979. if (cSize) {
  980. if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
  981. EXM_THROW(75, "Write error : %s ", strerror(errno));
  982. outFileSize += cSize;
  983. strm.next_out = (Bytef*)ress->dstBuffer;
  984. strm.avail_out = (uInt)ress->dstBufferSize;
  985. } }
  986. if (ret == Z_STREAM_END) break;
  987. if (ret != Z_BUF_ERROR)
  988. EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret);
  989. }
  990. { int const ret = deflateEnd(&strm);
  991. if (ret != Z_OK) {
  992. EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
  993. } }
  994. *readsize = inFileSize;
  995. return outFileSize;
  996. }
  997. #endif
  998. #ifdef ZSTD_LZMACOMPRESS
  999. static unsigned long long
  1000. FIO_compressLzmaFrame(cRess_t* ress,
  1001. const char* srcFileName, U64 const srcFileSize,
  1002. int compressionLevel, U64* readsize, int plain_lzma)
  1003. {
  1004. unsigned long long inFileSize = 0, outFileSize = 0;
  1005. lzma_stream strm = LZMA_STREAM_INIT;
  1006. lzma_action action = LZMA_RUN;
  1007. lzma_ret ret;
  1008. if (compressionLevel < 0) compressionLevel = 0;
  1009. if (compressionLevel > 9) compressionLevel = 9;
  1010. if (plain_lzma) {
  1011. lzma_options_lzma opt_lzma;
  1012. if (lzma_lzma_preset(&opt_lzma, compressionLevel))
  1013. EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName);
  1014. ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
  1015. if (ret != LZMA_OK)
  1016. EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
  1017. } else {
  1018. ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
  1019. if (ret != LZMA_OK)
  1020. EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
  1021. }
  1022. strm.next_in = 0;
  1023. strm.avail_in = 0;
  1024. strm.next_out = (BYTE*)ress->dstBuffer;
  1025. strm.avail_out = ress->dstBufferSize;
  1026. while (1) {
  1027. if (strm.avail_in == 0) {
  1028. size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
  1029. if (inSize == 0) action = LZMA_FINISH;
  1030. inFileSize += inSize;
  1031. strm.next_in = (BYTE const*)ress->srcBuffer;
  1032. strm.avail_in = inSize;
  1033. }
  1034. ret = lzma_code(&strm, action);
  1035. if (ret != LZMA_OK && ret != LZMA_STREAM_END)
  1036. EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
  1037. { size_t const compBytes = ress->dstBufferSize - strm.avail_out;
  1038. if (compBytes) {
  1039. if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes)
  1040. EXM_THROW(85, "Write error : %s", strerror(errno));
  1041. outFileSize += compBytes;
  1042. strm.next_out = (BYTE*)ress->dstBuffer;
  1043. strm.avail_out = ress->dstBufferSize;
  1044. } }
  1045. if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
  1046. DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
  1047. (unsigned)(inFileSize>>20),
  1048. (double)outFileSize/inFileSize*100)
  1049. else
  1050. DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
  1051. (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
  1052. (double)outFileSize/inFileSize*100);
  1053. if (ret == LZMA_STREAM_END) break;
  1054. }
  1055. lzma_end(&strm);
  1056. *readsize = inFileSize;
  1057. return outFileSize;
  1058. }
  1059. #endif
  1060. #ifdef ZSTD_LZ4COMPRESS
  1061. #if LZ4_VERSION_NUMBER <= 10600
  1062. #define LZ4F_blockLinked blockLinked
  1063. #define LZ4F_max64KB max64KB
  1064. #endif
  1065. static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
  1066. static unsigned long long
  1067. FIO_compressLz4Frame(cRess_t* ress,
  1068. const char* srcFileName, U64 const srcFileSize,
  1069. int compressionLevel, int checksumFlag,
  1070. U64* readsize)
  1071. {
  1072. const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB);
  1073. unsigned long long inFileSize = 0, outFileSize = 0;
  1074. LZ4F_preferences_t prefs;
  1075. LZ4F_compressionContext_t ctx;
  1076. LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
  1077. if (LZ4F_isError(errorCode))
  1078. EXM_THROW(31, "zstd: failed to create lz4 compression context");
  1079. memset(&prefs, 0, sizeof(prefs));
  1080. assert(blockSize <= ress->srcBufferSize);
  1081. prefs.autoFlush = 1;
  1082. prefs.compressionLevel = compressionLevel;
  1083. prefs.frameInfo.blockMode = LZ4F_blockLinked;
  1084. prefs.frameInfo.blockSizeID = LZ4F_max64KB;
  1085. prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag;
  1086. #if LZ4_VERSION_NUMBER >= 10600
  1087. prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
  1088. #endif
  1089. assert(LZ4F_compressBound(blockSize, &prefs) <= ress->dstBufferSize);
  1090. {
  1091. size_t readSize;
  1092. size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs);
  1093. if (LZ4F_isError(headerSize))
  1094. EXM_THROW(33, "File header generation failed : %s",
  1095. LZ4F_getErrorName(headerSize));
  1096. if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize)
  1097. EXM_THROW(34, "Write error : %s (cannot write header)", strerror(errno));
  1098. outFileSize += headerSize;
  1099. /* Read first block */
  1100. readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
  1101. inFileSize += readSize;
  1102. /* Main Loop */
  1103. while (readSize>0) {
  1104. size_t const outSize = LZ4F_compressUpdate(ctx,
  1105. ress->dstBuffer, ress->dstBufferSize,
  1106. ress->srcBuffer, readSize, NULL);
  1107. if (LZ4F_isError(outSize))
  1108. EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
  1109. srcFileName, LZ4F_getErrorName(outSize));
  1110. outFileSize += outSize;
  1111. if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
  1112. DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
  1113. (unsigned)(inFileSize>>20),
  1114. (double)outFileSize/inFileSize*100)
  1115. } else {
  1116. DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
  1117. (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
  1118. (double)outFileSize/inFileSize*100);
  1119. }
  1120. /* Write Block */
  1121. { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile);
  1122. if (sizeCheck != outSize)
  1123. EXM_THROW(36, "Write error : %s", strerror(errno));
  1124. }
  1125. /* Read next block */
  1126. readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
  1127. inFileSize += readSize;
  1128. }
  1129. if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName);
  1130. /* End of Stream mark */
  1131. headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL);
  1132. if (LZ4F_isError(headerSize))
  1133. EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s",
  1134. srcFileName, LZ4F_getErrorName(headerSize));
  1135. { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
  1136. if (sizeCheck != headerSize)
  1137. EXM_THROW(39, "Write error : %s (cannot write end of stream)",
  1138. strerror(errno));
  1139. }
  1140. outFileSize += headerSize;
  1141. }
  1142. *readsize = inFileSize;
  1143. LZ4F_freeCompressionContext(ctx);
  1144. return outFileSize;
  1145. }
  1146. #endif
  1147. static unsigned long long
  1148. FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
  1149. FIO_prefs_t* const prefs,
  1150. const cRess_t* ressPtr,
  1151. const char* srcFileName, U64 fileSize,
  1152. int compressionLevel, U64* readsize)
  1153. {
  1154. cRess_t const ress = *ressPtr;
  1155. FILE* const srcFile = ress.srcFile;
  1156. FILE* const dstFile = ress.dstFile;
  1157. U64 compressedfilesize = 0;
  1158. ZSTD_EndDirective directive = ZSTD_e_continue;
  1159. U64 pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
  1160. /* stats */
  1161. ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 };
  1162. ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 };
  1163. typedef enum { noChange, slower, faster } speedChange_e;
  1164. speedChange_e speedChange = noChange;
  1165. unsigned flushWaiting = 0;
  1166. unsigned inputPresented = 0;
  1167. unsigned inputBlocked = 0;
  1168. unsigned lastJobID = 0;
  1169. UTIL_HumanReadableSize_t const file_hrs = UTIL_makeHumanReadableSize(fileSize);
  1170. DISPLAYLEVEL(6, "compression using zstd format \n");
  1171. /* init */
  1172. if (fileSize != UTIL_FILESIZE_UNKNOWN) {
  1173. pledgedSrcSize = fileSize;
  1174. CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
  1175. } else if (prefs->streamSrcSize > 0) {
  1176. /* unknown source size; use the declared stream size */
  1177. pledgedSrcSize = prefs->streamSrcSize;
  1178. CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
  1179. }
  1180. {
  1181. int windowLog;
  1182. UTIL_HumanReadableSize_t windowSize;
  1183. CHECK(ZSTD_CCtx_getParameter(ress.cctx, ZSTD_c_windowLog, &windowLog));
  1184. if (windowLog == 0) {
  1185. const ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, fileSize, 0);
  1186. windowLog = cParams.windowLog;
  1187. }
  1188. windowSize = UTIL_makeHumanReadableSize(MAX(1ULL, MIN(1ULL << windowLog, pledgedSrcSize)));
  1189. DISPLAYLEVEL(4, "Decompression will require %.*f%s of memory\n", windowSize.precision, windowSize.value, windowSize.suffix);
  1190. }
  1191. (void)srcFileName;
  1192. /* Main compression loop */
  1193. do {
  1194. size_t stillToFlush;
  1195. /* Fill input Buffer */
  1196. size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
  1197. ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 };
  1198. DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize);
  1199. *readsize += inSize;
  1200. if ((inSize == 0) || (*readsize == fileSize))
  1201. directive = ZSTD_e_end;
  1202. stillToFlush = 1;
  1203. while ((inBuff.pos != inBuff.size) /* input buffer must be entirely ingested */
  1204. || (directive == ZSTD_e_end && stillToFlush != 0) ) {
  1205. size_t const oldIPos = inBuff.pos;
  1206. ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
  1207. size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx);
  1208. CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive));
  1209. /* count stats */
  1210. inputPresented++;
  1211. if (oldIPos == inBuff.pos) inputBlocked++; /* input buffer is full and can't take any more : input speed is faster than consumption rate */
  1212. if (!toFlushNow) flushWaiting = 1;
  1213. /* Write compressed stream */
  1214. DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n",
  1215. (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos);
  1216. if (outBuff.pos) {
  1217. size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
  1218. if (sizeCheck != outBuff.pos)
  1219. EXM_THROW(25, "Write error : %s (cannot write compressed block)",
  1220. strerror(errno));
  1221. compressedfilesize += outBuff.pos;
  1222. }
  1223. /* display notification; and adapt compression level */
  1224. if (READY_FOR_UPDATE()) {
  1225. ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
  1226. double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
  1227. UTIL_HumanReadableSize_t const buffered_hrs = UTIL_makeHumanReadableSize(zfp.ingested - zfp.consumed);
  1228. UTIL_HumanReadableSize_t const consumed_hrs = UTIL_makeHumanReadableSize(zfp.consumed);
  1229. UTIL_HumanReadableSize_t const produced_hrs = UTIL_makeHumanReadableSize(zfp.produced);
  1230. /* display progress notifications */
  1231. if (g_display_prefs.displayLevel >= 3) {
  1232. DISPLAYUPDATE(3, "\r(L%i) Buffered :%6.*f%4s - Consumed :%6.*f%4s - Compressed :%6.*f%4s => %.2f%% ",
  1233. compressionLevel,
  1234. buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix,
  1235. consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix,
  1236. produced_hrs.precision, produced_hrs.value, produced_hrs.suffix,
  1237. cShare );
  1238. } else if (g_display_prefs.displayLevel >= 2 || g_display_prefs.progressSetting == FIO_ps_always) {
  1239. /* Require level 2 or forcibly displayed progress counter for summarized updates */
  1240. DISPLAYLEVEL(1, "\r%79s\r", ""); /* Clear out the current displayed line */
  1241. if (fCtx->nbFilesTotal > 1) {
  1242. size_t srcFileNameSize = strlen(srcFileName);
  1243. /* Ensure that the string we print is roughly the same size each time */
  1244. if (srcFileNameSize > 18) {
  1245. const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
  1246. DISPLAYLEVEL(1, "Compress: %u/%u files. Current: ...%s ",
  1247. fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName);
  1248. } else {
  1249. DISPLAYLEVEL(1, "Compress: %u/%u files. Current: %*s ",
  1250. fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName);
  1251. }
  1252. }
  1253. DISPLAYLEVEL(1, "Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix);
  1254. if (fileSize != UTIL_FILESIZE_UNKNOWN)
  1255. DISPLAYLEVEL(2, "/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix);
  1256. DISPLAYLEVEL(1, " ==> %2.f%%", cShare);
  1257. DELAY_NEXT_UPDATE();
  1258. }
  1259. /* adaptive mode : statistics measurement and speed correction */
  1260. if (prefs->adaptiveMode) {
  1261. /* check output speed */
  1262. if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */
  1263. unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced;
  1264. unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed;
  1265. assert(zfp.produced >= previous_zfp_update.produced);
  1266. assert(prefs->nbWorkers >= 1);
  1267. /* test if compression is blocked
  1268. * either because output is slow and all buffers are full
  1269. * or because input is slow and no job can start while waiting for at least one buffer to be filled.
  1270. * note : exclude starting part, since currentJobID > 1 */
  1271. if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/
  1272. && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */
  1273. ) {
  1274. DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n")
  1275. speedChange = slower;
  1276. }
  1277. previous_zfp_update = zfp;
  1278. if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */
  1279. && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */
  1280. ) {
  1281. DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed);
  1282. speedChange = slower;
  1283. }
  1284. flushWaiting = 0;
  1285. }
  1286. /* course correct only if there is at least one new job completed */
  1287. if (zfp.currentJobID > lastJobID) {
  1288. DISPLAYLEVEL(6, "compression level adaptation check \n")
  1289. /* check input speed */
  1290. if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */
  1291. if (inputBlocked <= 0) {
  1292. DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n");
  1293. speedChange = slower;
  1294. } else if (speedChange == noChange) {
  1295. unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested;
  1296. unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed;
  1297. unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced;
  1298. unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed;
  1299. previous_zfp_correction = zfp;
  1300. assert(inputPresented > 0);
  1301. DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n",
  1302. inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100,
  1303. (unsigned)newlyIngested, (unsigned)newlyConsumed,
  1304. (unsigned)newlyFlushed, (unsigned)newlyProduced);
  1305. if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */
  1306. && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */
  1307. && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */
  1308. ) {
  1309. DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n",
  1310. newlyIngested, newlyConsumed, newlyProduced, newlyFlushed);
  1311. speedChange = faster;
  1312. }
  1313. }
  1314. inputBlocked = 0;
  1315. inputPresented = 0;
  1316. }
  1317. if (speedChange == slower) {
  1318. DISPLAYLEVEL(6, "slower speed , higher compression \n")
  1319. compressionLevel ++;
  1320. if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
  1321. if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel;
  1322. compressionLevel += (compressionLevel == 0); /* skip 0 */
  1323. ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
  1324. }
  1325. if (speedChange == faster) {
  1326. DISPLAYLEVEL(6, "faster speed , lighter compression \n")
  1327. compressionLevel --;
  1328. if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel;
  1329. compressionLevel -= (compressionLevel == 0); /* skip 0 */
  1330. ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
  1331. }
  1332. speedChange = noChange;
  1333. lastJobID = zfp.currentJobID;
  1334. } /* if (zfp.currentJobID > lastJobID) */
  1335. } /* if (g_adaptiveMode) */
  1336. } /* if (READY_FOR_UPDATE()) */
  1337. } /* while ((inBuff.pos != inBuff.size) */
  1338. } while (directive != ZSTD_e_end);
  1339. if (ferror(srcFile)) {
  1340. EXM_THROW(26, "Read error : I/O error");
  1341. }
  1342. if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) {
  1343. EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B",
  1344. (unsigned long long)*readsize, (unsigned long long)fileSize);
  1345. }
  1346. return compressedfilesize;
  1347. }
  1348. /*! FIO_compressFilename_internal() :
  1349. * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
  1350. * @return : 0 : compression completed correctly,
  1351. * 1 : missing or pb opening srcFileName
  1352. */
  1353. static int
  1354. FIO_compressFilename_internal(FIO_ctx_t* const fCtx,
  1355. FIO_prefs_t* const prefs,
  1356. cRess_t ress,
  1357. const char* dstFileName, const char* srcFileName,
  1358. int compressionLevel)
  1359. {
  1360. UTIL_time_t const timeStart = UTIL_getTime();
  1361. clock_t const cpuStart = clock();
  1362. U64 readsize = 0;
  1363. U64 compressedfilesize = 0;
  1364. U64 const fileSize = UTIL_getFileSize(srcFileName);
  1365. DISPLAYLEVEL(5, "%s: %llu bytes \n", srcFileName, (unsigned long long)fileSize);
  1366. /* compression format selection */
  1367. switch (prefs->compressionType) {
  1368. default:
  1369. case FIO_zstdCompression:
  1370. compressedfilesize = FIO_compressZstdFrame(fCtx, prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize);
  1371. break;
  1372. case FIO_gzipCompression:
  1373. #ifdef ZSTD_GZCOMPRESS
  1374. compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
  1375. #else
  1376. (void)compressionLevel;
  1377. EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n",
  1378. srcFileName);
  1379. #endif
  1380. break;
  1381. case FIO_xzCompression:
  1382. case FIO_lzmaCompression:
  1383. #ifdef ZSTD_LZMACOMPRESS
  1384. compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression);
  1385. #else
  1386. (void)compressionLevel;
  1387. EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n",
  1388. srcFileName);
  1389. #endif
  1390. break;
  1391. case FIO_lz4Compression:
  1392. #ifdef ZSTD_LZ4COMPRESS
  1393. compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize);
  1394. #else
  1395. (void)compressionLevel;
  1396. EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n",
  1397. srcFileName);
  1398. #endif
  1399. break;
  1400. }
  1401. /* Status */
  1402. fCtx->totalBytesInput += (size_t)readsize;
  1403. fCtx->totalBytesOutput += (size_t)compressedfilesize;
  1404. DISPLAYLEVEL(2, "\r%79s\r", "");
  1405. if (g_display_prefs.displayLevel >= 2 &&
  1406. !fCtx->hasStdoutOutput &&
  1407. (g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1)) {
  1408. UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) readsize);
  1409. UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) compressedfilesize);
  1410. if (readsize == 0) {
  1411. DISPLAYLEVEL(2,"%-20s : (%6.*f%4s => %6.*f%4s, %s) \n",
  1412. srcFileName,
  1413. hr_isize.precision, hr_isize.value, hr_isize.suffix,
  1414. hr_osize.precision, hr_osize.value, hr_osize.suffix,
  1415. dstFileName);
  1416. } else {
  1417. DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6.*f%4s => %6.*f%4s, %s) \n",
  1418. srcFileName,
  1419. (double)compressedfilesize / (double)readsize * 100,
  1420. hr_isize.precision, hr_isize.value, hr_isize.suffix,
  1421. hr_osize.precision, hr_osize.value, hr_osize.suffix,
  1422. dstFileName);
  1423. }
  1424. }
  1425. /* Elapsed Time and CPU Load */
  1426. { clock_t const cpuEnd = clock();
  1427. double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
  1428. U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
  1429. double const timeLength_s = (double)timeLength_ns / 1000000000;
  1430. double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100;
  1431. DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec (cpu load : %.0f%%)\n",
  1432. srcFileName, timeLength_s, cpuLoad_pct);
  1433. }
  1434. return 0;
  1435. }
  1436. /*! FIO_compressFilename_dstFile() :
  1437. * open dstFileName, or pass-through if ress.dstFile != NULL,
  1438. * then start compression with FIO_compressFilename_internal().
  1439. * Manages source removal (--rm) and file permissions transfer.
  1440. * note : ress.srcFile must be != NULL,
  1441. * so reach this function through FIO_compressFilename_srcFile().
  1442. * @return : 0 : compression completed correctly,
  1443. * 1 : pb
  1444. */
  1445. static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
  1446. FIO_prefs_t* const prefs,
  1447. cRess_t ress,
  1448. const char* dstFileName,
  1449. const char* srcFileName,
  1450. int compressionLevel)
  1451. {
  1452. int closeDstFile = 0;
  1453. int result;
  1454. stat_t statbuf;
  1455. int transferMTime = 0;
  1456. assert(ress.srcFile != NULL);
  1457. if (ress.dstFile == NULL) {
  1458. int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
  1459. if ( strcmp (srcFileName, stdinmark)
  1460. && UTIL_stat(srcFileName, &statbuf)
  1461. && UTIL_isRegularFileStat(&statbuf) ) {
  1462. dstFilePermissions = statbuf.st_mode;
  1463. transferMTime = 1;
  1464. }
  1465. closeDstFile = 1;
  1466. DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
  1467. ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
  1468. if (ress.dstFile==NULL) return 1; /* could not open dstFileName */
  1469. /* Must only be added after FIO_openDstFile() succeeds.
  1470. * Otherwise we may delete the destination file if it already exists,
  1471. * and the user presses Ctrl-C when asked if they wish to overwrite.
  1472. */
  1473. addHandler(dstFileName);
  1474. }
  1475. result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
  1476. if (closeDstFile) {
  1477. FILE* const dstFile = ress.dstFile;
  1478. ress.dstFile = NULL;
  1479. clearHandler();
  1480. DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName);
  1481. if (fclose(dstFile)) { /* error closing dstFile */
  1482. DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
  1483. result=1;
  1484. }
  1485. if (transferMTime) {
  1486. UTIL_utime(dstFileName, &statbuf);
  1487. }
  1488. if ( (result != 0) /* operation failure */
  1489. && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
  1490. ) {
  1491. FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
  1492. }
  1493. }
  1494. return result;
  1495. }
  1496. /* List used to compare file extensions (used with --exclude-compressed flag)
  1497. * Different from the suffixList and should only apply to ZSTD compress operationResult
  1498. */
  1499. static const char *compressedFileExtensions[] = {
  1500. ZSTD_EXTENSION,
  1501. TZSTD_EXTENSION,
  1502. GZ_EXTENSION,
  1503. TGZ_EXTENSION,
  1504. LZMA_EXTENSION,
  1505. XZ_EXTENSION,
  1506. TXZ_EXTENSION,
  1507. LZ4_EXTENSION,
  1508. TLZ4_EXTENSION,
  1509. NULL
  1510. };
  1511. /*! FIO_compressFilename_srcFile() :
  1512. * @return : 0 : compression completed correctly,
  1513. * 1 : missing or pb opening srcFileName
  1514. */
  1515. static int
  1516. FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx,
  1517. FIO_prefs_t* const prefs,
  1518. cRess_t ress,
  1519. const char* dstFileName,
  1520. const char* srcFileName,
  1521. int compressionLevel)
  1522. {
  1523. int result;
  1524. DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName);
  1525. /* ensure src is not a directory */
  1526. if (UTIL_isDirectory(srcFileName)) {
  1527. DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
  1528. return 1;
  1529. }
  1530. /* ensure src is not the same as dict (if present) */
  1531. if (ress.dictFileName != NULL && UTIL_isSameFile(srcFileName, ress.dictFileName)) {
  1532. DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName);
  1533. return 1;
  1534. }
  1535. /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used
  1536. * YES => ZSTD will skip compression of the file and will return 0.
  1537. * NO => ZSTD will resume with compress operation.
  1538. */
  1539. if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) {
  1540. DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName);
  1541. return 0;
  1542. }
  1543. ress.srcFile = FIO_openSrcFile(prefs, srcFileName);
  1544. if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */
  1545. result = FIO_compressFilename_dstFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
  1546. fclose(ress.srcFile);
  1547. ress.srcFile = NULL;
  1548. if ( prefs->removeSrcFile /* --rm */
  1549. && result == 0 /* success */
  1550. && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */
  1551. ) {
  1552. /* We must clear the handler, since after this point calling it would
  1553. * delete both the source and destination files.
  1554. */
  1555. clearHandler();
  1556. if (FIO_removeFile(srcFileName))
  1557. EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno));
  1558. }
  1559. return result;
  1560. }
  1561. static const char* checked_index(const char* options[], size_t length, size_t index) {
  1562. assert(index < length);
  1563. // Necessary to avoid warnings since -O3 will omit the above `assert`
  1564. (void) length;
  1565. return options[index];
  1566. }
  1567. #define INDEX(options, index) checked_index((options), sizeof(options) / sizeof(char*), (index))
  1568. void FIO_displayCompressionParameters(const FIO_prefs_t* prefs) {
  1569. static const char* formatOptions[5] = {ZSTD_EXTENSION, GZ_EXTENSION, XZ_EXTENSION,
  1570. LZMA_EXTENSION, LZ4_EXTENSION};
  1571. static const char* sparseOptions[3] = {" --no-sparse", "", " --sparse"};
  1572. static const char* checkSumOptions[3] = {" --no-check", "", " --check"};
  1573. static const char* rowMatchFinderOptions[3] = {"", " --no-row-match-finder", " --row-match-finder"};
  1574. static const char* compressLiteralsOptions[3] = {"", " --compress-literals", " --no-compress-literals"};
  1575. assert(g_display_prefs.displayLevel >= 4);
  1576. DISPLAY("--format=%s", formatOptions[prefs->compressionType]);
  1577. DISPLAY("%s", INDEX(sparseOptions, prefs->sparseFileSupport));
  1578. DISPLAY("%s", prefs->dictIDFlag ? "" : " --no-dictID");
  1579. DISPLAY("%s", INDEX(checkSumOptions, prefs->checksumFlag));
  1580. DISPLAY(" --block-size=%d", prefs->blockSize);
  1581. if (prefs->adaptiveMode)
  1582. DISPLAY(" --adapt=min=%d,max=%d", prefs->minAdaptLevel, prefs->maxAdaptLevel);
  1583. DISPLAY("%s", INDEX(rowMatchFinderOptions, prefs->useRowMatchFinder));
  1584. DISPLAY("%s", prefs->rsyncable ? " --rsyncable" : "");
  1585. if (prefs->streamSrcSize)
  1586. DISPLAY(" --stream-size=%u", (unsigned) prefs->streamSrcSize);
  1587. if (prefs->srcSizeHint)
  1588. DISPLAY(" --size-hint=%d", prefs->srcSizeHint);
  1589. if (prefs->targetCBlockSize)
  1590. DISPLAY(" --target-compressed-block-size=%u", (unsigned) prefs->targetCBlockSize);
  1591. DISPLAY("%s", INDEX(compressLiteralsOptions, prefs->literalCompressionMode));
  1592. DISPLAY(" --memory=%u", prefs->memLimit ? prefs->memLimit : 128 MB);
  1593. DISPLAY(" --threads=%d", prefs->nbWorkers);
  1594. DISPLAY("%s", prefs->excludeCompressedFiles ? " --exclude-compressed" : "");
  1595. DISPLAY(" --%scontent-size", prefs->contentSize ? "" : "no-");
  1596. DISPLAY("\n");
  1597. }
  1598. #undef INDEX
  1599. int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName,
  1600. const char* srcFileName, const char* dictFileName,
  1601. int compressionLevel, ZSTD_compressionParameters comprParams)
  1602. {
  1603. cRess_t const ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
  1604. int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
  1605. #define DISPLAY_LEVEL_DEFAULT 2
  1606. FIO_freeCResources(&ress);
  1607. return result;
  1608. }
  1609. /* FIO_determineCompressedName() :
  1610. * create a destination filename for compressed srcFileName.
  1611. * @return a pointer to it.
  1612. * This function never returns an error (it may abort() in case of pb)
  1613. */
  1614. static const char*
  1615. FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix)
  1616. {
  1617. static size_t dfnbCapacity = 0;
  1618. static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
  1619. char* outDirFilename = NULL;
  1620. size_t sfnSize = strlen(srcFileName);
  1621. size_t const srcSuffixLen = strlen(suffix);
  1622. if (outDirName) {
  1623. outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen);
  1624. sfnSize = strlen(outDirFilename);
  1625. assert(outDirFilename != NULL);
  1626. }
  1627. if (dfnbCapacity <= sfnSize+srcSuffixLen+1) {
  1628. /* resize buffer for dstName */
  1629. free(dstFileNameBuffer);
  1630. dfnbCapacity = sfnSize + srcSuffixLen + 30;
  1631. dstFileNameBuffer = (char*)malloc(dfnbCapacity);
  1632. if (!dstFileNameBuffer) {
  1633. EXM_THROW(30, "zstd: %s", strerror(errno));
  1634. }
  1635. }
  1636. assert(dstFileNameBuffer != NULL);
  1637. if (outDirFilename) {
  1638. memcpy(dstFileNameBuffer, outDirFilename, sfnSize);
  1639. free(outDirFilename);
  1640. } else {
  1641. memcpy(dstFileNameBuffer, srcFileName, sfnSize);
  1642. }
  1643. memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */);
  1644. return dstFileNameBuffer;
  1645. }
  1646. static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles)
  1647. {
  1648. size_t i;
  1649. unsigned long long fileSize, maxFileSize = 0;
  1650. for (i = 0; i < nbFiles; i++) {
  1651. fileSize = UTIL_getFileSize(inFileNames[i]);
  1652. maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize;
  1653. }
  1654. return maxFileSize;
  1655. }
  1656. /* FIO_compressMultipleFilenames() :
  1657. * compress nbFiles files
  1658. * into either one destination (outFileName),
  1659. * or into one file each (outFileName == NULL, but suffix != NULL),
  1660. * or into a destination folder (specified with -O)
  1661. */
  1662. int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
  1663. FIO_prefs_t* const prefs,
  1664. const char** inFileNamesTable,
  1665. const char* outMirroredRootDirName,
  1666. const char* outDirName,
  1667. const char* outFileName, const char* suffix,
  1668. const char* dictFileName, int compressionLevel,
  1669. ZSTD_compressionParameters comprParams)
  1670. {
  1671. int status;
  1672. int error = 0;
  1673. cRess_t ress = FIO_createCResources(prefs, dictFileName,
  1674. FIO_getLargestFileSize(inFileNamesTable, (unsigned)fCtx->nbFilesTotal),
  1675. compressionLevel, comprParams);
  1676. /* init */
  1677. assert(outFileName != NULL || suffix != NULL);
  1678. if (outFileName != NULL) { /* output into a single destination (stdout typically) */
  1679. if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
  1680. FIO_freeCResources(&ress);
  1681. return 1;
  1682. }
  1683. ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
  1684. if (ress.dstFile == NULL) { /* could not open outFileName */
  1685. error = 1;
  1686. } else {
  1687. for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
  1688. status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel);
  1689. if (!status) fCtx->nbFilesProcessed++;
  1690. error |= status;
  1691. }
  1692. if (fclose(ress.dstFile))
  1693. EXM_THROW(29, "Write error (%s) : cannot properly close %s",
  1694. strerror(errno), outFileName);
  1695. ress.dstFile = NULL;
  1696. }
  1697. } else {
  1698. if (outMirroredRootDirName)
  1699. UTIL_mirrorSourceFilesDirectories(inFileNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
  1700. for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
  1701. const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx];
  1702. const char* dstFileName = NULL;
  1703. if (outMirroredRootDirName) {
  1704. char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
  1705. if (validMirroredDirName) {
  1706. dstFileName = FIO_determineCompressedName(srcFileName, validMirroredDirName, suffix);
  1707. free(validMirroredDirName);
  1708. } else {
  1709. DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot compress '%s' into '%s' \n", srcFileName, outMirroredRootDirName);
  1710. error=1;
  1711. continue;
  1712. }
  1713. } else {
  1714. dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */
  1715. }
  1716. status = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
  1717. if (!status) fCtx->nbFilesProcessed++;
  1718. error |= status;
  1719. }
  1720. if (outDirName)
  1721. FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal);
  1722. }
  1723. if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesInput != 0) {
  1724. UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesInput);
  1725. UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesOutput);
  1726. DISPLAYLEVEL(2, "\r%79s\r", "");
  1727. DISPLAYLEVEL(2, "%3d files compressed :%.2f%% (%6.*f%4s => %6.*f%4s)\n",
  1728. fCtx->nbFilesProcessed,
  1729. (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100,
  1730. hr_isize.precision, hr_isize.value, hr_isize.suffix,
  1731. hr_osize.precision, hr_osize.value, hr_osize.suffix);
  1732. }
  1733. FIO_freeCResources(&ress);
  1734. return error;
  1735. }
  1736. #endif /* #ifndef ZSTD_NOCOMPRESS */
  1737. #ifndef ZSTD_NODECOMPRESS
  1738. /* **************************************************************************
  1739. * Decompression
  1740. ***************************************************************************/
  1741. typedef struct {
  1742. void* srcBuffer;
  1743. size_t srcBufferSize;
  1744. size_t srcBufferLoaded;
  1745. void* dstBuffer;
  1746. size_t dstBufferSize;
  1747. ZSTD_DStream* dctx;
  1748. FILE* dstFile;
  1749. } dRess_t;
  1750. static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName)
  1751. {
  1752. dRess_t ress;
  1753. memset(&ress, 0, sizeof(ress));
  1754. if (prefs->patchFromMode)
  1755. FIO_adjustMemLimitForPatchFromMode(prefs, UTIL_getFileSize(dictFileName), 0 /* just use the dict size */);
  1756. /* Allocation */
  1757. ress.dctx = ZSTD_createDStream();
  1758. if (ress.dctx==NULL)
  1759. EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno));
  1760. CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) );
  1761. CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag));
  1762. ress.srcBufferSize = ZSTD_DStreamInSize();
  1763. ress.srcBuffer = malloc(ress.srcBufferSize);
  1764. ress.dstBufferSize = ZSTD_DStreamOutSize();
  1765. ress.dstBuffer = malloc(ress.dstBufferSize);
  1766. if (!ress.srcBuffer || !ress.dstBuffer)
  1767. EXM_THROW(61, "Allocation error : not enough memory");
  1768. /* dictionary */
  1769. { void* dictBuffer;
  1770. size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs);
  1771. CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) );
  1772. free(dictBuffer);
  1773. }
  1774. return ress;
  1775. }
  1776. static void FIO_freeDResources(dRess_t ress)
  1777. {
  1778. CHECK( ZSTD_freeDStream(ress.dctx) );
  1779. free(ress.srcBuffer);
  1780. free(ress.dstBuffer);
  1781. }
  1782. /** FIO_fwriteSparse() :
  1783. * @return : storedSkips,
  1784. * argument for next call to FIO_fwriteSparse() or FIO_fwriteSparseEnd() */
  1785. static unsigned
  1786. FIO_fwriteSparse(FILE* file,
  1787. const void* buffer, size_t bufferSize,
  1788. const FIO_prefs_t* const prefs,
  1789. unsigned storedSkips)
  1790. {
  1791. const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */
  1792. size_t bufferSizeT = bufferSize / sizeof(size_t);
  1793. const size_t* const bufferTEnd = bufferT + bufferSizeT;
  1794. const size_t* ptrT = bufferT;
  1795. static const size_t segmentSizeT = (32 KB) / sizeof(size_t); /* check every 32 KB */
  1796. if (prefs->testMode) return 0; /* do not output anything in test mode */
  1797. if (!prefs->sparseFileSupport) { /* normal write */
  1798. size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
  1799. if (sizeCheck != bufferSize)
  1800. EXM_THROW(70, "Write error : cannot write decoded block : %s",
  1801. strerror(errno));
  1802. return 0;
  1803. }
  1804. /* avoid int overflow */
  1805. if (storedSkips > 1 GB) {
  1806. if (LONG_SEEK(file, 1 GB, SEEK_CUR) != 0)
  1807. EXM_THROW(91, "1 GB skip error (sparse file support)");
  1808. storedSkips -= 1 GB;
  1809. }
  1810. while (ptrT < bufferTEnd) {
  1811. size_t nb0T;
  1812. /* adjust last segment if < 32 KB */
  1813. size_t seg0SizeT = segmentSizeT;
  1814. if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT;
  1815. bufferSizeT -= seg0SizeT;
  1816. /* count leading zeroes */
  1817. for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ;
  1818. storedSkips += (unsigned)(nb0T * sizeof(size_t));
  1819. if (nb0T != seg0SizeT) { /* not all 0s */
  1820. size_t const nbNon0ST = seg0SizeT - nb0T;
  1821. /* skip leading zeros */
  1822. if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0)
  1823. EXM_THROW(92, "Sparse skip error ; try --no-sparse");
  1824. storedSkips = 0;
  1825. /* write the rest */
  1826. if (fwrite(ptrT + nb0T, sizeof(size_t), nbNon0ST, file) != nbNon0ST)
  1827. EXM_THROW(93, "Write error : cannot write decoded block : %s",
  1828. strerror(errno));
  1829. }
  1830. ptrT += seg0SizeT;
  1831. }
  1832. { static size_t const maskT = sizeof(size_t)-1;
  1833. if (bufferSize & maskT) {
  1834. /* size not multiple of sizeof(size_t) : implies end of block */
  1835. const char* const restStart = (const char*)bufferTEnd;
  1836. const char* restPtr = restStart;
  1837. const char* const restEnd = (const char*)buffer + bufferSize;
  1838. assert(restEnd > restStart && restEnd < restStart + sizeof(size_t));
  1839. for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ;
  1840. storedSkips += (unsigned) (restPtr - restStart);
  1841. if (restPtr != restEnd) {
  1842. /* not all remaining bytes are 0 */
  1843. size_t const restSize = (size_t)(restEnd - restPtr);
  1844. if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0)
  1845. EXM_THROW(92, "Sparse skip error ; try --no-sparse");
  1846. if (fwrite(restPtr, 1, restSize, file) != restSize)
  1847. EXM_THROW(95, "Write error : cannot write end of decoded block : %s",
  1848. strerror(errno));
  1849. storedSkips = 0;
  1850. } } }
  1851. return storedSkips;
  1852. }
  1853. static void
  1854. FIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
  1855. {
  1856. if (prefs->testMode) assert(storedSkips == 0);
  1857. if (storedSkips>0) {
  1858. assert(prefs->sparseFileSupport > 0); /* storedSkips>0 implies sparse support is enabled */
  1859. (void)prefs; /* assert can be disabled, in which case prefs becomes unused */
  1860. if (LONG_SEEK(file, storedSkips-1, SEEK_CUR) != 0)
  1861. EXM_THROW(69, "Final skip error (sparse file support)");
  1862. /* last zero must be explicitly written,
  1863. * so that skipped ones get implicitly translated as zero by FS */
  1864. { const char lastZeroByte[1] = { 0 };
  1865. if (fwrite(lastZeroByte, 1, 1, file) != 1)
  1866. EXM_THROW(69, "Write error : cannot write last zero : %s", strerror(errno));
  1867. } }
  1868. }
  1869. /** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
  1870. @return : 0 (no error) */
  1871. static int FIO_passThrough(const FIO_prefs_t* const prefs,
  1872. FILE* foutput, FILE* finput,
  1873. void* buffer, size_t bufferSize,
  1874. size_t alreadyLoaded)
  1875. {
  1876. size_t const blockSize = MIN(64 KB, bufferSize);
  1877. size_t readFromInput;
  1878. unsigned storedSkips = 0;
  1879. /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */
  1880. { size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput);
  1881. if (sizeCheck != alreadyLoaded) {
  1882. DISPLAYLEVEL(1, "Pass-through write error : %s\n", strerror(errno));
  1883. return 1;
  1884. } }
  1885. do {
  1886. readFromInput = fread(buffer, 1, blockSize, finput);
  1887. storedSkips = FIO_fwriteSparse(foutput, buffer, readFromInput, prefs, storedSkips);
  1888. } while (readFromInput == blockSize);
  1889. if (ferror(finput)) {
  1890. DISPLAYLEVEL(1, "Pass-through read error : %s\n", strerror(errno));
  1891. return 1;
  1892. }
  1893. assert(feof(finput));
  1894. FIO_fwriteSparseEnd(prefs, foutput, storedSkips);
  1895. return 0;
  1896. }
  1897. /* FIO_zstdErrorHelp() :
  1898. * detailed error message when requested window size is too large */
  1899. static void
  1900. FIO_zstdErrorHelp(const FIO_prefs_t* const prefs,
  1901. const dRess_t* ress,
  1902. size_t err, const char* srcFileName)
  1903. {
  1904. ZSTD_frameHeader header;
  1905. /* Help message only for one specific error */
  1906. if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge)
  1907. return;
  1908. /* Try to decode the frame header */
  1909. err = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded);
  1910. if (err == 0) {
  1911. unsigned long long const windowSize = header.windowSize;
  1912. unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0);
  1913. assert(prefs->memLimit > 0);
  1914. DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u \n",
  1915. srcFileName, windowSize, prefs->memLimit);
  1916. if (windowLog <= ZSTD_WINDOWLOG_MAX) {
  1917. unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0));
  1918. assert(windowSize < (U64)(1ULL << 52)); /* ensure now overflow for windowMB */
  1919. DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB \n",
  1920. srcFileName, windowLog, windowMB);
  1921. return;
  1922. } }
  1923. DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported \n",
  1924. srcFileName, ZSTD_WINDOWLOG_MAX);
  1925. }
  1926. /** FIO_decompressFrame() :
  1927. * @return : size of decoded zstd frame, or an error code
  1928. */
  1929. #define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2))
  1930. static unsigned long long
  1931. FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput,
  1932. const FIO_prefs_t* const prefs,
  1933. const char* srcFileName,
  1934. U64 alreadyDecoded) /* for multi-frames streams */
  1935. {
  1936. U64 frameSize = 0;
  1937. U32 storedSkips = 0;
  1938. /* display last 20 characters only */
  1939. { size_t const srcFileLength = strlen(srcFileName);
  1940. if (srcFileLength>20) srcFileName += srcFileLength-20;
  1941. }
  1942. ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only);
  1943. /* Header loading : ensures ZSTD_getFrameHeader() will succeed */
  1944. { size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX;
  1945. if (ress->srcBufferLoaded < toDecode) {
  1946. size_t const toRead = toDecode - ress->srcBufferLoaded;
  1947. void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
  1948. ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput);
  1949. } }
  1950. /* Main decompression Loop */
  1951. while (1) {
  1952. ZSTD_inBuffer inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 };
  1953. ZSTD_outBuffer outBuff= { ress->dstBuffer, ress->dstBufferSize, 0 };
  1954. size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff);
  1955. const int displayLevel = (g_display_prefs.progressSetting == FIO_ps_always) ? 1 : 2;
  1956. UTIL_HumanReadableSize_t const hrs = UTIL_makeHumanReadableSize(alreadyDecoded+frameSize);
  1957. if (ZSTD_isError(readSizeHint)) {
  1958. DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n",
  1959. srcFileName, ZSTD_getErrorName(readSizeHint));
  1960. FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName);
  1961. return FIO_ERROR_FRAME_DECODING;
  1962. }
  1963. /* Write block */
  1964. storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, prefs, storedSkips);
  1965. frameSize += outBuff.pos;
  1966. if (fCtx->nbFilesTotal > 1) {
  1967. size_t srcFileNameSize = strlen(srcFileName);
  1968. if (srcFileNameSize > 18) {
  1969. const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
  1970. DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s... ",
  1971. fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix);
  1972. } else {
  1973. DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ",
  1974. fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, hrs.precision, hrs.value, hrs.suffix);
  1975. }
  1976. } else {
  1977. DISPLAYUPDATE(displayLevel, "\r%-20.20s : %.*f%s... ",
  1978. srcFileName, hrs.precision, hrs.value, hrs.suffix);
  1979. }
  1980. if (inBuff.pos > 0) {
  1981. memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos);
  1982. ress->srcBufferLoaded -= inBuff.pos;
  1983. }
  1984. if (readSizeHint == 0) break; /* end of frame */
  1985. /* Fill input buffer */
  1986. { size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */
  1987. if (ress->srcBufferLoaded < toDecode) {
  1988. size_t const toRead = toDecode - ress->srcBufferLoaded; /* > 0 */
  1989. void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
  1990. size_t const readSize = fread(startPosition, 1, toRead, finput);
  1991. if (readSize==0) {
  1992. DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n",
  1993. srcFileName);
  1994. return FIO_ERROR_FRAME_DECODING;
  1995. }
  1996. ress->srcBufferLoaded += readSize;
  1997. } } }
  1998. FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
  1999. return frameSize;
  2000. }
  2001. #ifdef ZSTD_GZDECOMPRESS
  2002. static unsigned long long
  2003. FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile,
  2004. const FIO_prefs_t* const prefs,
  2005. const char* srcFileName)
  2006. {
  2007. unsigned long long outFileSize = 0;
  2008. z_stream strm;
  2009. int flush = Z_NO_FLUSH;
  2010. int decodingError = 0;
  2011. unsigned storedSkips = 0;
  2012. strm.zalloc = Z_NULL;
  2013. strm.zfree = Z_NULL;
  2014. strm.opaque = Z_NULL;
  2015. strm.next_in = 0;
  2016. strm.avail_in = 0;
  2017. /* see http://www.zlib.net/manual.html */
  2018. if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK)
  2019. return FIO_ERROR_FRAME_DECODING;
  2020. strm.next_out = (Bytef*)ress->dstBuffer;
  2021. strm.avail_out = (uInt)ress->dstBufferSize;
  2022. strm.avail_in = (uInt)ress->srcBufferLoaded;
  2023. strm.next_in = (z_const unsigned char*)ress->srcBuffer;
  2024. for ( ; ; ) {
  2025. int ret;
  2026. if (strm.avail_in == 0) {
  2027. ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
  2028. if (ress->srcBufferLoaded == 0) flush = Z_FINISH;
  2029. strm.next_in = (z_const unsigned char*)ress->srcBuffer;
  2030. strm.avail_in = (uInt)ress->srcBufferLoaded;
  2031. }
  2032. ret = inflate(&strm, flush);
  2033. if (ret == Z_BUF_ERROR) {
  2034. DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName);
  2035. decodingError = 1; break;
  2036. }
  2037. if (ret != Z_OK && ret != Z_STREAM_END) {
  2038. DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret);
  2039. decodingError = 1; break;
  2040. }
  2041. { size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
  2042. if (decompBytes) {
  2043. storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips);
  2044. outFileSize += decompBytes;
  2045. strm.next_out = (Bytef*)ress->dstBuffer;
  2046. strm.avail_out = (uInt)ress->dstBufferSize;
  2047. }
  2048. }
  2049. if (ret == Z_STREAM_END) break;
  2050. }
  2051. if (strm.avail_in > 0)
  2052. memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
  2053. ress->srcBufferLoaded = strm.avail_in;
  2054. if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */
  2055. && (decodingError==0) ) {
  2056. DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName);
  2057. decodingError = 1;
  2058. }
  2059. FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
  2060. return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
  2061. }
  2062. #endif
  2063. #ifdef ZSTD_LZMADECOMPRESS
  2064. static unsigned long long
  2065. FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
  2066. const FIO_prefs_t* const prefs,
  2067. const char* srcFileName, int plain_lzma)
  2068. {
  2069. unsigned long long outFileSize = 0;
  2070. lzma_stream strm = LZMA_STREAM_INIT;
  2071. lzma_action action = LZMA_RUN;
  2072. lzma_ret initRet;
  2073. int decodingError = 0;
  2074. unsigned storedSkips = 0;
  2075. strm.next_in = 0;
  2076. strm.avail_in = 0;
  2077. if (plain_lzma) {
  2078. initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */
  2079. } else {
  2080. initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */
  2081. }
  2082. if (initRet != LZMA_OK) {
  2083. DISPLAYLEVEL(1, "zstd: %s: %s error %d \n",
  2084. plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder",
  2085. srcFileName, initRet);
  2086. return FIO_ERROR_FRAME_DECODING;
  2087. }
  2088. strm.next_out = (BYTE*)ress->dstBuffer;
  2089. strm.avail_out = ress->dstBufferSize;
  2090. strm.next_in = (BYTE const*)ress->srcBuffer;
  2091. strm.avail_in = ress->srcBufferLoaded;
  2092. for ( ; ; ) {
  2093. lzma_ret ret;
  2094. if (strm.avail_in == 0) {
  2095. ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
  2096. if (ress->srcBufferLoaded == 0) action = LZMA_FINISH;
  2097. strm.next_in = (BYTE const*)ress->srcBuffer;
  2098. strm.avail_in = ress->srcBufferLoaded;
  2099. }
  2100. ret = lzma_code(&strm, action);
  2101. if (ret == LZMA_BUF_ERROR) {
  2102. DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName);
  2103. decodingError = 1; break;
  2104. }
  2105. if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
  2106. DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n",
  2107. srcFileName, ret);
  2108. decodingError = 1; break;
  2109. }
  2110. { size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
  2111. if (decompBytes) {
  2112. storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips);
  2113. outFileSize += decompBytes;
  2114. strm.next_out = (BYTE*)ress->dstBuffer;
  2115. strm.avail_out = ress->dstBufferSize;
  2116. } }
  2117. if (ret == LZMA_STREAM_END) break;
  2118. }
  2119. if (strm.avail_in > 0)
  2120. memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
  2121. ress->srcBufferLoaded = strm.avail_in;
  2122. lzma_end(&strm);
  2123. FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
  2124. return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
  2125. }
  2126. #endif
  2127. #ifdef ZSTD_LZ4DECOMPRESS
  2128. static unsigned long long
  2129. FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile,
  2130. const FIO_prefs_t* const prefs,
  2131. const char* srcFileName)
  2132. {
  2133. unsigned long long filesize = 0;
  2134. LZ4F_errorCode_t nextToLoad;
  2135. LZ4F_decompressionContext_t dCtx;
  2136. LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
  2137. int decodingError = 0;
  2138. unsigned storedSkips = 0;
  2139. if (LZ4F_isError(errorCode)) {
  2140. DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n");
  2141. return FIO_ERROR_FRAME_DECODING;
  2142. }
  2143. /* Init feed with magic number (already consumed from FILE* sFile) */
  2144. { size_t inSize = 4;
  2145. size_t outSize= 0;
  2146. MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER);
  2147. nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL);
  2148. if (LZ4F_isError(nextToLoad)) {
  2149. DISPLAYLEVEL(1, "zstd: %s: lz4 header error : %s \n",
  2150. srcFileName, LZ4F_getErrorName(nextToLoad));
  2151. LZ4F_freeDecompressionContext(dCtx);
  2152. return FIO_ERROR_FRAME_DECODING;
  2153. } }
  2154. /* Main Loop */
  2155. for (;nextToLoad;) {
  2156. size_t readSize;
  2157. size_t pos = 0;
  2158. size_t decodedBytes = ress->dstBufferSize;
  2159. /* Read input */
  2160. if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize;
  2161. readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile);
  2162. if (!readSize) break; /* reached end of file or stream */
  2163. while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) { /* still to read, or still to flush */
  2164. /* Decode Input (at least partially) */
  2165. size_t remaining = readSize - pos;
  2166. decodedBytes = ress->dstBufferSize;
  2167. nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL);
  2168. if (LZ4F_isError(nextToLoad)) {
  2169. DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n",
  2170. srcFileName, LZ4F_getErrorName(nextToLoad));
  2171. decodingError = 1; nextToLoad = 0; break;
  2172. }
  2173. pos += remaining;
  2174. /* Write Block */
  2175. if (decodedBytes) {
  2176. UTIL_HumanReadableSize_t hrs;
  2177. storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decodedBytes, prefs, storedSkips);
  2178. filesize += decodedBytes;
  2179. hrs = UTIL_makeHumanReadableSize(filesize);
  2180. DISPLAYUPDATE(2, "\rDecompressed : %.*f%s ", hrs.precision, hrs.value, hrs.suffix);
  2181. }
  2182. if (!nextToLoad) break;
  2183. }
  2184. }
  2185. /* can be out because readSize == 0, which could be an fread() error */
  2186. if (ferror(srcFile)) {
  2187. DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName);
  2188. decodingError=1;
  2189. }
  2190. if (nextToLoad!=0) {
  2191. DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName);
  2192. decodingError=1;
  2193. }
  2194. LZ4F_freeDecompressionContext(dCtx);
  2195. ress->srcBufferLoaded = 0; /* LZ4F will reach exact frame boundary */
  2196. FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
  2197. return decodingError ? FIO_ERROR_FRAME_DECODING : filesize;
  2198. }
  2199. #endif
  2200. /** FIO_decompressFrames() :
  2201. * Find and decode frames inside srcFile
  2202. * srcFile presumed opened and valid
  2203. * @return : 0 : OK
  2204. * 1 : error
  2205. */
  2206. static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
  2207. dRess_t ress, FILE* srcFile,
  2208. const FIO_prefs_t* const prefs,
  2209. const char* dstFileName, const char* srcFileName)
  2210. {
  2211. unsigned readSomething = 0;
  2212. unsigned long long filesize = 0;
  2213. assert(srcFile != NULL);
  2214. /* for each frame */
  2215. for ( ; ; ) {
  2216. /* check magic number -> version */
  2217. size_t const toRead = 4;
  2218. const BYTE* const buf = (const BYTE*)ress.srcBuffer;
  2219. if (ress.srcBufferLoaded < toRead) /* load up to 4 bytes for header */
  2220. ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded,
  2221. (size_t)1, toRead - ress.srcBufferLoaded, srcFile);
  2222. if (ress.srcBufferLoaded==0) {
  2223. if (readSomething==0) { /* srcFile is empty (which is invalid) */
  2224. DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName);
  2225. return 1;
  2226. } /* else, just reached frame boundary */
  2227. break; /* no more input */
  2228. }
  2229. readSomething = 1; /* there is at least 1 byte in srcFile */
  2230. if (ress.srcBufferLoaded < toRead) {
  2231. DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName);
  2232. return 1;
  2233. }
  2234. if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) {
  2235. unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, srcFile, prefs, srcFileName, filesize);
  2236. if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
  2237. filesize += frameSize;
  2238. } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
  2239. #ifdef ZSTD_GZDECOMPRESS
  2240. unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, prefs, srcFileName);
  2241. if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
  2242. filesize += frameSize;
  2243. #else
  2244. DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName);
  2245. return 1;
  2246. #endif
  2247. } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */
  2248. || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
  2249. #ifdef ZSTD_LZMADECOMPRESS
  2250. unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, prefs, srcFileName, buf[0] != 0xFD);
  2251. if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
  2252. filesize += frameSize;
  2253. #else
  2254. DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName);
  2255. return 1;
  2256. #endif
  2257. } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
  2258. #ifdef ZSTD_LZ4DECOMPRESS
  2259. unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, prefs, srcFileName);
  2260. if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
  2261. filesize += frameSize;
  2262. #else
  2263. DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName);
  2264. return 1;
  2265. #endif
  2266. } else if ((prefs->overwrite) && !strcmp (dstFileName, stdoutmark)) { /* pass-through mode */
  2267. return FIO_passThrough(prefs,
  2268. ress.dstFile, srcFile,
  2269. ress.srcBuffer, ress.srcBufferSize,
  2270. ress.srcBufferLoaded);
  2271. } else {
  2272. DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName);
  2273. return 1;
  2274. } } /* for each frame */
  2275. /* Final Status */
  2276. fCtx->totalBytesOutput += (size_t)filesize;
  2277. DISPLAYLEVEL(2, "\r%79s\r", "");
  2278. /* No status message in pipe mode (stdin - stdout) or multi-files mode */
  2279. if ((g_display_prefs.displayLevel >= 2 && fCtx->nbFilesTotal <= 1) ||
  2280. g_display_prefs.displayLevel >= 3 ||
  2281. g_display_prefs.progressSetting == FIO_ps_always) {
  2282. DISPLAYLEVEL(1, "\r%-20s: %llu bytes \n", srcFileName, filesize);
  2283. }
  2284. return 0;
  2285. }
  2286. /** FIO_decompressDstFile() :
  2287. open `dstFileName`,
  2288. or path-through if ress.dstFile is already != 0,
  2289. then start decompression process (FIO_decompressFrames()).
  2290. @return : 0 : OK
  2291. 1 : operation aborted
  2292. */
  2293. static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
  2294. FIO_prefs_t* const prefs,
  2295. dRess_t ress, FILE* srcFile,
  2296. const char* dstFileName, const char* srcFileName)
  2297. {
  2298. int result;
  2299. stat_t statbuf;
  2300. int releaseDstFile = 0;
  2301. int transferMTime = 0;
  2302. if ((ress.dstFile == NULL) && (prefs->testMode==0)) {
  2303. int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
  2304. if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */
  2305. && UTIL_stat(srcFileName, &statbuf)
  2306. && UTIL_isRegularFileStat(&statbuf) ) {
  2307. dstFilePermissions = statbuf.st_mode;
  2308. transferMTime = 1;
  2309. }
  2310. releaseDstFile = 1;
  2311. ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
  2312. if (ress.dstFile==NULL) return 1;
  2313. /* Must only be added after FIO_openDstFile() succeeds.
  2314. * Otherwise we may delete the destination file if it already exists,
  2315. * and the user presses Ctrl-C when asked if they wish to overwrite.
  2316. */
  2317. addHandler(dstFileName);
  2318. }
  2319. result = FIO_decompressFrames(fCtx, ress, srcFile, prefs, dstFileName, srcFileName);
  2320. if (releaseDstFile) {
  2321. FILE* const dstFile = ress.dstFile;
  2322. clearHandler();
  2323. ress.dstFile = NULL;
  2324. if (fclose(dstFile)) {
  2325. DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
  2326. result = 1;
  2327. }
  2328. if (transferMTime) {
  2329. UTIL_utime(dstFileName, &statbuf);
  2330. }
  2331. if ( (result != 0) /* operation failure */
  2332. && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
  2333. ) {
  2334. FIO_removeFile(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */
  2335. }
  2336. }
  2337. return result;
  2338. }
  2339. /** FIO_decompressSrcFile() :
  2340. Open `srcFileName`, transfer control to decompressDstFile()
  2341. @return : 0 : OK
  2342. 1 : error
  2343. */
  2344. static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName)
  2345. {
  2346. FILE* srcFile;
  2347. int result;
  2348. if (UTIL_isDirectory(srcFileName)) {
  2349. DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
  2350. return 1;
  2351. }
  2352. srcFile = FIO_openSrcFile(prefs, srcFileName);
  2353. if (srcFile==NULL) return 1;
  2354. ress.srcBufferLoaded = 0;
  2355. result = FIO_decompressDstFile(fCtx, prefs, ress, srcFile, dstFileName, srcFileName);
  2356. /* Close file */
  2357. if (fclose(srcFile)) {
  2358. DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */
  2359. return 1;
  2360. }
  2361. if ( prefs->removeSrcFile /* --rm */
  2362. && (result==0) /* decompression successful */
  2363. && strcmp(srcFileName, stdinmark) ) /* not stdin */ {
  2364. /* We must clear the handler, since after this point calling it would
  2365. * delete both the source and destination files.
  2366. */
  2367. clearHandler();
  2368. if (FIO_removeFile(srcFileName)) {
  2369. /* failed to remove src file */
  2370. DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
  2371. return 1;
  2372. } }
  2373. return result;
  2374. }
  2375. int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs,
  2376. const char* dstFileName, const char* srcFileName,
  2377. const char* dictFileName)
  2378. {
  2379. dRess_t const ress = FIO_createDResources(prefs, dictFileName);
  2380. int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
  2381. FIO_freeDResources(ress);
  2382. return decodingError;
  2383. }
  2384. static const char *suffixList[] = {
  2385. ZSTD_EXTENSION,
  2386. TZSTD_EXTENSION,
  2387. #ifndef ZSTD_NODECOMPRESS
  2388. ZSTD_ALT_EXTENSION,
  2389. #endif
  2390. #ifdef ZSTD_GZDECOMPRESS
  2391. GZ_EXTENSION,
  2392. TGZ_EXTENSION,
  2393. #endif
  2394. #ifdef ZSTD_LZMADECOMPRESS
  2395. LZMA_EXTENSION,
  2396. XZ_EXTENSION,
  2397. TXZ_EXTENSION,
  2398. #endif
  2399. #ifdef ZSTD_LZ4DECOMPRESS
  2400. LZ4_EXTENSION,
  2401. TLZ4_EXTENSION,
  2402. #endif
  2403. NULL
  2404. };
  2405. static const char *suffixListStr =
  2406. ZSTD_EXTENSION "/" TZSTD_EXTENSION
  2407. #ifdef ZSTD_GZDECOMPRESS
  2408. "/" GZ_EXTENSION "/" TGZ_EXTENSION
  2409. #endif
  2410. #ifdef ZSTD_LZMADECOMPRESS
  2411. "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION
  2412. #endif
  2413. #ifdef ZSTD_LZ4DECOMPRESS
  2414. "/" LZ4_EXTENSION "/" TLZ4_EXTENSION
  2415. #endif
  2416. ;
  2417. /* FIO_determineDstName() :
  2418. * create a destination filename from a srcFileName.
  2419. * @return a pointer to it.
  2420. * @return == NULL if there is an error */
  2421. static const char*
  2422. FIO_determineDstName(const char* srcFileName, const char* outDirName)
  2423. {
  2424. static size_t dfnbCapacity = 0;
  2425. static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
  2426. size_t dstFileNameEndPos;
  2427. char* outDirFilename = NULL;
  2428. const char* dstSuffix = "";
  2429. size_t dstSuffixLen = 0;
  2430. size_t sfnSize = strlen(srcFileName);
  2431. size_t srcSuffixLen;
  2432. const char* const srcSuffix = strrchr(srcFileName, '.');
  2433. if (srcSuffix == NULL) {
  2434. DISPLAYLEVEL(1,
  2435. "zstd: %s: unknown suffix (%s expected). "
  2436. "Can't derive the output file name. "
  2437. "Specify it with -o dstFileName. Ignoring.\n",
  2438. srcFileName, suffixListStr);
  2439. return NULL;
  2440. }
  2441. srcSuffixLen = strlen(srcSuffix);
  2442. {
  2443. const char** matchedSuffixPtr;
  2444. for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) {
  2445. if (!strcmp(*matchedSuffixPtr, srcSuffix)) {
  2446. break;
  2447. }
  2448. }
  2449. /* check suffix is authorized */
  2450. if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) {
  2451. DISPLAYLEVEL(1,
  2452. "zstd: %s: unknown suffix (%s expected). "
  2453. "Can't derive the output file name. "
  2454. "Specify it with -o dstFileName. Ignoring.\n",
  2455. srcFileName, suffixListStr);
  2456. return NULL;
  2457. }
  2458. if ((*matchedSuffixPtr)[1] == 't') {
  2459. dstSuffix = ".tar";
  2460. dstSuffixLen = strlen(dstSuffix);
  2461. }
  2462. }
  2463. if (outDirName) {
  2464. outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
  2465. sfnSize = strlen(outDirFilename);
  2466. assert(outDirFilename != NULL);
  2467. }
  2468. if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) {
  2469. /* allocate enough space to write dstFilename into it */
  2470. free(dstFileNameBuffer);
  2471. dfnbCapacity = sfnSize + 20;
  2472. dstFileNameBuffer = (char*)malloc(dfnbCapacity);
  2473. if (dstFileNameBuffer==NULL)
  2474. EXM_THROW(74, "%s : not enough memory for dstFileName",
  2475. strerror(errno));
  2476. }
  2477. /* return dst name == src name truncated from suffix */
  2478. assert(dstFileNameBuffer != NULL);
  2479. dstFileNameEndPos = sfnSize - srcSuffixLen;
  2480. if (outDirFilename) {
  2481. memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos);
  2482. free(outDirFilename);
  2483. } else {
  2484. memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos);
  2485. }
  2486. /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar"
  2487. * extension on decompression. Also writes terminating null. */
  2488. strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix);
  2489. return dstFileNameBuffer;
  2490. /* note : dstFileNameBuffer memory is not going to be free */
  2491. }
  2492. int
  2493. FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx,
  2494. FIO_prefs_t* const prefs,
  2495. const char** srcNamesTable,
  2496. const char* outMirroredRootDirName,
  2497. const char* outDirName, const char* outFileName,
  2498. const char* dictFileName)
  2499. {
  2500. int status;
  2501. int error = 0;
  2502. dRess_t ress = FIO_createDResources(prefs, dictFileName);
  2503. if (outFileName) {
  2504. if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
  2505. FIO_freeDResources(ress);
  2506. return 1;
  2507. }
  2508. if (!prefs->testMode) {
  2509. ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
  2510. if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName);
  2511. }
  2512. for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) {
  2513. status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]);
  2514. if (!status) fCtx->nbFilesProcessed++;
  2515. error |= status;
  2516. }
  2517. if ((!prefs->testMode) && (fclose(ress.dstFile)))
  2518. EXM_THROW(72, "Write error : %s : cannot properly close output file",
  2519. strerror(errno));
  2520. } else {
  2521. if (outMirroredRootDirName)
  2522. UTIL_mirrorSourceFilesDirectories(srcNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
  2523. for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { /* create dstFileName */
  2524. const char* const srcFileName = srcNamesTable[fCtx->currFileIdx];
  2525. const char* dstFileName = NULL;
  2526. if (outMirroredRootDirName) {
  2527. char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
  2528. if (validMirroredDirName) {
  2529. dstFileName = FIO_determineDstName(srcFileName, validMirroredDirName);
  2530. free(validMirroredDirName);
  2531. } else {
  2532. DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot decompress '%s' into '%s'\n", srcFileName, outMirroredRootDirName);
  2533. }
  2534. } else {
  2535. dstFileName = FIO_determineDstName(srcFileName, outDirName);
  2536. }
  2537. if (dstFileName == NULL) { error=1; continue; }
  2538. status = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
  2539. if (!status) fCtx->nbFilesProcessed++;
  2540. error |= status;
  2541. }
  2542. if (outDirName)
  2543. FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal);
  2544. }
  2545. if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesOutput != 0)
  2546. DISPLAYLEVEL(2, "%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput);
  2547. FIO_freeDResources(ress);
  2548. return error;
  2549. }
  2550. /* **************************************************************************
  2551. * .zst file info (--list command)
  2552. ***************************************************************************/
  2553. typedef struct {
  2554. U64 decompressedSize;
  2555. U64 compressedSize;
  2556. U64 windowSize;
  2557. int numActualFrames;
  2558. int numSkippableFrames;
  2559. int decompUnavailable;
  2560. int usesCheck;
  2561. U32 nbFiles;
  2562. } fileInfo_t;
  2563. typedef enum {
  2564. info_success=0,
  2565. info_frame_error=1,
  2566. info_not_zstd=2,
  2567. info_file_error=3,
  2568. info_truncated_input=4,
  2569. } InfoError;
  2570. #define ERROR_IF(c,n,...) { \
  2571. if (c) { \
  2572. DISPLAYLEVEL(1, __VA_ARGS__); \
  2573. DISPLAYLEVEL(1, " \n"); \
  2574. return n; \
  2575. } \
  2576. }
  2577. static InfoError
  2578. FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile)
  2579. {
  2580. /* begin analyzing frame */
  2581. for ( ; ; ) {
  2582. BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
  2583. size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
  2584. if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) {
  2585. if ( feof(srcFile)
  2586. && (numBytesRead == 0)
  2587. && (info->compressedSize > 0)
  2588. && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
  2589. unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile);
  2590. unsigned long long file_size = (unsigned long long) info->compressedSize;
  2591. ERROR_IF(file_position != file_size, info_truncated_input,
  2592. "Error: seeked to position %llu, which is beyond file size of %llu\n",
  2593. file_position,
  2594. file_size);
  2595. break; /* correct end of file => success */
  2596. }
  2597. ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame");
  2598. ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames");
  2599. }
  2600. { U32 const magicNumber = MEM_readLE32(headerBuffer);
  2601. /* Zstandard frame */
  2602. if (magicNumber == ZSTD_MAGICNUMBER) {
  2603. ZSTD_frameHeader header;
  2604. U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
  2605. if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR
  2606. || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) {
  2607. info->decompUnavailable = 1;
  2608. } else {
  2609. info->decompressedSize += frameContentSize;
  2610. }
  2611. ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0,
  2612. info_frame_error, "Error: could not decode frame header");
  2613. info->windowSize = header.windowSize;
  2614. /* move to the end of the frame header */
  2615. { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
  2616. ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size");
  2617. ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0,
  2618. info_frame_error, "Error: could not move to end of frame header");
  2619. }
  2620. /* skip all blocks in the frame */
  2621. { int lastBlock = 0;
  2622. do {
  2623. BYTE blockHeaderBuffer[3];
  2624. ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3,
  2625. info_frame_error, "Error while reading block header");
  2626. { U32 const blockHeader = MEM_readLE24(blockHeaderBuffer);
  2627. U32 const blockTypeID = (blockHeader >> 1) & 3;
  2628. U32 const isRLE = (blockTypeID == 1);
  2629. U32 const isWrongBlock = (blockTypeID == 3);
  2630. long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3);
  2631. ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type");
  2632. lastBlock = blockHeader & 1;
  2633. ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0,
  2634. info_frame_error, "Error: could not skip to end of block");
  2635. }
  2636. } while (lastBlock != 1);
  2637. }
  2638. /* check if checksum is used */
  2639. { BYTE const frameHeaderDescriptor = headerBuffer[4];
  2640. int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
  2641. if (contentChecksumFlag) {
  2642. info->usesCheck = 1;
  2643. ERROR_IF(fseek(srcFile, 4, SEEK_CUR) != 0,
  2644. info_frame_error, "Error: could not skip past checksum");
  2645. } }
  2646. info->numActualFrames++;
  2647. }
  2648. /* Skippable frame */
  2649. else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
  2650. U32 const frameSize = MEM_readLE32(headerBuffer + 4);
  2651. long const seek = (long)(8 + frameSize - numBytesRead);
  2652. ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0,
  2653. info_frame_error, "Error: could not find end of skippable frame");
  2654. info->numSkippableFrames++;
  2655. }
  2656. /* unknown content */
  2657. else {
  2658. return info_not_zstd;
  2659. }
  2660. } /* magic number analysis */
  2661. } /* end analyzing frames */
  2662. return info_success;
  2663. }
  2664. static InfoError
  2665. getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName)
  2666. {
  2667. InfoError status;
  2668. FILE* const srcFile = FIO_openSrcFile(NULL, inFileName);
  2669. ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName);
  2670. info->compressedSize = UTIL_getFileSize(inFileName);
  2671. status = FIO_analyzeFrames(info, srcFile);
  2672. fclose(srcFile);
  2673. info->nbFiles = 1;
  2674. return status;
  2675. }
  2676. /** getFileInfo() :
  2677. * Reads information from file, stores in *info
  2678. * @return : InfoError status
  2679. */
  2680. static InfoError
  2681. getFileInfo(fileInfo_t* info, const char* srcFileName)
  2682. {
  2683. ERROR_IF(!UTIL_isRegularFile(srcFileName),
  2684. info_file_error, "Error : %s is not a file", srcFileName);
  2685. return getFileInfo_fileConfirmed(info, srcFileName);
  2686. }
  2687. static void
  2688. displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel)
  2689. {
  2690. UTIL_HumanReadableSize_t const window_hrs = UTIL_makeHumanReadableSize(info->windowSize);
  2691. UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(info->compressedSize);
  2692. UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(info->decompressedSize);
  2693. double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/(double)info->compressedSize;
  2694. const char* const checkString = (info->usesCheck ? "XXH64" : "None");
  2695. if (displayLevel <= 2) {
  2696. if (!info->decompUnavailable) {
  2697. DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %s\n",
  2698. info->numSkippableFrames + info->numActualFrames,
  2699. info->numSkippableFrames,
  2700. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  2701. decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
  2702. ratio, checkString, inFileName);
  2703. } else {
  2704. DISPLAYOUT("%6d %5d %6.*f%4s %5s %s\n",
  2705. info->numSkippableFrames + info->numActualFrames,
  2706. info->numSkippableFrames,
  2707. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  2708. checkString, inFileName);
  2709. }
  2710. } else {
  2711. DISPLAYOUT("%s \n", inFileName);
  2712. DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
  2713. if (info->numSkippableFrames)
  2714. DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
  2715. DISPLAYOUT("Window Size: %.*f%s (%llu B)\n",
  2716. window_hrs.precision, window_hrs.value, window_hrs.suffix,
  2717. (unsigned long long)info->windowSize);
  2718. DISPLAYOUT("Compressed Size: %.*f%s (%llu B)\n",
  2719. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  2720. (unsigned long long)info->compressedSize);
  2721. if (!info->decompUnavailable) {
  2722. DISPLAYOUT("Decompressed Size: %.*f%s (%llu B)\n",
  2723. decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
  2724. (unsigned long long)info->decompressedSize);
  2725. DISPLAYOUT("Ratio: %.4f\n", ratio);
  2726. }
  2727. DISPLAYOUT("Check: %s\n", checkString);
  2728. DISPLAYOUT("\n");
  2729. }
  2730. }
  2731. static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2)
  2732. {
  2733. fileInfo_t total;
  2734. memset(&total, 0, sizeof(total));
  2735. total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames;
  2736. total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames;
  2737. total.compressedSize = fi1.compressedSize + fi2.compressedSize;
  2738. total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize;
  2739. total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable;
  2740. total.usesCheck = fi1.usesCheck & fi2.usesCheck;
  2741. total.nbFiles = fi1.nbFiles + fi2.nbFiles;
  2742. return total;
  2743. }
  2744. static int
  2745. FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel)
  2746. {
  2747. fileInfo_t info;
  2748. memset(&info, 0, sizeof(info));
  2749. { InfoError const error = getFileInfo(&info, inFileName);
  2750. switch (error) {
  2751. case info_frame_error:
  2752. /* display error, but provide output */
  2753. DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName);
  2754. break;
  2755. case info_not_zstd:
  2756. DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName);
  2757. if (displayLevel > 2) DISPLAYOUT("\n");
  2758. return 1;
  2759. case info_file_error:
  2760. /* error occurred while opening the file */
  2761. if (displayLevel > 2) DISPLAYOUT("\n");
  2762. return 1;
  2763. case info_truncated_input:
  2764. DISPLAYOUT("File \"%s\" is truncated \n", inFileName);
  2765. if (displayLevel > 2) DISPLAYOUT("\n");
  2766. return 1;
  2767. case info_success:
  2768. default:
  2769. break;
  2770. }
  2771. displayInfo(inFileName, &info, displayLevel);
  2772. *total = FIO_addFInfo(*total, info);
  2773. assert(error == info_success || error == info_frame_error);
  2774. return (int)error;
  2775. }
  2776. }
  2777. int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel)
  2778. {
  2779. /* ensure no specified input is stdin (needs fseek() capability) */
  2780. { unsigned u;
  2781. for (u=0; u<numFiles;u++) {
  2782. ERROR_IF(!strcmp (filenameTable[u], stdinmark),
  2783. 1, "zstd: --list does not support reading from standard input");
  2784. } }
  2785. if (numFiles == 0) {
  2786. if (!IS_CONSOLE(stdin)) {
  2787. DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n");
  2788. }
  2789. DISPLAYLEVEL(1, "No files given \n");
  2790. return 1;
  2791. }
  2792. if (displayLevel <= 2) {
  2793. DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n");
  2794. }
  2795. { int error = 0;
  2796. fileInfo_t total;
  2797. memset(&total, 0, sizeof(total));
  2798. total.usesCheck = 1;
  2799. /* --list each file, and check for any error */
  2800. { unsigned u;
  2801. for (u=0; u<numFiles;u++) {
  2802. error |= FIO_listFile(&total, filenameTable[u], displayLevel);
  2803. } }
  2804. if (numFiles > 1 && displayLevel <= 2) { /* display total */
  2805. UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(total.compressedSize);
  2806. UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(total.decompressedSize);
  2807. double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/(double)total.compressedSize;
  2808. const char* const checkString = (total.usesCheck ? "XXH64" : "");
  2809. DISPLAYOUT("----------------------------------------------------------------- \n");
  2810. if (total.decompUnavailable) {
  2811. DISPLAYOUT("%6d %5d %6.*f%4s %5s %u files\n",
  2812. total.numSkippableFrames + total.numActualFrames,
  2813. total.numSkippableFrames,
  2814. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  2815. checkString, (unsigned)total.nbFiles);
  2816. } else {
  2817. DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %u files\n",
  2818. total.numSkippableFrames + total.numActualFrames,
  2819. total.numSkippableFrames,
  2820. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  2821. decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
  2822. ratio, checkString, (unsigned)total.nbFiles);
  2823. } }
  2824. return error;
  2825. }
  2826. }
  2827. #endif /* #ifndef ZSTD_NODECOMPRESS */