2
0

Options.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. /*
  2. * Copyright (c) 2016-present, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. */
  9. #include "Options.h"
  10. #include "util.h"
  11. #include "utils/ScopeGuard.h"
  12. #include <algorithm>
  13. #include <cassert>
  14. #include <cstdio>
  15. #include <cstring>
  16. #include <iterator>
  17. #include <thread>
  18. #include <vector>
  19. namespace pzstd {
  20. namespace {
  21. unsigned defaultNumThreads() {
  22. #ifdef PZSTD_NUM_THREADS
  23. return PZSTD_NUM_THREADS;
  24. #else
  25. return std::thread::hardware_concurrency();
  26. #endif
  27. }
  28. unsigned parseUnsigned(const char **arg) {
  29. unsigned result = 0;
  30. while (**arg >= '0' && **arg <= '9') {
  31. result *= 10;
  32. result += **arg - '0';
  33. ++(*arg);
  34. }
  35. return result;
  36. }
  37. const char *getArgument(const char *options, const char **argv, int &i,
  38. int argc) {
  39. if (options[1] != 0) {
  40. return options + 1;
  41. }
  42. ++i;
  43. if (i == argc) {
  44. std::fprintf(stderr, "Option -%c requires an argument, but none provided\n",
  45. *options);
  46. return nullptr;
  47. }
  48. return argv[i];
  49. }
  50. const std::string kZstdExtension = ".zst";
  51. constexpr char kStdIn[] = "-";
  52. constexpr char kStdOut[] = "-";
  53. constexpr unsigned kDefaultCompressionLevel = 3;
  54. constexpr unsigned kMaxNonUltraCompressionLevel = 19;
  55. #ifdef _WIN32
  56. const char nullOutput[] = "nul";
  57. #else
  58. const char nullOutput[] = "/dev/null";
  59. #endif
  60. void notSupported(const char *option) {
  61. std::fprintf(stderr, "Operation not supported: %s\n", option);
  62. }
  63. void usage() {
  64. std::fprintf(stderr, "Usage:\n");
  65. std::fprintf(stderr, " pzstd [args] [FILE(s)]\n");
  66. std::fprintf(stderr, "Parallel ZSTD options:\n");
  67. std::fprintf(stderr, " -p, --processes # : number of threads to use for (de)compression (default:<numcpus>)\n");
  68. std::fprintf(stderr, "ZSTD options:\n");
  69. std::fprintf(stderr, " -# : # compression level (1-%d, default:%d)\n", kMaxNonUltraCompressionLevel, kDefaultCompressionLevel);
  70. std::fprintf(stderr, " -d, --decompress : decompression\n");
  71. std::fprintf(stderr, " -o file : result stored into `file` (only if 1 input file)\n");
  72. std::fprintf(stderr, " -f, --force : overwrite output without prompting, (de)compress links\n");
  73. std::fprintf(stderr, " --rm : remove source file(s) after successful (de)compression\n");
  74. std::fprintf(stderr, " -k, --keep : preserve source file(s) (default)\n");
  75. std::fprintf(stderr, " -h, --help : display help and exit\n");
  76. std::fprintf(stderr, " -V, --version : display version number and exit\n");
  77. std::fprintf(stderr, " -v, --verbose : verbose mode; specify multiple times to increase log level (default:2)\n");
  78. std::fprintf(stderr, " -q, --quiet : suppress warnings; specify twice to suppress errors too\n");
  79. std::fprintf(stderr, " -c, --stdout : write to standard output (even if it is the console)\n");
  80. #ifdef UTIL_HAS_CREATEFILELIST
  81. std::fprintf(stderr, " -r : operate recursively on directories\n");
  82. #endif
  83. std::fprintf(stderr, " --ultra : enable levels beyond %i, up to %i (requires more memory)\n", kMaxNonUltraCompressionLevel, ZSTD_maxCLevel());
  84. std::fprintf(stderr, " -C, --check : integrity check (default)\n");
  85. std::fprintf(stderr, " --no-check : no integrity check\n");
  86. std::fprintf(stderr, " -t, --test : test compressed file integrity\n");
  87. std::fprintf(stderr, " -- : all arguments after \"--\" are treated as files\n");
  88. }
  89. } // anonymous namespace
  90. Options::Options()
  91. : numThreads(defaultNumThreads()), maxWindowLog(23),
  92. compressionLevel(kDefaultCompressionLevel), decompress(false),
  93. overwrite(false), keepSource(true), writeMode(WriteMode::Auto),
  94. checksum(true), verbosity(2) {}
  95. Options::Status Options::parse(int argc, const char **argv) {
  96. bool test = false;
  97. bool recursive = false;
  98. bool ultra = false;
  99. bool forceStdout = false;
  100. bool followLinks = false;
  101. // Local copy of input files, which are pointers into argv.
  102. std::vector<const char *> localInputFiles;
  103. for (int i = 1; i < argc; ++i) {
  104. const char *arg = argv[i];
  105. // Protect against empty arguments
  106. if (arg[0] == 0) {
  107. continue;
  108. }
  109. // Everything after "--" is an input file
  110. if (!std::strcmp(arg, "--")) {
  111. ++i;
  112. std::copy(argv + i, argv + argc, std::back_inserter(localInputFiles));
  113. break;
  114. }
  115. // Long arguments that don't have a short option
  116. {
  117. bool isLongOption = true;
  118. if (!std::strcmp(arg, "--rm")) {
  119. keepSource = false;
  120. } else if (!std::strcmp(arg, "--ultra")) {
  121. ultra = true;
  122. maxWindowLog = 0;
  123. } else if (!std::strcmp(arg, "--no-check")) {
  124. checksum = false;
  125. } else if (!std::strcmp(arg, "--sparse")) {
  126. writeMode = WriteMode::Sparse;
  127. notSupported("Sparse mode");
  128. return Status::Failure;
  129. } else if (!std::strcmp(arg, "--no-sparse")) {
  130. writeMode = WriteMode::Regular;
  131. notSupported("Sparse mode");
  132. return Status::Failure;
  133. } else if (!std::strcmp(arg, "--dictID")) {
  134. notSupported(arg);
  135. return Status::Failure;
  136. } else if (!std::strcmp(arg, "--no-dictID")) {
  137. notSupported(arg);
  138. return Status::Failure;
  139. } else {
  140. isLongOption = false;
  141. }
  142. if (isLongOption) {
  143. continue;
  144. }
  145. }
  146. // Arguments with a short option simply set their short option.
  147. const char *options = nullptr;
  148. if (!std::strcmp(arg, "--processes")) {
  149. options = "p";
  150. } else if (!std::strcmp(arg, "--version")) {
  151. options = "V";
  152. } else if (!std::strcmp(arg, "--help")) {
  153. options = "h";
  154. } else if (!std::strcmp(arg, "--decompress")) {
  155. options = "d";
  156. } else if (!std::strcmp(arg, "--force")) {
  157. options = "f";
  158. } else if (!std::strcmp(arg, "--stdout")) {
  159. options = "c";
  160. } else if (!std::strcmp(arg, "--keep")) {
  161. options = "k";
  162. } else if (!std::strcmp(arg, "--verbose")) {
  163. options = "v";
  164. } else if (!std::strcmp(arg, "--quiet")) {
  165. options = "q";
  166. } else if (!std::strcmp(arg, "--check")) {
  167. options = "C";
  168. } else if (!std::strcmp(arg, "--test")) {
  169. options = "t";
  170. } else if (arg[0] == '-' && arg[1] != 0) {
  171. options = arg + 1;
  172. } else {
  173. localInputFiles.emplace_back(arg);
  174. continue;
  175. }
  176. assert(options != nullptr);
  177. bool finished = false;
  178. while (!finished && *options != 0) {
  179. // Parse the compression level
  180. if (*options >= '0' && *options <= '9') {
  181. compressionLevel = parseUnsigned(&options);
  182. continue;
  183. }
  184. switch (*options) {
  185. case 'h':
  186. case 'H':
  187. usage();
  188. return Status::Message;
  189. case 'V':
  190. std::fprintf(stderr, "PZSTD version: %s.\n", ZSTD_VERSION_STRING);
  191. return Status::Message;
  192. case 'p': {
  193. finished = true;
  194. const char *optionArgument = getArgument(options, argv, i, argc);
  195. if (optionArgument == nullptr) {
  196. return Status::Failure;
  197. }
  198. if (*optionArgument < '0' || *optionArgument > '9') {
  199. std::fprintf(stderr, "Option -p expects a number, but %s provided\n",
  200. optionArgument);
  201. return Status::Failure;
  202. }
  203. numThreads = parseUnsigned(&optionArgument);
  204. if (*optionArgument != 0) {
  205. std::fprintf(stderr,
  206. "Option -p expects a number, but %u%s provided\n",
  207. numThreads, optionArgument);
  208. return Status::Failure;
  209. }
  210. break;
  211. }
  212. case 'o': {
  213. finished = true;
  214. const char *optionArgument = getArgument(options, argv, i, argc);
  215. if (optionArgument == nullptr) {
  216. return Status::Failure;
  217. }
  218. outputFile = optionArgument;
  219. break;
  220. }
  221. case 'C':
  222. checksum = true;
  223. break;
  224. case 'k':
  225. keepSource = true;
  226. break;
  227. case 'd':
  228. decompress = true;
  229. break;
  230. case 'f':
  231. overwrite = true;
  232. forceStdout = true;
  233. followLinks = true;
  234. break;
  235. case 't':
  236. test = true;
  237. decompress = true;
  238. break;
  239. #ifdef UTIL_HAS_CREATEFILELIST
  240. case 'r':
  241. recursive = true;
  242. break;
  243. #endif
  244. case 'c':
  245. outputFile = kStdOut;
  246. forceStdout = true;
  247. break;
  248. case 'v':
  249. ++verbosity;
  250. break;
  251. case 'q':
  252. --verbosity;
  253. // Ignore them for now
  254. break;
  255. // Unsupported options from Zstd
  256. case 'D':
  257. case 's':
  258. notSupported("Zstd dictionaries.");
  259. return Status::Failure;
  260. case 'b':
  261. case 'e':
  262. case 'i':
  263. case 'B':
  264. notSupported("Zstd benchmarking options.");
  265. return Status::Failure;
  266. default:
  267. std::fprintf(stderr, "Invalid argument: %s\n", arg);
  268. return Status::Failure;
  269. }
  270. if (!finished) {
  271. ++options;
  272. }
  273. } // while (*options != 0);
  274. } // for (int i = 1; i < argc; ++i);
  275. // Set options for test mode
  276. if (test) {
  277. outputFile = nullOutput;
  278. keepSource = true;
  279. }
  280. // Input file defaults to standard input if not provided.
  281. if (localInputFiles.empty()) {
  282. localInputFiles.emplace_back(kStdIn);
  283. }
  284. // Check validity of input files
  285. if (localInputFiles.size() > 1) {
  286. const auto it = std::find(localInputFiles.begin(), localInputFiles.end(),
  287. std::string{kStdIn});
  288. if (it != localInputFiles.end()) {
  289. std::fprintf(
  290. stderr,
  291. "Cannot specify standard input when handling multiple files\n");
  292. return Status::Failure;
  293. }
  294. }
  295. if (localInputFiles.size() > 1 || recursive) {
  296. if (!outputFile.empty() && outputFile != nullOutput) {
  297. std::fprintf(
  298. stderr,
  299. "Cannot specify an output file when handling multiple inputs\n");
  300. return Status::Failure;
  301. }
  302. }
  303. g_utilDisplayLevel = verbosity;
  304. // Remove local input files that are symbolic links
  305. if (!followLinks) {
  306. std::remove_if(localInputFiles.begin(), localInputFiles.end(),
  307. [&](const char *path) {
  308. bool isLink = UTIL_isLink(path);
  309. if (isLink && verbosity >= 2) {
  310. std::fprintf(
  311. stderr,
  312. "Warning : %s is symbolic link, ignoring\n",
  313. path);
  314. }
  315. return isLink;
  316. });
  317. }
  318. // Translate input files/directories into files to (de)compress
  319. if (recursive) {
  320. FileNamesTable* const files = UTIL_createExpandedFNT(localInputFiles.data(), localInputFiles.size(), followLinks);
  321. if (files == nullptr) {
  322. std::fprintf(stderr, "Error traversing directories\n");
  323. return Status::Failure;
  324. }
  325. auto guard =
  326. makeScopeGuard([&] { UTIL_freeFileNamesTable(files); });
  327. if (files->tableSize == 0) {
  328. std::fprintf(stderr, "No files found\n");
  329. return Status::Failure;
  330. }
  331. inputFiles.resize(files->tableSize);
  332. std::copy(files->fileNames, files->fileNames + files->tableSize, inputFiles.begin());
  333. } else {
  334. inputFiles.resize(localInputFiles.size());
  335. std::copy(localInputFiles.begin(), localInputFiles.end(),
  336. inputFiles.begin());
  337. }
  338. localInputFiles.clear();
  339. assert(!inputFiles.empty());
  340. // If reading from standard input, default to standard output
  341. if (inputFiles[0] == kStdIn && outputFile.empty()) {
  342. assert(inputFiles.size() == 1);
  343. outputFile = "-";
  344. }
  345. if (inputFiles[0] == kStdIn && IS_CONSOLE(stdin)) {
  346. assert(inputFiles.size() == 1);
  347. std::fprintf(stderr, "Cannot read input from interactive console\n");
  348. return Status::Failure;
  349. }
  350. if (outputFile == "-" && IS_CONSOLE(stdout) && !(forceStdout && decompress)) {
  351. std::fprintf(stderr, "Will not write to console stdout unless -c or -f is "
  352. "specified and decompressing\n");
  353. return Status::Failure;
  354. }
  355. // Check compression level
  356. {
  357. unsigned maxCLevel =
  358. ultra ? ZSTD_maxCLevel() : kMaxNonUltraCompressionLevel;
  359. if (compressionLevel > maxCLevel || compressionLevel == 0) {
  360. std::fprintf(stderr, "Invalid compression level %u.\n", compressionLevel);
  361. return Status::Failure;
  362. }
  363. }
  364. // Check that numThreads is set
  365. if (numThreads == 0) {
  366. std::fprintf(stderr, "Invalid arguments: # of threads not specified "
  367. "and unable to determine hardware concurrency.\n");
  368. return Status::Failure;
  369. }
  370. // Modify verbosity
  371. // If we are piping input and output, turn off interaction
  372. if (inputFiles[0] == kStdIn && outputFile == kStdOut && verbosity == 2) {
  373. verbosity = 1;
  374. }
  375. // If we are in multi-file mode, turn off interaction
  376. if (inputFiles.size() > 1 && verbosity == 2) {
  377. verbosity = 1;
  378. }
  379. return Status::Success;
  380. }
  381. std::string Options::getOutputFile(const std::string &inputFile) const {
  382. if (!outputFile.empty()) {
  383. return outputFile;
  384. }
  385. // Attempt to add/remove zstd extension from the input file
  386. if (decompress) {
  387. int stemSize = inputFile.size() - kZstdExtension.size();
  388. if (stemSize > 0 && inputFile.substr(stemSize) == kZstdExtension) {
  389. return inputFile.substr(0, stemSize);
  390. } else {
  391. return "";
  392. }
  393. } else {
  394. return inputFile + kZstdExtension;
  395. }
  396. }
  397. }