io.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. // Copyright (c) 2024 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "io.h"
  15. #include <assert.h>
  16. #include <ctype.h>
  17. #include <stdlib.h>
  18. #if defined(SPIRV_WINDOWS)
  19. #include <fcntl.h>
  20. #include <io.h>
  21. #define SET_STDIN_TO_BINARY_MODE() _setmode(_fileno(stdin), O_BINARY);
  22. #define SET_STDIN_TO_TEXT_MODE() _setmode(_fileno(stdin), O_TEXT);
  23. #define SET_STDOUT_TO_BINARY_MODE() _setmode(_fileno(stdout), O_BINARY);
  24. #define SET_STDOUT_TO_TEXT_MODE() _setmode(_fileno(stdout), O_TEXT);
  25. #define SET_STDOUT_MODE(mode) _setmode(_fileno(stdout), mode);
  26. #else
  27. #define SET_STDIN_TO_BINARY_MODE()
  28. #define SET_STDIN_TO_TEXT_MODE()
  29. #define SET_STDOUT_TO_BINARY_MODE() 0
  30. #define SET_STDOUT_TO_TEXT_MODE() 0
  31. #define SET_STDOUT_MODE(mode)
  32. #endif
  33. namespace {
  34. // Appends the contents of the |file| to |data|, assuming each element in the
  35. // file is of type |T|.
  36. template <typename T>
  37. void ReadFile(FILE* file, std::vector<T>* data) {
  38. if (file == nullptr) return;
  39. const int buf_size = 4096 / sizeof(T);
  40. T buf[buf_size];
  41. while (size_t len = fread(buf, sizeof(T), buf_size, file)) {
  42. data->insert(data->end(), buf, buf + len);
  43. }
  44. }
  45. // Returns true if |file| has encountered an error opening the file or reading
  46. // from it. If there was an error, writes an error message to standard error.
  47. bool WasFileCorrectlyRead(FILE* file, const char* filename) {
  48. if (file == nullptr) {
  49. fprintf(stderr, "error: file does not exist '%s'\n", filename);
  50. return false;
  51. }
  52. if (ftell(file) == -1L) {
  53. if (ferror(file)) {
  54. fprintf(stderr, "error: error reading file '%s'\n", filename);
  55. return false;
  56. }
  57. }
  58. return true;
  59. }
  60. // Ensure the file contained an exact number of elements, whose size is given in
  61. // |alignment|.
  62. bool WasFileSizeAligned(const char* filename, size_t read_size,
  63. size_t alignment) {
  64. assert(alignment != 1);
  65. if ((read_size % alignment) != 0) {
  66. fprintf(stderr,
  67. "error: file size should be a multiple of %zd; file '%s' corrupt\n",
  68. alignment, filename);
  69. return false;
  70. }
  71. return true;
  72. }
  73. // Different formats the hex is expected to be in.
  74. enum class HexMode {
  75. // 0x07230203, ...
  76. Words,
  77. // 0x07, 0x23, 0x02, 0x03, ...
  78. BytesBigEndian,
  79. // 0x03, 0x02, 0x23, 0x07, ...
  80. BytesLittleEndian,
  81. // 07 23 02 03 ...
  82. StreamBigEndian,
  83. // 03 02 23 07 ...
  84. StreamLittleEndian,
  85. };
  86. // Whether a character should be skipped as whitespace / separator /
  87. // end-of-file.
  88. bool IsSpace(char c) { return isspace(c) || c == ',' || c == '\0'; }
  89. bool IsHexStream(const std::vector<char>& stream) {
  90. for (char c : stream) {
  91. if (IsSpace(c)) {
  92. continue;
  93. }
  94. // Every possible case of a SPIR-V hex stream starts with either '0' or 'x'
  95. // (see |HexMode| values). Make a decision upon inspecting the first
  96. // non-space character.
  97. return c == '0' || c == 'x' || c == 'X';
  98. }
  99. return false;
  100. }
  101. bool MatchIgnoreCase(const char* token, const char* expect, size_t len) {
  102. for (size_t i = 0; i < len; ++i) {
  103. if (tolower(token[i]) != tolower(expect[i])) {
  104. return false;
  105. }
  106. }
  107. return true;
  108. }
  109. // Helper class to tokenize a hex stream
  110. class HexTokenizer {
  111. public:
  112. HexTokenizer(const char* filename, const std::vector<char>& stream,
  113. std::vector<uint32_t>* data)
  114. : filename_(filename), stream_(stream), data_(data) {
  115. DetermineMode();
  116. }
  117. bool Parse() {
  118. while (current_ < stream_.size() && !encountered_error_) {
  119. data_->push_back(GetNextWord());
  120. // Make sure trailing space does not lead to parse error by skipping it
  121. // and exiting the loop.
  122. SkipSpace();
  123. }
  124. return !encountered_error_;
  125. }
  126. private:
  127. void ParseError(const char* reason) {
  128. if (!encountered_error_) {
  129. fprintf(stderr,
  130. "error: hex stream parse error at character %zu: %s in '%s'\n",
  131. current_, reason, filename_);
  132. encountered_error_ = true;
  133. }
  134. }
  135. // Skip whitespace until the next non-whitespace non-comma character.
  136. void SkipSpace() {
  137. while (current_ < stream_.size()) {
  138. char c = stream_[current_];
  139. if (!IsSpace(c)) {
  140. return;
  141. }
  142. ++current_;
  143. }
  144. }
  145. // Skip the 0x or x at the beginning of a hex value.
  146. void Skip0x() {
  147. // The first character must be 0 or x.
  148. const char first = Next();
  149. if (first != '0' && first != 'x' && first != 'X') {
  150. ParseError("expected 0x or x");
  151. } else if (first == '0') {
  152. const char second = Next();
  153. if (second != 'x' && second != 'X') {
  154. ParseError("expected 0x");
  155. }
  156. }
  157. }
  158. // Consume the next character.
  159. char Next() { return current_ < stream_.size() ? stream_[current_++] : '\0'; }
  160. // Determine how to read the hex stream based on the first token.
  161. void DetermineMode() {
  162. SkipSpace();
  163. // Read 11 bytes, that is the size of the biggest token (10) + one more.
  164. char first_token[11];
  165. for (uint32_t i = 0; i < 11; ++i) {
  166. first_token[i] = Next();
  167. }
  168. // Table of how to match the first token with a mode.
  169. struct {
  170. const char* expect;
  171. bool must_have_delimiter;
  172. HexMode mode;
  173. } parse_info[] = {
  174. {"0x07230203", true, HexMode::Words},
  175. {"0x7230203", true, HexMode::Words},
  176. {"x07230203", true, HexMode::Words},
  177. {"x7230203", true, HexMode::Words},
  178. {"0x07", true, HexMode::BytesBigEndian},
  179. {"0x7", true, HexMode::BytesBigEndian},
  180. {"x07", true, HexMode::BytesBigEndian},
  181. {"x7", true, HexMode::BytesBigEndian},
  182. {"0x03", true, HexMode::BytesLittleEndian},
  183. {"0x3", true, HexMode::BytesLittleEndian},
  184. {"x03", true, HexMode::BytesLittleEndian},
  185. {"x3", true, HexMode::BytesLittleEndian},
  186. {"07", false, HexMode::StreamBigEndian},
  187. {"03", false, HexMode::StreamLittleEndian},
  188. };
  189. // Check to see if any of the possible first tokens are matched. If not,
  190. // this is not a recognized hex stream.
  191. encountered_error_ = true;
  192. for (const auto& info : parse_info) {
  193. const size_t expect_len = strlen(info.expect);
  194. const bool matches_expect =
  195. MatchIgnoreCase(first_token, info.expect, expect_len);
  196. const bool satisfies_delimeter =
  197. !info.must_have_delimiter || IsSpace(first_token[expect_len]);
  198. if (matches_expect && satisfies_delimeter) {
  199. mode_ = info.mode;
  200. encountered_error_ = false;
  201. break;
  202. }
  203. }
  204. if (encountered_error_) {
  205. fprintf(stderr,
  206. "error: hex format detected, but pattern '%.11s' is not "
  207. "recognized '%s'\n",
  208. first_token, filename_);
  209. }
  210. // Reset the position to restart parsing with the determined mode.
  211. current_ = 0;
  212. }
  213. // Consume up to |max_len| characters and put them in |token_chars|. A
  214. // delimiter is expected. The resulting string is NUL-terminated.
  215. void NextN(char token_chars[9], size_t max_len) {
  216. assert(max_len < 9);
  217. for (size_t i = 0; i <= max_len; ++i) {
  218. char c = Next();
  219. if (IsSpace(c)) {
  220. token_chars[i] = '\0';
  221. return;
  222. }
  223. token_chars[i] = c;
  224. if (!isxdigit(c)) {
  225. ParseError("encountered non-hex character");
  226. }
  227. }
  228. // If space is not reached before the maximum number of characters where
  229. // consumed, that's an error.
  230. ParseError("expected delimiter (space or comma)");
  231. token_chars[max_len] = '\0';
  232. }
  233. // Consume one hex digit.
  234. char NextHexDigit() {
  235. char c = Next();
  236. if (!isxdigit(c)) {
  237. ParseError("encountered non-hex character");
  238. }
  239. return c;
  240. }
  241. // Extract a token out of the stream. It could be either a word or a byte,
  242. // based on |mode_|.
  243. uint32_t GetNextToken() {
  244. SkipSpace();
  245. // The longest token can be 8 chars (for |HexMode::Words|), add one for
  246. // '\0'.
  247. char token_chars[9];
  248. switch (mode_) {
  249. case HexMode::Words:
  250. case HexMode::BytesBigEndian:
  251. case HexMode::BytesLittleEndian:
  252. // Start with 0x, followed by up to 8 (for Word) or 2 (for Byte*)
  253. // digits.
  254. Skip0x();
  255. NextN(token_chars, mode_ == HexMode::Words ? 8 : 2);
  256. break;
  257. case HexMode::StreamBigEndian:
  258. case HexMode::StreamLittleEndian:
  259. // Always expected to see two consecutive hex digits.
  260. token_chars[0] = NextHexDigit();
  261. token_chars[1] = NextHexDigit();
  262. token_chars[2] = '\0';
  263. break;
  264. }
  265. if (encountered_error_) {
  266. return 0;
  267. }
  268. // Parse the hex value that was just read.
  269. return static_cast<uint32_t>(strtol(token_chars, nullptr, 16));
  270. }
  271. // Construct a word out of tokens
  272. uint32_t GetNextWord() {
  273. if (mode_ == HexMode::Words) {
  274. return GetNextToken();
  275. }
  276. uint32_t tokens[4] = {
  277. GetNextToken(),
  278. GetNextToken(),
  279. GetNextToken(),
  280. GetNextToken(),
  281. };
  282. switch (mode_) {
  283. case HexMode::BytesBigEndian:
  284. case HexMode::StreamBigEndian:
  285. return tokens[0] << 24 | tokens[1] << 16 | tokens[2] << 8 | tokens[3];
  286. case HexMode::BytesLittleEndian:
  287. case HexMode::StreamLittleEndian:
  288. return tokens[3] << 24 | tokens[2] << 16 | tokens[1] << 8 | tokens[0];
  289. default:
  290. assert(false);
  291. return 0;
  292. }
  293. }
  294. const char* filename_;
  295. const std::vector<char>& stream_;
  296. std::vector<uint32_t>* data_;
  297. HexMode mode_ = HexMode::Words;
  298. size_t current_ = 0;
  299. bool encountered_error_ = false;
  300. };
  301. } // namespace
  302. bool ReadBinaryFile(const char* filename, std::vector<uint32_t>* data) {
  303. assert(data->empty());
  304. const bool use_file = filename && strcmp("-", filename);
  305. FILE* fp = nullptr;
  306. if (use_file) {
  307. fp = fopen(filename, "rb");
  308. } else {
  309. SET_STDIN_TO_BINARY_MODE();
  310. fp = stdin;
  311. }
  312. // Read into a char vector first. If this is a hex stream, it needs to be
  313. // processed as such.
  314. std::vector<char> data_raw;
  315. ReadFile(fp, &data_raw);
  316. bool succeeded = WasFileCorrectlyRead(fp, filename);
  317. if (use_file && fp) fclose(fp);
  318. if (!succeeded) {
  319. return false;
  320. }
  321. if (IsHexStream(data_raw)) {
  322. // If a hex stream, parse it and fill |data|.
  323. HexTokenizer tokenizer(filename, data_raw, data);
  324. succeeded = tokenizer.Parse();
  325. } else {
  326. // If not a hex stream, convert it to uint32_t via memcpy.
  327. succeeded = WasFileSizeAligned(filename, data_raw.size(), sizeof(uint32_t));
  328. if (succeeded) {
  329. data->resize(data_raw.size() / sizeof(uint32_t), 0);
  330. memcpy(data->data(), data_raw.data(), data_raw.size());
  331. }
  332. }
  333. return succeeded;
  334. }
  335. bool ConvertHexToBinary(const std::vector<char>& stream,
  336. std::vector<uint32_t>* data) {
  337. HexTokenizer tokenizer("<input string>", stream, data);
  338. return tokenizer.Parse();
  339. }
  340. bool ReadTextFile(const char* filename, std::vector<char>* data) {
  341. assert(data->empty());
  342. const bool use_file = filename && strcmp("-", filename);
  343. FILE* fp = nullptr;
  344. if (use_file) {
  345. fp = fopen(filename, "r");
  346. } else {
  347. SET_STDIN_TO_TEXT_MODE();
  348. fp = stdin;
  349. }
  350. ReadFile(fp, data);
  351. bool succeeded = WasFileCorrectlyRead(fp, filename);
  352. if (use_file && fp) fclose(fp);
  353. return succeeded;
  354. }
  355. namespace {
  356. // A class to create and manage a file for outputting data.
  357. class OutputFile {
  358. public:
  359. // Opens |filename| in the given mode. If |filename| is nullptr, the empty
  360. // string or "-", stdout will be set to the given mode.
  361. OutputFile(const char* filename, const char* mode) : old_mode_(0) {
  362. const bool use_stdout =
  363. !filename || (filename[0] == '-' && filename[1] == '\0');
  364. if (use_stdout) {
  365. if (strchr(mode, 'b')) {
  366. old_mode_ = SET_STDOUT_TO_BINARY_MODE();
  367. } else {
  368. old_mode_ = SET_STDOUT_TO_TEXT_MODE();
  369. }
  370. fp_ = stdout;
  371. } else {
  372. fp_ = fopen(filename, mode);
  373. }
  374. }
  375. ~OutputFile() {
  376. if (fp_ == stdout) {
  377. fflush(stdout);
  378. SET_STDOUT_MODE(old_mode_);
  379. } else if (fp_ != nullptr) {
  380. fclose(fp_);
  381. }
  382. }
  383. // Returns a file handle to the file.
  384. FILE* GetFileHandle() const { return fp_; }
  385. private:
  386. FILE* fp_;
  387. int old_mode_;
  388. };
  389. } // namespace
  390. template <typename T>
  391. bool WriteFile(const char* filename, const char* mode, const T* data,
  392. size_t count) {
  393. OutputFile file(filename, mode);
  394. FILE* fp = file.GetFileHandle();
  395. if (fp == nullptr) {
  396. fprintf(stderr, "error: could not open file '%s'\n", filename);
  397. return false;
  398. }
  399. size_t written = fwrite(data, sizeof(T), count, fp);
  400. if (count != written) {
  401. fprintf(stderr, "error: could not write to file '%s'\n", filename);
  402. return false;
  403. }
  404. return true;
  405. }
  406. template bool WriteFile<uint32_t>(const char* filename, const char* mode,
  407. const uint32_t* data, size_t count);
  408. template bool WriteFile<char>(const char* filename, const char* mode,
  409. const char* data, size_t count);