2
0

binary_to_compressed_c.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. // dear imgui
  2. // (binary_to_compressed_c.cpp)
  3. // Helper tool to turn a file into a C array, if you want to embed font data in your source code.
  4. // The data is first compressed with stb_compress() to reduce source code size.
  5. // Then stored in a C array:
  6. // - Base85: ~5 bytes of source code for 4 bytes of input data. 5 bytes stored in binary (suggested by @mmalex).
  7. // - As int: ~11 bytes of source code for 4 bytes of input data. 4 bytes stored in binary. Endianness dependent, need swapping on big-endian CPU.
  8. // - As char: ~12 bytes of source code for 4 bytes of input data. 4 bytes stored in binary. Not endianness dependent.
  9. // Load compressed TTF fonts with ImGui::GetIO().Fonts->AddFontFromMemoryCompressedTTF()
  10. // Build with, e.g:
  11. // # cl.exe binary_to_compressed_c.cpp
  12. // # g++ binary_to_compressed_c.cpp
  13. // # clang++ binary_to_compressed_c.cpp
  14. // You can also find a precompiled Windows binary in the binary/demo package available from https://github.com/ocornut/imgui
  15. // Usage:
  16. // binary_to_compressed_c.exe [-nocompress] [-nostatic] [-base85] <inputfile> <symbolname>
  17. // Usage example:
  18. // # binary_to_compressed_c.exe myfont.ttf MyFont > myfont.cpp
  19. // # binary_to_compressed_c.exe -base85 myfont.ttf MyFont > myfont.cpp
  20. // Note:
  21. // Base85 encoding will be obsoleted by future version of Dear ImGui!
  22. #define _CRT_SECURE_NO_WARNINGS
  23. #include <stdio.h>
  24. #include <string.h>
  25. #include <stdlib.h>
  26. #include <assert.h>
  27. // stb_compress* from stb.h - declaration
  28. typedef unsigned int stb_uint;
  29. typedef unsigned char stb_uchar;
  30. stb_uint stb_compress(stb_uchar* out, stb_uchar* in, stb_uint len);
  31. enum SourceEncoding
  32. {
  33. SourceEncoding_U8, // New default since 2024/11
  34. SourceEncoding_U32,
  35. SourceEncoding_Base85,
  36. };
  37. static bool binary_to_compressed_c(const char* filename, const char* symbol, SourceEncoding source_encoding, bool use_compression, bool use_static);
  38. int main(int argc, char** argv)
  39. {
  40. if (argc < 3)
  41. {
  42. printf("Syntax: %s [-u8|-u32|-base85] [-nocompress] [-nostatic] <inputfile> <symbolname>\n", argv[0]);
  43. printf("Source encoding types:\n");
  44. printf(" -u8 = ~12 bytes of source per 4 bytes of data. 4 bytes in binary.\n");
  45. printf(" -u32 = ~11 bytes of source per 4 bytes of data. 4 bytes in binary. Need endianness swapping on big-endian.\n");
  46. printf(" -base85 = ~5 bytes of source per 4 bytes of data. 5 bytes in binary. Need decoder.\n");
  47. return 0;
  48. }
  49. int argn = 1;
  50. bool use_compression = true;
  51. bool use_static = true;
  52. SourceEncoding source_encoding = SourceEncoding_U8; // New default
  53. while (argn < (argc - 2) && argv[argn][0] == '-')
  54. {
  55. if (strcmp(argv[argn], "-u8") == 0) { source_encoding = SourceEncoding_U8; argn++; }
  56. else if (strcmp(argv[argn], "-u32") == 0) { source_encoding = SourceEncoding_U32; argn++; }
  57. else if (strcmp(argv[argn], "-base85") == 0) { source_encoding = SourceEncoding_Base85; argn++; }
  58. else if (strcmp(argv[argn], "-nocompress") == 0) { use_compression = false; argn++; }
  59. else if (strcmp(argv[argn], "-nostatic") == 0) { use_static = false; argn++; }
  60. else
  61. {
  62. fprintf(stderr, "Unknown argument: '%s'\n", argv[argn]);
  63. return 1;
  64. }
  65. }
  66. bool ret = binary_to_compressed_c(argv[argn], argv[argn + 1], source_encoding, use_compression, use_static);
  67. if (!ret)
  68. fprintf(stderr, "Error opening or reading file: '%s'\n", argv[argn]);
  69. return ret ? 0 : 1;
  70. }
  71. char Encode85Byte(unsigned int x)
  72. {
  73. x = (x % 85) + 35;
  74. return (char)((x >= '\\') ? x + 1 : x);
  75. }
  76. bool binary_to_compressed_c(const char* filename, const char* symbol, SourceEncoding source_encoding, bool use_compression, bool use_static)
  77. {
  78. // Read file
  79. FILE* f = fopen(filename, "rb");
  80. if (!f) return false;
  81. int data_sz;
  82. if (fseek(f, 0, SEEK_END) || (data_sz = (int)ftell(f)) == -1 || fseek(f, 0, SEEK_SET)) { fclose(f); return false; }
  83. char* data = new char[data_sz + 4];
  84. if (fread(data, 1, data_sz, f) != (size_t)data_sz) { fclose(f); delete[] data; return false; }
  85. memset((void*)(((char*)data) + data_sz), 0, 4);
  86. fclose(f);
  87. // Compress
  88. int maxlen = data_sz + 512 + (data_sz >> 2) + sizeof(int); // total guess
  89. char* compressed = use_compression ? new char[maxlen] : data;
  90. int compressed_sz = use_compression ? stb_compress((stb_uchar*)compressed, (stb_uchar*)data, data_sz) : data_sz;
  91. if (use_compression)
  92. memset(compressed + compressed_sz, 0, maxlen - compressed_sz);
  93. // Output as Base85 encoded
  94. FILE* out = stdout;
  95. fprintf(out, "// File: '%s' (%d bytes)\n", filename, (int)data_sz);
  96. const char* static_str = use_static ? "static " : "";
  97. const char* compressed_str = use_compression ? "compressed_" : "";
  98. if (source_encoding == SourceEncoding_Base85)
  99. {
  100. fprintf(out, "// Exported using binary_to_compressed_c.exe -base85 \"%s\" %s\n", filename, symbol);
  101. fprintf(out, "%sconst char %s_%sdata_base85[%d+1] =\n \"", static_str, symbol, compressed_str, (int)((compressed_sz + 3) / 4)*5);
  102. char prev_c = 0;
  103. for (int src_i = 0; src_i < compressed_sz; src_i += 4)
  104. {
  105. // This is made a little more complicated by the fact that ??X sequences are interpreted as trigraphs by old C/C++ compilers. So we need to escape pairs of ??.
  106. unsigned int d = *(unsigned int*)(compressed + src_i);
  107. for (unsigned int n5 = 0; n5 < 5; n5++, d /= 85)
  108. {
  109. char c = Encode85Byte(d);
  110. fprintf(out, (c == '?' && prev_c == '?') ? "\\%c" : "%c", c);
  111. prev_c = c;
  112. }
  113. if ((src_i % 112) == 112 - 4)
  114. fprintf(out, "\"\n \"");
  115. }
  116. fprintf(out, "\";\n\n");
  117. }
  118. else if (source_encoding == SourceEncoding_U8)
  119. {
  120. // As individual bytes, not subject to endianness issues.
  121. fprintf(out, "// Exported using binary_to_compressed_c.exe -u8 \"%s\" %s\n", filename, symbol);
  122. fprintf(out, "%sconst unsigned int %s_%ssize = %d;\n", static_str, symbol, compressed_str, (int)compressed_sz);
  123. fprintf(out, "%sconst unsigned char %s_%sdata[%d] =\n{", static_str, symbol, compressed_str, (int)compressed_sz);
  124. int column = 0;
  125. for (int i = 0; i < compressed_sz; i++)
  126. {
  127. unsigned char d = *(unsigned char*)(compressed + i);
  128. if (column == 0)
  129. fprintf(out, "\n ");
  130. column += fprintf(out, "%d,", d);
  131. if (column >= 180)
  132. column = 0;
  133. }
  134. fprintf(out, "\n};\n\n");
  135. }
  136. else if (source_encoding == SourceEncoding_U32)
  137. {
  138. // As integers
  139. fprintf(out, "// Exported using binary_to_compressed_c.exe -u32 \"%s\" %s\n", filename, symbol);
  140. fprintf(out, "%sconst unsigned int %s_%ssize = %d;\n", static_str, symbol, compressed_str, (int)compressed_sz);
  141. fprintf(out, "%sconst unsigned int %s_%sdata[%d/4] =\n{", static_str, symbol, compressed_str, (int)((compressed_sz + 3) / 4)*4);
  142. int column = 0;
  143. for (int i = 0; i < compressed_sz; i += 4)
  144. {
  145. unsigned int d = *(unsigned int*)(compressed + i);
  146. if ((column++ % 14) == 0)
  147. fprintf(out, "\n 0x%08x, ", d);
  148. else
  149. fprintf(out, "0x%08x, ", d);
  150. }
  151. fprintf(out, "\n};\n\n");
  152. }
  153. // Cleanup
  154. delete[] data;
  155. if (use_compression)
  156. delete[] compressed;
  157. return true;
  158. }
  159. // stb_compress* from stb.h - definition
  160. //////////////////// compressor ///////////////////////
  161. static stb_uint stb_adler32(stb_uint adler32, stb_uchar *buffer, stb_uint buflen)
  162. {
  163. const unsigned long ADLER_MOD = 65521;
  164. unsigned long s1 = adler32 & 0xffff, s2 = adler32 >> 16;
  165. unsigned long blocklen, i;
  166. blocklen = buflen % 5552;
  167. while (buflen) {
  168. for (i=0; i + 7 < blocklen; i += 8) {
  169. s1 += buffer[0], s2 += s1;
  170. s1 += buffer[1], s2 += s1;
  171. s1 += buffer[2], s2 += s1;
  172. s1 += buffer[3], s2 += s1;
  173. s1 += buffer[4], s2 += s1;
  174. s1 += buffer[5], s2 += s1;
  175. s1 += buffer[6], s2 += s1;
  176. s1 += buffer[7], s2 += s1;
  177. buffer += 8;
  178. }
  179. for (; i < blocklen; ++i)
  180. s1 += *buffer++, s2 += s1;
  181. s1 %= ADLER_MOD, s2 %= ADLER_MOD;
  182. buflen -= blocklen;
  183. blocklen = 5552;
  184. }
  185. return (s2 << 16) + s1;
  186. }
  187. static unsigned int stb_matchlen(stb_uchar *m1, stb_uchar *m2, stb_uint maxlen)
  188. {
  189. stb_uint i;
  190. for (i=0; i < maxlen; ++i)
  191. if (m1[i] != m2[i]) return i;
  192. return i;
  193. }
  194. // simple implementation that just takes the source data in a big block
  195. static stb_uchar *stb__out;
  196. static FILE *stb__outfile;
  197. static stb_uint stb__outbytes;
  198. static void stb__write(unsigned char v)
  199. {
  200. fputc(v, stb__outfile);
  201. ++stb__outbytes;
  202. }
  203. //#define stb_out(v) (stb__out ? *stb__out++ = (stb_uchar) (v) : stb__write((stb_uchar) (v)))
  204. #define stb_out(v) do { if (stb__out) *stb__out++ = (stb_uchar) (v); else stb__write((stb_uchar) (v)); } while (0)
  205. static void stb_out2(stb_uint v) { stb_out(v >> 8); stb_out(v); }
  206. static void stb_out3(stb_uint v) { stb_out(v >> 16); stb_out(v >> 8); stb_out(v); }
  207. static void stb_out4(stb_uint v) { stb_out(v >> 24); stb_out(v >> 16); stb_out(v >> 8 ); stb_out(v); }
  208. static void outliterals(stb_uchar *in, int numlit)
  209. {
  210. while (numlit > 65536) {
  211. outliterals(in,65536);
  212. in += 65536;
  213. numlit -= 65536;
  214. }
  215. if (numlit == 0) ;
  216. else if (numlit <= 32) stb_out (0x000020 + numlit-1);
  217. else if (numlit <= 2048) stb_out2(0x000800 + numlit-1);
  218. else /* numlit <= 65536) */ stb_out3(0x070000 + numlit-1);
  219. if (stb__out) {
  220. memcpy(stb__out,in,numlit);
  221. stb__out += numlit;
  222. } else
  223. fwrite(in, 1, numlit, stb__outfile);
  224. }
  225. static int stb__window = 0x40000; // 256K
  226. static int stb_not_crap(int best, int dist)
  227. {
  228. return ((best > 2 && dist <= 0x00100)
  229. || (best > 5 && dist <= 0x04000)
  230. || (best > 7 && dist <= 0x80000));
  231. }
  232. static stb_uint stb__hashsize = 32768;
  233. // note that you can play with the hashing functions all you
  234. // want without needing to change the decompressor
  235. #define stb__hc(q,h,c) (((h) << 7) + ((h) >> 25) + q[c])
  236. #define stb__hc2(q,h,c,d) (((h) << 14) + ((h) >> 18) + (q[c] << 7) + q[d])
  237. #define stb__hc3(q,c,d,e) ((q[c] << 14) + (q[d] << 7) + q[e])
  238. static unsigned int stb__running_adler;
  239. static int stb_compress_chunk(stb_uchar *history,
  240. stb_uchar *start,
  241. stb_uchar *end,
  242. int length,
  243. int *pending_literals,
  244. stb_uchar **chash,
  245. stb_uint mask)
  246. {
  247. (void)history;
  248. int window = stb__window;
  249. stb_uint match_max;
  250. stb_uchar *lit_start = start - *pending_literals;
  251. stb_uchar *q = start;
  252. #define STB__SCRAMBLE(h) (((h) + ((h) >> 16)) & mask)
  253. // stop short of the end so we don't scan off the end doing
  254. // the hashing; this means we won't compress the last few bytes
  255. // unless they were part of something longer
  256. while (q < start+length && q+12 < end) {
  257. int m;
  258. stb_uint h1,h2,h3,h4, h;
  259. stb_uchar *t;
  260. int best = 2, dist=0;
  261. if (q+65536 > end)
  262. match_max = (stb_uint)(end-q);
  263. else
  264. match_max = 65536;
  265. #define stb__nc(b,d) ((d) <= window && ((b) > 9 || stb_not_crap((int)(b),(int)(d))))
  266. #define STB__TRY(t,p) /* avoid retrying a match we already tried */ \
  267. if (p ? dist != (int)(q-t) : 1) \
  268. if ((m = stb_matchlen(t, q, match_max)) > best) \
  269. if (stb__nc(m,q-(t))) \
  270. best = m, dist = (int)(q - (t))
  271. // rather than search for all matches, only try 4 candidate locations,
  272. // chosen based on 4 different hash functions of different lengths.
  273. // this strategy is inspired by LZO; hashing is unrolled here using the
  274. // 'hc' macro
  275. h = stb__hc3(q,0, 1, 2); h1 = STB__SCRAMBLE(h);
  276. t = chash[h1]; if (t) STB__TRY(t,0);
  277. h = stb__hc2(q,h, 3, 4); h2 = STB__SCRAMBLE(h);
  278. h = stb__hc2(q,h, 5, 6); t = chash[h2]; if (t) STB__TRY(t,1);
  279. h = stb__hc2(q,h, 7, 8); h3 = STB__SCRAMBLE(h);
  280. h = stb__hc2(q,h, 9,10); t = chash[h3]; if (t) STB__TRY(t,1);
  281. h = stb__hc2(q,h,11,12); h4 = STB__SCRAMBLE(h);
  282. t = chash[h4]; if (t) STB__TRY(t,1);
  283. // because we use a shared hash table, can only update it
  284. // _after_ we've probed all of them
  285. chash[h1] = chash[h2] = chash[h3] = chash[h4] = q;
  286. if (best > 2)
  287. assert(dist > 0);
  288. // see if our best match qualifies
  289. if (best < 3) { // fast path literals
  290. ++q;
  291. } else if (best > 2 && best <= 0x80 && dist <= 0x100) {
  292. outliterals(lit_start, (int)(q-lit_start)); lit_start = (q += best);
  293. stb_out(0x80 + best-1);
  294. stb_out(dist-1);
  295. } else if (best > 5 && best <= 0x100 && dist <= 0x4000) {
  296. outliterals(lit_start, (int)(q-lit_start)); lit_start = (q += best);
  297. stb_out2(0x4000 + dist-1);
  298. stb_out(best-1);
  299. } else if (best > 7 && best <= 0x100 && dist <= 0x80000) {
  300. outliterals(lit_start, (int)(q-lit_start)); lit_start = (q += best);
  301. stb_out3(0x180000 + dist-1);
  302. stb_out(best-1);
  303. } else if (best > 8 && best <= 0x10000 && dist <= 0x80000) {
  304. outliterals(lit_start, (int)(q-lit_start)); lit_start = (q += best);
  305. stb_out3(0x100000 + dist-1);
  306. stb_out2(best-1);
  307. } else if (best > 9 && dist <= 0x1000000) {
  308. if (best > 65536) best = 65536;
  309. outliterals(lit_start, (int)(q-lit_start)); lit_start = (q += best);
  310. if (best <= 0x100) {
  311. stb_out(0x06);
  312. stb_out3(dist-1);
  313. stb_out(best-1);
  314. } else {
  315. stb_out(0x04);
  316. stb_out3(dist-1);
  317. stb_out2(best-1);
  318. }
  319. } else { // fallback literals if no match was a balanced tradeoff
  320. ++q;
  321. }
  322. }
  323. // if we didn't get all the way, add the rest to literals
  324. if (q-start < length)
  325. q = start+length;
  326. // the literals are everything from lit_start to q
  327. *pending_literals = (int)(q - lit_start);
  328. stb__running_adler = stb_adler32(stb__running_adler, start, (stb_uint)(q - start));
  329. return (int)(q - start);
  330. }
  331. static int stb_compress_inner(stb_uchar *input, stb_uint length)
  332. {
  333. int literals = 0;
  334. stb_uint len,i;
  335. stb_uchar **chash;
  336. chash = (stb_uchar**) malloc(stb__hashsize * sizeof(stb_uchar*));
  337. if (chash == nullptr) return 0; // failure
  338. for (i=0; i < stb__hashsize; ++i)
  339. chash[i] = nullptr;
  340. // stream signature
  341. stb_out(0x57); stb_out(0xbc);
  342. stb_out2(0);
  343. stb_out4(0); // 64-bit length requires 32-bit leading 0
  344. stb_out4(length);
  345. stb_out4(stb__window);
  346. stb__running_adler = 1;
  347. len = stb_compress_chunk(input, input, input+length, length, &literals, chash, stb__hashsize-1);
  348. assert(len == length);
  349. outliterals(input+length - literals, literals);
  350. free(chash);
  351. stb_out2(0x05fa); // end opcode
  352. stb_out4(stb__running_adler);
  353. return 1; // success
  354. }
  355. stb_uint stb_compress(stb_uchar *out, stb_uchar *input, stb_uint length)
  356. {
  357. stb__out = out;
  358. stb__outfile = nullptr;
  359. stb_compress_inner(input, length);
  360. return (stb_uint)(stb__out - out);
  361. }