par_filecache.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. // FILECACHE :: https://github.com/prideout/par
  2. // Simple file-based LRU cache for blobs with content-addressable names.
  3. //
  4. // Each cached item is stored on disk as "{PREFIX}{NAME}", where {PREFIX}
  5. // is passed in when initializing the cache. You'll probably want to specify
  6. // a folder path for your prefix, including the trailing slash.
  7. //
  8. // Each item is divided into a payload (arbitrary size) and an optional header
  9. // (fixed size). The structure of the payload and header are completely up to
  10. // you. The list of items is stored in a text file at "{PREFIX}table", which
  11. // contains a list of names, timestamps, and byte counts. This table is loaded
  12. // only once, but is saved every time the client fetches a blob from the cache,
  13. // so that the most-recently-accessed timestamps are always up to date, even
  14. // when your application doesn't close gracefully.
  15. //
  16. // Distributed under the MIT License, see bottom of file.
  17. // -----------------------------------------------------------------------------
  18. // BEGIN PUBLIC API
  19. // -----------------------------------------------------------------------------
  20. #ifndef PAR_FILECACHE_H
  21. #define PAR_FILECACHE_H
  22. #ifdef __cplusplus
  23. extern "C" {
  24. #endif
  25. #include <stdint.h>
  26. #include <stdbool.h>
  27. // Initialize the filecache using the given prefix (usually a folder path with
  28. // a trailing slash) and the given maximum byte count. If items already exist
  29. // in the cache when this is called, they are not evicted. Cached items are
  30. // meant to persist from run to run.
  31. void par_filecache_init(char const* prefix, int maxsize);
  32. // Save a blob to the cache using the given unique name. If adding the blob
  33. // would cause the cache to exceed maxsize, the least-recently-used item is
  34. // evicted at this time.
  35. void par_filecache_save(char const* name, uint8_t const* payload,
  36. int payloadsize, uint8_t const* header, int headersize);
  37. // Check if the given blob is in the cache; if not, return 0. If so, return 1
  38. // and allocate new memory for payload. The caller should free the payload.
  39. // The header is preallocated so the caller needs to know its size beforehand.
  40. bool par_filecache_load(char const* name, uint8_t** payload, int* payloadsize,
  41. uint8_t* header, int headersize);
  42. // Remove all items from the cache.
  43. void par_filecache_evict_all();
  44. // Set this to zero if you wish to avoid LZ4 compression. I recommend using
  45. // it though, because it's very fast and it's a two-file library.
  46. #ifndef ENABLE_LZ4
  47. #define ENABLE_LZ4 0
  48. #endif
  49. #ifndef PAR_FILECACHE_VERBOSE
  50. #define PAR_FILECACHE_VERBOSE 0
  51. #endif
  52. #ifndef PAR_PI
  53. #define PAR_PI (3.14159265359)
  54. #define PAR_MIN(a, b) (a > b ? b : a)
  55. #define PAR_MAX(a, b) (a > b ? a : b)
  56. #define PAR_CLAMP(v, lo, hi) PAR_MAX(lo, PAR_MIN(hi, v))
  57. #define PAR_SWAP(T, A, B) { T tmp = B; B = A; A = tmp; }
  58. #define PAR_SQR(a) ((a) * (a))
  59. #endif
  60. #ifndef PAR_MALLOC
  61. #define PAR_MALLOC(T, N) ((T*) malloc(N * sizeof(T)))
  62. #define PAR_CALLOC(T, N) ((T*) calloc(N * sizeof(T), 1))
  63. #define PAR_REALLOC(T, BUF, N) ((T*) realloc(BUF, sizeof(T) * (N)))
  64. #define PAR_FREE(BUF) free(BUF)
  65. #endif
  66. #ifdef __cplusplus
  67. }
  68. #endif
  69. // -----------------------------------------------------------------------------
  70. // END PUBLIC API
  71. // -----------------------------------------------------------------------------
  72. #ifdef PAR_FILECACHE_IMPLEMENTATION
  73. #include <limits.h>
  74. #include <string.h>
  75. #include <assert.h>
  76. #include <stdio.h>
  77. #include <fcntl.h>
  78. #include <unistd.h>
  79. #include <stdlib.h>
  80. #include <stdint.h>
  81. #include <libgen.h>
  82. #include <time.h>
  83. #include <sys/stat.h>
  84. #ifndef PATH_MAX
  85. #define PATH_MAX 4096
  86. #endif
  87. #if ENABLE_LZ4
  88. #include "lz4.h"
  89. #endif
  90. static char * _par_strdup(char const* s)
  91. {
  92. if (s) {
  93. size_t l = strlen(s);
  94. char *s2 = (char*) malloc(l + 1);
  95. if (s2) {
  96. strcpy(s2, s);
  97. }
  98. return s2;
  99. }
  100. return 0;
  101. }
  102. #define PAR_MAX_ENTRIES 64
  103. typedef struct {
  104. time_t last_used_timestamp;
  105. uint64_t hashed_name;
  106. char const* name;
  107. int nbytes;
  108. } filecache_entry_t;
  109. typedef struct {
  110. filecache_entry_t entries[PAR_MAX_ENTRIES];
  111. int nentries;
  112. int totalbytes;
  113. } filecache_table_t;
  114. static void _update_table(char const* item_name, int item_size);
  115. static void _append_table(char const* item_name, int item_size);
  116. static void _read_or_create_tablefile();
  117. static void _save_tablefile();
  118. static void _evict_lru();
  119. static uint64_t _hash(char const* name);
  120. static char _fileprefix[PATH_MAX] = "./_cache.";
  121. static char _tablepath[PATH_MAX] = "./_cache.table";
  122. static int _maxtotalbytes = 1024 * 1024 * 16;
  123. static filecache_table_t* _table = 0;
  124. void par_filecache_init(char const* prefix, int maxsize)
  125. {
  126. size_t len = strlen(prefix);
  127. assert(len + 1 < PATH_MAX && "Cache prefix is too long");
  128. strncpy(_fileprefix, prefix, len + 1);
  129. strcpy(_tablepath, _fileprefix);
  130. strcat(_tablepath, "table");
  131. _maxtotalbytes = maxsize;
  132. }
  133. #if IOS_EXAMPLE
  134. NSString* getPrefix()
  135. {
  136. NSString* cachesFolder = [NSSearchPathForDirectoriesInDomains(
  137. NSCachesDirectory, NSUserDomainMask, YES) firstObject];
  138. NSError* error = nil;
  139. if (![[NSFileManager defaultManager] createDirectoryAtPath : cachesFolder
  140. withIntermediateDirectories : YES
  141. attributes : nil
  142. error : &error]) {
  143. NSLog(@"MGMPlatformGetCachesFolder error: %@", error);
  144. return nil;
  145. }
  146. return [cachesFolder stringByAppendingString : @"/_cache."];
  147. }
  148. #endif
  149. static bool par_filecache__read(void* dest, int nbytes, FILE* file)
  150. {
  151. int consumed = (int) fread(dest, nbytes, 1, file);
  152. return consumed == 1;
  153. }
  154. bool par_filecache_load(char const* name, uint8_t** payload, int* payloadsize,
  155. uint8_t* header, int headersize)
  156. {
  157. char qualified[PATH_MAX];
  158. size_t len = strlen(name);
  159. if (len == 0) {
  160. return false;
  161. }
  162. assert(len + strlen(_fileprefix) < PATH_MAX);
  163. strcpy(qualified, _fileprefix);
  164. strcat(qualified, name);
  165. if (access(qualified, F_OK) == -1) {
  166. return false;
  167. }
  168. FILE* cachefile = fopen(qualified, "rb");
  169. assert(cachefile && "Unable to open cache file for reading");
  170. fseek(cachefile, 0, SEEK_END);
  171. long fsize = ftell(cachefile);
  172. fseek(cachefile, 0, SEEK_SET);
  173. if (headersize > 0 && !par_filecache__read(header, headersize, cachefile)) {
  174. return false;
  175. }
  176. int32_t dnbytes;
  177. #if ENABLE_LZ4
  178. long cnbytes = fsize - headersize - sizeof(dnbytes);
  179. if (!par_filecache__read(&dnbytes, sizeof(dnbytes), cachefile)) {
  180. return false;
  181. }
  182. #else
  183. long cnbytes = fsize - headersize;
  184. dnbytes = (int32_t) cnbytes;
  185. #endif
  186. char* cbuff = (char*) malloc(cnbytes);
  187. if (!par_filecache__read(cbuff, (int) cnbytes, cachefile)) {
  188. return false;
  189. }
  190. #if ENABLE_LZ4
  191. char* dbuff = (char*) malloc(dnbytes);
  192. LZ4_decompress_safe(cbuff, dbuff, (int) cnbytes, dnbytes);
  193. free(cbuff);
  194. #else
  195. char* dbuff = cbuff;
  196. #endif
  197. fclose(cachefile);
  198. *payload = (uint8_t*) dbuff;
  199. *payloadsize = dnbytes;
  200. _update_table(name, (int) cnbytes);
  201. return true;
  202. }
  203. void par_filecache_save(char const* name, uint8_t const* payload,
  204. int payloadsize, uint8_t const* header, int headersize)
  205. {
  206. char qualified[PATH_MAX];
  207. size_t len = strlen(name);
  208. if (len == 0) {
  209. return;
  210. }
  211. assert(len + strlen(_fileprefix) < PATH_MAX);
  212. strcpy(qualified, _fileprefix);
  213. strcat(qualified, name);
  214. FILE* cachefile = fopen(qualified, "wb");
  215. assert(cachefile && "Unable to open cache file for writing");
  216. if (headersize > 0) {
  217. fwrite(header, 1, headersize, cachefile);
  218. }
  219. int csize = 0;
  220. if (payloadsize > 0) {
  221. #if ENABLE_LZ4
  222. int32_t nbytes = payloadsize;
  223. fwrite(&nbytes, 1, sizeof(nbytes), cachefile);
  224. int maxsize = LZ4_compressBound(nbytes);
  225. char* dst = (char*) malloc(maxsize);
  226. char const* src = (char const*) payload;
  227. assert(nbytes < LZ4_MAX_INPUT_SIZE);
  228. csize = LZ4_compress_default(src, dst, nbytes, maxsize);
  229. fwrite(dst, 1, csize, cachefile);
  230. free(dst);
  231. #else
  232. csize = payloadsize;
  233. int actual = (int) fwrite(payload, 1, csize, cachefile);
  234. if (actual < csize) {
  235. fclose(cachefile);
  236. remove(qualified);
  237. printf("Unable to save %s to cache (%d bytes)\n", name, csize);
  238. return;
  239. }
  240. #endif
  241. }
  242. fclose(cachefile);
  243. _update_table(name, csize + headersize);
  244. }
  245. void par_filecache_evict_all()
  246. {
  247. #if PAR_FILECACHE_VERBOSE
  248. printf("Evicting all.\n");
  249. #endif
  250. char qualified[PATH_MAX];
  251. if (!_table) {
  252. _read_or_create_tablefile();
  253. }
  254. filecache_entry_t* entry = _table->entries;
  255. for (int i = 0; i < _table->nentries; i++, entry++) {
  256. strcpy(qualified, _fileprefix);
  257. strcat(qualified, entry->name);
  258. #if PAR_FILECACHE_VERBOSE
  259. printf("Evicting %s\n", qualified);
  260. #endif
  261. remove(qualified);
  262. }
  263. _table->nentries = 0;
  264. _table->totalbytes = 0;
  265. remove(_tablepath);
  266. }
  267. // Adds the given item to the table and evicts the LRU items if the total cache
  268. // size exceeds the specified maxsize.
  269. static void _append_table(char const* item_name, int item_size)
  270. {
  271. time_t now = time(0);
  272. if (!_table) {
  273. _read_or_create_tablefile();
  274. }
  275. uint64_t hashed_name = _hash(item_name);
  276. int total = _table->totalbytes + item_size;
  277. while (_table->nentries >= PAR_MAX_ENTRIES || total > _maxtotalbytes) {
  278. assert(_table->nentries > 0 && "Cache size is too small.");
  279. _evict_lru();
  280. total = _table->totalbytes + item_size;
  281. }
  282. _table->totalbytes = total;
  283. filecache_entry_t* entry = &_table->entries[_table->nentries++];
  284. entry->last_used_timestamp = now;
  285. entry->hashed_name = hashed_name;
  286. entry->name = _par_strdup(item_name);
  287. entry->nbytes = item_size;
  288. _save_tablefile();
  289. }
  290. // Updates the timestamp associated with the given item.
  291. static void _update_table(char const* item_name, int item_size)
  292. {
  293. time_t now = time(0);
  294. if (!_table) {
  295. _read_or_create_tablefile();
  296. }
  297. uint64_t hashed_name = _hash(item_name);
  298. filecache_entry_t* entry = _table->entries;
  299. int i;
  300. for (i = 0; i < _table->nentries; i++, entry++) {
  301. if (entry->hashed_name == hashed_name) {
  302. break;
  303. }
  304. }
  305. if (i >= _table->nentries) {
  306. _append_table(item_name, item_size);
  307. return;
  308. }
  309. entry->last_used_timestamp = now;
  310. _save_tablefile();
  311. }
  312. static void _read_or_create_tablefile()
  313. {
  314. _table = (filecache_table_t*) calloc(sizeof(filecache_table_t), 1);
  315. FILE* fhandle = fopen(_tablepath, "r");
  316. if (!fhandle) {
  317. fhandle = fopen(_tablepath, "w");
  318. if (!fhandle) {
  319. mkdir(dirname(_tablepath), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
  320. fhandle = fopen(_tablepath, "w");
  321. }
  322. assert(fhandle && "Unable to create filecache info file.");
  323. } else {
  324. filecache_entry_t entry;
  325. char name[PATH_MAX];
  326. while (1) {
  327. int nargs = fscanf(fhandle, "%ld %d %s", &entry.last_used_timestamp,
  328. &entry.nbytes, name);
  329. if (nargs != 3) {
  330. break;
  331. }
  332. entry.name = _par_strdup(name);
  333. entry.hashed_name = _hash(entry.name);
  334. _table->entries[_table->nentries++] = entry;
  335. _table->totalbytes += entry.nbytes;
  336. }
  337. }
  338. fclose(fhandle);
  339. }
  340. static void _save_tablefile()
  341. {
  342. FILE* fhandle = fopen(_tablepath, "w");
  343. assert(fhandle && "Unable to create filecache info file.");
  344. filecache_entry_t* entry = _table->entries;
  345. for (int i = 0; i < _table->nentries; i++, entry++) {
  346. fprintf(fhandle, "%ld %d %s\n", entry->last_used_timestamp,
  347. entry->nbytes, entry->name);
  348. }
  349. fclose(fhandle);
  350. }
  351. static void _evict_lru()
  352. {
  353. const uint64_t never_evict = _hash("version");
  354. int oldest_index = -1;
  355. time_t oldest_time = LONG_MAX;
  356. filecache_entry_t* entry = _table->entries;
  357. for (int i = 0; i < _table->nentries; i++, entry++) {
  358. if (entry->hashed_name == never_evict) {
  359. continue;
  360. }
  361. if (entry->last_used_timestamp < oldest_time) {
  362. oldest_time = entry->last_used_timestamp;
  363. oldest_index = i;
  364. }
  365. }
  366. if (oldest_index > -1) {
  367. entry = _table->entries + oldest_index;
  368. char qualified[PATH_MAX];
  369. size_t len = strlen(entry->name);
  370. assert(len + strlen(_fileprefix) < PATH_MAX);
  371. strcpy(qualified, _fileprefix);
  372. strcat(qualified, entry->name);
  373. #if PAR_FILECACHE_VERBOSE
  374. printf("Evicting %s\n", entry->name);
  375. #endif
  376. remove(qualified);
  377. _table->totalbytes -= entry->nbytes;
  378. if (_table->nentries-- > 1) {
  379. *entry = _table->entries[_table->nentries];
  380. }
  381. }
  382. }
  383. // https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function
  384. static uint64_t _hash(char const* name)
  385. {
  386. const uint64_t OFFSET = 14695981039346656037ull;
  387. const uint64_t PRIME = 1099511628211ull;
  388. const unsigned char* str = (const unsigned char*) name;
  389. uint64_t hval = OFFSET;
  390. while (*str) {
  391. hval *= PRIME;
  392. hval ^= (uint64_t) *(str++);
  393. }
  394. return hval;
  395. }
  396. #undef PAR_MAX_ENTRIES
  397. #endif // PAR_FILECACHE_IMPLEMENTATION
  398. #endif // PAR_FILECACHE_H
  399. // par_filecache is distributed under the MIT license:
  400. //
  401. // Copyright (c) 2019 Philip Rideout
  402. //
  403. // Permission is hereby granted, free of charge, to any person obtaining a copy
  404. // of this software and associated documentation files (the "Software"), to deal
  405. // in the Software without restriction, including without limitation the rights
  406. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  407. // copies of the Software, and to permit persons to whom the Software is
  408. // furnished to do so, subject to the following conditions:
  409. //
  410. // The above copyright notice and this permission notice shall be included in
  411. // all copies or substantial portions of the Software.
  412. //
  413. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  414. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  415. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  416. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  417. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  418. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  419. // SOFTWARE.