par_filecache.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. // FILECACHE :: https://github.com/prideout/par
  2. // Simple file-based LRU cache for blobs with content-addressable names.
  3. //
  4. // Each cached item is stored on disk as "{PREFIX}{NAME}", where {PREFIX}
  5. // is passed in when initializing the cache. You'll probably want to specify
  6. // a folder path for your prefix, including the trailing slash.
  7. //
  8. // Each item is divided into a payload (arbitrary size) and an optional header
  9. // (fixed size). The structure of the payload and header are completely up to
  10. // you. The list of items is stored in a text file at "{PREFIX}table", which
  11. // contains a list of names, timestamps, and byte counts. This table is loaded
  12. // only once, but is saved every time the client fetches a blob from the cache,
  13. // so that the most-recently-accessed timestamps are always up to date, even
  14. // when your application doesn't close gracefully.
  15. //
  16. // The MIT License
  17. // Copyright (c) 2015 Philip Rideout
  18. // -----------------------------------------------------------------------------
  19. // BEGIN PUBLIC API
  20. // -----------------------------------------------------------------------------
  21. typedef unsigned char par_byte;
  22. // Initialize the filecache using the given prefix (usually a folder path with
  23. // a trailing slash) and the given maximum byte count. If items already exist
  24. // in the cache when this is called, they are not evicted. Cached items are
  25. // meant to persist from run to run.
  26. void par_filecache_init(char const* prefix, int maxsize);
  27. // Save a blob to the cache using the given unique name. If adding the blob
  28. // would cause the cache to exceed maxsize, the least-recently-used item is
  29. // evicted at this time.
  30. void par_filecache_save(char const* name, par_byte* payload, int payloadsize,
  31. par_byte* header, int headersize);
  32. // Check if the given blob is in the cache; if not, return 0. If so, return 1
  33. // and allocate new memory for payload. The caller should free the payload.
  34. // The header is preallocated so the caller needs to know its size beforehand.
  35. int par_filecache_load(char const* name, par_byte** payload, int* payloadsize,
  36. par_byte* header, int headersize);
  37. // Remove all items from the cache.
  38. void par_filecache_evict_all();
  39. // Set this to zero if you wish to avoid LZ4 compression. I recommend using
  40. // it though, because it's very fast and it's a two-file library.
  41. #ifndef ENABLE_LZ4
  42. #define ENABLE_LZ4 0
  43. #endif
  44. #ifndef PAR_HELPERS
  45. #define PAR_HELPERS 1
  46. #define PAR_PI (3.14159265359)
  47. #define PAR_MIN(a, b) (a > b ? b : a)
  48. #define PAR_MAX(a, b) (a > b ? a : b)
  49. #define PAR_CLAMP(v, lo, hi) PAR_MAX(lo, PAR_MIN(hi, v))
  50. #define PAR_MALLOC(T, N) ((T*) malloc(N * sizeof(T)))
  51. #define PAR_CALLOC(T, N) ((T*) calloc(N * sizeof(T), 1))
  52. #define PAR_REALLOC(T, BUF, N) ((T*) realloc(BUF, sizeof(T) * N))
  53. #define PAR_FREE(BUF) free(BUF)
  54. #define PAR_SWAP(T, A, B) { T tmp = B; B = A; A = tmp; }
  55. #define PAR_SQR(a) (a * a)
  56. #endif
  57. // -----------------------------------------------------------------------------
  58. // END PUBLIC API
  59. // -----------------------------------------------------------------------------
  60. #ifdef PAR_FILECACHE_IMPLEMENTATION
  61. #include <limits.h>
  62. #include <string.h>
  63. #include <assert.h>
  64. #include <stdio.h>
  65. #include <fcntl.h>
  66. #include <unistd.h>
  67. #include <stdlib.h>
  68. #include <stdint.h>
  69. #include <libgen.h>
  70. #include <time.h>
  71. #include <sys/stat.h>
  72. #ifndef PATH_MAX
  73. #define PATH_MAX 4096
  74. #endif
  75. #if ENABLE_LZ4
  76. #include "lz4.h"
  77. #endif
  78. static char * _par_strdup(char const* s)
  79. {
  80. if (s) {
  81. size_t l = strlen(s);
  82. char *s2 = (char*) malloc(l + 1);
  83. if (s2) {
  84. strcpy(s2, s);
  85. }
  86. return s2;
  87. }
  88. return 0;
  89. }
  90. #define PAR_MAX_ENTRIES 64
  91. typedef struct {
  92. time_t last_used_timestamp;
  93. uint64_t hashed_name;
  94. char const* name;
  95. int nbytes;
  96. } filecache_entry_t;
  97. typedef struct {
  98. filecache_entry_t entries[PAR_MAX_ENTRIES];
  99. int nentries;
  100. int totalbytes;
  101. } filecache_table_t;
  102. static void _update_table(char const* item_name, int item_size);
  103. static void _append_table(char const* item_name, int item_size);
  104. static void _read_or_create_tablefile();
  105. static void _save_tablefile();
  106. static void _evict_lru();
  107. static uint64_t _hash(char const* name);
  108. static char _fileprefix[PATH_MAX] = "./_cache.";
  109. static char _tablepath[PATH_MAX] = "./_cache.table";
  110. static int _maxtotalbytes = 1024 * 1024 * 16;
  111. static filecache_table_t* _table = 0;
  112. void par_filecache_init(char const* prefix, int maxsize)
  113. {
  114. size_t len = strlen(prefix);
  115. assert(len + 1 < PATH_MAX && "Cache prefix is too long");
  116. strncpy(_fileprefix, prefix, len + 1);
  117. strcpy(_tablepath, _fileprefix);
  118. strcat(_tablepath, "table");
  119. _maxtotalbytes = maxsize;
  120. }
  121. #if IOS_EXAMPLE
  122. NSString* getPrefix()
  123. {
  124. NSString* cachesFolder = [NSSearchPathForDirectoriesInDomains(
  125. NSCachesDirectory, NSUserDomainMask, YES) firstObject];
  126. NSError* error = nil;
  127. if (![[NSFileManager defaultManager] createDirectoryAtPath : cachesFolder
  128. withIntermediateDirectories : YES
  129. attributes : nil
  130. error : &error]) {
  131. NSLog(@ "MGMPlatformGetCachesFolder error: %@", error);
  132. return nil;
  133. }
  134. return [cachesFolder stringByAppendingString : @ "/_cache."];
  135. }
  136. #endif
  137. static void par_filecache__read(void* dest, int nbytes, FILE* file)
  138. {
  139. int consumed = (int) fread(dest, 1, nbytes, file);
  140. assert(consumed == nbytes);
  141. }
  142. int par_filecache_load(char const* name, par_byte** payload, int* payloadsize,
  143. par_byte* header, int headersize)
  144. {
  145. char qualified[PATH_MAX];
  146. size_t len = strlen(name);
  147. if (len == 0) {
  148. return 0;
  149. }
  150. assert(len + strlen(_fileprefix) < PATH_MAX);
  151. strcpy(qualified, _fileprefix);
  152. strcat(qualified, name);
  153. if (access(qualified, F_OK) == -1) {
  154. return 0;
  155. }
  156. FILE* cachefile = fopen(qualified, "rb");
  157. assert(cachefile && "Unable to open cache file for reading");
  158. fseek(cachefile, 0, SEEK_END);
  159. long fsize = ftell(cachefile);
  160. fseek(cachefile, 0, SEEK_SET);
  161. if (headersize > 0) {
  162. par_filecache__read(header, headersize, cachefile);
  163. }
  164. int32_t dnbytes;
  165. #if ENABLE_LZ4
  166. long cnbytes = fsize - headersize - sizeof(dnbytes);
  167. par_filecache__read(&dnbytes, sizeof(dnbytes), cachefile);
  168. #else
  169. long cnbytes = fsize - headersize;
  170. dnbytes = (int32_t) cnbytes;
  171. #endif
  172. char* cbuff = (char*) malloc(cnbytes);
  173. par_filecache__read(cbuff, cnbytes, cachefile);
  174. #if ENABLE_LZ4
  175. char* dbuff = (char*) malloc(dnbytes);
  176. LZ4_decompress_safe(cbuff, dbuff, (int) cnbytes, dnbytes);
  177. free(cbuff);
  178. #else
  179. char* dbuff = cbuff;
  180. #endif
  181. fclose(cachefile);
  182. *payload = (par_byte*) dbuff;
  183. *payloadsize = dnbytes;
  184. _update_table(name, (int) cnbytes);
  185. return 1;
  186. }
  187. void par_filecache_save(char const* name, par_byte* payload, int payloadsize,
  188. par_byte* header, int headersize)
  189. {
  190. char qualified[PATH_MAX];
  191. size_t len = strlen(name);
  192. if (len == 0) {
  193. return;
  194. }
  195. assert(len + strlen(_fileprefix) < PATH_MAX);
  196. strcpy(qualified, _fileprefix);
  197. strcat(qualified, name);
  198. FILE* cachefile = fopen(qualified, "wb");
  199. assert(cachefile && "Unable to open cache file for writing");
  200. if (headersize > 0) {
  201. fwrite(header, 1, headersize, cachefile);
  202. }
  203. int csize = 0;
  204. if (payloadsize > 0) {
  205. #if ENABLE_LZ4
  206. int32_t nbytes = payloadsize;
  207. fwrite(&nbytes, 1, sizeof(nbytes), cachefile);
  208. int maxsize = LZ4_compressBound(nbytes);
  209. char* dst = (char*) malloc(maxsize);
  210. char const* src = (char const*) payload;
  211. assert(nbytes < LZ4_MAX_INPUT_SIZE);
  212. csize = LZ4_compress_default(src, dst, nbytes, maxsize);
  213. fwrite(dst, 1, csize, cachefile);
  214. free(dst);
  215. #else
  216. csize = payloadsize;
  217. int actual = (int) fwrite(payload, 1, csize, cachefile);
  218. if (actual < csize) {
  219. fclose(cachefile);
  220. remove(qualified);
  221. printf("Unable to save %s to cache (%d bytes)\n", name, csize);
  222. return;
  223. }
  224. #endif
  225. }
  226. fclose(cachefile);
  227. _update_table(name, csize + headersize);
  228. }
  229. void par_filecache_evict_all()
  230. {
  231. printf("Evicting all.\n");
  232. char qualified[PATH_MAX];
  233. if (!_table) {
  234. _read_or_create_tablefile();
  235. }
  236. filecache_entry_t* entry = _table->entries;
  237. for (int i = 0; i < _table->nentries; i++, entry++) {
  238. strcpy(qualified, _fileprefix);
  239. strcat(qualified, entry->name);
  240. printf("Evicting %s\n", qualified);
  241. remove(qualified);
  242. }
  243. _table->nentries = 0;
  244. _table->totalbytes = 0;
  245. remove(_tablepath);
  246. }
  247. // Adds the given item to the table and evicts the LRU items if the total cache
  248. // size exceeds the specified maxsize.
  249. static void _append_table(char const* item_name, int item_size)
  250. {
  251. time_t now = time(0);
  252. if (!_table) {
  253. _read_or_create_tablefile();
  254. }
  255. uint64_t hashed_name = _hash(item_name);
  256. int total = _table->totalbytes + item_size;
  257. while (_table->nentries >= PAR_MAX_ENTRIES || total > _maxtotalbytes) {
  258. assert(_table->nentries > 0 && "Cache size is too small.");
  259. _evict_lru();
  260. total = _table->totalbytes + item_size;
  261. }
  262. _table->totalbytes = total;
  263. filecache_entry_t* entry = &_table->entries[_table->nentries++];
  264. entry->last_used_timestamp = now;
  265. entry->hashed_name = hashed_name;
  266. entry->name = _par_strdup(item_name);
  267. entry->nbytes = item_size;
  268. _save_tablefile();
  269. }
  270. // Updates the timestamp associated with the given item.
  271. static void _update_table(char const* item_name, int item_size)
  272. {
  273. time_t now = time(0);
  274. if (!_table) {
  275. _read_or_create_tablefile();
  276. }
  277. uint64_t hashed_name = _hash(item_name);
  278. filecache_entry_t* entry = _table->entries;
  279. int i;
  280. for (i = 0; i < _table->nentries; i++, entry++) {
  281. if (entry->hashed_name == hashed_name) {
  282. break;
  283. }
  284. }
  285. if (i >= _table->nentries) {
  286. _append_table(item_name, item_size);
  287. return;
  288. }
  289. entry->last_used_timestamp = now;
  290. _save_tablefile();
  291. }
  292. static void _read_or_create_tablefile()
  293. {
  294. _table = (filecache_table_t*) calloc(sizeof(filecache_table_t), 1);
  295. FILE* fhandle = fopen(_tablepath, "r");
  296. if (!fhandle) {
  297. fhandle = fopen(_tablepath, "w");
  298. if (!fhandle) {
  299. mkdir(dirname(_tablepath), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
  300. fhandle = fopen(_tablepath, "w");
  301. }
  302. assert(fhandle && "Unable to create filecache info file.");
  303. } else {
  304. filecache_entry_t entry;
  305. char name[PATH_MAX];
  306. while (1) {
  307. int nargs = fscanf(fhandle, "%ld %d %s", &entry.last_used_timestamp,
  308. &entry.nbytes, name);
  309. if (nargs != 3) {
  310. break;
  311. }
  312. entry.name = _par_strdup(name);
  313. entry.hashed_name = _hash(entry.name);
  314. _table->entries[_table->nentries++] = entry;
  315. _table->totalbytes += entry.nbytes;
  316. }
  317. }
  318. fclose(fhandle);
  319. }
  320. static void _save_tablefile()
  321. {
  322. FILE* fhandle = fopen(_tablepath, "w");
  323. assert(fhandle && "Unable to create filecache info file.");
  324. filecache_entry_t* entry = _table->entries;
  325. for (int i = 0; i < _table->nentries; i++, entry++) {
  326. fprintf(fhandle, "%ld %d %s\n", entry->last_used_timestamp,
  327. entry->nbytes, entry->name);
  328. }
  329. fclose(fhandle);
  330. }
  331. static void _evict_lru()
  332. {
  333. const uint64_t never_evict = _hash("version");
  334. int oldest_index = -1;
  335. time_t oldest_time = LONG_MAX;
  336. filecache_entry_t* entry = _table->entries;
  337. for (int i = 0; i < _table->nentries; i++, entry++) {
  338. if (entry->hashed_name == never_evict) {
  339. continue;
  340. }
  341. if (entry->last_used_timestamp < oldest_time) {
  342. oldest_time = entry->last_used_timestamp;
  343. oldest_index = i;
  344. }
  345. }
  346. if (oldest_index > -1) {
  347. entry = _table->entries + oldest_index;
  348. char qualified[PATH_MAX];
  349. size_t len = strlen(entry->name);
  350. assert(len + strlen(_fileprefix) < PATH_MAX);
  351. strcpy(qualified, _fileprefix);
  352. strcat(qualified, entry->name);
  353. printf("Evicting %s\n", entry->name);
  354. remove(qualified);
  355. _table->totalbytes -= entry->nbytes;
  356. if (_table->nentries-- > 1) {
  357. *entry = _table->entries[_table->nentries];
  358. }
  359. }
  360. }
  361. // https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function
  362. static uint64_t _hash(char const* name)
  363. {
  364. const uint64_t OFFSET = 14695981039346656037ull;
  365. const uint64_t PRIME = 1099511628211ull;
  366. const unsigned char* str = (const unsigned char*) name;
  367. uint64_t hval = OFFSET;
  368. while (*str) {
  369. hval *= PRIME;
  370. hval ^= (uint64_t) *(str++);
  371. }
  372. return hval;
  373. }
  374. #undef PAR_MAX_ENTRIES
  375. #endif