diskcache.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. /*
  2. * libwebsockets - small server side websockets and web server implementation
  3. *
  4. * Copyright (C) 2010 - 2019 Andy Green <[email protected]>
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to
  8. * deal in the Software without restriction, including without limitation the
  9. * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10. * sell copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22. * IN THE SOFTWARE.
  23. */
  24. #if !defined(_GNU_SOURCE)
  25. #define _GNU_SOURCE
  26. #endif
  27. #include <pthread.h>
  28. #include <libwebsockets.h>
  29. #include "private-lib-core.h"
  30. #include <string.h>
  31. #include <stdio.h>
  32. #include <unistd.h>
  33. #include <fcntl.h>
  34. #include <dirent.h>
  35. #include <time.h>
  36. #include <errno.h>
  37. #include <stdarg.h>
  38. #include <sys/stat.h>
  39. #include <sys/time.h>
  40. #include <sys/types.h>
  41. #if defined(__APPLE__)
  42. #include <sys/dirent.h>
  43. /* Travis OSX does not have DT_REG... */
  44. #if !defined(DT_REG)
  45. #define DT_REG 8
  46. #endif
  47. #endif
  48. struct file_entry {
  49. lws_list_ptr sorted;
  50. lws_list_ptr prev;
  51. char name[64];
  52. time_t modified;
  53. size_t size;
  54. };
  55. struct lws_diskcache_scan {
  56. struct file_entry *batch;
  57. const char *cache_dir_base;
  58. lws_list_ptr head;
  59. time_t last_scan_completed;
  60. uint64_t agg_size;
  61. uint64_t cache_size_limit;
  62. uint64_t avg_size;
  63. uint64_t cache_tries;
  64. uint64_t cache_hits;
  65. int cache_subdir;
  66. int batch_in_use;
  67. int agg_file_count;
  68. int secs_waiting;
  69. };
  70. #define KIB (1024)
  71. #define MIB (KIB * KIB)
  72. #define lp_to_fe(p, _n) lws_list_ptr_container(p, struct file_entry, _n)
  73. static const char *hex = "0123456789abcdef";
  74. #define BATCH_COUNT 128
  75. static int
  76. fe_modified_sort(lws_list_ptr a, lws_list_ptr b)
  77. {
  78. struct file_entry *p1 = lp_to_fe(a, sorted), *p2 = lp_to_fe(b, sorted);
  79. return p2->modified - p1->modified;
  80. }
  81. struct lws_diskcache_scan *
  82. lws_diskcache_create(const char *cache_dir_base, uint64_t cache_size_limit)
  83. {
  84. struct lws_diskcache_scan *lds = lws_malloc(sizeof(*lds), "cachescan");
  85. if (!lds)
  86. return NULL;
  87. memset(lds, 0, sizeof(*lds));
  88. lds->cache_dir_base = cache_dir_base;
  89. lds->cache_size_limit = cache_size_limit;
  90. return lds;
  91. }
  92. void
  93. lws_diskcache_destroy(struct lws_diskcache_scan **lds)
  94. {
  95. if ((*lds)->batch)
  96. lws_free((*lds)->batch);
  97. lws_free(*lds);
  98. *lds = NULL;
  99. }
  100. int
  101. lws_diskcache_prepare(const char *cache_base_dir, int mode, int uid)
  102. {
  103. char dir[256];
  104. int n, m;
  105. (void)mkdir(cache_base_dir, mode);
  106. if (chown(cache_base_dir, uid, -1))
  107. lwsl_err("%s: %s: unable to chown %d\n", __func__,
  108. cache_base_dir, uid);
  109. for (n = 0; n < 16; n++) {
  110. lws_snprintf(dir, sizeof(dir), "%s/%c", cache_base_dir, hex[n]);
  111. (void)mkdir(dir, mode);
  112. if (chown(dir, uid, -1))
  113. lwsl_err("%s: %s: unable to chown %d\n", __func__,
  114. dir, uid);
  115. for (m = 0; m < 16; m++) {
  116. lws_snprintf(dir, sizeof(dir), "%s/%c/%c",
  117. cache_base_dir, hex[n], hex[m]);
  118. (void)mkdir(dir, mode);
  119. if (chown(dir, uid, -1))
  120. lwsl_err("%s: %s: unable to chown %d\n",
  121. __func__, dir, uid);
  122. }
  123. }
  124. return 0;
  125. }
  126. /* copies and then truncates the incoming name, and renames the file at the
  127. * untruncated path to have the new truncated name */
  128. int
  129. lws_diskcache_finalize_name(char *cache)
  130. {
  131. char ren[256], *p;
  132. strncpy(ren, cache, sizeof(ren) - 1);
  133. ren[sizeof(ren) - 1] = '\0';
  134. p = strchr(cache, '~');
  135. if (p) {
  136. *p = '\0';
  137. if (rename(ren, cache)) {
  138. lwsl_err("%s: problem renaming %s to %s\n", __func__,
  139. ren, cache);
  140. return 1;
  141. }
  142. return 0;
  143. }
  144. return 1;
  145. }
  146. int
  147. lws_diskcache_query(struct lws_diskcache_scan *lds, int is_bot,
  148. const char *hash_hex, int *_fd, char *cache, int cache_len,
  149. size_t *extant_cache_len)
  150. {
  151. struct stat s;
  152. int n;
  153. /* caching is disabled? */
  154. if (!lds->cache_dir_base)
  155. return LWS_DISKCACHE_QUERY_NO_CACHE;
  156. if (!is_bot)
  157. lds->cache_tries++;
  158. n = lws_snprintf(cache, cache_len, "%s/%c/%c/%s", lds->cache_dir_base,
  159. hash_hex[0], hash_hex[1], hash_hex);
  160. lwsl_info("%s: job cache %s\n", __func__, cache);
  161. *_fd = open(cache, O_RDONLY);
  162. if (*_fd >= 0) {
  163. int fd;
  164. if (!is_bot)
  165. lds->cache_hits++;
  166. if (fstat(*_fd, &s)) {
  167. close(*_fd);
  168. return LWS_DISKCACHE_QUERY_NO_CACHE;
  169. }
  170. *extant_cache_len = (size_t)s.st_size;
  171. /* "touch" the hit cache file so it's last for LRU now */
  172. fd = open(cache, O_RDWR);
  173. if (fd >= 0)
  174. close(fd);
  175. return LWS_DISKCACHE_QUERY_EXISTS;
  176. }
  177. /* bots are too random to pollute the cache with their antics */
  178. if (is_bot)
  179. return LWS_DISKCACHE_QUERY_NO_CACHE;
  180. /* let's create it first with a unique temp name */
  181. lws_snprintf(cache + n, cache_len - n, "~%d-%p", (int)getpid(),
  182. extant_cache_len);
  183. *_fd = open(cache, O_RDWR | O_CREAT | O_TRUNC, 0600);
  184. if (*_fd < 0) {
  185. /* well... ok... we will proceed without cache then... */
  186. lwsl_notice("%s: Problem creating cache %s: errno %d\n",
  187. __func__, cache, errno);
  188. return LWS_DISKCACHE_QUERY_NO_CACHE;
  189. }
  190. return LWS_DISKCACHE_QUERY_CREATING;
  191. }
  192. int
  193. lws_diskcache_secs_to_idle(struct lws_diskcache_scan *lds)
  194. {
  195. return lds->secs_waiting;
  196. }
  197. /*
  198. * The goal is to collect the oldest BATCH_COUNT filepaths and filesizes from
  199. * the dirs under the cache dir. Since we don't need or want a full list of
  200. * files in there in memory at once, we restrict the linked-list size to
  201. * BATCH_COUNT entries, and once it is full, simply ignore any further files
  202. * that are newer than the newest one on that list. Files older than the
  203. * newest guy already on the list evict the newest guy already on the list
  204. * and are sorted into the correct order. In this way no matter the number
  205. * of files to be processed the memory requirement is fixed at BATCH_COUNT
  206. * struct file_entry-s.
  207. *
  208. * The oldest subset of BATCH_COUNT files are sorted into the cd->batch
  209. * allocation in more recent -> least recent order.
  210. *
  211. * We want to track the total size of all files we saw as well, so we know if
  212. * we need to actually do anything yet to restrict how much space it's taking
  213. * up.
  214. *
  215. * And we want to do those things statefully and incrementally instead of one
  216. * big atomic operation, since the user may want a huge cache, so we look in
  217. * one cache dir at a time and track state in the repodir struct.
  218. *
  219. * When we have seen everything, we add the doubly-linked prev pointers and then
  220. * if we are over the limit, start deleting up to BATCH_COUNT files working back
  221. * from the end.
  222. */
  223. int
  224. lws_diskcache_trim(struct lws_diskcache_scan *lds)
  225. {
  226. size_t cache_size_limit = lds->cache_size_limit;
  227. char dirpath[132], filepath[132 + 32];
  228. lws_list_ptr lp, op = NULL;
  229. int files_trimmed = 0;
  230. struct file_entry *p;
  231. int fd, n, ret = -1;
  232. size_t trimmed = 0;
  233. struct dirent *de;
  234. struct stat s;
  235. DIR *dir;
  236. if (!lds->cache_subdir) {
  237. if (lds->last_scan_completed + lds->secs_waiting > time(NULL))
  238. return 0;
  239. lds->batch = lws_malloc(sizeof(struct file_entry) *
  240. BATCH_COUNT, "cache_trim");
  241. if (!lds->batch) {
  242. lwsl_err("%s: OOM\n", __func__);
  243. return 1;
  244. }
  245. lds->agg_size = 0;
  246. lds->head = NULL;
  247. lds->batch_in_use = 0;
  248. lds->agg_file_count = 0;
  249. }
  250. lws_snprintf(dirpath, sizeof(dirpath), "%s/%c/%c",
  251. lds->cache_dir_base, hex[(lds->cache_subdir >> 4) & 15],
  252. hex[lds->cache_subdir & 15]);
  253. dir = opendir(dirpath);
  254. if (!dir) {
  255. lwsl_err("Unable to walk repo dir '%s'\n",
  256. lds->cache_dir_base);
  257. return -1;
  258. }
  259. do {
  260. de = readdir(dir);
  261. if (!de)
  262. break;
  263. if (de->d_type != DT_REG)
  264. continue;
  265. lds->agg_file_count++;
  266. lws_snprintf(filepath, sizeof(filepath), "%s/%s", dirpath,
  267. de->d_name);
  268. fd = open(filepath, O_RDONLY);
  269. if (fd < 0) {
  270. lwsl_err("%s: cannot open %s\n", __func__, filepath);
  271. continue;
  272. }
  273. n = fstat(fd, &s);
  274. close(fd);
  275. if (n) {
  276. lwsl_notice("%s: cannot stat %s\n", __func__, filepath);
  277. continue;
  278. }
  279. lds->agg_size += s.st_size;
  280. if (lds->batch_in_use == BATCH_COUNT) {
  281. /*
  282. * once we filled up the batch with candidates, we don't
  283. * need to consider any files newer than the newest guy
  284. * on the list...
  285. */
  286. if (lp_to_fe(lds->head, sorted)->modified < s.st_mtime)
  287. continue;
  288. /*
  289. * ... and if we find an older file later, we know it
  290. * will be replacing the newest guy on the list, so use
  291. * that directly...
  292. */
  293. p = lds->head;
  294. lds->head = p->sorted;
  295. } else
  296. /* we are still accepting anything to fill the batch */
  297. p = &lds->batch[lds->batch_in_use++];
  298. p->sorted = NULL;
  299. strncpy(p->name, de->d_name, sizeof(p->name) - 1);
  300. p->name[sizeof(p->name) - 1] = '\0';
  301. p->modified = s.st_mtime;
  302. p->size = s.st_size;
  303. lws_list_ptr_insert(&lds->head, &p->sorted, fe_modified_sort);
  304. } while (de);
  305. ret = 0;
  306. lds->cache_subdir++;
  307. if (lds->cache_subdir != 0x100)
  308. goto done;
  309. /* we completed the whole scan... */
  310. /* if really no guidence, then 256MiB */
  311. if (!cache_size_limit)
  312. cache_size_limit = 256 * 1024 * 1024;
  313. if (lds->agg_size > cache_size_limit) {
  314. /* apply prev pointers to make the list doubly-linked */
  315. lp = lds->head;
  316. while (lp) {
  317. p = lp_to_fe(lp, sorted);
  318. p->prev = op;
  319. op = &p->prev;
  320. lp = p->sorted;
  321. }
  322. /*
  323. * reverse the list (start from tail, now traverse using
  324. * .prev)... it's oldest-first now...
  325. */
  326. lp = op;
  327. while (lp && lds->agg_size > cache_size_limit) {
  328. p = lp_to_fe(lp, prev);
  329. lws_snprintf(filepath, sizeof(filepath), "%s/%c/%c/%s",
  330. lds->cache_dir_base, p->name[0],
  331. p->name[1], p->name);
  332. if (!unlink(filepath)) {
  333. lds->agg_size -= p->size;
  334. trimmed += p->size;
  335. files_trimmed++;
  336. } else
  337. lwsl_notice("%s: Failed to unlink %s\n",
  338. __func__, filepath);
  339. lp = p->prev;
  340. }
  341. if (files_trimmed)
  342. lwsl_notice("%s: %s: trimmed %d files totalling "
  343. "%lldKib, leaving %lldMiB\n", __func__,
  344. lds->cache_dir_base, files_trimmed,
  345. ((unsigned long long)trimmed) / KIB,
  346. ((unsigned long long)lds->agg_size) / MIB);
  347. }
  348. if (lds->agg_size && lds->agg_file_count)
  349. lds->avg_size = lds->agg_size / lds->agg_file_count;
  350. /*
  351. * estimate how long we can go before scanning again... default we need
  352. * to start again immediately
  353. */
  354. lds->last_scan_completed = time(NULL);
  355. lds->secs_waiting = 1;
  356. if (lds->agg_size < cache_size_limit) {
  357. uint64_t avg = 4096, capacity, projected;
  358. /* let's use 80% of the real average for margin */
  359. if (lds->agg_size && lds->agg_file_count)
  360. avg = ((lds->agg_size * 8) / lds->agg_file_count) / 10;
  361. /*
  362. * if we collected BATCH_COUNT files of the average size,
  363. * how much can we clean up in 256s?
  364. */
  365. capacity = avg * BATCH_COUNT;
  366. /*
  367. * if the cache grew by 10%, would we hit the limit even then?
  368. */
  369. projected = (lds->agg_size * 11) / 10;
  370. if (projected < cache_size_limit)
  371. /* no... */
  372. lds->secs_waiting = (256 / 2) * ((cache_size_limit -
  373. projected) / capacity);
  374. /*
  375. * large waits imply we may not have enough info yet, so
  376. * check once an hour at least.
  377. */
  378. if (lds->secs_waiting > 3600)
  379. lds->secs_waiting = 3600;
  380. } else
  381. lds->secs_waiting = 0;
  382. lwsl_info("%s: cache %s: %lldKiB / %lldKiB, next scan %ds\n", __func__,
  383. lds->cache_dir_base,
  384. (unsigned long long)lds->agg_size / KIB,
  385. (unsigned long long)cache_size_limit / KIB,
  386. lds->secs_waiting);
  387. lws_free(lds->batch);
  388. lds->batch = NULL;
  389. lds->cache_subdir = 0;
  390. done:
  391. closedir(dir);
  392. return ret;
  393. }