bencode.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703
  1. #include "bencode.h"
  2. #include <stdio.h>
  3. #include <sys/uio.h>
  4. #include <unistd.h>
  5. #include <stdlib.h>
  6. #include <assert.h>
  7. #include <string.h>
  8. /* set to 0 for alloc debugging, e.g. through valgrind */
  9. #define BENCODE_MIN_BUFFER_PIECE_LEN 512
  10. #define BENCODE_HASH_BUCKETS 31 /* prime numbers work best */
  11. struct __bencode_buffer_piece {
  12. char *tail;
  13. unsigned int left;
  14. struct __bencode_buffer_piece *next;
  15. char buf[0];
  16. };
  17. struct __bencode_free_list {
  18. void *ptr;
  19. free_func_t func;
  20. struct __bencode_free_list *next;
  21. };
  22. struct __bencode_hash {
  23. struct bencode_item *buckets[BENCODE_HASH_BUCKETS];
  24. };
  25. static bencode_item_t __bencode_end_marker = {
  26. .type = BENCODE_END_MARKER,
  27. .iov[0].iov_base = "e",
  28. .iov[0].iov_len = 1,
  29. .iov_cnt = 1,
  30. .str_len = 1,
  31. };
  32. static bencode_item_t *__bencode_decode(bencode_buffer_t *buf, const char *s, const char *end);
  33. static void __bencode_item_init(bencode_item_t *item) {
  34. item->last_child = item->parent = item->child = item->sibling = NULL;
  35. }
  36. static void __bencode_container_init(bencode_item_t *cont) {
  37. cont->iov[0].iov_len = 1;
  38. cont->iov[1].iov_base = "e";
  39. cont->iov[1].iov_len = 1;
  40. cont->iov_cnt = 2;
  41. cont->str_len = 2;
  42. }
  43. static void __bencode_dictionary_init(bencode_item_t *dict) {
  44. dict->type = BENCODE_DICTIONARY;
  45. dict->iov[0].iov_base = "d";
  46. dict->value = 0;
  47. __bencode_container_init(dict);
  48. }
  49. static void __bencode_list_init(bencode_item_t *list) {
  50. list->type = BENCODE_LIST;
  51. list->iov[0].iov_base = "l";
  52. __bencode_container_init(list);
  53. }
  54. static struct __bencode_buffer_piece *__bencode_piece_new(unsigned int size) {
  55. struct __bencode_buffer_piece *ret;
  56. if (size < BENCODE_MIN_BUFFER_PIECE_LEN)
  57. size = BENCODE_MIN_BUFFER_PIECE_LEN;
  58. ret = BENCODE_MALLOC(sizeof(*ret) + size);
  59. if (!ret)
  60. return NULL;
  61. ret->tail = ret->buf;
  62. ret->left = size;
  63. ret->next = NULL;
  64. return ret;
  65. }
  66. int bencode_buffer_init(bencode_buffer_t *buf) {
  67. buf->pieces = __bencode_piece_new(0);
  68. if (!buf->pieces)
  69. return -1;
  70. buf->free_list = NULL;
  71. buf->error = 0;
  72. return 0;
  73. }
  74. static void *__bencode_alloc(bencode_buffer_t *buf, unsigned int size) {
  75. struct __bencode_buffer_piece *piece;
  76. void *ret;
  77. if (!buf)
  78. return NULL;
  79. if (buf->error)
  80. return NULL;
  81. piece = buf->pieces;
  82. if (size <= piece->left)
  83. goto alloc;
  84. piece = __bencode_piece_new(size);
  85. if (!piece) {
  86. buf->error = 1;
  87. return NULL;
  88. }
  89. piece->next = buf->pieces;
  90. buf->pieces = piece;
  91. assert(size <= piece->left);
  92. alloc:
  93. piece->left -= size;
  94. ret = piece->tail;
  95. piece->tail += size;
  96. return ret;
  97. }
  98. void bencode_buffer_free(bencode_buffer_t *buf) {
  99. struct __bencode_free_list *fl;
  100. struct __bencode_buffer_piece *piece, *next;
  101. for (fl = buf->free_list; fl; fl = fl->next)
  102. fl->func(fl->ptr);
  103. for (piece = buf->pieces; piece; piece = next) {
  104. next = piece->next;
  105. BENCODE_FREE(piece);
  106. }
  107. }
  108. static bencode_item_t *__bencode_item_alloc(bencode_buffer_t *buf, unsigned int payload) {
  109. bencode_item_t *ret;
  110. ret = __bencode_alloc(buf, sizeof(struct bencode_item) + payload);
  111. if (!ret)
  112. return NULL;
  113. ret->buffer = buf;
  114. __bencode_item_init(ret);
  115. return ret;
  116. }
  117. bencode_item_t *bencode_dictionary(bencode_buffer_t *buf) {
  118. bencode_item_t *ret;
  119. ret = __bencode_item_alloc(buf, 0);
  120. if (!ret)
  121. return NULL;
  122. __bencode_dictionary_init(ret);
  123. return ret;
  124. }
  125. bencode_item_t *bencode_list(bencode_buffer_t *buf) {
  126. bencode_item_t *ret;
  127. ret = __bencode_item_alloc(buf, 0);
  128. if (!ret)
  129. return NULL;
  130. __bencode_list_init(ret);
  131. return ret;
  132. }
  133. static void __bencode_container_add(bencode_item_t *parent, bencode_item_t *child) {
  134. if (!parent)
  135. return;
  136. if (!child)
  137. return;
  138. assert(child->parent == NULL);
  139. assert(child->sibling == NULL);
  140. child->parent = parent;
  141. if (parent->last_child)
  142. parent->last_child->sibling = child;
  143. parent->last_child = child;
  144. if (!parent->child)
  145. parent->child = child;
  146. while (parent) {
  147. parent->iov_cnt += child->iov_cnt;
  148. parent->str_len += child->str_len;
  149. parent = parent->parent;
  150. }
  151. }
  152. static bencode_item_t *__bencode_string_alloc(bencode_buffer_t *buf, const void *base,
  153. int str_len, int iov_len, int iov_cnt, bencode_type_t type)
  154. {
  155. bencode_item_t *ret;
  156. int len_len;
  157. assert((str_len <= 99999) && (str_len >= 0));
  158. ret = __bencode_item_alloc(buf, 7);
  159. if (!ret)
  160. return NULL;
  161. len_len = sprintf(ret->__buf, "%d:", str_len);
  162. ret->type = type;
  163. ret->iov[0].iov_base = ret->__buf;
  164. ret->iov[0].iov_len = len_len;
  165. ret->iov[1].iov_base = (void *) base;
  166. ret->iov[1].iov_len = iov_len;
  167. ret->iov_cnt = iov_cnt + 1;
  168. ret->str_len = len_len + str_len;
  169. return ret;
  170. }
  171. bencode_item_t *bencode_string_len_dup(bencode_buffer_t *buf, const char *s, int len) {
  172. char *sd = __bencode_alloc(buf, len);
  173. if (!sd)
  174. return NULL;
  175. memcpy(sd, s, len);
  176. return bencode_string_len(buf, sd, len);
  177. }
  178. bencode_item_t *bencode_string_len(bencode_buffer_t *buf, const char *s, int len) {
  179. return __bencode_string_alloc(buf, s, len, len, 1, BENCODE_STRING);
  180. }
  181. bencode_item_t *bencode_string_iovec(bencode_buffer_t *buf, const struct iovec *iov, int iov_cnt, int str_len) {
  182. int i;
  183. if (iov_cnt < 0)
  184. return NULL;
  185. if (str_len < 0) {
  186. str_len = 0;
  187. for (i = 0; i < iov_cnt; i++)
  188. str_len += iov[i].iov_len;
  189. }
  190. return __bencode_string_alloc(buf, iov, str_len, iov_cnt, iov_cnt, BENCODE_IOVEC);
  191. }
  192. bencode_item_t *bencode_integer(bencode_buffer_t *buf, long long int i) {
  193. bencode_item_t *ret;
  194. int alen, rlen;
  195. alen = 8;
  196. while (1) {
  197. ret = __bencode_item_alloc(buf, alen + 3);
  198. if (!ret)
  199. return NULL;
  200. rlen = snprintf(ret->__buf, alen, "i%llde", i);
  201. if (rlen < alen)
  202. break;
  203. alen <<= 1;
  204. }
  205. ret->type = BENCODE_INTEGER;
  206. ret->iov[0].iov_base = ret->__buf;
  207. ret->iov[0].iov_len = rlen;
  208. ret->iov[1].iov_base = NULL;
  209. ret->iov[1].iov_len = 0;
  210. ret->iov_cnt = 1;
  211. ret->str_len = rlen;
  212. return ret;
  213. }
  214. bencode_item_t *bencode_dictionary_add_len(bencode_item_t *dict, const char *key, int keylen, bencode_item_t *val) {
  215. bencode_item_t *str;
  216. if (!dict || !val)
  217. return NULL;
  218. assert(dict->type == BENCODE_DICTIONARY);
  219. str = bencode_string_len(dict->buffer, key, keylen);
  220. if (!str)
  221. return NULL;
  222. __bencode_container_add(dict, str);
  223. __bencode_container_add(dict, val);
  224. return val;
  225. }
  226. bencode_item_t *bencode_list_add(bencode_item_t *list, bencode_item_t *item) {
  227. if (!list || !item)
  228. return NULL;
  229. assert(list->type == BENCODE_LIST);
  230. __bencode_container_add(list, item);
  231. return item;
  232. }
  233. static int __bencode_iovec_cpy(struct iovec *out, const struct iovec *in, int num) {
  234. memcpy(out, in, num * sizeof(*out));
  235. return num;
  236. }
  237. static int __bencode_str_cpy(char *out, const struct iovec *in, int num) {
  238. char *orig = out;
  239. while (--num >= 0) {
  240. memcpy(out, in->iov_base, in->iov_len);
  241. out += in->iov_len;
  242. in++;
  243. }
  244. return out - orig;
  245. }
  246. static int __bencode_iovec_dump(struct iovec *out, bencode_item_t *item) {
  247. bencode_item_t *child;
  248. struct iovec *orig = out;
  249. assert(item->iov[0].iov_base != NULL);
  250. out += __bencode_iovec_cpy(out, &item->iov[0], 1);
  251. child = item->child;
  252. while (child) {
  253. out += __bencode_iovec_dump(out, child);
  254. child = child->sibling;
  255. }
  256. if (item->type == BENCODE_IOVEC)
  257. out += __bencode_iovec_cpy(out, item->iov[1].iov_base, item->iov[1].iov_len);
  258. else if (item->iov[1].iov_base)
  259. out += __bencode_iovec_cpy(out, &item->iov[1], 1);
  260. assert((out - orig) == item->iov_cnt);
  261. return item->iov_cnt;
  262. }
  263. static int __bencode_str_dump(char *out, bencode_item_t *item) {
  264. char *orig = out;
  265. bencode_item_t *child;
  266. assert(item->iov[0].iov_base != NULL);
  267. out += __bencode_str_cpy(out, &item->iov[0], 1);
  268. child = item->child;
  269. while (child) {
  270. out += __bencode_str_dump(out, child);
  271. child = child->sibling;
  272. }
  273. if (item->type == BENCODE_IOVEC)
  274. out += __bencode_str_cpy(out, item->iov[1].iov_base, item->iov[1].iov_len);
  275. else if (item->iov[1].iov_base)
  276. out += __bencode_str_cpy(out, &item->iov[1], 1);
  277. assert((out - orig) == item->str_len);
  278. *out = '\0';
  279. return item->str_len;
  280. }
  281. struct iovec *bencode_iovec(bencode_item_t *root, int *cnt, unsigned int head, unsigned int tail) {
  282. struct iovec *ret;
  283. if (!root)
  284. return NULL;
  285. assert(cnt != NULL);
  286. assert(root->iov_cnt > 0);
  287. ret = __bencode_alloc(root->buffer, sizeof(*ret) * (root->iov_cnt + head + tail));
  288. if (!ret)
  289. return NULL;
  290. *cnt = __bencode_iovec_dump(ret + head, root);
  291. return ret;
  292. }
  293. char *bencode_collapse(bencode_item_t *root, int *len) {
  294. char *ret;
  295. int l;
  296. if (!root)
  297. return NULL;
  298. assert(root->str_len > 0);
  299. ret = __bencode_alloc(root->buffer, root->str_len + 1);
  300. if (!ret)
  301. return NULL;
  302. l = __bencode_str_dump(ret, root);
  303. if (len)
  304. *len = l;
  305. return ret;
  306. }
  307. char *bencode_collapse_dup(bencode_item_t *root, int *len) {
  308. char *ret;
  309. int l;
  310. if (!root)
  311. return NULL;
  312. assert(root->str_len > 0);
  313. ret = BENCODE_MALLOC(root->str_len + 1);
  314. if (!ret)
  315. return NULL;
  316. l = __bencode_str_dump(ret, root);
  317. if (len)
  318. *len = l;
  319. return ret;
  320. }
  321. static unsigned int __bencode_hash_str_len(const unsigned char *s, int len) {
  322. unsigned long *ul;
  323. unsigned int *ui;
  324. unsigned short *us;
  325. if (len >= sizeof(*ul)) {
  326. ul = (void *) s;
  327. return *ul % BENCODE_HASH_BUCKETS;
  328. }
  329. if (len >= sizeof(*ui)) {
  330. ui = (void *) s;
  331. return *ui % BENCODE_HASH_BUCKETS;
  332. }
  333. if (len >= sizeof(*us)) {
  334. us = (void *) s;
  335. return *us % BENCODE_HASH_BUCKETS;
  336. }
  337. if (len >= sizeof(*s))
  338. return *s % BENCODE_HASH_BUCKETS;
  339. return 0;
  340. }
  341. static unsigned int __bencode_hash_str(bencode_item_t *str) {
  342. assert(str->type == BENCODE_STRING);
  343. return __bencode_hash_str_len(str->iov[1].iov_base, str->iov[1].iov_len);
  344. }
  345. static void __bencode_hash_insert(bencode_item_t *key, struct __bencode_hash *hash) {
  346. unsigned int bucket, i;
  347. i = bucket = __bencode_hash_str(key);
  348. while (1) {
  349. if (!hash->buckets[i]) {
  350. hash->buckets[i] = key;
  351. break;
  352. }
  353. i++;
  354. if (i >= BENCODE_HASH_BUCKETS)
  355. i = 0;
  356. if (i == bucket)
  357. break;
  358. }
  359. }
  360. static bencode_item_t *__bencode_decode_dictionary(bencode_buffer_t *buf, const char *s, const char *end) {
  361. bencode_item_t *ret, *key, *value;
  362. struct __bencode_hash *hash;
  363. if (*s != 'd')
  364. return NULL;
  365. s++;
  366. ret = __bencode_item_alloc(buf, sizeof(*hash));
  367. if (!ret)
  368. return NULL;
  369. __bencode_dictionary_init(ret);
  370. ret->value = 1;
  371. hash = (void *) ret->__buf;
  372. memset(hash, 0, sizeof(*hash));
  373. while (s < end) {
  374. key = __bencode_decode(buf, s, end);
  375. if (!key)
  376. return NULL;
  377. s += key->str_len;
  378. if (key->type == BENCODE_END_MARKER)
  379. break;
  380. if (key->type != BENCODE_STRING)
  381. return NULL;
  382. __bencode_container_add(ret, key);
  383. if (s >= end)
  384. return NULL;
  385. value = __bencode_decode(buf, s, end);
  386. if (!value)
  387. return NULL;
  388. s += value->str_len;
  389. if (value->type == BENCODE_END_MARKER)
  390. return NULL;
  391. __bencode_container_add(ret, value);
  392. __bencode_hash_insert(key, hash);
  393. }
  394. return ret;
  395. }
  396. static bencode_item_t *__bencode_decode_list(bencode_buffer_t *buf, const char *s, const char *end) {
  397. bencode_item_t *ret, *item;
  398. if (*s != 'l')
  399. return NULL;
  400. s++;
  401. ret = __bencode_item_alloc(buf, 0);
  402. if (!ret)
  403. return NULL;
  404. __bencode_list_init(ret);
  405. while (s < end) {
  406. item = __bencode_decode(buf, s, end);
  407. if (!item)
  408. return NULL;
  409. s += item->str_len;
  410. if (item->type == BENCODE_END_MARKER)
  411. break;
  412. __bencode_container_add(ret, item);
  413. }
  414. return ret;
  415. }
  416. static bencode_item_t *__bencode_decode_integer(bencode_buffer_t *buf, const char *s, const char *end) {
  417. long long int i;
  418. const char *orig = s;
  419. char *convend;
  420. bencode_item_t *ret;
  421. if (*s != 'i')
  422. return NULL;
  423. s++;
  424. if (s >= end)
  425. return NULL;
  426. if (*s == '0') {
  427. i = 0;
  428. s++;
  429. goto done;
  430. }
  431. i = strtoll(s, &convend, 10);
  432. if (convend == s)
  433. return NULL;
  434. s += (convend - s);
  435. done:
  436. if (s >= end)
  437. return NULL;
  438. if (*s != 'e')
  439. return NULL;
  440. s++;
  441. ret = __bencode_item_alloc(buf, 0);
  442. if (!ret)
  443. return NULL;
  444. ret->type = BENCODE_INTEGER;
  445. ret->iov[0].iov_base = (void *) orig;
  446. ret->iov[0].iov_len = s - orig;
  447. ret->iov[1].iov_base = NULL;
  448. ret->iov[1].iov_len = 0;
  449. ret->iov_cnt = 1;
  450. ret->str_len = s - orig;
  451. ret->value = i;
  452. return ret;
  453. }
  454. static bencode_item_t *__bencode_decode_string(bencode_buffer_t *buf, const char *s, const char *end) {
  455. unsigned long int sl;
  456. char *convend;
  457. const char *orig = s;
  458. bencode_item_t *ret;
  459. if (*s == '0') {
  460. sl = 0;
  461. s++;
  462. goto colon;
  463. }
  464. sl = strtoul(s, &convend, 10);
  465. if (convend == s)
  466. return NULL;
  467. s += (convend - s);
  468. colon:
  469. if (s >= end)
  470. return NULL;
  471. if (*s != ':')
  472. return NULL;
  473. s++;
  474. if (s + sl > end)
  475. return NULL;
  476. ret = __bencode_item_alloc(buf, 0);
  477. if (!ret)
  478. return NULL;
  479. ret->type = BENCODE_STRING;
  480. ret->iov[0].iov_base = (void *) orig;
  481. ret->iov[0].iov_len = s - orig;
  482. ret->iov[1].iov_base = (void *) s;
  483. ret->iov[1].iov_len = sl;
  484. ret->iov_cnt = 2;
  485. ret->str_len = s - orig + sl;
  486. return ret;
  487. }
  488. static bencode_item_t *__bencode_decode(bencode_buffer_t *buf, const char *s, const char *end) {
  489. if (s >= end)
  490. return NULL;
  491. switch (*s) {
  492. case 'd':
  493. return __bencode_decode_dictionary(buf, s, end);
  494. case 'l':
  495. return __bencode_decode_list(buf, s, end);
  496. case 'i':
  497. return __bencode_decode_integer(buf, s, end);
  498. case 'e':
  499. return &__bencode_end_marker;
  500. case '0':
  501. case '1':
  502. case '2':
  503. case '3':
  504. case '4':
  505. case '5':
  506. case '6':
  507. case '7':
  508. case '8':
  509. case '9':
  510. return __bencode_decode_string(buf, s, end);
  511. default:
  512. return NULL;
  513. }
  514. }
  515. bencode_item_t *bencode_decode(bencode_buffer_t *buf, const char *s, int len) {
  516. assert(s != NULL);
  517. return __bencode_decode(buf, s, s + len);
  518. }
  519. static int __bencode_dictionary_key_match(bencode_item_t *key, const char *keystr, int keylen) {
  520. assert(key->type == BENCODE_STRING);
  521. if (keylen != key->iov[1].iov_len)
  522. return 0;
  523. if (memcmp(keystr, key->iov[1].iov_base, keylen))
  524. return 0;
  525. return 1;
  526. }
  527. bencode_item_t *bencode_dictionary_get_len(bencode_item_t *dict, const char *keystr, int keylen) {
  528. bencode_item_t *key;
  529. unsigned int bucket, i;
  530. struct __bencode_hash *hash;
  531. if (!dict)
  532. return NULL;
  533. if (dict->type != BENCODE_DICTIONARY)
  534. return NULL;
  535. /* try hash lookup first if possible */
  536. if (dict->value == 1) {
  537. hash = (void *) dict->__buf;
  538. i = bucket = __bencode_hash_str_len((const unsigned char *) keystr, keylen);
  539. while (1) {
  540. key = hash->buckets[i];
  541. if (!key)
  542. return NULL; /* would be there, but isn't */
  543. assert(key->sibling != NULL);
  544. if (__bencode_dictionary_key_match(key, keystr, keylen))
  545. return key->sibling;
  546. i++;
  547. if (i >= BENCODE_HASH_BUCKETS)
  548. i = 0;
  549. if (i == bucket)
  550. break; /* fall back to regular lookup */
  551. }
  552. }
  553. for (key = dict->child; key; key = key->sibling->sibling) {
  554. assert(key->sibling != NULL);
  555. if (__bencode_dictionary_key_match(key, keystr, keylen))
  556. return key->sibling;
  557. }
  558. return NULL;
  559. }
  560. void bencode_buffer_destroy_add(bencode_buffer_t *buf, free_func_t func, void *p) {
  561. struct __bencode_free_list *li;
  562. if (!p)
  563. return;
  564. li = __bencode_alloc(buf, sizeof(*li));
  565. if (!li)
  566. return;
  567. li->ptr = p;
  568. li->func = func;
  569. li->next = buf->free_list;
  570. buf->free_list = li;
  571. }