huffman.c 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. /*
  2. * Huffandpuff minimal Huffman coder
  3. *
  4. * (c)2013 Adam Ierymenko <[email protected]>
  5. * This code is in the public domain and is distributed with NO WARRANTY.
  6. */
  7. #include "huffman.h"
  8. struct _huffman_node
  9. {
  10. struct _huffman_node *lr[2];
  11. struct _huffman_node *qprev,*qnext;
  12. double prob;
  13. unsigned long c;
  14. };
  15. struct _huffman_encode_table
  16. {
  17. unsigned long code;
  18. unsigned long bits;
  19. };
  20. static void _huffman_write_tree_and_make_encode_table(unsigned char *out,unsigned long *outbitctr,unsigned long outlen,struct _huffman_encode_table *et,unsigned long code,unsigned int bits,struct _huffman_node *t)
  21. {
  22. struct _huffman_encode_table *eti;
  23. unsigned int i;
  24. unsigned long byte_index;
  25. byte_index = (*outbitctr)++ >> 3;
  26. byte_index *= (byte_index < outlen);
  27. if (t->lr[0]) {
  28. out[byte_index] <<= 1;
  29. _huffman_write_tree_and_make_encode_table(out,outbitctr,outlen,et,code,bits + 1,t->lr[0]);
  30. _huffman_write_tree_and_make_encode_table(out,outbitctr,outlen,et,code | (1 << bits),bits + 1,t->lr[1]);
  31. } else {
  32. out[byte_index] = (out[byte_index] << 1) | 1;
  33. for(i=0;i<9;++i) {
  34. byte_index = (*outbitctr)++ >> 3;
  35. if (byte_index >= outlen) return;
  36. out[byte_index] = (out[byte_index] << 1) | ((unsigned char)((t->c >> i) & 1));
  37. }
  38. eti = &(et[t->c]);
  39. eti->code = code;
  40. eti->bits = bits;
  41. }
  42. }
  43. static struct _huffman_node *_huffman_read_tree(const unsigned char *in,unsigned long *inbitctr,unsigned long inlen,unsigned char **heapptr,unsigned char *heapend)
  44. {
  45. struct _huffman_node *n;
  46. unsigned int i;
  47. unsigned long byte_index;
  48. n = (struct _huffman_node *)(*heapptr);
  49. *heapptr += sizeof(struct _huffman_node);
  50. if (*heapptr > heapend) return (struct _huffman_node *)0;
  51. byte_index = *inbitctr >> 3;
  52. byte_index *= (byte_index < inlen);
  53. if (((in[byte_index] >> (~((*inbitctr)++) & 7)) & 1)) {
  54. n->lr[0] = (struct _huffman_node *)0;
  55. n->lr[1] = (struct _huffman_node *)0;
  56. n->c = 0;
  57. for(i=0;i<9;++i) {
  58. byte_index = *inbitctr >> 3;
  59. if (byte_index >= inlen) return (struct _huffman_node *)0;
  60. n->c |= (((unsigned int)(in[byte_index] >> (~((*inbitctr)++) & 7))) & 1) << i;
  61. }
  62. } else {
  63. n->lr[0] = _huffman_read_tree(in,inbitctr,inlen,heapptr,heapend);
  64. n->lr[1] = _huffman_read_tree(in,inbitctr,inlen,heapptr,heapend);
  65. if (!((n->lr[0])&&(n->lr[1])))
  66. return (struct _huffman_node *)0;
  67. }
  68. return n;
  69. }
  70. unsigned long huffman_compress(const unsigned char *in,unsigned long inlen,unsigned char *out,unsigned long outlen,void *huffheap)
  71. {
  72. struct _huffman_encode_table *et,*eti;
  73. struct _huffman_node *t,*n;
  74. struct _huffman_node *pair[2];
  75. unsigned char *heapptr = (unsigned char *)huffheap;
  76. unsigned long i,code,byte_index,outbitctr;
  77. unsigned int bits,b;
  78. double *counts,lowest_prob,total_symbols;
  79. counts = (double *)heapptr;
  80. heapptr += (sizeof(double) * 257);
  81. for(i=0;i<256;++i)
  82. counts[i] = 0.0;
  83. counts[256] = 1.0; /* one stop code at end */
  84. for(i=0;i<inlen;++i)
  85. counts[(unsigned long)in[i]] += 1.0;
  86. t = (struct _huffman_node *)0;
  87. total_symbols = (double)(inlen + 1);
  88. for(i=0;i<=256;++i) {
  89. if (counts[i] > 0.0) {
  90. n = (struct _huffman_node *)heapptr;
  91. heapptr += sizeof(struct _huffman_node);
  92. if (t)
  93. t->qprev = n;
  94. n->qprev = (struct _huffman_node *)0;
  95. n->qnext = t;
  96. n->lr[0] = (struct _huffman_node *)0;
  97. n->lr[1] = (struct _huffman_node *)0;
  98. n->prob = counts[i] / total_symbols;
  99. n->c = (unsigned int)i;
  100. t = n;
  101. }
  102. }
  103. while (t->qnext) {
  104. for(i=0;i<2;++i) {
  105. lowest_prob = 1.0;
  106. pair[i] = (struct _huffman_node *)0;
  107. n = t;
  108. while (n) {
  109. if (n->prob <= lowest_prob) {
  110. lowest_prob = n->prob;
  111. pair[i] = n;
  112. }
  113. n = n->qnext;
  114. }
  115. if (pair[i]->qprev)
  116. pair[i]->qprev->qnext = pair[i]->qnext;
  117. else t = pair[i]->qnext;
  118. if (pair[i]->qnext)
  119. pair[i]->qnext->qprev = pair[i]->qprev;
  120. }
  121. n = (struct _huffman_node *)heapptr;
  122. heapptr += sizeof(struct _huffman_node);
  123. n->lr[0] = pair[0];
  124. n->lr[1] = pair[1];
  125. n->prob = pair[0]->prob + pair[1]->prob;
  126. if (t)
  127. t->qprev = n;
  128. n->qprev = (struct _huffman_node *)0;
  129. n->qnext = t;
  130. t = n;
  131. }
  132. et = (struct _huffman_encode_table *)heapptr;
  133. heapptr += (sizeof(struct _huffman_encode_table) * 257);
  134. outbitctr = 0;
  135. _huffman_write_tree_and_make_encode_table(out,&outbitctr,outlen,et,0,0,t);
  136. for(i=0;i<inlen;++i) {
  137. eti = &(et[(unsigned long)in[i]]);
  138. code = eti->code;
  139. bits = eti->bits;
  140. for(b=0;b<bits;++b) {
  141. byte_index = outbitctr++ >> 3;
  142. if (byte_index >= outlen) return 0;
  143. out[byte_index] = (out[byte_index] << 1) | (unsigned char)(code & 1);
  144. code >>= 1;
  145. }
  146. }
  147. code = et[256].code;
  148. bits = et[256].bits;
  149. for(b=0;b<bits;++b) {
  150. byte_index = outbitctr++ >> 3;
  151. if (byte_index >= outlen) return 0;
  152. out[byte_index] = (out[byte_index] << 1) | (unsigned char)(code & 1);
  153. code >>= 1;
  154. }
  155. if (outbitctr > (outlen << 3))
  156. return 0;
  157. else if ((outbitctr & 7)) {
  158. out[i = (outbitctr >> 3)] <<= 8 - (outbitctr & 7);
  159. return (i + 1);
  160. } else return (outbitctr >> 3);
  161. }
  162. unsigned long huffman_decompress(const unsigned char *in,unsigned long inlen,unsigned char *out,unsigned long outlen,void *huffheap)
  163. {
  164. struct _huffman_node *t,*n;
  165. unsigned char *heapptr = (unsigned char *)huffheap;
  166. unsigned long inbitctr,outptr,byte_index = 0;
  167. inbitctr = 0;
  168. t = _huffman_read_tree(in,&inbitctr,inlen,&heapptr,heapptr + HUFFHEAP_SIZE);
  169. if (!t) return 0;
  170. outptr = 0;
  171. for(;;) {
  172. n = t;
  173. while (n->lr[0]) {
  174. byte_index = inbitctr >> 3;
  175. if (byte_index >= inlen) return 0;
  176. n = n->lr[((unsigned long)(in[byte_index] >> (~(inbitctr++) & 7))) & 1];
  177. }
  178. if (n->c == 256) return outptr;
  179. if (outptr == outlen) return 0;
  180. out[outptr++] = (unsigned char)n->c;
  181. }
  182. }
  183. #ifdef HUFFANDPUFF_TEST
  184. #include <stdio.h>
  185. #include <stdlib.h>
  186. #include <string.h>
  187. #include <time.h>
  188. #define HUFFANDPUFF_TEST_MAXLEN 1048576
  189. #define HUFFANDPUFF_TEST_ITER 1024
  190. static unsigned char testin[HUFFANDPUFF_TEST_MAXLEN];
  191. static unsigned char testout[HUFFANDPUFF_TEST_MAXLEN * 2];
  192. static unsigned char testver[HUFFANDPUFF_TEST_MAXLEN];
  193. static unsigned char huffbuf[HUFFHEAP_SIZE];
  194. int main(int argc,char **argv)
  195. {
  196. unsigned long i,k,l,cl,dcl;
  197. int v;
  198. unsigned char mask;
  199. srand(time(0));
  200. for(k=0;k<HUFFANDPUFF_TEST_ITER;++k) {
  201. l = (rand() % HUFFANDPUFF_TEST_MAXLEN) + 1;
  202. mask = (rand() & 0xff);
  203. for(i=0;i<l;++i)
  204. testin[i] = (unsigned char)(rand() & 0xff) & mask;
  205. cl = huffman_compress(testin,l,testout,sizeof(testout),huffbuf);
  206. if (cl) {
  207. memset(testver,0,sizeof(testver));
  208. dcl = huffman_decompress(testout,cl,testver,sizeof(testver),huffbuf);
  209. v = ((dcl)&&(!memcmp(testver,testin,l)));
  210. printf("[%d] in: %d, out: %d, verified: %s\n",(int)k,(int)l,(int)cl,(v) ? "OK" : "FAIL");
  211. } else printf("[%d] in: %d, out: FAIL\n",(int)k,(int)l);
  212. }
  213. printf("\nFuzzing decompress function...\n");
  214. for(;;) {
  215. l = (rand() % HUFFANDPUFF_TEST_MAXLEN) + 1;
  216. mask = (rand() & 0xff);
  217. for(i=0;i<l;++i)
  218. testin[i] = (unsigned char)(rand() & 0xff) & mask;
  219. huffman_decompress(testin,l,testver,sizeof(testver),huffbuf);
  220. printf("."); fflush(stdout);
  221. }
  222. return 0;
  223. }
  224. #endif