crn_huffman_codes.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. // File: crn_huffman_codes.cpp
  2. // See Copyright Notice and license at the end of inc/crnlib.h
  3. #include "crn_core.h"
  4. #include "crn_huffman_codes.h"
  5. namespace crnlib
  6. {
  7. struct sym_freq
  8. {
  9. uint m_freq;
  10. uint16 m_left;
  11. uint16 m_right;
  12. inline bool operator< (const sym_freq& other) const
  13. {
  14. return m_freq > other.m_freq;
  15. }
  16. };
  17. static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* syms0, sym_freq* syms1)
  18. {
  19. const uint cMaxPasses = 2;
  20. uint hist[256 * cMaxPasses];
  21. memset(hist, 0, sizeof(hist[0]) * 256 * cMaxPasses);
  22. sym_freq* p = syms0;
  23. sym_freq* q = syms0 + (num_syms >> 1) * 2;
  24. for ( ; p != q; p += 2)
  25. {
  26. const uint freq0 = p[0].m_freq;
  27. const uint freq1 = p[1].m_freq;
  28. hist[ freq0 & 0xFF]++;
  29. hist[256 + ((freq0 >> 8) & 0xFF)]++;
  30. hist[ freq1 & 0xFF]++;
  31. hist[256 + ((freq1 >> 8) & 0xFF)]++;
  32. }
  33. if (num_syms & 1)
  34. {
  35. const uint freq = p->m_freq;
  36. hist[ freq & 0xFF]++;
  37. hist[256 + ((freq >> 8) & 0xFF)]++;
  38. }
  39. sym_freq* pCur_syms = syms0;
  40. sym_freq* pNew_syms = syms1;
  41. for (uint pass = 0; pass < cMaxPasses; pass++)
  42. {
  43. const uint* pHist = &hist[pass << 8];
  44. uint offsets[256];
  45. uint cur_ofs = 0;
  46. for (uint i = 0; i < 256; i += 2)
  47. {
  48. offsets[i] = cur_ofs;
  49. cur_ofs += pHist[i];
  50. offsets[i+1] = cur_ofs;
  51. cur_ofs += pHist[i+1];
  52. }
  53. const uint pass_shift = pass << 3;
  54. sym_freq* p = pCur_syms;
  55. sym_freq* q = pCur_syms + (num_syms >> 1) * 2;
  56. for ( ; p != q; p += 2)
  57. {
  58. uint c0 = p[0].m_freq;
  59. uint c1 = p[1].m_freq;
  60. if (pass)
  61. {
  62. c0 >>= 8;
  63. c1 >>= 8;
  64. }
  65. c0 &= 0xFF;
  66. c1 &= 0xFF;
  67. if (c0 == c1)
  68. {
  69. uint dst_offset0 = offsets[c0];
  70. offsets[c0] = dst_offset0 + 2;
  71. pNew_syms[dst_offset0] = p[0];
  72. pNew_syms[dst_offset0 + 1] = p[1];
  73. }
  74. else
  75. {
  76. uint dst_offset0 = offsets[c0]++;
  77. uint dst_offset1 = offsets[c1]++;
  78. pNew_syms[dst_offset0] = p[0];
  79. pNew_syms[dst_offset1] = p[1];
  80. }
  81. }
  82. if (num_syms & 1)
  83. {
  84. uint c = ((p->m_freq) >> pass_shift) & 0xFF;
  85. uint dst_offset = offsets[c];
  86. offsets[c] = dst_offset + 1;
  87. pNew_syms[dst_offset] = *p;
  88. }
  89. sym_freq* t = pCur_syms;
  90. pCur_syms = pNew_syms;
  91. pNew_syms = t;
  92. }
  93. #ifdef CRNLIB_ASSERTS_ENABLED
  94. uint prev_freq = 0;
  95. for (uint i = 0; i < num_syms; i++)
  96. {
  97. CRNLIB_ASSERT(!(pCur_syms[i].m_freq < prev_freq));
  98. prev_freq = pCur_syms[i].m_freq;
  99. }
  100. #endif
  101. return pCur_syms;
  102. }
  103. struct huffman_work_tables
  104. {
  105. enum { cMaxInternalNodes = cHuffmanMaxSupportedSyms };
  106. sym_freq syms0[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes];
  107. sym_freq syms1[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes];
  108. uint16 queue[cMaxInternalNodes];
  109. };
  110. void* create_generate_huffman_codes_tables()
  111. {
  112. return crnlib_new<huffman_work_tables>();
  113. }
  114. void free_generate_huffman_codes_tables(void* p)
  115. {
  116. crnlib_delete(static_cast<huffman_work_tables*>(p));
  117. }
  118. #if USE_CALCULATE_MINIMUM_REDUNDANCY
  119. /* calculate_minimum_redundancy() written by
  120. Alistair Moffat, [email protected],
  121. Jyrki Katajainen, [email protected]
  122. November 1996.
  123. */
  124. static void calculate_minimum_redundancy(int A[], int n) {
  125. int root; /* next root node to be used */
  126. int leaf; /* next leaf to be used */
  127. int next; /* next value to be assigned */
  128. int avbl; /* number of available nodes */
  129. int used; /* number of internal nodes */
  130. int dpth; /* current depth of leaves */
  131. /* check for pathological cases */
  132. if (n==0) { return; }
  133. if (n==1) { A[0] = 0; return; }
  134. /* first pass, left to right, setting parent pointers */
  135. A[0] += A[1]; root = 0; leaf = 2;
  136. for (next=1; next < n-1; next++) {
  137. /* select first item for a pairing */
  138. if (leaf>=n || A[root]<A[leaf]) {
  139. A[next] = A[root]; A[root++] = next;
  140. } else
  141. A[next] = A[leaf++];
  142. /* add on the second item */
  143. if (leaf>=n || (root<next && A[root]<A[leaf])) {
  144. A[next] += A[root]; A[root++] = next;
  145. } else
  146. A[next] += A[leaf++];
  147. }
  148. /* second pass, right to left, setting internal depths */
  149. A[n-2] = 0;
  150. for (next=n-3; next>=0; next--)
  151. A[next] = A[A[next]]+1;
  152. /* third pass, right to left, setting leaf depths */
  153. avbl = 1; used = dpth = 0; root = n-2; next = n-1;
  154. while (avbl>0) {
  155. while (root>=0 && A[root]==dpth) {
  156. used++; root--;
  157. }
  158. while (avbl>used) {
  159. A[next--] = dpth; avbl--;
  160. }
  161. avbl = 2*used; dpth++; used = 0;
  162. }
  163. }
  164. #endif
  165. bool generate_huffman_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret)
  166. {
  167. if ((!num_syms) || (num_syms > cHuffmanMaxSupportedSyms))
  168. return false;
  169. huffman_work_tables& state = *static_cast<huffman_work_tables*>(pContext);;
  170. uint max_freq = 0;
  171. uint total_freq = 0;
  172. uint num_used_syms = 0;
  173. for (uint i = 0; i < num_syms; i++)
  174. {
  175. uint freq = pFreq[i];
  176. if (!freq)
  177. pCodesizes[i] = 0;
  178. else
  179. {
  180. total_freq += freq;
  181. max_freq = math::maximum(max_freq, freq);
  182. sym_freq& sf = state.syms0[num_used_syms];
  183. sf.m_left = (uint16)i;
  184. sf.m_right = cUINT16_MAX;
  185. sf.m_freq = freq;
  186. num_used_syms++;
  187. }
  188. }
  189. total_freq_ret = total_freq;
  190. if (num_used_syms == 1)
  191. {
  192. pCodesizes[state.syms0[0].m_left] = 1;
  193. return true;
  194. }
  195. sym_freq* syms = radix_sort_syms(num_used_syms, state.syms0, state.syms1);
  196. #if USE_CALCULATE_MINIMUM_REDUNDANCY
  197. int x[cHuffmanMaxSupportedSyms];
  198. for (uint i = 0; i < num_used_syms; i++)
  199. x[i] = state.syms0[i].m_freq;
  200. calculate_minimum_redundancy(x, num_used_syms);
  201. uint max_len = 0;
  202. for (uint i = 0; i < num_used_syms; i++)
  203. {
  204. uint len = x[i];
  205. max_len = math::maximum(len, max_len);
  206. pCodesizes[state.syms0[i].m_left] = static_cast<uint8>(len);
  207. }
  208. return true;
  209. #else
  210. // Dummy node
  211. sym_freq& sf = state.syms0[num_used_syms];
  212. sf.m_left = cUINT16_MAX;
  213. sf.m_right = cUINT16_MAX;
  214. sf.m_freq = UINT_MAX;
  215. uint next_internal_node = num_used_syms + 1;
  216. uint queue_front = 0;
  217. uint queue_end = 0;
  218. uint next_lowest_sym = 0;
  219. uint num_nodes_remaining = num_used_syms;
  220. do
  221. {
  222. uint left_freq = syms[next_lowest_sym].m_freq;
  223. uint left_child = next_lowest_sym;
  224. if ((queue_end > queue_front) && (syms[state.queue[queue_front]].m_freq < left_freq))
  225. {
  226. left_child = state.queue[queue_front];
  227. left_freq = syms[left_child].m_freq;
  228. queue_front++;
  229. }
  230. else
  231. next_lowest_sym++;
  232. uint right_freq = syms[next_lowest_sym].m_freq;
  233. uint right_child = next_lowest_sym;
  234. if ((queue_end > queue_front) && (syms[state.queue[queue_front]].m_freq < right_freq))
  235. {
  236. right_child = state.queue[queue_front];
  237. right_freq = syms[right_child].m_freq;
  238. queue_front++;
  239. }
  240. else
  241. next_lowest_sym++;
  242. const uint internal_node_index = next_internal_node;
  243. next_internal_node++;
  244. CRNLIB_ASSERT(next_internal_node < CRNLIB_ARRAYSIZE(state.syms0));
  245. syms[internal_node_index].m_freq = left_freq + right_freq;
  246. syms[internal_node_index].m_left = static_cast<uint16>(left_child);
  247. syms[internal_node_index].m_right = static_cast<uint16>(right_child);
  248. CRNLIB_ASSERT(queue_end < huffman_work_tables::cMaxInternalNodes);
  249. state.queue[queue_end] = static_cast<uint16>(internal_node_index);
  250. queue_end++;
  251. num_nodes_remaining--;
  252. } while (num_nodes_remaining > 1);
  253. CRNLIB_ASSERT(next_lowest_sym == num_used_syms);
  254. CRNLIB_ASSERT((queue_end - queue_front) == 1);
  255. uint cur_node_index = state.queue[queue_front];
  256. uint32* pStack = (syms == state.syms0) ? (uint32*)state.syms1 : (uint32*)state.syms0;
  257. uint32* pStack_top = pStack;
  258. uint max_level = 0;
  259. for ( ; ; )
  260. {
  261. uint level = cur_node_index >> 16;
  262. uint node_index = cur_node_index & 0xFFFF;
  263. uint left_child = syms[node_index].m_left;
  264. uint right_child = syms[node_index].m_right;
  265. uint next_level = (cur_node_index + 0x10000) & 0xFFFF0000;
  266. if (left_child < num_used_syms)
  267. {
  268. max_level = math::maximum(max_level, level);
  269. pCodesizes[syms[left_child].m_left] = static_cast<uint8>(level + 1);
  270. if (right_child < num_used_syms)
  271. {
  272. pCodesizes[syms[right_child].m_left] = static_cast<uint8>(level + 1);
  273. if (pStack == pStack_top) break;
  274. cur_node_index = *--pStack;
  275. }
  276. else
  277. {
  278. cur_node_index = next_level | right_child;
  279. }
  280. }
  281. else
  282. {
  283. if (right_child < num_used_syms)
  284. {
  285. max_level = math::maximum(max_level, level);
  286. pCodesizes[syms[right_child].m_left] = static_cast<uint8>(level + 1);
  287. cur_node_index = next_level | left_child;
  288. }
  289. else
  290. {
  291. *pStack++ = next_level | left_child;
  292. cur_node_index = next_level | right_child;
  293. }
  294. }
  295. }
  296. max_code_size = max_level + 1;
  297. #endif
  298. return true;
  299. }
  300. } // namespace crnlib