lzham_huffman_codes.cpp 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. // File: huffman_codes.cpp
  2. // See Copyright Notice and license at the end of include/lzham.h
  3. #include "lzham_core.h"
  4. #include "lzham_huffman_codes.h"
  5. namespace lzham
  6. {
  7. struct sym_freq
  8. {
  9. uint m_freq;
  10. uint16 m_left;
  11. uint16 m_right;
  12. inline bool operator< (const sym_freq& other) const
  13. {
  14. return m_freq > other.m_freq;
  15. }
  16. };
  17. static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* syms0, sym_freq* syms1)
  18. {
  19. const uint cMaxPasses = 2;
  20. uint hist[256 * cMaxPasses];
  21. memset(hist, 0, sizeof(hist[0]) * 256 * cMaxPasses);
  22. {
  23. sym_freq* p = syms0;
  24. sym_freq* q = syms0 + (num_syms >> 1) * 2;
  25. for ( ; p != q; p += 2)
  26. {
  27. const uint freq0 = p[0].m_freq;
  28. const uint freq1 = p[1].m_freq;
  29. hist[ freq0 & 0xFF]++;
  30. hist[256 + ((freq0 >> 8) & 0xFF)]++;
  31. hist[ freq1 & 0xFF]++;
  32. hist[256 + ((freq1 >> 8) & 0xFF)]++;
  33. }
  34. if (num_syms & 1)
  35. {
  36. const uint freq = p->m_freq;
  37. hist[ freq & 0xFF]++;
  38. hist[256 + ((freq >> 8) & 0xFF)]++;
  39. }
  40. }
  41. sym_freq* pCur_syms = syms0;
  42. sym_freq* pNew_syms = syms1;
  43. const uint total_passes = (hist[256] == num_syms) ? 1 : cMaxPasses;
  44. for (uint pass = 0; pass < total_passes; pass++)
  45. {
  46. const uint* pHist = &hist[pass << 8];
  47. uint offsets[256];
  48. uint cur_ofs = 0;
  49. for (uint i = 0; i < 256; i += 2)
  50. {
  51. offsets[i] = cur_ofs;
  52. cur_ofs += pHist[i];
  53. offsets[i+1] = cur_ofs;
  54. cur_ofs += pHist[i+1];
  55. }
  56. const uint pass_shift = pass << 3;
  57. sym_freq* p = pCur_syms;
  58. sym_freq* q = pCur_syms + (num_syms >> 1) * 2;
  59. for ( ; p != q; p += 2)
  60. {
  61. uint c0 = p[0].m_freq;
  62. uint c1 = p[1].m_freq;
  63. if (pass)
  64. {
  65. c0 >>= 8;
  66. c1 >>= 8;
  67. }
  68. c0 &= 0xFF;
  69. c1 &= 0xFF;
  70. if (c0 == c1)
  71. {
  72. uint dst_offset0 = offsets[c0];
  73. offsets[c0] = dst_offset0 + 2;
  74. pNew_syms[dst_offset0] = p[0];
  75. pNew_syms[dst_offset0 + 1] = p[1];
  76. }
  77. else
  78. {
  79. uint dst_offset0 = offsets[c0]++;
  80. uint dst_offset1 = offsets[c1]++;
  81. pNew_syms[dst_offset0] = p[0];
  82. pNew_syms[dst_offset1] = p[1];
  83. }
  84. }
  85. if (num_syms & 1)
  86. {
  87. uint c = ((p->m_freq) >> pass_shift) & 0xFF;
  88. uint dst_offset = offsets[c];
  89. offsets[c] = dst_offset + 1;
  90. pNew_syms[dst_offset] = *p;
  91. }
  92. sym_freq* t = pCur_syms;
  93. pCur_syms = pNew_syms;
  94. pNew_syms = t;
  95. }
  96. #if LZHAM_ASSERTS_ENABLED
  97. uint prev_freq = 0;
  98. for (uint i = 0; i < num_syms; i++)
  99. {
  100. LZHAM_ASSERT(!(pCur_syms[i].m_freq < prev_freq));
  101. prev_freq = pCur_syms[i].m_freq;
  102. }
  103. #endif
  104. return pCur_syms;
  105. }
  106. struct huffman_work_tables
  107. {
  108. enum { cMaxInternalNodes = cHuffmanMaxSupportedSyms };
  109. sym_freq syms0[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes];
  110. sym_freq syms1[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes];
  111. #if !USE_CALCULATE_MINIMUM_REDUNDANCY
  112. uint16 queue[cMaxInternalNodes];
  113. #endif
  114. };
  115. uint get_generate_huffman_codes_table_size()
  116. {
  117. return sizeof(huffman_work_tables);
  118. }
  119. // calculate_minimum_redundancy() written by Alistair Moffat, [email protected], Jyrki Katajainen, [email protected] November 1996.
  120. static void calculate_minimum_redundancy(int A[], int n)
  121. {
  122. int root; /* next root node to be used */
  123. int leaf; /* next leaf to be used */
  124. int next; /* next value to be assigned */
  125. int avbl; /* number of available nodes */
  126. int used; /* number of internal nodes */
  127. int dpth; /* current depth of leaves */
  128. /* check for pathological cases */
  129. if (n==0) { return; }
  130. if (n==1) { A[0] = 0; return; }
  131. /* first pass, left to right, setting parent pointers */
  132. A[0] += A[1]; root = 0; leaf = 2;
  133. for (next=1; next < n-1; next++) {
  134. /* select first item for a pairing */
  135. if (leaf>=n || A[root]<A[leaf]) {
  136. A[next] = A[root]; A[root++] = next;
  137. } else
  138. A[next] = A[leaf++];
  139. /* add on the second item */
  140. if (leaf>=n || (root<next && A[root]<A[leaf])) {
  141. A[next] += A[root]; A[root++] = next;
  142. } else
  143. A[next] += A[leaf++];
  144. }
  145. /* second pass, right to left, setting internal depths */
  146. A[n-2] = 0;
  147. for (next=n-3; next>=0; next--)
  148. A[next] = A[A[next]]+1;
  149. /* third pass, right to left, setting leaf depths */
  150. avbl = 1; used = dpth = 0; root = n-2; next = n-1;
  151. while (avbl>0) {
  152. while (root>=0 && A[root]==dpth) {
  153. used++; root--;
  154. }
  155. while (avbl>used) {
  156. A[next--] = dpth; avbl--;
  157. }
  158. avbl = 2*used; dpth++; used = 0;
  159. }
  160. }
  161. bool generate_huffman_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret)
  162. {
  163. if ((!num_syms) || (num_syms > cHuffmanMaxSupportedSyms))
  164. return false;
  165. huffman_work_tables& state = *static_cast<huffman_work_tables*>(pContext);;
  166. uint max_freq = 0;
  167. uint total_freq = 0;
  168. uint num_used_syms = 0;
  169. for (uint i = 0; i < num_syms; i++)
  170. {
  171. uint freq = pFreq[i];
  172. if (!freq)
  173. pCodesizes[i] = 0;
  174. else
  175. {
  176. total_freq += freq;
  177. max_freq = math::maximum(max_freq, freq);
  178. sym_freq& sf = state.syms0[num_used_syms];
  179. sf.m_left = (uint16)i;
  180. sf.m_right = UINT16_MAX;
  181. sf.m_freq = freq;
  182. num_used_syms++;
  183. }
  184. }
  185. total_freq_ret = total_freq;
  186. if (num_used_syms == 1)
  187. {
  188. pCodesizes[state.syms0[0].m_left] = 1;
  189. return true;
  190. }
  191. sym_freq* syms = radix_sort_syms(num_used_syms, state.syms0, state.syms1);
  192. int x[cHuffmanMaxSupportedSyms];
  193. for (uint i = 0; i < num_used_syms; i++)
  194. x[i] = syms[i].m_freq;
  195. calculate_minimum_redundancy(x, num_used_syms);
  196. uint max_len = 0;
  197. for (uint i = 0; i < num_used_syms; i++)
  198. {
  199. uint len = x[i];
  200. max_len = math::maximum(len, max_len);
  201. pCodesizes[syms[i].m_left] = static_cast<uint8>(len);
  202. }
  203. max_code_size = max_len;
  204. return true;
  205. }
  206. } // namespace lzham