lzham_prefix_coding.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. // File: lzham_prefix_coding.cpp
  2. // See Copyright Notice and license at the end of include/lzham.h
  3. #include "lzham_core.h"
  4. #include "lzham_prefix_coding.h"
  5. #ifdef LZHAM_BUILD_DEBUG
  6. //#define TEST_DECODER_TABLES
  7. #endif
  8. namespace lzham
  9. {
  10. namespace prefix_coding
  11. {
  12. bool limit_max_code_size(uint num_syms, uint8* pCodesizes, uint max_code_size)
  13. {
  14. const uint cMaxEverCodeSize = 34;
  15. if ((!num_syms) || (num_syms > cMaxSupportedSyms) || (max_code_size < 1) || (max_code_size > cMaxEverCodeSize))
  16. return false;
  17. uint num_codes[cMaxEverCodeSize + 1];
  18. utils::zero_object(num_codes);
  19. bool should_limit = false;
  20. for (uint i = 0; i < num_syms; i++)
  21. {
  22. uint c = pCodesizes[i];
  23. LZHAM_ASSERT(c <= cMaxEverCodeSize);
  24. num_codes[c]++;
  25. if (c > max_code_size)
  26. should_limit = true;
  27. }
  28. if (!should_limit)
  29. return true;
  30. uint ofs = 0;
  31. uint next_sorted_ofs[cMaxEverCodeSize + 1];
  32. for (uint i = 1; i <= cMaxEverCodeSize; i++)
  33. {
  34. next_sorted_ofs[i] = ofs;
  35. ofs += num_codes[i];
  36. }
  37. if ((ofs < 2) || (ofs > cMaxSupportedSyms))
  38. return true;
  39. if (ofs > (1U << max_code_size))
  40. return false;
  41. for (uint i = max_code_size + 1; i <= cMaxEverCodeSize; i++)
  42. num_codes[max_code_size] += num_codes[i];
  43. // Technique of adjusting tree to enforce maximum code size from LHArc.
  44. // (If you remember what LHArc was, you've been doing this for a LONG time.)
  45. uint total = 0;
  46. for (uint i = max_code_size; i; --i)
  47. total += (num_codes[i] << (max_code_size - i));
  48. if (total == (1U << max_code_size))
  49. return true;
  50. do
  51. {
  52. num_codes[max_code_size]--;
  53. uint i;
  54. for (i = max_code_size - 1; i; --i)
  55. {
  56. if (!num_codes[i])
  57. continue;
  58. num_codes[i]--;
  59. num_codes[i + 1] += 2;
  60. break;
  61. }
  62. if (!i)
  63. return false;
  64. total--;
  65. } while (total != (1U << max_code_size));
  66. uint8 new_codesizes[cMaxSupportedSyms];
  67. uint8* p = new_codesizes;
  68. for (uint i = 1; i <= max_code_size; i++)
  69. {
  70. uint n = num_codes[i];
  71. if (n)
  72. {
  73. memset(p, i, n);
  74. p += n;
  75. }
  76. }
  77. for (uint i = 0; i < num_syms; i++)
  78. {
  79. const uint c = pCodesizes[i];
  80. if (c)
  81. {
  82. uint next_ofs = next_sorted_ofs[c];
  83. next_sorted_ofs[c] = next_ofs + 1;
  84. pCodesizes[i] = static_cast<uint8>(new_codesizes[next_ofs]);
  85. }
  86. }
  87. return true;
  88. }
  89. bool generate_codes(uint num_syms, const uint8* pCodesizes, uint16* pCodes)
  90. {
  91. uint num_codes[cMaxExpectedCodeSize + 1];
  92. utils::zero_object(num_codes);
  93. for (uint i = 0; i < num_syms; i++)
  94. {
  95. uint c = pCodesizes[i];
  96. LZHAM_ASSERT(c <= cMaxExpectedCodeSize);
  97. num_codes[c]++;
  98. }
  99. uint code = 0;
  100. uint next_code[cMaxExpectedCodeSize + 1];
  101. next_code[0] = 0;
  102. for (uint i = 1; i <= cMaxExpectedCodeSize; i++)
  103. {
  104. next_code[i] = code;
  105. code = (code + num_codes[i]) << 1;
  106. }
  107. if (code != (1 << (cMaxExpectedCodeSize + 1)))
  108. {
  109. uint t = 0;
  110. for (uint i = 1; i <= cMaxExpectedCodeSize; i++)
  111. {
  112. t += num_codes[i];
  113. if (t > 1)
  114. return false;
  115. }
  116. }
  117. for (uint i = 0; i < num_syms; i++)
  118. {
  119. uint c = pCodesizes[i];
  120. LZHAM_ASSERT(!c || (next_code[c] <= UINT16_MAX));
  121. pCodes[i] = static_cast<uint16>(next_code[c]++);
  122. LZHAM_ASSERT(!c || (math::total_bits(pCodes[i]) <= pCodesizes[i]));
  123. }
  124. return true;
  125. }
  126. bool generate_decoder_tables(uint num_syms, const uint8* pCodesizes, decoder_tables* pTables, uint table_bits)
  127. {
  128. uint min_codes[cMaxExpectedCodeSize];
  129. if ((!num_syms) || (table_bits > cMaxTableBits))
  130. return false;
  131. pTables->m_num_syms = num_syms;
  132. uint num_codes[cMaxExpectedCodeSize + 1];
  133. utils::zero_object(num_codes);
  134. for (uint i = 0; i < num_syms; i++)
  135. {
  136. uint c = pCodesizes[i];
  137. num_codes[c]++;
  138. }
  139. uint sorted_positions[cMaxExpectedCodeSize + 1];
  140. uint next_code = 0;
  141. uint total_used_syms = 0;
  142. uint max_code_size = 0;
  143. uint min_code_size = UINT_MAX;
  144. for (uint i = 1; i <= cMaxExpectedCodeSize; i++)
  145. {
  146. const uint n = num_codes[i];
  147. if (!n)
  148. pTables->m_max_codes[i - 1] = 0;//UINT_MAX;
  149. else
  150. {
  151. min_code_size = math::minimum(min_code_size, i);
  152. max_code_size = math::maximum(max_code_size, i);
  153. min_codes[i - 1] = next_code;
  154. pTables->m_max_codes[i - 1] = next_code + n - 1;
  155. pTables->m_max_codes[i - 1] = 1 + ((pTables->m_max_codes[i - 1] << (16 - i)) | ((1 << (16 - i)) - 1));
  156. pTables->m_val_ptrs[i - 1] = total_used_syms;
  157. sorted_positions[i] = total_used_syms;
  158. next_code += n;
  159. total_used_syms += n;
  160. }
  161. next_code <<= 1;
  162. }
  163. pTables->m_total_used_syms = total_used_syms;
  164. if (total_used_syms > pTables->m_cur_sorted_symbol_order_size)
  165. {
  166. pTables->m_cur_sorted_symbol_order_size = total_used_syms;
  167. if (!math::is_power_of_2(total_used_syms))
  168. pTables->m_cur_sorted_symbol_order_size = math::minimum<uint>(num_syms, math::next_pow2(total_used_syms));
  169. if (pTables->m_sorted_symbol_order)
  170. {
  171. lzham_delete_array(pTables->m_sorted_symbol_order);
  172. pTables->m_sorted_symbol_order = NULL;
  173. }
  174. pTables->m_sorted_symbol_order = lzham_new_array<uint16>(pTables->m_cur_sorted_symbol_order_size);
  175. if (!pTables->m_sorted_symbol_order)
  176. return false;
  177. }
  178. pTables->m_min_code_size = static_cast<uint8>(min_code_size);
  179. pTables->m_max_code_size = static_cast<uint8>(max_code_size);
  180. for (uint i = 0; i < num_syms; i++)
  181. {
  182. uint c = pCodesizes[i];
  183. if (c)
  184. {
  185. LZHAM_ASSERT(num_codes[c]);
  186. uint sorted_pos = sorted_positions[c]++;
  187. LZHAM_ASSERT(sorted_pos < total_used_syms);
  188. pTables->m_sorted_symbol_order[sorted_pos] = static_cast<uint16>(i);
  189. }
  190. }
  191. if (table_bits <= pTables->m_min_code_size)
  192. table_bits = 0;
  193. pTables->m_table_bits = table_bits;
  194. if (table_bits)
  195. {
  196. uint table_size = 1 << table_bits;
  197. if (table_size > pTables->m_cur_lookup_size)
  198. {
  199. pTables->m_cur_lookup_size = table_size;
  200. if (pTables->m_lookup)
  201. {
  202. lzham_delete_array(pTables->m_lookup);
  203. pTables->m_lookup = NULL;
  204. }
  205. pTables->m_lookup = lzham_new_array<uint32>(table_size);
  206. if (!pTables->m_lookup)
  207. return false;
  208. }
  209. memset(pTables->m_lookup, 0xFF, static_cast<uint>(sizeof(pTables->m_lookup[0])) * (1UL << table_bits));
  210. for (uint codesize = 1; codesize <= table_bits; codesize++)
  211. {
  212. if (!num_codes[codesize])
  213. continue;
  214. const uint fillsize = table_bits - codesize;
  215. const uint fillnum = 1 << fillsize;
  216. const uint min_code = min_codes[codesize - 1];
  217. const uint max_code = pTables->get_unshifted_max_code(codesize);
  218. const uint val_ptr = pTables->m_val_ptrs[codesize - 1];
  219. for (uint code = min_code; code <= max_code; code++)
  220. {
  221. const uint sym_index = pTables->m_sorted_symbol_order[ val_ptr + code - min_code ];
  222. LZHAM_ASSERT( pCodesizes[sym_index] == codesize );
  223. for (uint j = 0; j < fillnum; j++)
  224. {
  225. const uint t = j + (code << fillsize);
  226. LZHAM_ASSERT(t < (1U << table_bits));
  227. LZHAM_ASSERT(pTables->m_lookup[t] == UINT32_MAX);
  228. pTables->m_lookup[t] = sym_index | (codesize << 16U);
  229. }
  230. }
  231. }
  232. }
  233. for (uint i = 0; i < cMaxExpectedCodeSize; i++)
  234. pTables->m_val_ptrs[i] -= min_codes[i];
  235. pTables->m_table_max_code = 0;
  236. pTables->m_decode_start_code_size = pTables->m_min_code_size;
  237. if (table_bits)
  238. {
  239. uint i;
  240. for (i = table_bits; i >= 1; i--)
  241. {
  242. if (num_codes[i])
  243. {
  244. pTables->m_table_max_code = pTables->m_max_codes[i - 1];
  245. break;
  246. }
  247. }
  248. if (i >= 1)
  249. {
  250. pTables->m_decode_start_code_size = table_bits + 1;
  251. for (i = table_bits + 1; i <= max_code_size; i++)
  252. {
  253. if (num_codes[i])
  254. {
  255. pTables->m_decode_start_code_size = i;
  256. break;
  257. }
  258. }
  259. }
  260. }
  261. // sentinels
  262. pTables->m_max_codes[cMaxExpectedCodeSize] = UINT_MAX;
  263. pTables->m_val_ptrs[cMaxExpectedCodeSize] = 0xFFFFF;
  264. pTables->m_table_shift = 32 - pTables->m_table_bits;
  265. return true;
  266. }
  267. } // namespace prefix_codig
  268. } // namespace lzham