lzham_lzcomp_internal.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. // File: lzham_lzcomp_internal.h
  2. // See Copyright Notice and license at the end of include/lzham.h
  3. #pragma once
  4. #include "lzham_match_accel.h"
  5. #include "lzham_symbol_codec.h"
  6. #include "lzham_lzbase.h"
  7. namespace lzham
  8. {
  9. typedef lzham::vector<uint8> byte_vec;
  10. const uint cMaxParseGraphNodes = 3072;
  11. const uint cMaxParseThreads = 8;
  12. enum compression_level
  13. {
  14. cCompressionLevelFastest,
  15. cCompressionLevelFaster,
  16. cCompressionLevelDefault,
  17. cCompressionLevelBetter,
  18. cCompressionLevelUber,
  19. cCompressionLevelCount
  20. };
  21. struct comp_settings
  22. {
  23. uint m_fast_bytes;
  24. bool m_fast_adaptive_huffman_updating;
  25. uint m_match_accel_max_matches_per_probe;
  26. uint m_match_accel_max_probes;
  27. };
  28. class lzcompressor : public CLZBase
  29. {
  30. public:
  31. lzcompressor();
  32. struct init_params
  33. {
  34. enum
  35. {
  36. cMinDictSizeLog2 = CLZBase::cMinDictSizeLog2,
  37. cMaxDictSizeLog2 = CLZBase::cMaxDictSizeLog2,
  38. cDefaultBlockSize = 1024U*512U
  39. };
  40. init_params() :
  41. m_pTask_pool(NULL),
  42. m_max_helper_threads(0),
  43. m_compression_level(cCompressionLevelDefault),
  44. m_dict_size_log2(22),
  45. m_block_size(cDefaultBlockSize),
  46. m_lzham_compress_flags(0),
  47. m_pSeed_bytes(0),
  48. m_num_seed_bytes(0),
  49. m_table_max_update_interval(0),
  50. m_table_update_interval_slow_rate(0)
  51. {
  52. }
  53. task_pool* m_pTask_pool;
  54. uint m_max_helper_threads;
  55. compression_level m_compression_level;
  56. uint m_dict_size_log2;
  57. uint m_block_size;
  58. uint m_lzham_compress_flags;
  59. const void *m_pSeed_bytes;
  60. uint m_num_seed_bytes;
  61. uint m_table_max_update_interval;
  62. uint m_table_update_interval_slow_rate;
  63. };
  64. bool init(const init_params& params);
  65. void clear();
  66. // sync, or sync+dictionary flush
  67. bool flush(lzham_flush_t flush_type);
  68. bool reset();
  69. bool put_bytes(const void* pBuf, uint buf_len);
  70. const byte_vec& get_compressed_data() const { return m_comp_buf; }
  71. byte_vec& get_compressed_data() { return m_comp_buf; }
  72. uint32 get_src_adler32() const { return m_src_adler32; }
  73. private:
  74. class state;
  75. enum
  76. {
  77. cLitComplexity = 1,
  78. cRep0Complexity = 2,
  79. cRep3Complexity = 5,
  80. cLongMatchComplexity = 6,
  81. cLongMatchComplexityLenThresh = 9,
  82. cShortMatchComplexity = 7
  83. };
  84. struct lzdecision
  85. {
  86. int m_pos; // dict position where decision was evaluated
  87. int m_len; // 0 if literal, 1+ if match
  88. int m_dist; // <0 if match rep, else >=1 is match dist
  89. inline lzdecision() { }
  90. inline lzdecision(int pos, int len, int dist) : m_pos(pos), m_len(len), m_dist(dist) { }
  91. inline void init(int pos, int len, int dist) { m_pos = pos; m_len = len; m_dist = dist; }
  92. inline bool is_lit() const { return !m_len; }
  93. inline bool is_match() const { return m_len > 0; } // may be a rep or full match
  94. inline bool is_full_match() const { return (m_len > 0) && (m_dist >= 1); }
  95. inline uint get_len() const { return math::maximum<uint>(m_len, 1); }
  96. inline bool is_rep() const { return m_dist < 0; }
  97. inline bool is_rep0() const { return m_dist == -1; }
  98. uint get_match_dist(const state& s) const;
  99. inline uint get_complexity() const
  100. {
  101. if (is_lit())
  102. return cLitComplexity;
  103. else if (is_rep())
  104. {
  105. LZHAM_ASSUME(cRep0Complexity == 2);
  106. return 1 + -m_dist; // 2, 3, 4, or 5
  107. }
  108. else if (get_len() >= cLongMatchComplexityLenThresh)
  109. return cLongMatchComplexity;
  110. else
  111. return cShortMatchComplexity;
  112. }
  113. inline uint get_min_codable_len() const
  114. {
  115. if (is_lit() || is_rep0())
  116. return 1;
  117. else
  118. return CLZBase::cMinMatchLen;
  119. }
  120. };
  121. struct lzpriced_decision : lzdecision
  122. {
  123. lzpriced_decision() { }
  124. inline lzpriced_decision(int pos, int len, int dist) : lzdecision(pos, len, dist) { }
  125. inline lzpriced_decision(int pos, int len, int dist, bit_cost_t cost) : lzdecision(pos, len, dist), m_cost(cost) { }
  126. inline void init(int pos, int len, int dist, bit_cost_t cost) { lzdecision::init(pos, len, dist); m_cost = cost; }
  127. inline bit_cost_t get_cost() const { return m_cost; }
  128. bit_cost_t m_cost;
  129. };
  130. struct state_base
  131. {
  132. uint m_cur_ofs;
  133. uint m_cur_state;
  134. uint m_match_hist[CLZBase::cMatchHistSize];
  135. inline bool operator== (const state_base &rhs) const
  136. {
  137. if (m_cur_state != rhs.m_cur_state)
  138. return false;
  139. for (uint i = 0; i < CLZBase::cMatchHistSize; i++)
  140. if (m_match_hist[i] != rhs.m_match_hist[i])
  141. return false;
  142. return true;
  143. }
  144. void partial_advance(const lzdecision& lzdec);
  145. inline void save_partial_state(state_base& dst)
  146. {
  147. dst.m_cur_ofs = m_cur_ofs;
  148. dst.m_cur_state = m_cur_state;
  149. memcpy(dst.m_match_hist, m_match_hist, sizeof(m_match_hist));
  150. }
  151. inline void restore_partial_state(const state_base& src)
  152. {
  153. m_cur_ofs = src.m_cur_ofs;
  154. m_cur_state = src.m_cur_state;
  155. memcpy(m_match_hist, src.m_match_hist, sizeof(m_match_hist));
  156. }
  157. };
  158. class state : public state_base
  159. {
  160. public:
  161. state();
  162. void clear();
  163. bool init(CLZBase& lzbase, uint table_max_update_interval, uint table_update_interval_slow_rate);
  164. void reset();
  165. bit_cost_t get_cost(CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec) const;
  166. bit_cost_t get_len2_match_cost(CLZBase& lzbase, uint dict_pos, uint len2_match_dist, uint is_match_model_index);
  167. bit_cost_t get_lit_cost(CLZBase& lzbase, const search_accelerator& dict, uint dict_pos, uint lit_pred0, uint is_match_model_index) const;
  168. // Returns actual cost.
  169. void get_rep_match_costs(uint dict_pos, bit_cost_t *pBitcosts, uint match_hist_index, int min_len, int max_len, uint is_match_model_index) const;
  170. void get_full_match_costs(CLZBase& lzbase, uint dict_pos, bit_cost_t *pBitcosts, uint match_dist, int min_len, int max_len, uint is_match_model_index) const;
  171. bit_cost_t update_stats(CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec);
  172. bool advance(CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec);
  173. bool encode(symbol_codec& codec, CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec);
  174. void print(symbol_codec& codec, CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec);
  175. bool encode_eob(symbol_codec& codec, const search_accelerator& dict, uint dict_pos);
  176. bool encode_reset_state_partial(symbol_codec& codec, const search_accelerator& dict, uint dict_pos);
  177. void update_match_hist(uint match_dist);
  178. int find_match_dist(uint match_hist) const;
  179. void reset_state_partial();
  180. void start_of_block(const search_accelerator& dict, uint cur_ofs, uint block_index);
  181. void reset_update_rate();
  182. uint get_pred_char(const search_accelerator& dict, int pos, int backward_ofs) const;
  183. inline bool will_reference_last_match(const lzdecision& lzdec) const
  184. {
  185. return (!lzdec.is_match()) && (m_cur_state >= CLZBase::cNumLitStates);
  186. }
  187. uint m_block_start_dict_ofs;
  188. adaptive_bit_model m_is_match_model[CLZBase::cNumStates];
  189. adaptive_bit_model m_is_rep_model[CLZBase::cNumStates];
  190. adaptive_bit_model m_is_rep0_model[CLZBase::cNumStates];
  191. adaptive_bit_model m_is_rep0_single_byte_model[CLZBase::cNumStates];
  192. adaptive_bit_model m_is_rep1_model[CLZBase::cNumStates];
  193. adaptive_bit_model m_is_rep2_model[CLZBase::cNumStates];
  194. quasi_adaptive_huffman_data_model m_lit_table;
  195. quasi_adaptive_huffman_data_model m_delta_lit_table;
  196. quasi_adaptive_huffman_data_model m_main_table;
  197. quasi_adaptive_huffman_data_model m_rep_len_table[2];
  198. quasi_adaptive_huffman_data_model m_large_len_table[2];
  199. quasi_adaptive_huffman_data_model m_dist_lsb_table;
  200. };
  201. class tracked_stat
  202. {
  203. public:
  204. tracked_stat() { clear(); }
  205. void clear() { m_num = 0; m_total = 0.0f; m_total2 = 0.0f; m_min_val = 9e+99; m_max_val = -9e+99; }
  206. void update(double val) { m_num++; m_total += val; m_total2 += val * val; m_min_val = LZHAM_MIN(m_min_val, val); m_max_val = LZHAM_MAX(m_max_val, val); }
  207. tracked_stat &operator += (double val) { update(val); return *this; }
  208. operator double() const { return m_total; }
  209. uint64 get_number_of_values() { return m_num; }
  210. uint32 get_number_of_values32() { return static_cast<uint32>(LZHAM_MIN(UINT_MAX, m_num)); }
  211. double get_total() const { return m_total; }
  212. double get_average() const { return m_num ? m_total / m_num : 0.0f; };
  213. double get_std_dev() const { return m_num ? sqrt( m_num * m_total2 - m_total * m_total ) / m_num: 0.0f; }
  214. double get_min_val() const { return m_num ? m_min_val : 0.0f; }
  215. double get_max_val() const { return m_num ? m_max_val : 0.0f; }
  216. private:
  217. uint64 m_num;
  218. double m_total;
  219. double m_total2;
  220. double m_min_val;
  221. double m_max_val;
  222. };
  223. struct coding_stats
  224. {
  225. coding_stats() { clear(); }
  226. void clear();
  227. void update(const lzdecision& lzdec, const state& cur_state, const search_accelerator& dict, bit_cost_t cost);
  228. void print();
  229. uint m_total_bytes;
  230. uint m_total_contexts;
  231. double m_total_cost;
  232. tracked_stat m_context_stats;
  233. double m_total_match_bits_cost;
  234. double m_worst_match_bits_cost;
  235. double m_total_is_match0_bits_cost;
  236. double m_total_is_match1_bits_cost;
  237. uint m_total_truncated_matches;
  238. uint m_match_truncation_len_hist[CLZBase::cMaxMatchLen + 1];
  239. uint m_match_truncation_hist[CLZBase::cMaxMatchLen + 1];
  240. uint m_match_type_truncation_hist[CLZBase::cNumStates][5];
  241. uint m_match_type_was_not_truncated_hist[CLZBase::cNumStates][5];
  242. uint m_total_nonmatches;
  243. uint m_total_matches;
  244. tracked_stat m_lit_stats;
  245. tracked_stat m_delta_lit_stats;
  246. tracked_stat m_rep_stats[CLZBase::cMatchHistSize];
  247. tracked_stat m_rep0_len1_stats;
  248. tracked_stat m_rep0_len2_plus_stats;
  249. tracked_stat m_full_match_stats[cMaxMatchLen + 1];
  250. uint m_total_far_len2_matches;
  251. uint m_total_near_len2_matches;
  252. uint m_total_update_rate_resets;
  253. uint m_max_len2_dist;
  254. };
  255. init_params m_params;
  256. comp_settings m_settings;
  257. int64 m_src_size;
  258. uint32 m_src_adler32;
  259. search_accelerator m_accel;
  260. symbol_codec m_codec;
  261. coding_stats m_stats;
  262. byte_vec m_block_buf;
  263. byte_vec m_comp_buf;
  264. uint m_step;
  265. uint m_block_start_dict_ofs;
  266. uint m_block_index;
  267. bool m_finished;
  268. bool m_use_task_pool;
  269. struct node_state
  270. {
  271. LZHAM_FORCE_INLINE void clear()
  272. {
  273. m_total_cost = cBitCostMax; //math::cNearlyInfinite;
  274. m_total_complexity = UINT_MAX;
  275. }
  276. // the lzdecision that led from parent to this node_state
  277. lzdecision m_lzdec;
  278. // This is either the state of the parent node (optimal parsing), or the state of the child node (extreme parsing).
  279. state::state_base m_saved_state;
  280. // Total cost to arrive at this node state.
  281. bit_cost_t m_total_cost;
  282. uint m_total_complexity;
  283. // Parent node index.
  284. int16 m_parent_index;
  285. // Parent node state index (only valid when extreme parsing).
  286. int8 m_parent_state_index;
  287. };
  288. struct node
  289. {
  290. LZHAM_FORCE_INLINE void clear()
  291. {
  292. m_num_node_states = 0;
  293. }
  294. uint m_num_node_states;
  295. enum { cMaxNodeStates = 4 };
  296. node_state m_node_states[cMaxNodeStates];
  297. void add_state(int parent_index, int parent_state_index, const lzdecision &lzdec, state &parent_state, bit_cost_t total_cost, uint total_complexity);
  298. };
  299. state m_start_of_block_state; // state at start of block
  300. state m_state; // main thread's current coding state
  301. struct raw_parse_thread_state
  302. {
  303. uint m_start_ofs;
  304. uint m_bytes_to_match;
  305. state m_initial_state;
  306. node m_nodes[cMaxParseGraphNodes + 1];
  307. lzham::vector<lzdecision> m_best_decisions;
  308. bool m_emit_decisions_backwards;
  309. lzham::vector<lzpriced_decision> m_temp_decisions;
  310. uint m_max_greedy_decisions;
  311. uint m_greedy_parse_total_bytes_coded;
  312. bool m_greedy_parse_gave_up;
  313. bool m_issue_reset_state_partial;
  314. bool m_failed;
  315. };
  316. struct parse_thread_state : raw_parse_thread_state
  317. {
  318. uint8 m_unused_alignment_array[128 - (sizeof(raw_parse_thread_state) & 127)];
  319. };
  320. uint m_num_parse_threads;
  321. parse_thread_state m_parse_thread_state[cMaxParseThreads + 1]; // +1 extra for the greedy parser thread (only used for delta compression)
  322. volatile atomic32_t m_parse_jobs_remaining;
  323. semaphore m_parse_jobs_complete;
  324. enum { cMaxBlockHistorySize = 6, cBlockHistoryCompRatioScale = 1000U };
  325. struct block_history
  326. {
  327. uint m_comp_size;
  328. uint m_src_size;
  329. uint m_ratio;
  330. bool m_raw_block;
  331. bool m_reset_update_rate;
  332. };
  333. block_history m_block_history[cMaxBlockHistorySize];
  334. uint m_block_history_size;
  335. uint m_block_history_next;
  336. void update_block_history(uint comp_size, uint src_size, uint ratio, bool raw_block, bool reset_update_rate);
  337. uint get_recent_block_ratio();
  338. uint get_min_block_ratio();
  339. uint get_max_block_ratio();
  340. uint get_total_recent_reset_update_rate();
  341. bool send_zlib_header();
  342. bool init_seed_bytes();
  343. bool send_final_block();
  344. bool send_configuration();
  345. bool extreme_parse(parse_thread_state &parse_state);
  346. bool optimal_parse(parse_thread_state &parse_state);
  347. int enumerate_lz_decisions(uint ofs, const state& cur_state, lzham::vector<lzpriced_decision>& decisions, uint min_match_len, uint max_match_len);
  348. bool greedy_parse(parse_thread_state &parse_state);
  349. void parse_job_callback(uint64 data, void* pData_ptr);
  350. bool compress_block(const void* pBuf, uint buf_len);
  351. bool compress_block_internal(const void* pBuf, uint buf_len);
  352. bool code_decision(lzdecision lzdec, uint& cur_ofs, uint& bytes_to_match);
  353. bool send_sync_block(lzham_flush_t flush_type);
  354. };
  355. } // namespace lzham