lizard_compress_liz.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. #define LIZARD_FREQ_DIV 5
  2. FORCE_INLINE void Lizard_setLog2Prices(Lizard_stream_t* ctx)
  3. {
  4. ctx->log2LitSum = Lizard_highbit32(ctx->litSum+1);
  5. ctx->log2FlagSum = Lizard_highbit32(ctx->flagSum+1);
  6. }
  7. MEM_STATIC void Lizard_rescaleFreqs(Lizard_stream_t* ctx)
  8. {
  9. unsigned u;
  10. ctx->cachedLiterals = NULL;
  11. ctx->cachedPrice = ctx->cachedLitLength = 0;
  12. ctx->litPriceSum = 0;
  13. if (ctx->litSum == 0) {
  14. ctx->litSum = 2 * 256;
  15. ctx->flagSum = 2 * 256;
  16. for (u=0; u < 256; u++) {
  17. ctx->litFreq[u] = 2;
  18. ctx->flagFreq[u] = 2;
  19. }
  20. } else {
  21. ctx->litSum = 0;
  22. ctx->flagSum = 0;
  23. for (u=0; u < 256; u++) {
  24. ctx->litFreq[u] = 1 + (ctx->litFreq[u]>>LIZARD_FREQ_DIV);
  25. ctx->litSum += ctx->litFreq[u];
  26. ctx->flagFreq[u] = 1 + (ctx->flagFreq[u]>>LIZARD_FREQ_DIV);
  27. ctx->flagSum += ctx->flagFreq[u];
  28. }
  29. }
  30. Lizard_setLog2Prices(ctx);
  31. }
  32. FORCE_INLINE int Lizard_encodeSequence_LIZv1 (
  33. Lizard_stream_t* ctx,
  34. const BYTE** ip,
  35. const BYTE** anchor,
  36. size_t matchLength,
  37. const BYTE* const match)
  38. {
  39. U32 offset = (U32)(*ip - match);
  40. size_t length = (size_t)(*ip - *anchor);
  41. BYTE* token = (ctx->flagsPtr)++;
  42. if (length > 0 || offset < LIZARD_MAX_16BIT_OFFSET) {
  43. /* Encode Literal length */
  44. // if ((limitedOutputBuffer) && (ctx->literalsPtr > oend - length - LIZARD_LENGTH_SIZE_LIZv1(length) - WILDCOPYLENGTH)) { LIZARD_LOG_COMPRESS_LIZv1("encodeSequence overflow1\n"); return 1; } /* Check output limit */
  45. if (length >= MAX_SHORT_LITLEN)
  46. { size_t len;
  47. *token = MAX_SHORT_LITLEN;
  48. len = length - MAX_SHORT_LITLEN;
  49. if (len >= (1<<16)) { *(ctx->literalsPtr) = 255; MEM_writeLE24(ctx->literalsPtr+1, (U32)(len)); ctx->literalsPtr += 4; }
  50. else if (len >= 254) { *(ctx->literalsPtr) = 254; MEM_writeLE16(ctx->literalsPtr+1, (U16)(len)); ctx->literalsPtr += 3; }
  51. else *(ctx->literalsPtr)++ = (BYTE)len;
  52. }
  53. else *token = (BYTE)length;
  54. /* Copy Literals */
  55. Lizard_wildCopy(ctx->literalsPtr, *anchor, (ctx->literalsPtr) + length);
  56. #ifndef LIZARD_NO_HUFFMAN
  57. if (ctx->huffType) {
  58. ctx->litSum += (U32)length;
  59. ctx->litPriceSum += (U32)(length * ctx->log2LitSum);
  60. { U32 u;
  61. for (u=0; u < length; u++) {
  62. ctx->litPriceSum -= Lizard_highbit32(ctx->litFreq[ctx->literalsPtr[u]]+1);
  63. ctx->litFreq[ctx->literalsPtr[u]]++;
  64. } }
  65. }
  66. #endif
  67. ctx->literalsPtr += length;
  68. if (offset >= LIZARD_MAX_16BIT_OFFSET) {
  69. COMPLOG_CODEWORDS_LIZv1("T32+ literal=%u match=%u offset=%d\n", (U32)length, 0, 0);
  70. *token+=(1<<ML_RUN_BITS);
  71. #ifndef LIZARD_NO_HUFFMAN
  72. if (ctx->huffType) {
  73. ctx->flagFreq[*token]++;
  74. ctx->flagSum++;
  75. }
  76. #endif
  77. token = (ctx->flagsPtr)++;
  78. }
  79. }
  80. /* Encode Offset */
  81. if (offset >= LIZARD_MAX_16BIT_OFFSET) // 24-bit offset
  82. {
  83. if (matchLength < MM_LONGOFF) printf("ERROR matchLength=%d/%d\n", (int)matchLength, MM_LONGOFF), exit(1);
  84. // if ((limitedOutputBuffer) && (ctx->literalsPtr > oend - 8 /*LIZARD_LENGTH_SIZE_LIZv1(length)*/)) { LIZARD_LOG_COMPRESS_LIZv1("encodeSequence overflow2\n"); return 1; } /* Check output limit */
  85. if (matchLength - MM_LONGOFF >= LIZARD_LAST_LONG_OFF)
  86. {
  87. size_t len = matchLength - MM_LONGOFF - LIZARD_LAST_LONG_OFF;
  88. *token = LIZARD_LAST_LONG_OFF;
  89. if (len >= (1<<16)) { *(ctx->literalsPtr) = 255; MEM_writeLE24(ctx->literalsPtr+1, (U32)(len)); ctx->literalsPtr += 4; }
  90. else if (len >= 254) { *(ctx->literalsPtr) = 254; MEM_writeLE16(ctx->literalsPtr+1, (U16)(len)); ctx->literalsPtr += 3; }
  91. else *(ctx->literalsPtr)++ = (BYTE)len;
  92. COMPLOG_CODEWORDS_LIZv1("T31 literal=%u match=%u offset=%d\n", 0, (U32)matchLength, offset);
  93. }
  94. else
  95. {
  96. COMPLOG_CODEWORDS_LIZv1("T0-30 literal=%u match=%u offset=%d\n", 0, (U32)matchLength, offset);
  97. *token = (BYTE)(matchLength - MM_LONGOFF);
  98. }
  99. MEM_writeLE24(ctx->offset24Ptr, offset);
  100. ctx->offset24Ptr += 3;
  101. ctx->last_off = offset;
  102. ctx->off24pos = *ip;
  103. }
  104. else
  105. {
  106. COMPLOG_CODEWORDS_LIZv1("T32+ literal=%u match=%u offset=%d\n", (U32)length, (U32)matchLength, offset);
  107. if (offset == 0)
  108. {
  109. *token+=(1<<ML_RUN_BITS);
  110. }
  111. else
  112. {
  113. // it should never happen
  114. if (offset < 8) { printf("ERROR offset=%d\n", (int)offset); exit(1); }
  115. if (matchLength < MINMATCH) { printf("ERROR matchLength[%d] < MINMATCH offset=%d\n", (int)matchLength, (int)ctx->last_off); exit(1); }
  116. ctx->last_off = offset;
  117. MEM_writeLE16(ctx->offset16Ptr, (U16)ctx->last_off); ctx->offset16Ptr += 2;
  118. }
  119. /* Encode MatchLength */
  120. length = matchLength;
  121. // if ((limitedOutputBuffer) && (ctx->literalsPtr > oend - 5 /*LIZARD_LENGTH_SIZE_LIZv1(length)*/)) { LIZARD_LOG_COMPRESS_LIZv1("encodeSequence overflow2\n"); return 1; } /* Check output limit */
  122. if (length >= MAX_SHORT_MATCHLEN) {
  123. *token += (BYTE)(MAX_SHORT_MATCHLEN<<RUN_BITS_LIZv1);
  124. length -= MAX_SHORT_MATCHLEN;
  125. if (length >= (1<<16)) { *(ctx->literalsPtr) = 255; MEM_writeLE24(ctx->literalsPtr+1, (U32)(length)); ctx->literalsPtr += 4; }
  126. else if (length >= 254) { *(ctx->literalsPtr) = 254; MEM_writeLE16(ctx->literalsPtr+1, (U16)(length)); ctx->literalsPtr += 3; }
  127. else *(ctx->literalsPtr)++ = (BYTE)length;
  128. }
  129. else *token += (BYTE)(length<<RUN_BITS_LIZv1);
  130. }
  131. #ifndef LIZARD_NO_HUFFMAN
  132. if (ctx->huffType) {
  133. ctx->flagFreq[*token]++;
  134. ctx->flagSum++;
  135. Lizard_setLog2Prices(ctx);
  136. }
  137. #endif
  138. /* Prepare next loop */
  139. *ip += matchLength;
  140. *anchor = *ip;
  141. return 0;
  142. }
  143. FORCE_INLINE int Lizard_encodeLastLiterals_LIZv1 (
  144. Lizard_stream_t* ctx,
  145. const BYTE** ip,
  146. const BYTE** anchor)
  147. {
  148. size_t length = (int)(*ip - *anchor);
  149. (void)ctx;
  150. memcpy(ctx->literalsPtr, *anchor, length);
  151. ctx->literalsPtr += length;
  152. return 0;
  153. }
  154. #define LIZARD_PRICE_MULT 1
  155. #define LIZARD_GET_TOKEN_PRICE_LIZv1(token) (LIZARD_PRICE_MULT * (ctx->log2FlagSum - Lizard_highbit32(ctx->flagFreq[token]+1)))
  156. FORCE_INLINE size_t Lizard_get_price_LIZv1(Lizard_stream_t* const ctx, int rep, const BYTE *ip, const BYTE *off24pos, size_t litLength, U32 offset, size_t matchLength)
  157. {
  158. size_t price = 0;
  159. BYTE token = 0;
  160. #ifndef LIZARD_NO_HUFFMAN
  161. const BYTE* literals = ip - litLength;
  162. U32 u;
  163. if ((ctx->huffType) && (ctx->params.parserType != Lizard_parser_lowestPrice)) {
  164. if (ctx->cachedLiterals == literals && litLength >= ctx->cachedLitLength) {
  165. size_t const additional = litLength - ctx->cachedLitLength;
  166. const BYTE* literals2 = ctx->cachedLiterals + ctx->cachedLitLength;
  167. price = ctx->cachedPrice + LIZARD_PRICE_MULT * additional * ctx->log2LitSum;
  168. for (u=0; u < additional; u++)
  169. price -= LIZARD_PRICE_MULT * Lizard_highbit32(ctx->litFreq[literals2[u]]+1);
  170. ctx->cachedPrice = (U32)price;
  171. ctx->cachedLitLength = (U32)litLength;
  172. } else {
  173. price = LIZARD_PRICE_MULT * litLength * ctx->log2LitSum;
  174. for (u=0; u < litLength; u++)
  175. price -= LIZARD_PRICE_MULT * Lizard_highbit32(ctx->litFreq[literals[u]]+1);
  176. if (litLength >= 12) {
  177. ctx->cachedLiterals = literals;
  178. ctx->cachedPrice = (U32)price;
  179. ctx->cachedLitLength = (U32)litLength;
  180. }
  181. }
  182. }
  183. else
  184. price += 8*litLength; /* Copy Literals */
  185. #else
  186. price += 8*litLength; /* Copy Literals */
  187. (void)ip;
  188. (void)ctx;
  189. #endif
  190. (void)off24pos;
  191. (void)rep;
  192. if (litLength > 0 || offset < LIZARD_MAX_16BIT_OFFSET) {
  193. /* Encode Literal length */
  194. if (litLength >= MAX_SHORT_LITLEN)
  195. { size_t len = litLength - MAX_SHORT_LITLEN;
  196. token = MAX_SHORT_LITLEN;
  197. if (len >= (1<<16)) price += 32;
  198. else if (len >= 254) price += 24;
  199. else price += 8;
  200. }
  201. else token = (BYTE)litLength;
  202. if (offset >= LIZARD_MAX_16BIT_OFFSET) {
  203. token+=(1<<ML_RUN_BITS);
  204. if (ctx->huffType && ctx->params.parserType != Lizard_parser_lowestPrice)
  205. price += LIZARD_GET_TOKEN_PRICE_LIZv1(token);
  206. else
  207. price += 8;
  208. }
  209. }
  210. /* Encode Offset */
  211. if (offset >= LIZARD_MAX_16BIT_OFFSET) { // 24-bit offset
  212. if (matchLength < MM_LONGOFF) return LIZARD_MAX_PRICE; // error
  213. if (matchLength - MM_LONGOFF >= LIZARD_LAST_LONG_OFF) {
  214. size_t len = matchLength - MM_LONGOFF - LIZARD_LAST_LONG_OFF;
  215. token = LIZARD_LAST_LONG_OFF;
  216. if (len >= (1<<16)) price += 32;
  217. else if (len >= 254) price += 24;
  218. else price += 8;
  219. } else {
  220. token = (BYTE)(matchLength - MM_LONGOFF);
  221. }
  222. price += 24;
  223. } else {
  224. size_t length;
  225. if (offset == 0) {
  226. token+=(1<<ML_RUN_BITS);
  227. } else {
  228. if (offset < 8) return LIZARD_MAX_PRICE; // error
  229. if (matchLength < MINMATCH) return LIZARD_MAX_PRICE; // error
  230. price += 16;
  231. }
  232. /* Encode MatchLength */
  233. length = matchLength;
  234. if (length >= MAX_SHORT_MATCHLEN) {
  235. token += (BYTE)(MAX_SHORT_MATCHLEN<<RUN_BITS_LIZv1);
  236. length -= MAX_SHORT_MATCHLEN;
  237. if (length >= (1<<16)) price += 32;
  238. else if (length >= 254) price += 24;
  239. else price += 8;
  240. }
  241. else token += (BYTE)(length<<RUN_BITS_LIZv1);
  242. }
  243. if (offset > 0 || matchLength > 0) {
  244. int offset_load = Lizard_highbit32(offset);
  245. if (ctx->huffType) {
  246. price += ((offset_load>=20) ? ((offset_load-19)*4) : 0);
  247. price += 4 + (matchLength==1);
  248. } else {
  249. price += ((offset_load>=16) ? ((offset_load-15)*4) : 0);
  250. price += 6 + (matchLength==1);
  251. }
  252. if (ctx->huffType && ctx->params.parserType != Lizard_parser_lowestPrice)
  253. price += LIZARD_GET_TOKEN_PRICE_LIZv1(token);
  254. else
  255. price += 8;
  256. } else {
  257. if (ctx->huffType && ctx->params.parserType != Lizard_parser_lowestPrice)
  258. price += LIZARD_GET_TOKEN_PRICE_LIZv1(token); // 1=better ratio
  259. }
  260. return price;
  261. }