lizard_parser_lowestprice.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. #define LIZARD_LOWESTPRICE_MIN_OFFSET 8
  2. FORCE_INLINE size_t Lizard_more_profitable(Lizard_stream_t* const ctx, const BYTE *best_ip, size_t best_off, size_t best_common, const BYTE *ip, size_t off, size_t common, size_t literals, int last_off)
  3. {
  4. size_t sum;
  5. if (literals > 0)
  6. sum = MAX(common + literals, best_common);
  7. else
  8. sum = MAX(common, best_common - literals);
  9. if ((int)off == last_off) off = 0; // rep code
  10. if ((int)best_off == last_off) best_off = 0;
  11. return Lizard_get_price_LIZv1(ctx, last_off, ip, ctx->off24pos, sum - common, (U32)off, common) <= Lizard_get_price_LIZv1(ctx, last_off, best_ip, ctx->off24pos, sum - best_common, (U32)best_off, best_common);
  12. }
  13. FORCE_INLINE size_t Lizard_better_price(Lizard_stream_t* const ctx, const BYTE *best_ip, size_t best_off, size_t best_common, const BYTE *ip, size_t off, size_t common, int last_off)
  14. {
  15. if ((int)off == last_off) off = 0; // rep code
  16. if ((int)best_off == last_off) best_off = 0;
  17. return Lizard_get_price_LIZv1(ctx, last_off, ip, ctx->off24pos, 0, (U32)off, common) < Lizard_get_price_LIZv1(ctx, last_off, best_ip, ctx->off24pos, common - best_common, (U32)best_off, best_common);
  18. }
  19. FORCE_INLINE int Lizard_FindMatchLowestPrice (Lizard_stream_t* ctx, /* Index table will be updated */
  20. const BYTE* ip, const BYTE* const iLimit,
  21. const BYTE** matchpos)
  22. {
  23. U32* const chainTable = ctx->chainTable;
  24. U32* const HashTable = ctx->hashTable;
  25. const BYTE* const base = ctx->base;
  26. const BYTE* const dictBase = ctx->dictBase;
  27. const intptr_t dictLimit = ctx->dictLimit;
  28. const BYTE* const dictEnd = dictBase + dictLimit;
  29. const intptr_t maxDistance = (1 << ctx->params.windowLog) - 1;
  30. const intptr_t current = (ip - base);
  31. const intptr_t lowLimit = ((intptr_t)ctx->lowLimit + maxDistance >= current) ? (intptr_t)ctx->lowLimit : current - maxDistance;
  32. const BYTE* const lowPrefixPtr = base + dictLimit;
  33. const U32 contentMask = (1 << ctx->params.contentLog) - 1;
  34. const size_t minMatchLongOff = ctx->params.minMatchLongOff;
  35. intptr_t matchIndex;
  36. const BYTE* match, *matchDict;
  37. int nbAttempts=ctx->params.searchNum;
  38. size_t ml=0, mlt;
  39. matchIndex = HashTable[Lizard_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)];
  40. if (ctx->last_off >= LIZARD_LOWESTPRICE_MIN_OFFSET) {
  41. intptr_t matchIndexLO = (ip - ctx->last_off) - base;
  42. if (matchIndexLO >= lowLimit) {
  43. if (matchIndexLO >= dictLimit) {
  44. match = base + matchIndexLO;
  45. mlt = Lizard_count(ip, match, iLimit);// + MINMATCH;
  46. // if ((mlt >= minMatchLongOff) || (ctx->last_off < LIZARD_MAX_16BIT_OFFSET))
  47. if (mlt > REPMINMATCH) {
  48. *matchpos = match;
  49. return (int)mlt;
  50. }
  51. } else {
  52. match = dictBase + matchIndexLO;
  53. if ((U32)((dictLimit-1) - matchIndexLO) >= 3) { /* intentional overflow */
  54. mlt = Lizard_count_2segments(ip, match, iLimit, dictEnd, lowPrefixPtr);
  55. // if ((mlt >= minMatchLongOff) || (ctx->last_off < LIZARD_MAX_16BIT_OFFSET))
  56. if (mlt > REPMINMATCH) {
  57. *matchpos = base + matchIndexLO; /* virtual matchpos */
  58. return (int)mlt;
  59. }
  60. }
  61. }
  62. }
  63. }
  64. #if MINMATCH == 3
  65. {
  66. U32 matchIndex3 = ctx->hashTable3[Lizard_hash3Ptr(ip, ctx->params.hashLog3)];
  67. if (matchIndex3 < current && matchIndex3 >= lowLimit)
  68. {
  69. size_t offset = (size_t)current - matchIndex3;
  70. if (offset < LIZARD_MAX_8BIT_OFFSET)
  71. {
  72. match = ip - offset;
  73. if (match > base && MEM_readMINMATCH(ip) == MEM_readMINMATCH(match))
  74. {
  75. ml = 3;//Lizard_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
  76. *matchpos = match;
  77. }
  78. }
  79. }
  80. }
  81. #endif
  82. while ((matchIndex < current) && (matchIndex >= lowLimit) && (nbAttempts)) {
  83. nbAttempts--;
  84. match = base + matchIndex;
  85. if ((U32)(ip - match) >= LIZARD_LOWESTPRICE_MIN_OFFSET) {
  86. if (matchIndex >= dictLimit) {
  87. if (*(match+ml) == *(ip+ml) && (MEM_read32(match) == MEM_read32(ip))) {
  88. mlt = Lizard_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
  89. if ((mlt >= minMatchLongOff) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  90. if (!ml || (mlt > ml && Lizard_better_price(ctx, ip, (ip - *matchpos), ml, ip, (ip - match), mlt, ctx->last_off)))
  91. { ml = mlt; *matchpos = match; }
  92. }
  93. } else {
  94. matchDict = dictBase + matchIndex;
  95. if ((U32)((dictLimit-1) - matchIndex) >= 3) /* intentional overflow */
  96. if (MEM_read32(matchDict) == MEM_read32(ip)) {
  97. mlt = Lizard_count_2segments(ip+MINMATCH, matchDict+MINMATCH, iLimit, dictEnd, lowPrefixPtr) + MINMATCH;
  98. if ((mlt >= minMatchLongOff) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  99. if (!ml || (mlt > ml && Lizard_better_price(ctx, ip, (ip - *matchpos), ml, ip, (U32)(ip - match), mlt, ctx->last_off)))
  100. { ml = mlt; *matchpos = match; } /* virtual matchpos */
  101. }
  102. }
  103. }
  104. matchIndex -= chainTable[matchIndex & contentMask];
  105. }
  106. return (int)ml;
  107. }
  108. FORCE_INLINE size_t Lizard_GetWiderMatch (
  109. Lizard_stream_t* ctx,
  110. const BYTE* const ip,
  111. const BYTE* const iLowLimit,
  112. const BYTE* const iHighLimit,
  113. size_t longest,
  114. const BYTE** matchpos,
  115. const BYTE** startpos)
  116. {
  117. U32* const chainTable = ctx->chainTable;
  118. U32* const HashTable = ctx->hashTable;
  119. const BYTE* const base = ctx->base;
  120. const BYTE* const dictBase = ctx->dictBase;
  121. const intptr_t dictLimit = ctx->dictLimit;
  122. const BYTE* const dictEnd = dictBase + dictLimit;
  123. const intptr_t maxDistance = (1 << ctx->params.windowLog) - 1;
  124. const intptr_t current = (ip - base);
  125. const intptr_t lowLimit = ((intptr_t)ctx->lowLimit + maxDistance >= current) ? (intptr_t)ctx->lowLimit : current - maxDistance;
  126. const BYTE* const lowPrefixPtr = base + dictLimit;
  127. const U32 contentMask = (1 << ctx->params.contentLog) - 1;
  128. const BYTE* match, *matchDict;
  129. const size_t minMatchLongOff = ctx->params.minMatchLongOff;
  130. intptr_t matchIndex;
  131. int nbAttempts = ctx->params.searchNum;
  132. size_t mlt;
  133. /* First Match */
  134. matchIndex = HashTable[Lizard_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)];
  135. if (ctx->last_off >= LIZARD_LOWESTPRICE_MIN_OFFSET) {
  136. intptr_t matchIndexLO = (ip - ctx->last_off) - base;
  137. if (matchIndexLO >= lowLimit) {
  138. if (matchIndexLO >= dictLimit) {
  139. match = base + matchIndexLO;
  140. if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) {
  141. int back = 0;
  142. mlt = Lizard_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH;
  143. while ((ip+back > iLowLimit) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--;
  144. mlt -= back;
  145. if (mlt > longest)
  146. if ((mlt >= minMatchLongOff) || (ctx->last_off < LIZARD_MAX_16BIT_OFFSET)) {
  147. *matchpos = match+back;
  148. *startpos = ip+back;
  149. longest = mlt;
  150. }
  151. }
  152. } else {
  153. match = dictBase + matchIndexLO;
  154. if ((U32)((dictLimit-1) - matchIndexLO) >= 3) /* intentional overflow */
  155. if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) {
  156. int back=0;
  157. mlt = Lizard_count_2segments(ip+MINMATCH, match+MINMATCH, iHighLimit, dictEnd, lowPrefixPtr) + MINMATCH;
  158. while ((ip+back > iLowLimit) && (matchIndexLO+back > lowLimit) && (ip[back-1] == match[back-1])) back--;
  159. mlt -= back;
  160. if (mlt > longest)
  161. if ((mlt >= minMatchLongOff) || (ctx->last_off < LIZARD_MAX_16BIT_OFFSET)) {
  162. *matchpos = base + matchIndexLO + back; /* virtual matchpos */
  163. *startpos = ip+back;
  164. longest = mlt;
  165. }
  166. }
  167. }
  168. }
  169. }
  170. #if MINMATCH == 3
  171. {
  172. U32 matchIndex3 = ctx->hashTable3[Lizard_hash3Ptr(ip, ctx->params.hashLog3)];
  173. if (matchIndex3 < current && matchIndex3 >= lowLimit) {
  174. size_t offset = (size_t)current - matchIndex3;
  175. if (offset < LIZARD_MAX_8BIT_OFFSET) {
  176. match = ip - offset;
  177. if (match > base && MEM_readMINMATCH(ip) == MEM_readMINMATCH(match)) {
  178. mlt = Lizard_count(ip + MINMATCH, match + MINMATCH, iHighLimit) + MINMATCH;
  179. int back = 0;
  180. while ((ip + back > iLowLimit) && (match + back > lowPrefixPtr) && (ip[back - 1] == match[back - 1])) back--;
  181. mlt -= back;
  182. if (!longest || (mlt > longest && Lizard_better_price(ctx, *startpos, (*startpos - *matchpos), longest, ip, (ip - match), mlt, ctx->last_off))) {
  183. *matchpos = match + back;
  184. *startpos = ip + back;
  185. longest = mlt;
  186. }
  187. }
  188. }
  189. }
  190. }
  191. #endif
  192. while ((matchIndex < current) && (matchIndex >= lowLimit) && (nbAttempts)) {
  193. nbAttempts--;
  194. match = base + matchIndex;
  195. if ((U32)(ip - match) >= LIZARD_LOWESTPRICE_MIN_OFFSET) {
  196. if (matchIndex >= dictLimit) {
  197. if (MEM_read32(match) == MEM_read32(ip)) {
  198. int back = 0;
  199. mlt = Lizard_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH;
  200. while ((ip+back > iLowLimit) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--;
  201. mlt -= back;
  202. if ((mlt >= minMatchLongOff) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  203. if (!longest || (mlt > longest && Lizard_better_price(ctx, *startpos, (*startpos - *matchpos), longest, ip, (ip - match), mlt, ctx->last_off)))
  204. { longest = mlt; *startpos = ip+back; *matchpos = match+back; }
  205. }
  206. } else {
  207. matchDict = dictBase + matchIndex;
  208. if ((U32)((dictLimit-1) - matchIndex) >= 3) /* intentional overflow */
  209. if (MEM_read32(matchDict) == MEM_read32(ip)) {
  210. int back=0;
  211. mlt = Lizard_count_2segments(ip+MINMATCH, matchDict+MINMATCH, iHighLimit, dictEnd, lowPrefixPtr) + MINMATCH;
  212. while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchDict[back-1])) back--;
  213. mlt -= back;
  214. if ((mlt >= minMatchLongOff) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  215. if (!longest || (mlt > longest && Lizard_better_price(ctx, *startpos, (*startpos - *matchpos), longest, ip, (U32)(ip - match), mlt, ctx->last_off)))
  216. { longest = mlt; *startpos = ip+back; *matchpos = match+back; } /* virtual matchpos */
  217. }
  218. }
  219. }
  220. matchIndex -= chainTable[matchIndex & contentMask];
  221. }
  222. return longest;
  223. }
  224. FORCE_INLINE int Lizard_compress_lowestPrice(
  225. Lizard_stream_t* const ctx,
  226. const BYTE* ip,
  227. const BYTE* const iend)
  228. {
  229. const BYTE* anchor = ip;
  230. const BYTE* const mflimit = iend - MFLIMIT;
  231. const BYTE* const matchlimit = (iend - LASTLITERALS);
  232. size_t ml, ml2, ml0;
  233. const BYTE* ref=NULL;
  234. const BYTE* start2=NULL;
  235. const BYTE* ref2=NULL;
  236. const BYTE* start0;
  237. const BYTE* ref0;
  238. const BYTE* lowPrefixPtr = ctx->base + ctx->dictLimit;
  239. const size_t minMatchLongOff = ctx->params.minMatchLongOff;
  240. const size_t sufficient_len = ctx->params.sufficientLength;
  241. /* Main Loop */
  242. while (ip < mflimit)
  243. {
  244. Lizard_Insert(ctx, ip);
  245. ml = Lizard_FindMatchLowestPrice (ctx, ip, matchlimit, (&ref));
  246. if (!ml) { ip++; continue; }
  247. {
  248. int back = 0;
  249. while ((ip + back > anchor) && (ref + back > lowPrefixPtr) && (ip[back - 1] == ref[back - 1])) back--;
  250. ml -= back;
  251. ip += back;
  252. ref += back;
  253. }
  254. /* saved, in case we would skip too much */
  255. start0 = ip;
  256. ref0 = ref;
  257. ml0 = ml;
  258. // goto _Encode;
  259. _Search:
  260. if (ip+ml >= mflimit) { goto _Encode; }
  261. if (ml >= sufficient_len) { goto _Encode; }
  262. Lizard_Insert(ctx, ip);
  263. ml2 = (int)Lizard_GetWiderMatch(ctx, ip + ml - 2, anchor, matchlimit, 0, &ref2, &start2);
  264. if (!ml2) goto _Encode;
  265. {
  266. U64 price, best_price;
  267. int off0=0, off1=0;
  268. const BYTE *pos, *best_pos;
  269. // find the lowest price for encoding ml bytes
  270. best_pos = ip;
  271. best_price = LIZARD_MAX_PRICE;
  272. off0 = (int)(ip - ref);
  273. off1 = (int)(start2 - ref2);
  274. for (pos = ip + ml; pos >= start2; pos--)
  275. {
  276. int common0 = (int)(pos - ip);
  277. if (common0 >= MINMATCH) {
  278. price = (int)Lizard_get_price_LIZv1(ctx, ctx->last_off, ip, ctx->off24pos, ip - anchor, (off0 == ctx->last_off) ? 0 : off0, common0);
  279. {
  280. int common1 = (int)(start2 + ml2 - pos);
  281. if (common1 >= MINMATCH)
  282. price += Lizard_get_price_LIZv1(ctx, ctx->last_off, pos, ctx->off24pos, 0, (off1 == off0) ? 0 : (off1), common1);
  283. else
  284. price += Lizard_get_price_LIZv1(ctx, ctx->last_off, pos, ctx->off24pos, common1, 0, 0);
  285. }
  286. if (price < best_price) {
  287. best_price = price;
  288. best_pos = pos;
  289. }
  290. } else {
  291. price = Lizard_get_price_LIZv1(ctx, ctx->last_off, ip, ctx->off24pos, start2 - anchor, (off1 == ctx->last_off) ? 0 : off1, ml2);
  292. if (price < best_price)
  293. best_pos = pos;
  294. break;
  295. }
  296. }
  297. ml = (int)(best_pos - ip);
  298. }
  299. if ((ml < MINMATCH) || ((ml < minMatchLongOff) && ((U32)(ip-ref) >= LIZARD_MAX_16BIT_OFFSET)))
  300. {
  301. ip = start2;
  302. ref = ref2;
  303. ml = ml2;
  304. goto _Search;
  305. }
  306. _Encode:
  307. if (start0 < ip)
  308. {
  309. if (Lizard_more_profitable(ctx, ip, (ip - ref), ml, start0, (start0 - ref0), ml0, (ref0 - ref), ctx->last_off))
  310. {
  311. ip = start0;
  312. ref = ref0;
  313. ml = ml0;
  314. }
  315. }
  316. if (Lizard_encodeSequence_LIZv1(ctx, &ip, &anchor, ml, ((ip - ref == ctx->last_off) ? ip : ref))) return 0;
  317. }
  318. /* Encode Last Literals */
  319. ip = iend;
  320. if (Lizard_encodeLastLiterals_LIZv1(ctx, &ip, &anchor)) goto _output_error;
  321. /* End */
  322. return 1;
  323. _output_error:
  324. return 0;
  325. }