lizard_parser_optimal.h 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679
  1. #define LIZARD_LOG_PARSER(fmt, ...) //printf(fmt, __VA_ARGS__)
  2. #define LIZARD_LOG_PRICE(fmt, ...) //printf(fmt, __VA_ARGS__)
  3. #define LIZARD_LOG_ENCODE(fmt, ...) //printf(fmt, __VA_ARGS__)
  4. #define LIZARD_OPTIMAL_MIN_OFFSET 8
  5. #define LIZARD_OPT_NUM (1<<12)
  6. #define REPMINMATCH 1
  7. FORCE_INLINE size_t Lizard_get_price(Lizard_stream_t* const ctx, int rep, const BYTE *ip, const BYTE *off24pos, size_t litLength, U32 offset, size_t matchLength)
  8. {
  9. if (ctx->params.decompressType == Lizard_coderwords_LZ4)
  10. return Lizard_get_price_LZ4(ctx, ip, litLength, offset, matchLength);
  11. return Lizard_get_price_LIZv1(ctx, rep, ip, off24pos, litLength, offset, matchLength);
  12. }
  13. typedef struct
  14. {
  15. int off;
  16. int len;
  17. int back;
  18. } Lizard_match_t;
  19. typedef struct
  20. {
  21. int price;
  22. int off;
  23. int mlen;
  24. int litlen;
  25. int rep;
  26. const BYTE* off24pos;
  27. } Lizard_optimal_t;
  28. /* Update chains up to ip (excluded) */
  29. FORCE_INLINE void Lizard_BinTree_Insert(Lizard_stream_t* ctx, const BYTE* ip)
  30. {
  31. #if MINMATCH == 3
  32. U32* HashTable3 = ctx->hashTable3;
  33. const BYTE* const base = ctx->base;
  34. const U32 target = (U32)(ip - base);
  35. U32 idx = ctx->nextToUpdate;
  36. while(idx < target) {
  37. HashTable3[Lizard_hash3Ptr(base+idx, ctx->params.hashLog3)] = idx;
  38. idx++;
  39. }
  40. ctx->nextToUpdate = target;
  41. #else
  42. (void)ctx; (void)ip;
  43. #endif
  44. }
  45. FORCE_INLINE int Lizard_GetAllMatches (
  46. Lizard_stream_t* ctx,
  47. const BYTE* const ip,
  48. const BYTE* const iLowLimit,
  49. const BYTE* const iHighLimit,
  50. size_t best_mlen,
  51. Lizard_match_t* matches)
  52. {
  53. U32* const chainTable = ctx->chainTable;
  54. U32* const HashTable = ctx->hashTable;
  55. const BYTE* const base = ctx->base;
  56. const BYTE* const dictBase = ctx->dictBase;
  57. const intptr_t dictLimit = ctx->dictLimit;
  58. const BYTE* const dictEnd = dictBase + dictLimit;
  59. const BYTE* const lowPrefixPtr = base + dictLimit;
  60. const intptr_t maxDistance = (1 << ctx->params.windowLog) - 1;
  61. const intptr_t current = (ip - base);
  62. const intptr_t lowLimit = ((intptr_t)ctx->lowLimit + maxDistance >= current) ? (intptr_t)ctx->lowLimit : current - maxDistance;
  63. const U32 contentMask = (1 << ctx->params.contentLog) - 1;
  64. const BYTE* match, *matchDict;
  65. const size_t minMatchLongOff = ctx->params.minMatchLongOff;
  66. intptr_t matchIndex;
  67. int nbAttempts = ctx->params.searchNum;
  68. // bool fullSearch = (ctx->params.fullSearch >= 2);
  69. int mnum = 0;
  70. U32* HashPos;
  71. size_t mlt;
  72. if (ip + MINMATCH > iHighLimit) return 0;
  73. /* First Match */
  74. HashPos = &HashTable[Lizard_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)];
  75. matchIndex = *HashPos;
  76. #if MINMATCH == 3
  77. {
  78. U32* const HashTable3 = ctx->hashTable3;
  79. U32* HashPos3 = &HashTable3[Lizard_hash3Ptr(ip, ctx->params.hashLog3)];
  80. if ((*HashPos3 < current) && (*HashPos3 >= lowLimit)) {
  81. size_t offset = current - *HashPos3;
  82. if (offset < LIZARD_MAX_8BIT_OFFSET) {
  83. match = ip - offset;
  84. if (match > base && MEM_readMINMATCH(ip) == MEM_readMINMATCH(match)) {
  85. size_t mlt = Lizard_count(ip + MINMATCH, match + MINMATCH, iHighLimit) + MINMATCH;
  86. int back = 0;
  87. while ((ip + back > iLowLimit) && (match + back > lowPrefixPtr) && (ip[back - 1] == match[back - 1])) back--;
  88. mlt -= back;
  89. matches[mnum].off = (int)offset;
  90. matches[mnum].len = (int)mlt;
  91. matches[mnum].back = -back;
  92. mnum++;
  93. }
  94. }
  95. }
  96. *HashPos3 = current;
  97. }
  98. #endif
  99. chainTable[current & contentMask] = (U32)(current - matchIndex);
  100. *HashPos = (U32)current;
  101. ctx->nextToUpdate++;
  102. if (best_mlen < MINMATCH-1) best_mlen = MINMATCH-1;
  103. while ((matchIndex < current) && (matchIndex >= lowLimit) && (nbAttempts)) {
  104. nbAttempts--;
  105. match = base + matchIndex;
  106. if ((U32)(ip - match) >= LIZARD_OPTIMAL_MIN_OFFSET) {
  107. if (matchIndex >= dictLimit) {
  108. if ((/*fullSearch ||*/ ip[best_mlen] == match[best_mlen]) && (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip))) {
  109. int back = 0;
  110. mlt = Lizard_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH;
  111. while ((ip+back > iLowLimit) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--;
  112. mlt -= back;
  113. if ((mlt >= minMatchLongOff) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  114. if (mlt > best_mlen) {
  115. best_mlen = mlt;
  116. matches[mnum].off = (int)(ip - match);
  117. matches[mnum].len = (int)mlt;
  118. matches[mnum].back = -back;
  119. mnum++;
  120. if (best_mlen > LIZARD_OPT_NUM) break;
  121. }
  122. }
  123. } else {
  124. matchDict = dictBase + matchIndex;
  125. // fprintf(stderr, "dictBase[%p]+matchIndex[%d]=match[%p] dictLimit=%d base=%p ip=%p iLimit=%p off=%d\n", dictBase, matchIndex, match, dictLimit, base, ip, iLimit, (U32)(ip-match));
  126. if ((U32)((dictLimit-1) - matchIndex) >= 3) /* intentional overflow */
  127. if (MEM_readMINMATCH(matchDict) == MEM_readMINMATCH(ip)) {
  128. int back=0;
  129. mlt = Lizard_count_2segments(ip+MINMATCH, matchDict+MINMATCH, iHighLimit, dictEnd, lowPrefixPtr) + MINMATCH;
  130. while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchDict[back-1])) back--;
  131. mlt -= back;
  132. if ((mlt >= minMatchLongOff) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  133. if (mlt > best_mlen) {
  134. best_mlen = mlt;
  135. matches[mnum].off = (int)(ip - match);
  136. matches[mnum].len = (int)mlt;
  137. matches[mnum].back = -back;
  138. mnum++;
  139. if (best_mlen > LIZARD_OPT_NUM) break;
  140. }
  141. }
  142. }
  143. }
  144. matchIndex -= chainTable[matchIndex & contentMask];
  145. }
  146. return mnum;
  147. }
  148. FORCE_INLINE int Lizard_BinTree_GetAllMatches (
  149. Lizard_stream_t* ctx,
  150. const BYTE* const ip,
  151. const BYTE* const iHighLimit,
  152. size_t best_mlen,
  153. Lizard_match_t* matches)
  154. {
  155. U32* const chainTable = ctx->chainTable;
  156. U32* const HashTable = ctx->hashTable;
  157. const BYTE* const base = ctx->base;
  158. const intptr_t dictLimit = ctx->dictLimit;
  159. const BYTE* const dictBase = ctx->dictBase;
  160. const BYTE* const dictEnd = dictBase + dictLimit;
  161. const U32 contentMask = (1 << ctx->params.contentLog) - 1;
  162. const BYTE* const lowPrefixPtr = base + dictLimit;
  163. const intptr_t maxDistance = (1 << ctx->params.windowLog) - 1;
  164. const intptr_t current = (ip - base);
  165. const intptr_t lowLimit = ((intptr_t)ctx->lowLimit + maxDistance >= current) ? (intptr_t)ctx->lowLimit : current - maxDistance;
  166. const BYTE* match;
  167. const size_t minMatchLongOff = ctx->params.minMatchLongOff;
  168. int nbAttempts = ctx->params.searchNum;
  169. int mnum = 0;
  170. U32 *ptr0, *ptr1, delta0, delta1;
  171. intptr_t matchIndex;
  172. size_t mlt = 0;
  173. U32* HashPos;
  174. if (ip + MINMATCH > iHighLimit) return 0;
  175. /* First Match */
  176. HashPos = &HashTable[Lizard_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)];
  177. matchIndex = *HashPos;
  178. #if MINMATCH == 3
  179. {
  180. U32* HashPos3 = &ctx->hashTable3[Lizard_hash3Ptr(ip, ctx->params.hashLog3)];
  181. if ((*HashPos3 < current) && (*HashPos3 >= lowLimit)) {
  182. size_t offset = current - *HashPos3;
  183. if (offset < LIZARD_MAX_8BIT_OFFSET) {
  184. match = ip - offset;
  185. if (match > base && MEM_readMINMATCH(ip) == MEM_readMINMATCH(match))
  186. {
  187. mlt = Lizard_count(ip + MINMATCH, match + MINMATCH, iHighLimit) + MINMATCH;
  188. matches[mnum].off = (int)offset;
  189. matches[mnum].len = (int)mlt;
  190. matches[mnum].back = 0;
  191. mnum++;
  192. }
  193. }
  194. *HashPos3 = current;
  195. }
  196. }
  197. #endif
  198. *HashPos = (U32)current;
  199. ctx->nextToUpdate++;
  200. // check rest of matches
  201. ptr0 = &chainTable[(current*2+1) & contentMask];
  202. ptr1 = &chainTable[(current*2) & contentMask];
  203. delta0 = delta1 = (U32)(current - matchIndex);
  204. if (best_mlen < MINMATCH-1) best_mlen = MINMATCH-1;
  205. while ((matchIndex < current) && (matchIndex >= lowLimit) && (nbAttempts)) {
  206. nbAttempts--;
  207. if (matchIndex >= dictLimit) {
  208. match = base + matchIndex;
  209. // if (ip[mlt] == match[mlt])
  210. mlt = Lizard_count(ip, match, iHighLimit);
  211. } else {
  212. match = dictBase + matchIndex;
  213. mlt = Lizard_count_2segments(ip, match, iHighLimit, dictEnd, lowPrefixPtr);
  214. if (matchIndex + (int)mlt >= dictLimit)
  215. match = base + matchIndex; /* to prepare for next usage of match[mlt] */
  216. }
  217. if ((U32)(current - matchIndex) >= LIZARD_OPTIMAL_MIN_OFFSET) {
  218. if ((mlt >= minMatchLongOff) || ((U32)(current - matchIndex) < LIZARD_MAX_16BIT_OFFSET))
  219. if (mlt > best_mlen) {
  220. best_mlen = mlt;
  221. matches[mnum].off = (int)(current - matchIndex);
  222. matches[mnum].len = (int)mlt;
  223. matches[mnum].back = 0;
  224. mnum++;
  225. if (mlt > LIZARD_OPT_NUM) break;
  226. if (ip + mlt >= iHighLimit) break;
  227. }
  228. } else {
  229. #if 1
  230. intptr_t newMatchIndex;
  231. size_t newml = 0, newoff = 0;
  232. do {
  233. newoff += (int)(current - matchIndex);
  234. } while (newoff < LIZARD_OPTIMAL_MIN_OFFSET);
  235. newMatchIndex = current - newoff;
  236. if (newMatchIndex >= dictLimit) newml = Lizard_count(ip, base + newMatchIndex, iHighLimit);
  237. // printf("%d: off=%d mlt=%d\n", (U32)current, (U32)(current - matchIndex), (int)mlt);
  238. // printf("%d: newoff=%d newml=%d\n", (U32)current, (int)newoff, (int)newml);
  239. if ((newml >= minMatchLongOff) && (newml > best_mlen)) {
  240. best_mlen = newml;
  241. matches[mnum].off = (int)newoff;
  242. matches[mnum].len = (int)newml;
  243. matches[mnum].back = 0;
  244. mnum++;
  245. if (newml > LIZARD_OPT_NUM) break;
  246. if (ip + newml >= iHighLimit) break;
  247. }
  248. #endif
  249. }
  250. if (ip[mlt] < match[mlt]) {
  251. *ptr0 = delta0;
  252. ptr0 = &chainTable[(matchIndex*2) & contentMask];
  253. if (*ptr0 == (U32)-1) break;
  254. delta0 = *ptr0;
  255. delta1 += delta0;
  256. matchIndex -= delta0;
  257. } else {
  258. *ptr1 = delta1;
  259. ptr1 = &chainTable[(matchIndex*2+1) & contentMask];
  260. if (*ptr1 == (U32)-1) break;
  261. delta1 = *ptr1;
  262. delta0 += delta1;
  263. matchIndex -= delta1;
  264. }
  265. }
  266. *ptr0 = (U32)-1;
  267. *ptr1 = (U32)-1;
  268. return mnum;
  269. }
  270. #define SET_PRICE(pos, mlen, offset, litlen, price) \
  271. { \
  272. while (last_pos < pos) { opt[last_pos+1].price = LIZARD_MAX_PRICE; last_pos++; } \
  273. opt[pos].mlen = (int)mlen; \
  274. opt[pos].off = (int)offset; \
  275. opt[pos].litlen = (int)litlen; \
  276. opt[pos].price = (int)price; \
  277. LIZARD_LOG_PARSER("%d: SET price[%d/%d]=%d litlen=%d len=%d off=%d\n", (int)(inr-source), pos, last_pos, opt[pos].price, opt[pos].litlen, opt[pos].mlen, opt[pos].off); \
  278. }
  279. FORCE_INLINE int Lizard_compress_optimalPrice(
  280. Lizard_stream_t* const ctx,
  281. const BYTE* ip,
  282. const BYTE* const iend)
  283. {
  284. Lizard_optimal_t opt[LIZARD_OPT_NUM + 4];
  285. Lizard_match_t matches[LIZARD_OPT_NUM + 1];
  286. const BYTE *inr;
  287. size_t res, cur, cur2, skip_num = 0;
  288. size_t i, llen, litlen, mlen, best_mlen, price, offset, best_off, match_num, last_pos;
  289. const BYTE* anchor = ip;
  290. const BYTE* const mflimit = iend - MFLIMIT;
  291. const BYTE* const matchlimit = (iend - LASTLITERALS);
  292. const BYTE* const base = ctx->base;
  293. const BYTE* const dictBase = ctx->dictBase;
  294. const intptr_t dictLimit = ctx->dictLimit;
  295. const BYTE* const dictEnd = dictBase + dictLimit;
  296. const BYTE* const lowPrefixPtr = base + dictLimit;
  297. const intptr_t lowLimit = ctx->lowLimit;
  298. const intptr_t maxDistance = (1 << ctx->params.windowLog) - 1;
  299. const size_t sufficient_len = ctx->params.sufficientLength;
  300. const int faster_get_matches = (ctx->params.fullSearch == 0);
  301. const size_t minMatchLongOff = ctx->params.minMatchLongOff;
  302. const int lizardOptimalMinOffset = (ctx->params.decompressType == Lizard_coderwords_LZ4) ? (1<<30) : LIZARD_OPTIMAL_MIN_OFFSET;
  303. const size_t repMinMatch = (ctx->params.decompressType == Lizard_coderwords_LZ4) ? MINMATCH : REPMINMATCH;
  304. /* Main Loop */
  305. while (ip < mflimit) {
  306. memset(opt, 0, sizeof(Lizard_optimal_t));
  307. last_pos = 0;
  308. llen = ip - anchor;
  309. /* check rep code */
  310. if (ctx->last_off >= lizardOptimalMinOffset) {
  311. intptr_t matchIndexLO = (ip - ctx->last_off) - base;
  312. mlen = 0;
  313. if ((matchIndexLO >= lowLimit) && (base + matchIndexLO + maxDistance >= ip)) {
  314. if (matchIndexLO >= dictLimit) {
  315. mlen = Lizard_count(ip, base + matchIndexLO, matchlimit);
  316. } else {
  317. mlen = Lizard_count_2segments(ip, dictBase + matchIndexLO, matchlimit, dictEnd, lowPrefixPtr);
  318. }
  319. }
  320. if (mlen >= REPMINMATCH) {
  321. if (mlen > sufficient_len || mlen >= LIZARD_OPT_NUM) {
  322. best_mlen = mlen; best_off = 0; cur = 0; last_pos = 1;
  323. goto encode;
  324. }
  325. do
  326. {
  327. litlen = 0;
  328. price = Lizard_get_price(ctx, ctx->last_off, ip, ctx->off24pos, llen, 0, mlen);
  329. if (mlen > last_pos || price < (size_t)opt[mlen].price)
  330. SET_PRICE(mlen, mlen, 0, litlen, price);
  331. mlen--;
  332. }
  333. while (mlen >= REPMINMATCH);
  334. }
  335. }
  336. if (faster_get_matches && last_pos)
  337. match_num = 0;
  338. else
  339. {
  340. if (ctx->params.parserType == Lizard_parser_optimalPrice) {
  341. Lizard_Insert(ctx, ip);
  342. match_num = Lizard_GetAllMatches(ctx, ip, ip, matchlimit, last_pos, matches);
  343. } else {
  344. Lizard_BinTree_Insert(ctx, ip);
  345. match_num = Lizard_BinTree_GetAllMatches(ctx, ip, matchlimit, last_pos, matches);
  346. }
  347. }
  348. LIZARD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-source), match_num, last_pos);
  349. if (!last_pos && !match_num) { ip++; continue; }
  350. if (match_num && (size_t)matches[match_num-1].len > sufficient_len) {
  351. best_mlen = matches[match_num-1].len;
  352. best_off = matches[match_num-1].off;
  353. cur = 0;
  354. last_pos = 1;
  355. goto encode;
  356. }
  357. // set prices using matches at position = 0
  358. best_mlen = (last_pos > MINMATCH) ? last_pos : MINMATCH;
  359. for (i = 0; i < match_num; i++) {
  360. mlen = (i>0) ? (size_t)matches[i-1].len+1 : best_mlen;
  361. best_mlen = (matches[i].len < LIZARD_OPT_NUM) ? matches[i].len : LIZARD_OPT_NUM;
  362. LIZARD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-source), matches[i].len, matches[i].off, best_mlen, last_pos);
  363. while (mlen <= best_mlen){
  364. litlen = 0;
  365. price = Lizard_get_price(ctx, ctx->last_off, ip, ctx->off24pos, llen + litlen, matches[i].off, mlen);
  366. if ((mlen >= minMatchLongOff) || (matches[i].off < LIZARD_MAX_16BIT_OFFSET))
  367. if (mlen > last_pos || price < (size_t)opt[mlen].price)
  368. SET_PRICE(mlen, mlen, matches[i].off, litlen, price);
  369. mlen++;
  370. }
  371. }
  372. if (last_pos < repMinMatch) { ip++; continue; }
  373. opt[0].off24pos = ctx->off24pos;
  374. opt[0].rep = ctx->last_off;
  375. opt[0].mlen = 1;
  376. opt[0].off = -1;
  377. // check further positions
  378. for (skip_num = 0, cur = 1; cur <= last_pos; cur++) {
  379. int rep;
  380. inr = ip + cur;
  381. if (opt[cur-1].off == -1) { // -1 = literals, 0 = rep
  382. litlen = opt[cur-1].litlen + 1;
  383. if (cur != litlen) {
  384. price = opt[cur - litlen].price + Lizard_get_price(ctx, opt[cur-litlen].rep, inr, ctx->off24pos, litlen, 0, 0);
  385. LIZARD_LOG_PRICE("%d: TRY1 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-source), cur - litlen, opt[cur - litlen].price, price, cur, litlen);
  386. } else {
  387. price = Lizard_get_price(ctx, ctx->last_off, inr, ctx->off24pos, llen + litlen, 0, 0);
  388. LIZARD_LOG_PRICE("%d: TRY2 price=%d cur=%d litlen=%d llen=%d\n", (int)(inr-source), price, cur, litlen, llen);
  389. }
  390. } else {
  391. litlen = 1;
  392. price = opt[cur - 1].price + Lizard_get_price(ctx, opt[cur-1].rep, inr, ctx->off24pos, litlen, 0, 0);
  393. LIZARD_LOG_PRICE("%d: TRY3 price=%d cur=%d litlen=%d litonly=%d\n", (int)(inr-source), price, cur, litlen, Lizard_get_price(ctx, rep, inr, ctx->off24pos, litlen, 0, 0));
  394. }
  395. mlen = 1;
  396. best_mlen = 0;
  397. LIZARD_LOG_PARSER("%d: TRY price=%d opt[%d].price=%d\n", (int)(inr-source), price, cur, opt[cur].price);
  398. if (cur > last_pos || price <= (size_t)opt[cur].price) // || ((price == opt[cur].price) && (opt[cur-1].mlen == 1) && (cur != litlen)))
  399. SET_PRICE(cur, mlen, -1, litlen, price);
  400. if (cur == last_pos) break;
  401. /* set rep code */
  402. if (opt[cur].off != -1) {
  403. mlen = opt[cur].mlen;
  404. offset = opt[cur].off;
  405. if (offset < 1) {
  406. opt[cur].rep = opt[cur-mlen].rep;
  407. opt[cur].off24pos = opt[cur-mlen].off24pos;
  408. LIZARD_LOG_PARSER("%d: COPYREP1 cur=%d mlen=%d rep=%d\n", (int)(inr-source), cur, mlen, opt[cur-mlen].rep);
  409. } else {
  410. opt[cur].rep = (int)offset;
  411. opt[cur].off24pos = (offset >= LIZARD_MAX_16BIT_OFFSET) ? inr : opt[cur-mlen].off24pos;
  412. LIZARD_LOG_PARSER("%d: COPYREP2 cur=%d offset=%d rep=%d\n", (int)(inr-source), cur, offset, opt[cur].rep);
  413. }
  414. } else {
  415. opt[cur].rep = opt[cur-1].rep; // copy rep
  416. opt[cur].off24pos = opt[cur-1].off24pos;
  417. }
  418. rep = opt[cur].rep;
  419. LIZARD_LOG_PARSER("%d: CURRENT price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d\n", (int)(inr-source), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep);
  420. /* check rep code */
  421. if (opt[cur].rep >= lizardOptimalMinOffset) {
  422. intptr_t matchIndexLO = (inr - opt[cur].rep) - base;
  423. mlen = 0;
  424. if ((matchIndexLO >= lowLimit) && (base + matchIndexLO + maxDistance >= inr)) {
  425. if (matchIndexLO >= dictLimit) {
  426. mlen = Lizard_count(inr, base + matchIndexLO, matchlimit);
  427. } else {
  428. mlen = Lizard_count_2segments(inr, dictBase + matchIndexLO, matchlimit, dictEnd, lowPrefixPtr);
  429. }
  430. }
  431. if (mlen >= REPMINMATCH/* && mlen > best_mlen*/) {
  432. LIZARD_LOG_PARSER("%d: try REP rep=%d mlen=%d\n", (int)(inr-source), opt[cur].rep, mlen);
  433. LIZARD_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-source), mlen, 0, opt[cur].rep, cur, opt[cur].off);
  434. if (mlen > sufficient_len || cur + mlen >= LIZARD_OPT_NUM) {
  435. best_mlen = mlen;
  436. best_off = 0;
  437. LIZARD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-source), sufficient_len, best_mlen, best_off, last_pos);
  438. last_pos = cur + 1;
  439. goto encode;
  440. }
  441. best_mlen = mlen;
  442. if (faster_get_matches)
  443. skip_num = best_mlen;
  444. do
  445. {
  446. //if (opt[cur].mlen == 1)
  447. if (opt[cur].off == -1) {
  448. litlen = opt[cur].litlen;
  449. if (cur != litlen) {
  450. price = opt[cur - litlen].price + Lizard_get_price(ctx, rep, inr, opt[cur].off24pos, litlen, 0, mlen);
  451. LIZARD_LOG_PRICE("%d: TRY1 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-source), cur - litlen, opt[cur - litlen].price, price, cur, litlen);
  452. } else {
  453. price = Lizard_get_price(ctx, rep, inr, ctx->off24pos, llen + litlen, 0, mlen);
  454. LIZARD_LOG_PRICE("%d: TRY2 price=%d cur=%d litlen=%d llen=%d\n", (int)(inr-source), price, cur, litlen, llen);
  455. }
  456. } else {
  457. litlen = 0;
  458. price = opt[cur].price + Lizard_get_price(ctx, rep, inr, opt[cur].off24pos, litlen, 0, mlen);
  459. LIZARD_LOG_PRICE("%d: TRY3 price=%d cur=%d litlen=%d getprice=%d\n", (int)(inr-source), price, cur, litlen, Lizard_get_price(ctx, rep, inr, opt[cur].off24pos, litlen, 0, mlen - MINMATCH));
  460. }
  461. LIZARD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-source), mlen, 0, price, litlen, cur - litlen, opt[cur - litlen].price);
  462. if (cur + mlen > last_pos || price <= (size_t)opt[cur + mlen].price) // || ((price == opt[cur + mlen].price) && (opt[cur].mlen == 1) && (cur != litlen))) // at equal price prefer REP instead of MATCH
  463. SET_PRICE(cur + mlen, mlen, 0, litlen, price);
  464. mlen--;
  465. }
  466. while (mlen >= REPMINMATCH);
  467. }
  468. }
  469. if (faster_get_matches && skip_num > 0) {
  470. skip_num--;
  471. continue;
  472. }
  473. if (ctx->params.parserType == Lizard_parser_optimalPrice) {
  474. Lizard_Insert(ctx, inr);
  475. match_num = Lizard_GetAllMatches(ctx, inr, ip, matchlimit, best_mlen, matches);
  476. LIZARD_LOG_PARSER("%d: Lizard_GetAllMatches match_num=%d\n", (int)(inr-source), match_num);
  477. } else {
  478. Lizard_BinTree_Insert(ctx, inr);
  479. match_num = Lizard_BinTree_GetAllMatches(ctx, inr, matchlimit, best_mlen, matches);
  480. LIZARD_LOG_PARSER("%d: Lizard_BinTree_GetAllMatches match_num=%d\n", (int)(inr-source), match_num);
  481. }
  482. if (match_num > 0 && (size_t)matches[match_num-1].len > sufficient_len) {
  483. cur -= matches[match_num-1].back;
  484. best_mlen = matches[match_num-1].len;
  485. best_off = matches[match_num-1].off;
  486. last_pos = cur + 1;
  487. goto encode;
  488. }
  489. // set prices using matches at position = cur
  490. best_mlen = (best_mlen > MINMATCH) ? best_mlen : MINMATCH;
  491. for (i = 0; i < match_num; i++) {
  492. mlen = (i>0) ? (size_t)matches[i-1].len+1 : best_mlen;
  493. cur2 = cur - matches[i].back;
  494. best_mlen = (cur2 + matches[i].len < LIZARD_OPT_NUM) ? (size_t)matches[i].len : LIZARD_OPT_NUM - cur2;
  495. LIZARD_LOG_PARSER("%d: Found1 cur=%d cur2=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-source), cur, cur2, matches[i].len, matches[i].off, best_mlen, last_pos);
  496. if (mlen < (size_t)matches[i].back + 1)
  497. mlen = matches[i].back + 1;
  498. while (mlen <= best_mlen) {
  499. // if (opt[cur2].mlen == 1)
  500. if (opt[cur2].off == -1)
  501. {
  502. litlen = opt[cur2].litlen;
  503. if (cur2 != litlen)
  504. price = opt[cur2 - litlen].price + Lizard_get_price(ctx, rep, inr, opt[cur2].off24pos, litlen, matches[i].off, mlen);
  505. else
  506. price = Lizard_get_price(ctx, rep, inr, ctx->off24pos, llen + litlen, matches[i].off, mlen);
  507. } else {
  508. litlen = 0;
  509. price = opt[cur2].price + Lizard_get_price(ctx, rep, inr, opt[cur2].off24pos, litlen, matches[i].off, mlen);
  510. }
  511. LIZARD_LOG_PARSER("%d: Found2 pred=%d mlen=%d best_mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-source), matches[i].back, mlen, best_mlen, matches[i].off, price, litlen, cur - litlen, opt[cur - litlen].price);
  512. // if (cur2 + mlen > last_pos || ((matches[i].off != opt[cur2 + mlen].off) && (price < opt[cur2 + mlen].price)))
  513. if ((mlen >= minMatchLongOff) || (matches[i].off < LIZARD_MAX_16BIT_OFFSET))
  514. if (cur2 + mlen > last_pos || price < (size_t)opt[cur2 + mlen].price)
  515. {
  516. SET_PRICE(cur2 + mlen, mlen, matches[i].off, litlen, price);
  517. }
  518. mlen++;
  519. }
  520. }
  521. } // for (skip_num = 0, cur = 1; cur <= last_pos; cur++)
  522. best_mlen = opt[last_pos].mlen;
  523. best_off = opt[last_pos].off;
  524. cur = last_pos - best_mlen;
  525. encode: // cur, last_pos, best_mlen, best_off have to be set
  526. for (i = 1; i <= last_pos; i++) {
  527. LIZARD_LOG_PARSER("%d: price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d\n", (int)(ip-source+i), i, last_pos, opt[i].price, opt[i].off, opt[i].mlen, opt[i].litlen, opt[i].rep);
  528. }
  529. LIZARD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep=%d\n", (int)(ip-source+cur), cur, last_pos, best_mlen, best_off, opt[cur].rep);
  530. opt[0].mlen = 1;
  531. while (1) {
  532. mlen = opt[cur].mlen;
  533. offset = opt[cur].off;
  534. opt[cur].mlen = (int)best_mlen;
  535. opt[cur].off = (int)best_off;
  536. best_mlen = mlen;
  537. best_off = offset;
  538. if (mlen > cur) break;
  539. cur -= mlen;
  540. }
  541. for (i = 0; i <= last_pos;) {
  542. LIZARD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d\n", (int)(ip-source+i), i, last_pos, opt[i].price, opt[i].off, opt[i].mlen, opt[i].litlen, opt[i].rep);
  543. i += opt[i].mlen;
  544. }
  545. cur = 0;
  546. while (cur < last_pos) {
  547. LIZARD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d\n", (int)(ip-source+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep);
  548. mlen = opt[cur].mlen;
  549. // if (mlen == 1) { ip++; cur++; continue; }
  550. if (opt[cur].off == -1) { ip++; cur++; continue; }
  551. offset = opt[cur].off;
  552. cur += mlen;
  553. LIZARD_LOG_ENCODE("%d: ENCODE literals=%d off=%d mlen=%d ", (int)(ip-source), (int)(ip-anchor), (int)(offset), mlen);
  554. res = Lizard_encodeSequence(ctx, &ip, &anchor, mlen, ip - offset);
  555. if (res) return 0;
  556. LIZARD_LOG_PARSER("%d: offset=%d rep=%d\n", (int)(ip-source), offset, ctx->last_off);
  557. }
  558. }
  559. /* Encode Last Literals */
  560. ip = iend;
  561. if (Lizard_encodeLastLiterals(ctx, &ip, &anchor)) goto _output_error;
  562. /* End */
  563. return 1;
  564. _output_error:
  565. return 0;
  566. }