lizard_parser_nochain.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. #define OPTIMAL_ML (int)((ML_MASK_LZ4-1)+MINMATCH)
  2. //#define LIZARD_NOCHAIN_HASH_FUNCTION(ip, hashLog) Lizard_hashPtr(ip, hashLog, ctx->params.searchLength)
  3. #define LIZARD_NOCHAIN_HASH_FUNCTION(ip, hashLog) Lizard_hash5Ptr(ip, hashLog)
  4. #define LIZARD_NOCHAIN_MIN_OFFSET 8
  5. /* Update chains up to ip (excluded) */
  6. FORCE_INLINE void Lizard_InsertNoChain (Lizard_stream_t* ctx, const BYTE* ip)
  7. {
  8. U32* const hashTable = ctx->hashTable;
  9. const BYTE* const base = ctx->base;
  10. U32 const target = (U32)(ip - base);
  11. U32 idx = ctx->nextToUpdate;
  12. const int hashLog = ctx->params.hashLog;
  13. while (idx < target) {
  14. size_t const h = LIZARD_NOCHAIN_HASH_FUNCTION(base+idx, hashLog);
  15. if ((hashTable[h] >= idx) || (idx >= hashTable[h] + LIZARD_NOCHAIN_MIN_OFFSET))
  16. hashTable[h] = idx;
  17. idx++;
  18. }
  19. ctx->nextToUpdate = target;
  20. }
  21. FORCE_INLINE int Lizard_InsertAndFindBestMatchNoChain (Lizard_stream_t* ctx, /* Index table will be updated */
  22. const BYTE* ip, const BYTE* const iLimit,
  23. const BYTE** matchpos)
  24. {
  25. U32* const HashTable = ctx->hashTable;
  26. const BYTE* const base = ctx->base;
  27. const BYTE* const dictBase = ctx->dictBase;
  28. const U32 dictLimit = ctx->dictLimit;
  29. const BYTE* const lowPrefixPtr = base + dictLimit;
  30. const BYTE* const dictEnd = dictBase + dictLimit;
  31. U32 matchIndex;
  32. const BYTE* match;
  33. size_t ml=0;
  34. const int hashLog = ctx->params.hashLog;
  35. const U32 maxDistance = (1 << ctx->params.windowLog) - 1;
  36. const U32 current = (U32)(ip - base);
  37. const U32 lowLimit = (ctx->lowLimit + maxDistance >= current) ? ctx->lowLimit : current - maxDistance;
  38. /* HC4 match finder */
  39. Lizard_InsertNoChain(ctx, ip);
  40. matchIndex = HashTable[LIZARD_NOCHAIN_HASH_FUNCTION(ip, hashLog)];
  41. if ((matchIndex < current) && (matchIndex >= lowLimit)) {
  42. if (matchIndex >= dictLimit) {
  43. match = base + matchIndex;
  44. #if LIZARD_NOCHAIN_MIN_OFFSET > 0
  45. if ((U32)(ip - match) >= LIZARD_NOCHAIN_MIN_OFFSET)
  46. #endif
  47. if (*(match+ml) == *(ip+ml) && (MEM_read32(match) == MEM_read32(ip)))
  48. {
  49. size_t const mlt = Lizard_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
  50. if (mlt > ml) { ml = mlt; *matchpos = match; }
  51. }
  52. } else {
  53. match = dictBase + matchIndex;
  54. #if LIZARD_NOCHAIN_MIN_OFFSET > 0
  55. if ((U32)(ip - (base + matchIndex)) >= LIZARD_NOCHAIN_MIN_OFFSET)
  56. #endif
  57. if ((U32)((dictLimit-1) - matchIndex) >= 3) /* intentional overflow */
  58. if (MEM_read32(match) == MEM_read32(ip)) {
  59. size_t mlt = Lizard_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, lowPrefixPtr) + MINMATCH;
  60. if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */
  61. }
  62. }
  63. }
  64. return (int)ml;
  65. }
  66. FORCE_INLINE int Lizard_InsertAndGetWiderMatchNoChain (
  67. Lizard_stream_t* ctx,
  68. const BYTE* const ip,
  69. const BYTE* const iLowLimit,
  70. const BYTE* const iHighLimit,
  71. int longest,
  72. const BYTE** matchpos,
  73. const BYTE** startpos)
  74. {
  75. U32* const HashTable = ctx->hashTable;
  76. const BYTE* const base = ctx->base;
  77. const U32 dictLimit = ctx->dictLimit;
  78. const BYTE* const lowPrefixPtr = base + dictLimit;
  79. const BYTE* const dictBase = ctx->dictBase;
  80. const BYTE* const dictEnd = dictBase + dictLimit;
  81. U32 matchIndex;
  82. int LLdelta = (int)(ip-iLowLimit);
  83. const int hashLog = ctx->params.hashLog;
  84. const U32 maxDistance = (1 << ctx->params.windowLog) - 1;
  85. const U32 current = (U32)(ip - base);
  86. const U32 lowLimit = (ctx->lowLimit + maxDistance >= current) ? ctx->lowLimit : current - maxDistance;
  87. /* First Match */
  88. Lizard_InsertNoChain(ctx, ip);
  89. matchIndex = HashTable[LIZARD_NOCHAIN_HASH_FUNCTION(ip, hashLog)];
  90. if ((matchIndex < current) && (matchIndex >= lowLimit)) {
  91. if (matchIndex >= dictLimit) {
  92. const BYTE* match = base + matchIndex;
  93. #if LIZARD_NOCHAIN_MIN_OFFSET > 0
  94. if ((U32)(ip - match) >= LIZARD_NOCHAIN_MIN_OFFSET)
  95. #endif
  96. if (*(iLowLimit + longest) == *(match - LLdelta + longest)) {
  97. if (MEM_read32(match) == MEM_read32(ip)) {
  98. int mlt = MINMATCH + Lizard_count(ip+MINMATCH, match+MINMATCH, iHighLimit);
  99. int back = 0;
  100. while ((ip+back > iLowLimit) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--;
  101. mlt -= back;
  102. if (mlt > longest) {
  103. longest = (int)mlt;
  104. *matchpos = match+back;
  105. *startpos = ip+back;
  106. }
  107. }
  108. }
  109. } else {
  110. const BYTE* match = dictBase + matchIndex;
  111. #if LIZARD_NOCHAIN_MIN_OFFSET > 0
  112. if ((U32)(ip - (base + matchIndex)) >= LIZARD_NOCHAIN_MIN_OFFSET)
  113. #endif
  114. if ((U32)((dictLimit-1) - matchIndex) >= 3) /* intentional overflow */
  115. if (MEM_read32(match) == MEM_read32(ip)) {
  116. int back=0;
  117. size_t mlt = Lizard_count_2segments(ip+MINMATCH, match+MINMATCH, iHighLimit, dictEnd, lowPrefixPtr) + MINMATCH;
  118. while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == match[back-1])) back--;
  119. mlt -= back;
  120. if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
  121. }
  122. }
  123. }
  124. return longest;
  125. }
  126. FORCE_INLINE int Lizard_compress_noChain (
  127. Lizard_stream_t* const ctx,
  128. const BYTE* ip,
  129. const BYTE* const iend)
  130. {
  131. const BYTE* anchor = ip;
  132. const BYTE* const mflimit = iend - MFLIMIT;
  133. const BYTE* const matchlimit = (iend - LASTLITERALS);
  134. int ml, ml2, ml3, ml0;
  135. const BYTE* ref = NULL;
  136. const BYTE* start2 = NULL;
  137. const BYTE* ref2 = NULL;
  138. const BYTE* start3 = NULL;
  139. const BYTE* ref3 = NULL;
  140. const BYTE* start0;
  141. const BYTE* ref0;
  142. /* init */
  143. ip++;
  144. /* Main Loop */
  145. while (ip < mflimit) {
  146. ml = Lizard_InsertAndFindBestMatchNoChain (ctx, ip, matchlimit, (&ref));
  147. if (!ml) { ip++; continue; }
  148. /* saved, in case we would skip too much */
  149. start0 = ip;
  150. ref0 = ref;
  151. ml0 = ml;
  152. _Search2:
  153. if (ip+ml < mflimit)
  154. ml2 = Lizard_InsertAndGetWiderMatchNoChain(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
  155. else ml2 = ml;
  156. if (ml2 == ml) { /* No better match */
  157. if (Lizard_encodeSequence_LZ4(ctx, &ip, &anchor, ml, ref)) return 0;
  158. continue;
  159. }
  160. if (start0 < ip) {
  161. if (start2 < ip + ml0) { /* empirical */
  162. ip = start0;
  163. ref = ref0;
  164. ml = ml0;
  165. }
  166. }
  167. /* Here, start0==ip */
  168. if ((start2 - ip) < 3) { /* First Match too small : removed */
  169. ml = ml2;
  170. ip = start2;
  171. ref =ref2;
  172. goto _Search2;
  173. }
  174. _Search3:
  175. /*
  176. * Currently we have :
  177. * ml2 > ml1, and
  178. * ip1+3 <= ip2 (usually < ip1+ml1)
  179. */
  180. if ((start2 - ip) < OPTIMAL_ML) {
  181. int correction;
  182. int new_ml = ml;
  183. if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
  184. if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
  185. correction = new_ml - (int)(start2 - ip);
  186. if (correction > 0) {
  187. start2 += correction;
  188. ref2 += correction;
  189. ml2 -= correction;
  190. }
  191. }
  192. /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
  193. if (start2 + ml2 < mflimit)
  194. ml3 = Lizard_InsertAndGetWiderMatchNoChain(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
  195. else ml3 = ml2;
  196. if (ml3 == ml2) { /* No better match : 2 sequences to encode */
  197. /* ip & ref are known; Now for ml */
  198. if (start2 < ip+ml) ml = (int)(start2 - ip);
  199. /* Now, encode 2 sequences */
  200. if (Lizard_encodeSequence_LZ4(ctx, &ip, &anchor, ml, ref)) return 0;
  201. ip = start2;
  202. if (Lizard_encodeSequence_LZ4(ctx, &ip, &anchor, ml2, ref2)) return 0;
  203. continue;
  204. }
  205. if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */
  206. if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
  207. if (start2 < ip+ml) {
  208. int correction = (int)(ip+ml - start2);
  209. start2 += correction;
  210. ref2 += correction;
  211. ml2 -= correction;
  212. if (ml2 < MINMATCH) {
  213. start2 = start3;
  214. ref2 = ref3;
  215. ml2 = ml3;
  216. }
  217. }
  218. if (Lizard_encodeSequence_LZ4(ctx, &ip, &anchor, ml, ref)) return 0;
  219. ip = start3;
  220. ref = ref3;
  221. ml = ml3;
  222. start0 = start2;
  223. ref0 = ref2;
  224. ml0 = ml2;
  225. goto _Search2;
  226. }
  227. start2 = start3;
  228. ref2 = ref3;
  229. ml2 = ml3;
  230. goto _Search3;
  231. }
  232. /*
  233. * OK, now we have 3 ascending matches; let's write at least the first one
  234. * ip & ref are known; Now for ml
  235. */
  236. if (start2 < ip+ml) {
  237. if ((start2 - ip) < (int)ML_MASK_LZ4) {
  238. int correction;
  239. if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
  240. if (ip + ml > start2 + ml2 - MINMATCH) {
  241. ml = (int)(start2 - ip) + ml2 - MINMATCH;
  242. if (ml < MINMATCH) { // match2 doesn't fit, remove it
  243. if (Lizard_encodeSequence_LZ4(ctx, &ip, &anchor, ml, ref)) return 0;
  244. ip = start3;
  245. ref = ref3;
  246. ml = ml3;
  247. start0 = start2;
  248. ref0 = ref2;
  249. ml0 = ml2;
  250. goto _Search2;
  251. }
  252. }
  253. correction = ml - (int)(start2 - ip);
  254. if (correction > 0) {
  255. start2 += correction;
  256. ref2 += correction;
  257. ml2 -= correction;
  258. }
  259. } else {
  260. ml = (int)(start2 - ip);
  261. }
  262. }
  263. if (Lizard_encodeSequence_LZ4(ctx, &ip, &anchor, ml, ref)) return 0;
  264. ip = start2;
  265. ref = ref2;
  266. ml = ml2;
  267. start2 = start3;
  268. ref2 = ref3;
  269. ml2 = ml3;
  270. goto _Search3;
  271. }
  272. /* Encode Last Literals */
  273. ip = iend;
  274. if (Lizard_encodeLastLiterals_LZ4(ctx, &ip, &anchor)) goto _output_error;
  275. /* End */
  276. return 1;
  277. _output_error:
  278. return 0;
  279. }