lizard_parser_fast.h 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. #define LIZARD_FAST_MIN_OFFSET 8
  2. #define LIZARD_FAST_LONGOFF_MM 0 /* not used with offsets > 1<<16 */
  3. /**************************************
  4. * Hash Functions
  5. **************************************/
  6. static size_t Lizard_hashPosition(const void* p)
  7. {
  8. if (MEM_64bits())
  9. return Lizard_hash5Ptr(p, LIZARD_HASHLOG_LZ4);
  10. return Lizard_hash4Ptr(p, LIZARD_HASHLOG_LZ4);
  11. }
  12. static void Lizard_putPositionOnHash(const BYTE* p, size_t h, U32* hashTable, const BYTE* srcBase)
  13. {
  14. hashTable[h] = (U32)(p-srcBase);
  15. }
  16. static void Lizard_putPosition(const BYTE* p, U32* hashTable, const BYTE* srcBase)
  17. {
  18. size_t const h = Lizard_hashPosition(p);
  19. Lizard_putPositionOnHash(p, h, hashTable, srcBase);
  20. }
  21. static U32 Lizard_getPositionOnHash(size_t h, U32* hashTable)
  22. {
  23. return hashTable[h];
  24. }
  25. static U32 Lizard_getPosition(const BYTE* p, U32* hashTable)
  26. {
  27. size_t const h = Lizard_hashPosition(p);
  28. return Lizard_getPositionOnHash(h, hashTable);
  29. }
  30. static const U32 Lizard_skipTrigger = 6; /* Increase this value ==> compression run slower on incompressible data */
  31. static const U32 Lizard_minLength = (MFLIMIT+1);
  32. FORCE_INLINE int Lizard_compress_fast(
  33. Lizard_stream_t* const ctx,
  34. const BYTE* ip,
  35. const BYTE* const iend)
  36. {
  37. const U32 acceleration = 1;
  38. const BYTE* base = ctx->base;
  39. const U32 dictLimit = ctx->dictLimit;
  40. const BYTE* const lowPrefixPtr = base + dictLimit;
  41. const BYTE* const dictBase = ctx->dictBase;
  42. const BYTE* const dictEnd = dictBase + dictLimit;
  43. const BYTE* const mflimit = iend - MFLIMIT;
  44. const BYTE* const matchlimit = iend - LASTLITERALS;
  45. const BYTE* anchor = ip;
  46. size_t forwardH, matchIndex;
  47. const U32 maxDistance = (1 << ctx->params.windowLog) - 1;
  48. const U32 lowLimit = (ctx->lowLimit + maxDistance >= (U32)(ip - base)) ? ctx->lowLimit : (U32)(ip - base) - maxDistance;
  49. /* Init conditions */
  50. if ((U32)(iend-ip) > (U32)LIZARD_MAX_INPUT_SIZE) goto _output_error; /* Unsupported inputSize, too large (or negative) */
  51. if ((U32)(iend-ip) < Lizard_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
  52. /* First Byte */
  53. Lizard_putPosition(ip, ctx->hashTable, base);
  54. ip++; forwardH = Lizard_hashPosition(ip);
  55. /* Main Loop */
  56. for ( ; ; ) {
  57. const BYTE* match;
  58. size_t matchLength;
  59. /* Find a match */
  60. { const BYTE* forwardIp = ip;
  61. unsigned step = 1;
  62. unsigned searchMatchNb = acceleration << Lizard_skipTrigger;
  63. while (1) {
  64. size_t const h = forwardH;
  65. ip = forwardIp;
  66. forwardIp += step;
  67. step = (searchMatchNb++ >> Lizard_skipTrigger);
  68. if (unlikely(forwardIp > mflimit)) goto _last_literals;
  69. matchIndex = Lizard_getPositionOnHash(h, ctx->hashTable);
  70. forwardH = Lizard_hashPosition(forwardIp);
  71. Lizard_putPositionOnHash(ip, h, ctx->hashTable, base);
  72. if ((matchIndex < lowLimit) || (matchIndex >= (U32)(ip - base)) || (base + matchIndex + maxDistance < ip)) continue;
  73. if (matchIndex >= dictLimit) {
  74. match = base + matchIndex;
  75. #if LIZARD_FAST_MIN_OFFSET > 0
  76. if ((U32)(ip - match) >= LIZARD_FAST_MIN_OFFSET)
  77. #endif
  78. if (MEM_read32(match) == MEM_read32(ip))
  79. {
  80. int back = 0;
  81. matchLength = Lizard_count(ip+MINMATCH, match+MINMATCH, matchlimit);
  82. while ((ip+back > anchor) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--;
  83. matchLength -= back;
  84. #if LIZARD_FAST_LONGOFF_MM > 0
  85. if ((matchLength >= LIZARD_FAST_LONGOFF_MM) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  86. #endif
  87. {
  88. ip += back;
  89. match += back;
  90. break;
  91. }
  92. }
  93. } else {
  94. match = dictBase + matchIndex;
  95. #if LIZARD_FAST_MIN_OFFSET > 0
  96. if ((U32)(ip - (base + matchIndex)) >= LIZARD_FAST_MIN_OFFSET)
  97. #endif
  98. if ((U32)((dictLimit-1) - matchIndex) >= 3) /* intentional overflow */
  99. if (MEM_read32(match) == MEM_read32(ip)) {
  100. const U32 newLowLimit = (lowLimit + maxDistance >= (U32)(ip-base)) ? lowLimit : (U32)(ip - base) - maxDistance;
  101. int back = 0;
  102. matchLength = Lizard_count_2segments(ip+MINMATCH, match+MINMATCH, matchlimit, dictEnd, lowPrefixPtr);
  103. while ((ip+back > anchor) && (matchIndex+back > newLowLimit) && (ip[back-1] == match[back-1])) back--;
  104. matchLength -= back;
  105. match = base + matchIndex + back;
  106. #if LIZARD_FAST_LONGOFF_MM > 0
  107. if ((matchLength >= LIZARD_FAST_LONGOFF_MM) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  108. #endif
  109. {
  110. ip += back;
  111. break;
  112. }
  113. }
  114. }
  115. } // while (1)
  116. }
  117. _next_match:
  118. if (Lizard_encodeSequence_LZ4(ctx, &ip, &anchor, matchLength+MINMATCH, match)) goto _output_error;
  119. /* Test end of chunk */
  120. if (ip > mflimit) break;
  121. /* Fill table */
  122. Lizard_putPosition(ip-2, ctx->hashTable, base);
  123. /* Test next position */
  124. matchIndex = Lizard_getPosition(ip, ctx->hashTable);
  125. Lizard_putPosition(ip, ctx->hashTable, base);
  126. if ((matchIndex >= lowLimit) && (matchIndex < (U32)(ip - base)) && (base + matchIndex + maxDistance >= ip))
  127. {
  128. if (matchIndex >= dictLimit) {
  129. match = base + matchIndex;
  130. #if LIZARD_FAST_MIN_OFFSET > 0
  131. if ((U32)(ip - match) >= LIZARD_FAST_MIN_OFFSET)
  132. #endif
  133. if (MEM_read32(match) == MEM_read32(ip))
  134. {
  135. matchLength = Lizard_count(ip+MINMATCH, match+MINMATCH, matchlimit);
  136. #if LIZARD_FAST_LONGOFF_MM > 0
  137. if ((matchLength >= LIZARD_FAST_LONGOFF_MM) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  138. #endif
  139. goto _next_match;
  140. }
  141. } else {
  142. match = dictBase + matchIndex;
  143. #if LIZARD_FAST_MIN_OFFSET > 0
  144. if ((U32)(ip - (base + matchIndex)) >= LIZARD_FAST_MIN_OFFSET)
  145. #endif
  146. if ((U32)((dictLimit-1) - matchIndex) >= 3) /* intentional overflow */
  147. if (MEM_read32(match) == MEM_read32(ip)) {
  148. matchLength = Lizard_count_2segments(ip+MINMATCH, match+MINMATCH, matchlimit, dictEnd, lowPrefixPtr);
  149. match = base + matchIndex;
  150. #if LIZARD_FAST_LONGOFF_MM > 0
  151. if ((matchLength >= LIZARD_FAST_LONGOFF_MM) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  152. #endif
  153. goto _next_match;
  154. }
  155. }
  156. }
  157. /* Prepare next loop */
  158. forwardH = Lizard_hashPosition(++ip);
  159. }
  160. _last_literals:
  161. /* Encode Last Literals */
  162. ip = iend;
  163. if (Lizard_encodeLastLiterals_LZ4(ctx, &ip, &anchor)) goto _output_error;
  164. /* End */
  165. return 1;
  166. _output_error:
  167. return 0;
  168. }