lizard_parser_fastbig.h 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. #define LIZARD_FASTBIG_LONGOFF_MM MM_LONGOFF
  2. /**************************************
  3. * Hash Functions
  4. **************************************/
  5. static size_t Lizard_hashPositionHLog(const void* p, int hashLog)
  6. {
  7. if (MEM_64bits())
  8. return Lizard_hash5Ptr(p, hashLog);
  9. return Lizard_hash4Ptr(p, hashLog);
  10. }
  11. static void Lizard_putPositionOnHashHLog(const BYTE* p, size_t h, U32* hashTable, const BYTE* srcBase)
  12. {
  13. hashTable[h] = (U32)(p-srcBase);
  14. }
  15. static void Lizard_putPositionHLog(const BYTE* p, U32* hashTable, const BYTE* srcBase, int hashLog)
  16. {
  17. size_t const h = Lizard_hashPositionHLog(p, hashLog);
  18. Lizard_putPositionOnHashHLog(p, h, hashTable, srcBase);
  19. }
  20. static U32 Lizard_getPositionOnHashHLog(size_t h, U32* hashTable)
  21. {
  22. return hashTable[h];
  23. }
  24. static U32 Lizard_getPositionHLog(const BYTE* p, U32* hashTable, int hashLog)
  25. {
  26. size_t const h = Lizard_hashPositionHLog(p, hashLog);
  27. return Lizard_getPositionOnHashHLog(h, hashTable);
  28. }
  29. FORCE_INLINE int Lizard_compress_fastBig(
  30. Lizard_stream_t* const ctx,
  31. const BYTE* ip,
  32. const BYTE* const iend)
  33. {
  34. const U32 acceleration = 1;
  35. const BYTE* base = ctx->base;
  36. const U32 dictLimit = ctx->dictLimit;
  37. const BYTE* const lowPrefixPtr = base + dictLimit;
  38. const BYTE* const dictBase = ctx->dictBase;
  39. const BYTE* const dictEnd = dictBase + dictLimit;
  40. const BYTE* const mflimit = iend - MFLIMIT;
  41. const BYTE* const matchlimit = iend - LASTLITERALS;
  42. const BYTE* anchor = ip;
  43. size_t forwardH, matchIndex;
  44. const int hashLog = ctx->params.hashLog;
  45. const U32 maxDistance = (1 << ctx->params.windowLog) - 1;
  46. const U32 lowLimit = (ctx->lowLimit + maxDistance >= (U32)(ip - base)) ? ctx->lowLimit : (U32)(ip - base) - maxDistance;
  47. /* Init conditions */
  48. if ((U32)(iend-ip) > (U32)LIZARD_MAX_INPUT_SIZE) goto _output_error; /* Unsupported inputSize, too large (or negative) */
  49. if ((U32)(iend-ip) < Lizard_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
  50. /* First Byte */
  51. Lizard_putPositionHLog(ip, ctx->hashTable, base, hashLog);
  52. ip++; forwardH = Lizard_hashPositionHLog(ip, hashLog);
  53. /* Main Loop */
  54. for ( ; ; ) {
  55. const BYTE* match;
  56. size_t matchLength;
  57. /* Find a match */
  58. { const BYTE* forwardIp = ip;
  59. unsigned step = 1;
  60. unsigned searchMatchNb = acceleration << Lizard_skipTrigger;
  61. while (1) {
  62. size_t const h = forwardH;
  63. ip = forwardIp;
  64. forwardIp += step;
  65. step = (searchMatchNb++ >> Lizard_skipTrigger);
  66. if (unlikely(forwardIp > mflimit)) goto _last_literals;
  67. matchIndex = Lizard_getPositionOnHashHLog(h, ctx->hashTable);
  68. forwardH = Lizard_hashPositionHLog(forwardIp, hashLog);
  69. Lizard_putPositionOnHashHLog(ip, h, ctx->hashTable, base);
  70. if ((matchIndex < lowLimit) || (matchIndex >= (U32)(ip - base)) || (base + matchIndex + maxDistance < ip)) continue;
  71. if (matchIndex >= dictLimit) {
  72. match = base + matchIndex;
  73. if ((U32)(ip - match) >= LIZARD_FAST_MIN_OFFSET)
  74. if (MEM_read32(match) == MEM_read32(ip))
  75. {
  76. int back = 0;
  77. matchLength = Lizard_count(ip+MINMATCH, match+MINMATCH, matchlimit);
  78. while ((ip+back > anchor) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--;
  79. matchLength -= back;
  80. if ((matchLength >= LIZARD_FASTBIG_LONGOFF_MM) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  81. {
  82. ip += back;
  83. match += back;
  84. break;
  85. }
  86. }
  87. } else {
  88. match = dictBase + matchIndex;
  89. if ((U32)(ip - (base + matchIndex)) >= LIZARD_FAST_MIN_OFFSET)
  90. if ((U32)((dictLimit-1) - matchIndex) >= 3) /* intentional overflow */
  91. if (MEM_read32(match) == MEM_read32(ip)) {
  92. const U32 newLowLimit = (lowLimit + maxDistance >= (U32)(ip-base)) ? lowLimit : (U32)(ip - base) - maxDistance;
  93. int back = 0;
  94. matchLength = Lizard_count_2segments(ip+MINMATCH, match+MINMATCH, matchlimit, dictEnd, lowPrefixPtr);
  95. while ((ip+back > anchor) && (matchIndex+back > newLowLimit) && (ip[back-1] == match[back-1])) back--;
  96. matchLength -= back;
  97. match = base + matchIndex + back;
  98. if ((matchLength >= LIZARD_FASTBIG_LONGOFF_MM) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  99. {
  100. ip += back;
  101. break;
  102. }
  103. }
  104. }
  105. } // while (1)
  106. }
  107. _next_match:
  108. if (Lizard_encodeSequence_LIZv1(ctx, &ip, &anchor, matchLength+MINMATCH, match)) goto _output_error;
  109. /* Test end of chunk */
  110. if (ip > mflimit) break;
  111. /* Fill table */
  112. Lizard_putPositionHLog(ip-2, ctx->hashTable, base, hashLog);
  113. /* Test next position */
  114. matchIndex = Lizard_getPositionHLog(ip, ctx->hashTable, hashLog);
  115. Lizard_putPositionHLog(ip, ctx->hashTable, base, hashLog);
  116. if ((matchIndex >= lowLimit) && (matchIndex < (U32)(ip - base)) && (base + matchIndex + maxDistance >= ip))
  117. {
  118. if (matchIndex >= dictLimit) {
  119. match = base + matchIndex;
  120. if ((U32)(ip - match) >= LIZARD_FAST_MIN_OFFSET)
  121. if (MEM_read32(match) == MEM_read32(ip))
  122. {
  123. matchLength = Lizard_count(ip+MINMATCH, match+MINMATCH, matchlimit);
  124. if ((matchLength >= LIZARD_FASTBIG_LONGOFF_MM) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  125. goto _next_match;
  126. }
  127. } else {
  128. match = dictBase + matchIndex;
  129. if ((U32)(ip - (base + matchIndex)) >= LIZARD_FAST_MIN_OFFSET)
  130. if ((U32)((dictLimit-1) - matchIndex) >= 3) /* intentional overflow */
  131. if (MEM_read32(match) == MEM_read32(ip)) {
  132. matchLength = Lizard_count_2segments(ip+MINMATCH, match+MINMATCH, matchlimit, dictEnd, lowPrefixPtr);
  133. match = base + matchIndex;
  134. if ((matchLength >= LIZARD_FASTBIG_LONGOFF_MM) || ((U32)(ip - match) < LIZARD_MAX_16BIT_OFFSET))
  135. goto _next_match;
  136. }
  137. }
  138. }
  139. /* Prepare next loop */
  140. forwardH = Lizard_hashPositionHLog(++ip, hashLog);
  141. }
  142. _last_literals:
  143. /* Encode Last Literals */
  144. ip = iend;
  145. if (Lizard_encodeLastLiterals_LIZv1(ctx, &ip, &anchor)) goto _output_error;
  146. /* End */
  147. return 1;
  148. _output_error:
  149. return 0;
  150. }