| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578 |
- /* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
- 2023-04-02 : Igor Pavlov : Public domain */
- #include "Precomp.h"
- #include "CpuArch.h"
- #include "LzFind.h"
- // #include "LzFindMt.h"
- // #define LOG_ITERS
- // #define LOG_THREAD
- #ifdef LOG_THREAD
- #include <stdio.h>
- #define PRF(x) x
- #else
- // #define PRF(x)
- #endif
- #ifdef LOG_ITERS
- #include <stdio.h>
- UInt64 g_NumIters_Tree;
- UInt64 g_NumIters_Loop;
- UInt64 g_NumIters_Bytes;
- #define LOG_ITER(x) x
- #else
- #define LOG_ITER(x)
- #endif
- // ---------- BT THREAD ----------
- #define USE_SON_PREFETCH
- #define USE_LONG_MATCH_OPT
- #define kEmptyHashValue 0
- // #define CYC_TO_POS_OFFSET 0
- // #define CYC_TO_POS_OFFSET 1 // for debug
- /*
- Z7_NO_INLINE
- UInt32 * Z7_FASTCALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
- UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
- {
- do
- {
- UInt32 delta;
- if (hash == size)
- break;
- delta = *hash++;
- if (delta == 0 || delta > (UInt32)pos)
- return NULL;
- lenLimit++;
- if (delta == (UInt32)pos)
- {
- CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
- *d++ = 0;
- ptr1[0] = kEmptyHashValue;
- ptr1[1] = kEmptyHashValue;
- }
- else
- {
- UInt32 *_distances = ++d;
- CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
- CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
- const Byte *len0 = cur, *len1 = cur;
- UInt32 cutValue = _cutValue;
- const Byte *maxLen = cur + _maxLen;
- for (LOG_ITER(g_NumIters_Tree++);;)
- {
- LOG_ITER(g_NumIters_Loop++);
- {
- const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
- CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
- const Byte *len = (len0 < len1 ? len0 : len1);
- #ifdef USE_SON_PREFETCH
- const UInt32 pair0 = *pair;
- #endif
- if (len[diff] == len[0])
- {
- if (++len != lenLimit && len[diff] == len[0])
- while (++len != lenLimit)
- {
- LOG_ITER(g_NumIters_Bytes++);
- if (len[diff] != len[0])
- break;
- }
- if (maxLen < len)
- {
- maxLen = len;
- *d++ = (UInt32)(len - cur);
- *d++ = delta - 1;
-
- if (len == lenLimit)
- {
- const UInt32 pair1 = pair[1];
- *ptr1 =
- #ifdef USE_SON_PREFETCH
- pair0;
- #else
- pair[0];
- #endif
- *ptr0 = pair1;
- _distances[-1] = (UInt32)(d - _distances);
- #ifdef USE_LONG_MATCH_OPT
- if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
- break;
- {
- for (;;)
- {
- hash++;
- pos++;
- cur++;
- lenLimit++;
- {
- CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
- #if 0
- *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
- #else
- const UInt32 p0 = ptr[0 + (diff * 2)];
- const UInt32 p1 = ptr[1 + (diff * 2)];
- ptr[0] = p0;
- ptr[1] = p1;
- // ptr[0] = ptr[0 + (diff * 2)];
- // ptr[1] = ptr[1 + (diff * 2)];
- #endif
- }
- // PrintSon(son + 2, pos - 1);
- // printf("\npos = %x delta = %x\n", pos, delta);
- len++;
- *d++ = 2;
- *d++ = (UInt32)(len - cur);
- *d++ = delta - 1;
- if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
- break;
- }
- }
- #endif
- break;
- }
- }
- }
- {
- const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
- if (len[diff] < len[0])
- {
- delta = pair[1];
- if (delta >= curMatch)
- return NULL;
- *ptr1 = curMatch;
- ptr1 = pair + 1;
- len1 = len;
- }
- else
- {
- delta = *pair;
- if (delta >= curMatch)
- return NULL;
- *ptr0 = curMatch;
- ptr0 = pair;
- len0 = len;
- }
- delta = (UInt32)pos - delta;
-
- if (--cutValue == 0 || delta >= pos)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- _distances[-1] = (UInt32)(d - _distances);
- break;
- }
- }
- }
- } // for (tree iterations)
- }
- pos++;
- cur++;
- }
- while (d < limit);
- *posRes = (UInt32)pos;
- return d;
- }
- */
- /* define cbs if you use 2 functions.
- GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
- GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
- do not define cbs if you use 1 function:
- GetMatchesSpecN_2()
- */
- // #define cbs _cyclicBufferSize
- /*
- we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
- to eliminate "movsx" BUG in old MSVC x64 compiler.
- */
- UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
- UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
- UInt32 *posRes);
- Z7_NO_INLINE
- UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
- UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
- UInt32 *posRes)
- {
- do // while (hash != size)
- {
- UInt32 delta;
-
- #ifndef cbs
- UInt32 cbs;
- #endif
- if (hash == size)
- break;
- delta = *hash++;
- if (delta == 0)
- return NULL;
- lenLimit++;
- #ifndef cbs
- cbs = _cyclicBufferSize;
- if ((UInt32)pos < cbs)
- {
- if (delta > (UInt32)pos)
- return NULL;
- cbs = (UInt32)pos;
- }
- #endif
- if (delta >= cbs)
- {
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
- *d++ = 0;
- ptr1[0] = kEmptyHashValue;
- ptr1[1] = kEmptyHashValue;
- }
- else
- {
- UInt32 *_distances = ++d;
- CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
- UInt32 cutValue = _cutValue;
- const Byte *len0 = cur, *len1 = cur;
- const Byte *maxLen = cur + _maxLen;
- // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
- for (LOG_ITER(g_NumIters_Tree++);;)
- {
- LOG_ITER(g_NumIters_Loop++);
- {
- // SPEC code
- CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
- + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
- ) << 1);
- const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
- const Byte *len = (len0 < len1 ? len0 : len1);
- #ifdef USE_SON_PREFETCH
- const UInt32 pair0 = *pair;
- #endif
- if (len[diff] == len[0])
- {
- if (++len != lenLimit && len[diff] == len[0])
- while (++len != lenLimit)
- {
- LOG_ITER(g_NumIters_Bytes++);
- if (len[diff] != len[0])
- break;
- }
- if (maxLen < len)
- {
- maxLen = len;
- *d++ = (UInt32)(len - cur);
- *d++ = delta - 1;
-
- if (len == lenLimit)
- {
- const UInt32 pair1 = pair[1];
- *ptr1 =
- #ifdef USE_SON_PREFETCH
- pair0;
- #else
- pair[0];
- #endif
- *ptr0 = pair1;
- _distances[-1] = (UInt32)(d - _distances);
- #ifdef USE_LONG_MATCH_OPT
- if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
- break;
- {
- for (;;)
- {
- *d++ = 2;
- *d++ = (UInt32)(lenLimit - cur);
- *d++ = delta - 1;
- cur++;
- lenLimit++;
- // SPEC
- _cyclicBufferPos++;
- {
- // SPEC code
- CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
- const CLzRef *src = dest + ((diff
- + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
- // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
- #if 0
- *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
- #else
- const UInt32 p0 = src[0];
- const UInt32 p1 = src[1];
- dest[0] = p0;
- dest[1] = p1;
- #endif
- }
- pos++;
- hash++;
- if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
- break;
- } // for() end for long matches
- }
- #endif
- break; // break from TREE iterations
- }
- }
- }
- {
- const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
- if (len[diff] < len[0])
- {
- delta = pair[1];
- *ptr1 = curMatch;
- ptr1 = pair + 1;
- len1 = len;
- if (delta >= curMatch)
- return NULL;
- }
- else
- {
- delta = *pair;
- *ptr0 = curMatch;
- ptr0 = pair;
- len0 = len;
- if (delta >= curMatch)
- return NULL;
- }
- delta = (UInt32)pos - delta;
-
- if (--cutValue == 0 || delta >= cbs)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- _distances[-1] = (UInt32)(d - _distances);
- break;
- }
- }
- }
- } // for (tree iterations)
- }
- pos++;
- _cyclicBufferPos++;
- cur++;
- }
- while (d < limit);
- *posRes = (UInt32)pos;
- return d;
- }
- /*
- typedef UInt32 uint32plus; // size_t
- UInt32 * Z7_FASTCALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
- UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
- UInt32 *posRes)
- {
- do // while (hash != size)
- {
- UInt32 delta;
- #ifndef cbs
- UInt32 cbs;
- #endif
- if (hash == size)
- break;
- delta = *hash++;
- if (delta == 0)
- return NULL;
- #ifndef cbs
- cbs = _cyclicBufferSize;
- if ((UInt32)pos < cbs)
- {
- if (delta > (UInt32)pos)
- return NULL;
- cbs = (UInt32)pos;
- }
- #endif
-
- if (delta >= cbs)
- {
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
- *d++ = 0;
- ptr1[0] = kEmptyHashValue;
- ptr1[1] = kEmptyHashValue;
- }
- else
- {
- CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
- UInt32 *_distances = ++d;
- uint32plus len0 = 0, len1 = 0;
- UInt32 cutValue = _cutValue;
- uint32plus maxLen = _maxLen;
- // lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
- for (LOG_ITER(g_NumIters_Tree++);;)
- {
- LOG_ITER(g_NumIters_Loop++);
- {
- // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
- CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
- + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
- ) << 1);
- const Byte *pb = cur - delta;
- uint32plus len = (len0 < len1 ? len0 : len1);
- #ifdef USE_SON_PREFETCH
- const UInt32 pair0 = *pair;
- #endif
- if (pb[len] == cur[len])
- {
- if (++len != lenLimit && pb[len] == cur[len])
- while (++len != lenLimit)
- if (pb[len] != cur[len])
- break;
- if (maxLen < len)
- {
- maxLen = len;
- *d++ = (UInt32)len;
- *d++ = delta - 1;
- if (len == lenLimit)
- {
- {
- const UInt32 pair1 = pair[1];
- *ptr0 = pair1;
- *ptr1 =
- #ifdef USE_SON_PREFETCH
- pair0;
- #else
- pair[0];
- #endif
- }
- _distances[-1] = (UInt32)(d - _distances);
- #ifdef USE_LONG_MATCH_OPT
- if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
- break;
- {
- const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
- for (;;)
- {
- *d++ = 2;
- *d++ = (UInt32)lenLimit;
- *d++ = delta - 1;
- _cyclicBufferPos++;
- {
- CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
- const CLzRef *src = dest + ((diff +
- (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
- #if 0
- *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
- #else
- const UInt32 p0 = src[0];
- const UInt32 p1 = src[1];
- dest[0] = p0;
- dest[1] = p1;
- #endif
- }
- hash++;
- pos++;
- cur++;
- pb++;
- if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
- break;
- }
- }
- #endif
- break;
- }
- }
- }
- {
- const UInt32 curMatch = (UInt32)pos - delta;
- if (pb[len] < cur[len])
- {
- delta = pair[1];
- *ptr1 = curMatch;
- ptr1 = pair + 1;
- len1 = len;
- }
- else
- {
- delta = *pair;
- *ptr0 = curMatch;
- ptr0 = pair;
- len0 = len;
- }
- {
- if (delta >= curMatch)
- return NULL;
- delta = (UInt32)pos - delta;
- if (delta >= cbs
- // delta >= _cyclicBufferSize || delta >= pos
- || --cutValue == 0)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- _distances[-1] = (UInt32)(d - _distances);
- break;
- }
- }
- }
- }
- } // for (tree iterations)
- }
- pos++;
- _cyclicBufferPos++;
- cur++;
- }
- while (d < limit);
- *posRes = (UInt32)pos;
- return d;
- }
- */
|