| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746 |
- /* LzFind.c -- Match finder for LZ algorithms
- : Igor Pavlov : Public domain */
- #include "Precomp.h"
- #include <string.h>
- // #include <stdio.h>
- #include "CpuArch.h"
- #include "LzFind.h"
- #include "LzHash.h"
- #define kBlockMoveAlign (1 << 7) // alignment for memmove()
- #define kBlockSizeAlign (1 << 16) // alignment for block allocation
- #define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary
- #define kEmptyHashValue 0
- #define kMaxValForNormalize ((UInt32)0)
- // #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug
- // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
- #define GET_AVAIL_BYTES(p) \
- Inline_MatchFinder_GetNumAvailableBytes(p)
- // #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
- #define kFix5HashSize kFix4HashSize
- /*
- HASH2_CALC:
- if (hv) match, then cur[0] and cur[1] also match
- */
- #define HASH2_CALC hv = GetUi16(cur);
- // (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255]
- /*
- HASH3_CALC:
- if (cur[0]) and (h2) match, then cur[1] also match
- if (cur[0]) and (hv) match, then cur[1] and cur[2] also match
- */
- #define HASH3_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
- #define HASH4_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- temp ^= ((UInt32)cur[2] << 8); \
- h3 = temp & (kHash3Size - 1); \
- hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; }
- #define HASH5_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- temp ^= ((UInt32)cur[2] << 8); \
- h3 = temp & (kHash3Size - 1); \
- temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \
- /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \
- hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; }
- #define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
- static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
- {
- // if (!p->directInput)
- {
- ISzAlloc_Free(alloc, p->bufBase);
- p->bufBase = NULL;
- }
- }
- static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc)
- {
- if (blockSize == 0)
- return 0;
- if (!p->bufBase || p->blockSize != blockSize)
- {
- // size_t blockSizeT;
- LzInWindow_Free(p, alloc);
- p->blockSize = blockSize;
- // blockSizeT = blockSize;
-
- // printf("\nblockSize = 0x%x\n", blockSize);
- /*
- #if defined _WIN64
- // we can allocate 4GiB, but still use UInt32 for (p->blockSize)
- // we use UInt32 type for (p->blockSize), because
- // we don't want to wrap over 4 GiB,
- // when we use (p->streamPos - p->pos) that is UInt32.
- if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign)
- {
- blockSizeT = ((size_t)1 << 32);
- printf("\nchanged to blockSizeT = 4GiB\n");
- }
- #endif
- */
-
- p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
- // printf("\nbufferBase = %p\n", p->bufBase);
- // return 0; // for debug
- }
- return (p->bufBase != NULL);
- }
- static const Byte *MatchFinder_GetPointerToCurrentPos(void *p)
- {
- return ((CMatchFinder *)p)->buffer;
- }
- static UInt32 MatchFinder_GetNumAvailableBytes(void *p)
- {
- return GET_AVAIL_BYTES((CMatchFinder *)p);
- }
- Z7_NO_INLINE
- static void MatchFinder_ReadBlock(CMatchFinder *p)
- {
- if (p->streamEndWasReached || p->result != SZ_OK)
- return;
- /* We use (p->streamPos - p->pos) value.
- (p->streamPos < p->pos) is allowed. */
- if (p->directInput)
- {
- UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
- if (curSize > p->directInputRem)
- curSize = (UInt32)p->directInputRem;
- p->streamPos += curSize;
- p->directInputRem -= curSize;
- if (p->directInputRem == 0)
- p->streamEndWasReached = 1;
- return;
- }
-
- for (;;)
- {
- const Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
- size_t size = (size_t)(p->bufBase + p->blockSize - dest);
- if (size == 0)
- {
- /* we call ReadBlock() after NeedMove() and MoveBlock().
- NeedMove() and MoveBlock() povide more than (keepSizeAfter)
- to the end of (blockSize).
- So we don't execute this branch in normal code flow.
- We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock().
- */
- // p->result = SZ_ERROR_FAIL; // we can show error here
- return;
- }
- // #define kRead 3
- // if (size > kRead) size = kRead; // for debug
- /*
- // we need cast (Byte *)dest.
- #ifdef __clang__
- #pragma GCC diagnostic ignored "-Wcast-qual"
- #endif
- */
- p->result = ISeqInStream_Read(p->stream,
- p->bufBase + (dest - p->bufBase), &size);
- if (p->result != SZ_OK)
- return;
- if (size == 0)
- {
- p->streamEndWasReached = 1;
- return;
- }
- p->streamPos += (UInt32)size;
- if (GET_AVAIL_BYTES(p) > p->keepSizeAfter)
- return;
- /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function
- (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */
- }
- // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter)
- }
- Z7_NO_INLINE
- void MatchFinder_MoveBlock(CMatchFinder *p)
- {
- const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore;
- const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
- p->buffer = p->bufBase + keepBefore;
- memmove(p->bufBase,
- p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
- keepBefore + (size_t)GET_AVAIL_BYTES(p));
- }
- /* We call MoveBlock() before ReadBlock().
- So MoveBlock() can be wasteful operation, if the whole input data
- can fit in current block even without calling MoveBlock().
- in important case where (dataSize <= historySize)
- condition (p->blockSize > dataSize + p->keepSizeAfter) is met
- So there is no MoveBlock() in that case case.
- */
- int MatchFinder_NeedMove(CMatchFinder *p)
- {
- if (p->directInput)
- return 0;
- if (p->streamEndWasReached || p->result != SZ_OK)
- return 0;
- return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
- }
- void MatchFinder_ReadIfRequired(CMatchFinder *p)
- {
- if (p->keepSizeAfter >= GET_AVAIL_BYTES(p))
- MatchFinder_ReadBlock(p);
- }
- static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
- {
- p->cutValue = 32;
- p->btMode = 1;
- p->numHashBytes = 4;
- p->numHashBytes_Min = 2;
- p->numHashOutBits = 0;
- p->bigHash = 0;
- }
- #define kCrcPoly 0xEDB88320
- void MatchFinder_Construct(CMatchFinder *p)
- {
- unsigned i;
- p->buffer = NULL;
- p->bufBase = NULL;
- p->directInput = 0;
- p->stream = NULL;
- p->hash = NULL;
- p->expectedDataSize = (UInt64)(Int64)-1;
- MatchFinder_SetDefaultSettings(p);
- for (i = 0; i < 256; i++)
- {
- UInt32 r = (UInt32)i;
- unsigned j;
- for (j = 0; j < 8; j++)
- r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
- p->crc[i] = r;
- }
- }
- #undef kCrcPoly
- static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
- {
- ISzAlloc_Free(alloc, p->hash);
- p->hash = NULL;
- }
- void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
- {
- MatchFinder_FreeThisClassMemory(p, alloc);
- LzInWindow_Free(p, alloc);
- }
- static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
- {
- const size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
- if (sizeInBytes / sizeof(CLzRef) != num)
- return NULL;
- return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
- }
- #if (kBlockSizeReserveMin < kBlockSizeAlign * 2)
- #error Stop_Compiling_Bad_Reserve
- #endif
- static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
- {
- UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter);
- /*
- if (historySize > kMaxHistorySize)
- return 0;
- */
- // printf("\nhistorySize == 0x%x\n", historySize);
-
- if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow
- return 0;
-
- {
- const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign;
- const UInt32 rem = kBlockSizeMax - blockSize;
- const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2))
- + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here
- if (blockSize >= kBlockSizeMax
- || rem < kBlockSizeReserveMin) // we reject settings that will be slow
- return 0;
- if (reserve >= rem)
- blockSize = kBlockSizeMax;
- else
- {
- blockSize += reserve;
- blockSize &= ~(UInt32)(kBlockSizeAlign - 1);
- }
- }
- // printf("\n LzFind_blockSize = %x\n", blockSize);
- // printf("\n LzFind_blockSize = %d\n", blockSize >> 20);
- return blockSize;
- }
- // input is historySize
- static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs)
- {
- if (p->numHashBytes == 2)
- return (1 << 16) - 1;
- if (hs != 0)
- hs--;
- hs |= (hs >> 1);
- hs |= (hs >> 2);
- hs |= (hs >> 4);
- hs |= (hs >> 8);
- // we propagated 16 bits in (hs). Low 16 bits must be set later
- if (hs >= (1 << 24))
- {
- if (p->numHashBytes == 3)
- hs = (1 << 24) - 1;
- /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
- }
- // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
- hs |= (1 << 16) - 1; /* don't change it! */
- // bt5: we adjust the size with recommended minimum size
- if (p->numHashBytes >= 5)
- hs |= (256 << kLzHash_CrcShift_2) - 1;
- return hs;
- }
- // input is historySize
- static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs)
- {
- if (p->numHashBytes == 2)
- return (1 << 16) - 1;
- if (hs != 0)
- hs--;
- hs |= (hs >> 1);
- hs |= (hs >> 2);
- hs |= (hs >> 4);
- hs |= (hs >> 8);
- // we propagated 16 bits in (hs). Low 16 bits must be set later
- hs >>= 1;
- if (hs >= (1 << 24))
- {
- if (p->numHashBytes == 3)
- hs = (1 << 24) - 1;
- else
- hs >>= 1;
- /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
- }
- // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
- hs |= (1 << 16) - 1; /* don't change it! */
- // bt5: we adjust the size with recommended minimum size
- if (p->numHashBytes >= 5)
- hs |= (256 << kLzHash_CrcShift_2) - 1;
- return hs;
- }
- int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
- UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
- ISzAllocPtr alloc)
- {
- /* we need one additional byte in (p->keepSizeBefore),
- since we use MoveBlock() after (p->pos++) and before dictionary using */
- // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug
- p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
- keepAddBufferAfter += matchMaxLen;
- /* we need (p->keepSizeAfter >= p->numHashBytes) */
- if (keepAddBufferAfter < p->numHashBytes)
- keepAddBufferAfter = p->numHashBytes;
- // keepAddBufferAfter -= 2; // for debug
- p->keepSizeAfter = keepAddBufferAfter;
- if (p->directInput)
- p->blockSize = 0;
- if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
- {
- size_t hashSizeSum;
- {
- UInt32 hs;
- UInt32 hsCur;
-
- if (p->numHashOutBits != 0)
- {
- unsigned numBits = p->numHashOutBits;
- const unsigned nbMax =
- (p->numHashBytes == 2 ? 16 :
- (p->numHashBytes == 3 ? 24 : 32));
- if (numBits >= nbMax)
- numBits = nbMax;
- if (numBits >= 32)
- hs = (UInt32)0 - 1;
- else
- hs = ((UInt32)1 << numBits) - 1;
- // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
- hs |= (1 << 16) - 1; /* don't change it! */
- if (p->numHashBytes >= 5)
- hs |= (256 << kLzHash_CrcShift_2) - 1;
- {
- const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
- if (hs >= hs2)
- hs = hs2;
- }
- hsCur = hs;
- if (p->expectedDataSize < historySize)
- {
- const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
- if (hsCur >= hs2)
- hsCur = hs2;
- }
- }
- else
- {
- hs = MatchFinder_GetHashMask(p, historySize);
- hsCur = hs;
- if (p->expectedDataSize < historySize)
- {
- hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
- if (hsCur >= hs) // is it possible?
- hsCur = hs;
- }
- }
- p->hashMask = hsCur;
- hashSizeSum = hs;
- hashSizeSum++;
- if (hashSizeSum < hs)
- return 0;
- {
- UInt32 fixedHashSize = 0;
- if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size;
- if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size;
- // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
- hashSizeSum += fixedHashSize;
- p->fixedHashSize = fixedHashSize;
- }
- }
- p->matchMaxLen = matchMaxLen;
- {
- size_t newSize;
- size_t numSons;
- const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
- p->historySize = historySize;
- p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
-
- numSons = newCyclicBufferSize;
- if (p->btMode)
- numSons <<= 1;
- newSize = hashSizeSum + numSons;
- if (numSons < newCyclicBufferSize || newSize < numSons)
- return 0;
- // aligned size is not required here, but it can be better for some loops
- #define NUM_REFS_ALIGN_MASK 0xF
- newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
- // 22.02: we don't reallocate buffer, if old size is enough
- if (p->hash && p->numRefs >= newSize)
- return 1;
-
- MatchFinder_FreeThisClassMemory(p, alloc);
- p->numRefs = newSize;
- p->hash = AllocRefs(newSize, alloc);
-
- if (p->hash)
- {
- p->son = p->hash + hashSizeSum;
- return 1;
- }
- }
- }
- MatchFinder_Free(p, alloc);
- return 0;
- }
- static void MatchFinder_SetLimits(CMatchFinder *p)
- {
- UInt32 k;
- UInt32 n = kMaxValForNormalize - p->pos;
- if (n == 0)
- n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0)
-
- k = p->cyclicBufferSize - p->cyclicBufferPos;
- if (k < n)
- n = k;
- k = GET_AVAIL_BYTES(p);
- {
- const UInt32 ksa = p->keepSizeAfter;
- UInt32 mm = p->matchMaxLen;
- if (k > ksa)
- k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock
- else if (k >= mm)
- {
- // the limitation for (p->lenLimit) update
- k -= mm; // optimization : to reduce the number of checks
- k++;
- // k = 1; // non-optimized version : for debug
- }
- else
- {
- mm = k;
- if (k != 0)
- k = 1;
- }
- p->lenLimit = mm;
- }
- if (k < n)
- n = k;
-
- p->posLimit = p->pos + n;
- }
- void MatchFinder_Init_LowHash(CMatchFinder *p)
- {
- size_t i;
- CLzRef *items = p->hash;
- const size_t numItems = p->fixedHashSize;
- for (i = 0; i < numItems; i++)
- items[i] = kEmptyHashValue;
- }
- void MatchFinder_Init_HighHash(CMatchFinder *p)
- {
- size_t i;
- CLzRef *items = p->hash + p->fixedHashSize;
- const size_t numItems = (size_t)p->hashMask + 1;
- for (i = 0; i < numItems; i++)
- items[i] = kEmptyHashValue;
- }
- void MatchFinder_Init_4(CMatchFinder *p)
- {
- if (!p->directInput)
- p->buffer = p->bufBase;
- {
- /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
- the code in CMatchFinderMt expects (pos = 1) */
- p->pos =
- p->streamPos =
- 1; // it's smallest optimal value. do not change it
- // 0; // for debug
- }
- p->result = SZ_OK;
- p->streamEndWasReached = 0;
- }
- // (CYC_TO_POS_OFFSET == 0) is expected by some optimized code
- #define CYC_TO_POS_OFFSET 0
- // #define CYC_TO_POS_OFFSET 1 // for debug
- void MatchFinder_Init(void *_p)
- {
- CMatchFinder *p = (CMatchFinder *)_p;
- MatchFinder_Init_HighHash(p);
- MatchFinder_Init_LowHash(p);
- MatchFinder_Init_4(p);
- // if (readData)
- MatchFinder_ReadBlock(p);
- /* if we init (cyclicBufferPos = pos), then we can use one variable
- instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */
- p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos)
- // p->cyclicBufferPos = 0; // smallest value
- // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses.
- MatchFinder_SetLimits(p);
- }
- #ifdef MY_CPU_X86_OR_AMD64
- #if defined(__clang__) && (__clang_major__ >= 4) \
- || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900)
- // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
- #define USE_LZFIND_SATUR_SUB_128
- #define USE_LZFIND_SATUR_SUB_256
- #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
- #define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2")))
- #elif defined(_MSC_VER)
- #if (_MSC_VER >= 1600)
- #define USE_LZFIND_SATUR_SUB_128
- #endif
- #if (_MSC_VER >= 1900)
- #define USE_LZFIND_SATUR_SUB_256
- #endif
- #endif
- #elif defined(MY_CPU_ARM64) \
- /* || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) */
- #if defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
- || defined(__GNUC__) && (__GNUC__ >= 6)
- #define USE_LZFIND_SATUR_SUB_128
- #ifdef MY_CPU_ARM64
- // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
- #else
- #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=neon")))
- #endif
- #elif defined(_MSC_VER)
- #if (_MSC_VER >= 1910)
- #define USE_LZFIND_SATUR_SUB_128
- #endif
- #endif
- #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
- #include <arm64_neon.h>
- #else
- #include <arm_neon.h>
- #endif
- #endif
- #ifdef USE_LZFIND_SATUR_SUB_128
- // #define Z7_SHOW_HW_STATUS
- #ifdef Z7_SHOW_HW_STATUS
- #include <stdio.h>
- #define PRF(x) x
- PRF(;)
- #else
- #define PRF(x)
- #endif
- #ifdef MY_CPU_ARM_OR_ARM64
- #ifdef MY_CPU_ARM64
- // #define FORCE_LZFIND_SATUR_SUB_128
- #endif
- typedef uint32x4_t LzFind_v128;
- #define SASUB_128_V(v, s) \
- vsubq_u32(vmaxq_u32(v, s), s)
- #else // MY_CPU_ARM_OR_ARM64
- #include <smmintrin.h> // sse4.1
- typedef __m128i LzFind_v128;
- // SSE 4.1
- #define SASUB_128_V(v, s) \
- _mm_sub_epi32(_mm_max_epu32(v, s), s)
- #endif // MY_CPU_ARM_OR_ARM64
- #define SASUB_128(i) \
- *( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \
- *(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2);
- Z7_NO_INLINE
- static
- #ifdef LZFIND_ATTRIB_SSE41
- LZFIND_ATTRIB_SSE41
- #endif
- void
- Z7_FASTCALL
- LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
- {
- const LzFind_v128 sub2 =
- #ifdef MY_CPU_ARM_OR_ARM64
- vdupq_n_u32(subValue);
- #else
- _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
- #endif
- Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
- do
- {
- SASUB_128(0) SASUB_128(1) items += 2 * 4;
- SASUB_128(0) SASUB_128(1) items += 2 * 4;
- }
- while (items != lim);
- }
- #ifdef USE_LZFIND_SATUR_SUB_256
- #include <immintrin.h> // avx
- /*
- clang :immintrin.h uses
- #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
- defined(__AVX2__)
- #include <avx2intrin.h>
- #endif
- so we need <avxintrin.h> for clang-cl */
- #if defined(__clang__)
- #include <avxintrin.h>
- #include <avx2intrin.h>
- #endif
- // AVX2:
- #define SASUB_256(i) \
- *( __m256i *)( void *)(items + (i) * 8) = \
- _mm256_sub_epi32(_mm256_max_epu32( \
- *(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2);
- Z7_NO_INLINE
- static
- #ifdef LZFIND_ATTRIB_AVX2
- LZFIND_ATTRIB_AVX2
- #endif
- void
- Z7_FASTCALL
- LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
- {
- const __m256i sub2 = _mm256_set_epi32(
- (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
- (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
- Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
- do
- {
- SASUB_256(0) SASUB_256(1) items += 2 * 8;
- SASUB_256(0) SASUB_256(1) items += 2 * 8;
- }
- while (items != lim);
- }
- #endif // USE_LZFIND_SATUR_SUB_256
- #ifndef FORCE_LZFIND_SATUR_SUB_128
- typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)(
- UInt32 subValue, CLzRef *items, const CLzRef *lim);
- static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
- #endif // FORCE_LZFIND_SATUR_SUB_128
- #endif // USE_LZFIND_SATUR_SUB_128
- // kEmptyHashValue must be zero
- // #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; }
- #define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; }
- #ifdef FORCE_LZFIND_SATUR_SUB_128
- #define DEFAULT_SaturSub LzFind_SaturSub_128
- #else
- #define DEFAULT_SaturSub LzFind_SaturSub_32
- Z7_NO_INLINE
- static
- void
- Z7_FASTCALL
- LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
- {
- Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
- do
- {
- SASUB_32(0) SASUB_32(1) items += 2;
- SASUB_32(0) SASUB_32(1) items += 2;
- SASUB_32(0) SASUB_32(1) items += 2;
- SASUB_32(0) SASUB_32(1) items += 2;
- }
- while (items != lim);
- }
- #endif
- Z7_NO_INLINE
- void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
- {
- #define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7)
- Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
- for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
- {
- SASUB_32(0)
- items++;
- }
- {
- const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1);
- CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask);
- numItems &= k_Align_Mask;
- if (items != lim)
- {
- #if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128)
- if (g_LzFind_SaturSub)
- g_LzFind_SaturSub(subValue, items, lim);
- else
- #endif
- DEFAULT_SaturSub(subValue, items, lim);
- }
- items = lim;
- }
- Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
- for (; numItems != 0; numItems--)
- {
- SASUB_32(0)
- items++;
- }
- }
- // call MatchFinder_CheckLimits() only after (p->pos++) update
- Z7_NO_INLINE
- static void MatchFinder_CheckLimits(CMatchFinder *p)
- {
- if (// !p->streamEndWasReached && p->result == SZ_OK &&
- p->keepSizeAfter == GET_AVAIL_BYTES(p))
- {
- // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p))
- if (MatchFinder_NeedMove(p))
- MatchFinder_MoveBlock(p);
- MatchFinder_ReadBlock(p);
- }
- if (p->pos == kMaxValForNormalize)
- if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data.
- /*
- if we disable normalization for last bytes of data, and
- if (data_size == 4 GiB), we don't call wastfull normalization,
- but (pos) will be wrapped over Zero (0) in that case.
- And we cannot resume later to normal operation
- */
- {
- // MatchFinder_Normalize(p);
- /* after normalization we need (p->pos >= p->historySize + 1); */
- /* we can reduce subValue to aligned value, if want to keep alignment
- of (p->pos) and (p->buffer) for speculated accesses. */
- const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
- // const UInt32 subValue = (1 << 15); // for debug
- // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
- MatchFinder_REDUCE_OFFSETS(p, subValue)
- MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize);
- {
- size_t numSonRefs = p->cyclicBufferSize;
- if (p->btMode)
- numSonRefs <<= 1;
- MatchFinder_Normalize3(subValue, p->son, numSonRefs);
- }
- }
- if (p->cyclicBufferPos == p->cyclicBufferSize)
- p->cyclicBufferPos = 0;
-
- MatchFinder_SetLimits(p);
- }
- /*
- (lenLimit > maxLen)
- */
- Z7_FORCE_INLINE
- static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
- UInt32 *d, unsigned maxLen)
- {
- /*
- son[_cyclicBufferPos] = curMatch;
- for (;;)
- {
- UInt32 delta = pos - curMatch;
- if (cutValue-- == 0 || delta >= _cyclicBufferSize)
- return d;
- {
- const Byte *pb = cur - delta;
- curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)];
- if (pb[maxLen] == cur[maxLen] && *pb == *cur)
- {
- UInt32 len = 0;
- while (++len != lenLimit)
- if (pb[len] != cur[len])
- break;
- if (maxLen < len)
- {
- maxLen = len;
- *d++ = len;
- *d++ = delta - 1;
- if (len == lenLimit)
- return d;
- }
- }
- }
- }
- */
- const Byte *lim = cur + lenLimit;
- son[_cyclicBufferPos] = curMatch;
- do
- {
- UInt32 delta;
- if (curMatch == 0)
- break;
- // if (curMatch2 >= curMatch) return NULL;
- delta = pos - curMatch;
- if (delta >= _cyclicBufferSize)
- break;
- {
- ptrdiff_t diff;
- curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)];
- diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
- if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])
- {
- const Byte *c = cur;
- while (*c == c[diff])
- {
- if (++c == lim)
- {
- d[0] = (UInt32)(lim - cur);
- d[1] = delta - 1;
- return d + 2;
- }
- }
- {
- const unsigned len = (unsigned)(c - cur);
- if (maxLen < len)
- {
- maxLen = len;
- d[0] = (UInt32)len;
- d[1] = delta - 1;
- d += 2;
- }
- }
- }
- }
- }
- while (--cutValue);
-
- return d;
- }
- Z7_FORCE_INLINE
- UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
- UInt32 *d, UInt32 maxLen)
- {
- CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
- unsigned len0 = 0, len1 = 0;
- UInt32 cmCheck;
- // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
- cmCheck = (UInt32)(pos - _cyclicBufferSize);
- if ((UInt32)pos < _cyclicBufferSize)
- cmCheck = 0;
- if (cmCheck < curMatch)
- do
- {
- const UInt32 delta = pos - curMatch;
- {
- CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1);
- const Byte *pb = cur - delta;
- unsigned len = (len0 < len1 ? len0 : len1);
- const UInt32 pair0 = pair[0];
- if (pb[len] == cur[len])
- {
- if (++len != lenLimit && pb[len] == cur[len])
- while (++len != lenLimit)
- if (pb[len] != cur[len])
- break;
- if (maxLen < len)
- {
- maxLen = (UInt32)len;
- *d++ = (UInt32)len;
- *d++ = delta - 1;
- if (len == lenLimit)
- {
- *ptr1 = pair0;
- *ptr0 = pair[1];
- return d;
- }
- }
- }
- if (pb[len] < cur[len])
- {
- *ptr1 = curMatch;
- // const UInt32 curMatch2 = pair[1];
- // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
- // curMatch = curMatch2;
- curMatch = pair[1];
- ptr1 = pair + 1;
- len1 = len;
- }
- else
- {
- *ptr0 = curMatch;
- curMatch = pair[0];
- ptr0 = pair;
- len0 = len;
- }
- }
- }
- while(--cutValue && cmCheck < curMatch);
- *ptr0 = *ptr1 = kEmptyHashValue;
- return d;
- }
- static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
- {
- CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
- unsigned len0 = 0, len1 = 0;
- UInt32 cmCheck;
- cmCheck = (UInt32)(pos - _cyclicBufferSize);
- if ((UInt32)pos < _cyclicBufferSize)
- cmCheck = 0;
- if (// curMatch >= pos || // failure
- cmCheck < curMatch)
- do
- {
- const UInt32 delta = pos - curMatch;
- {
- CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1);
- const Byte *pb = cur - delta;
- unsigned len = (len0 < len1 ? len0 : len1);
- if (pb[len] == cur[len])
- {
- while (++len != lenLimit)
- if (pb[len] != cur[len])
- break;
- {
- if (len == lenLimit)
- {
- *ptr1 = pair[0];
- *ptr0 = pair[1];
- return;
- }
- }
- }
- if (pb[len] < cur[len])
- {
- *ptr1 = curMatch;
- curMatch = pair[1];
- ptr1 = pair + 1;
- len1 = len;
- }
- else
- {
- *ptr0 = curMatch;
- curMatch = pair[0];
- ptr0 = pair;
- len0 = len;
- }
- }
- }
- while(--cutValue && cmCheck < curMatch);
-
- *ptr0 = *ptr1 = kEmptyHashValue;
- return;
- }
- #define MOVE_POS \
- p->cyclicBufferPos++; \
- p->buffer++; \
- { const UInt32 pos1 = p->pos + 1; \
- p->pos = pos1; \
- if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
- #define MOVE_POS_RET MOVE_POS return distances;
- Z7_NO_INLINE
- static void MatchFinder_MovePos(CMatchFinder *p)
- {
- /* we go here at the end of stream data, when (avail < num_hash_bytes)
- We don't update sons[cyclicBufferPos << btMode].
- So (sons) record will contain junk. And we cannot resume match searching
- to normal operation, even if we will provide more input data in buffer.
- p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue
- if (p->btMode)
- p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
- */
- MOVE_POS
- }
- #define GET_MATCHES_HEADER2(minLen, ret_op) \
- UInt32 hv; const Byte *cur; UInt32 curMatch; \
- UInt32 lenLimit = p->lenLimit; \
- if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; } \
- cur = p->buffer;
- #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
- #define SKIP_HEADER(minLen) \
- do { GET_MATCHES_HEADER2(minLen, continue)
- #define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, \
- p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
- #define SKIP_FOOTER \
- SkipMatchesSpec(MF_PARAMS(p)); \
- MOVE_POS \
- } while (--num);
- #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
- distances = func(MF_PARAMS(p), distances, (UInt32)_maxLen_); \
- MOVE_POS_RET
- #define GET_MATCHES_FOOTER_BT(_maxLen_) \
- GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
- #define GET_MATCHES_FOOTER_HC(_maxLen_) \
- GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec)
- #define UPDATE_maxLen { \
- const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \
- const Byte *c = cur + maxLen; \
- const Byte *lim = cur + lenLimit; \
- for (; c != lim; c++) if (*(c + diff) != *c) break; \
- maxLen = (unsigned)(c - cur); }
- static UInt32* Bt2_MatchFinder_GetMatches(void *_p, UInt32 *distances)
- {
- CMatchFinder *p = (CMatchFinder *)_p;
- GET_MATCHES_HEADER(2)
- HASH2_CALC
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- GET_MATCHES_FOOTER_BT(1)
- }
- UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
- {
- GET_MATCHES_HEADER(3)
- HASH_ZIP_CALC
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- GET_MATCHES_FOOTER_BT(2)
- }
- #define SET_mmm \
- mmm = p->cyclicBufferSize; \
- if (pos < mmm) \
- mmm = pos;
- static UInt32* Bt3_MatchFinder_GetMatches(void *_p, UInt32 *distances)
- {
- CMatchFinder *p = (CMatchFinder *)_p;
- UInt32 mmm;
- UInt32 h2, d2, pos;
- unsigned maxLen;
- UInt32 *hash;
- GET_MATCHES_HEADER(3)
- HASH3_CALC
- hash = p->hash;
- pos = p->pos;
- d2 = pos - hash[h2];
- curMatch = (hash + kFix3HashSize)[hv];
-
- hash[h2] = pos;
- (hash + kFix3HashSize)[hv] = pos;
- SET_mmm
- maxLen = 2;
- if (d2 < mmm && *(cur - d2) == *cur)
- {
- UPDATE_maxLen
- distances[0] = (UInt32)maxLen;
- distances[1] = d2 - 1;
- distances += 2;
- if (maxLen == lenLimit)
- {
- SkipMatchesSpec(MF_PARAMS(p));
- MOVE_POS_RET
- }
- }
-
- GET_MATCHES_FOOTER_BT(maxLen)
- }
- static UInt32* Bt4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
- {
- CMatchFinder *p = (CMatchFinder *)_p;
- UInt32 mmm;
- UInt32 h2, h3, d2, d3, pos;
- unsigned maxLen;
- UInt32 *hash;
- GET_MATCHES_HEADER(4)
- HASH4_CALC
- hash = p->hash;
- pos = p->pos;
- d2 = pos - hash [h2];
- d3 = pos - (hash + kFix3HashSize)[h3];
- curMatch = (hash + kFix4HashSize)[hv];
- hash [h2] = pos;
- (hash + kFix3HashSize)[h3] = pos;
- (hash + kFix4HashSize)[hv] = pos;
- SET_mmm
- maxLen = 3;
-
- for (;;)
- {
- if (d2 < mmm && *(cur - d2) == *cur)
- {
- distances[0] = 2;
- distances[1] = d2 - 1;
- distances += 2;
- if (*(cur - d2 + 2) == cur[2])
- {
- // distances[-2] = 3;
- }
- else if (d3 < mmm && *(cur - d3) == *cur)
- {
- d2 = d3;
- distances[1] = d3 - 1;
- distances += 2;
- }
- else
- break;
- }
- else if (d3 < mmm && *(cur - d3) == *cur)
- {
- d2 = d3;
- distances[1] = d3 - 1;
- distances += 2;
- }
- else
- break;
-
- UPDATE_maxLen
- distances[-2] = (UInt32)maxLen;
- if (maxLen == lenLimit)
- {
- SkipMatchesSpec(MF_PARAMS(p));
- MOVE_POS_RET
- }
- break;
- }
-
- GET_MATCHES_FOOTER_BT(maxLen)
- }
- static UInt32* Bt5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
- {
- CMatchFinder *p = (CMatchFinder *)_p;
- UInt32 mmm;
- UInt32 h2, h3, d2, d3, pos;
- unsigned maxLen;
- UInt32 *hash;
- GET_MATCHES_HEADER(5)
- HASH5_CALC
- hash = p->hash;
- pos = p->pos;
- d2 = pos - hash [h2];
- d3 = pos - (hash + kFix3HashSize)[h3];
- // d4 = pos - (hash + kFix4HashSize)[h4];
- curMatch = (hash + kFix5HashSize)[hv];
- hash [h2] = pos;
- (hash + kFix3HashSize)[h3] = pos;
- // (hash + kFix4HashSize)[h4] = pos;
- (hash + kFix5HashSize)[hv] = pos;
- SET_mmm
- maxLen = 4;
- for (;;)
- {
- if (d2 < mmm && *(cur - d2) == *cur)
- {
- distances[0] = 2;
- distances[1] = d2 - 1;
- distances += 2;
- if (*(cur - d2 + 2) == cur[2])
- {
- }
- else if (d3 < mmm && *(cur - d3) == *cur)
- {
- distances[1] = d3 - 1;
- distances += 2;
- d2 = d3;
- }
- else
- break;
- }
- else if (d3 < mmm && *(cur - d3) == *cur)
- {
- distances[1] = d3 - 1;
- distances += 2;
- d2 = d3;
- }
- else
- break;
- distances[-2] = 3;
- if (*(cur - d2 + 3) != cur[3])
- break;
- UPDATE_maxLen
- distances[-2] = (UInt32)maxLen;
- if (maxLen == lenLimit)
- {
- SkipMatchesSpec(MF_PARAMS(p));
- MOVE_POS_RET
- }
- break;
- }
-
- GET_MATCHES_FOOTER_BT(maxLen)
- }
- static UInt32* Hc4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
- {
- CMatchFinder *p = (CMatchFinder *)_p;
- UInt32 mmm;
- UInt32 h2, h3, d2, d3, pos;
- unsigned maxLen;
- UInt32 *hash;
- GET_MATCHES_HEADER(4)
- HASH4_CALC
- hash = p->hash;
- pos = p->pos;
-
- d2 = pos - hash [h2];
- d3 = pos - (hash + kFix3HashSize)[h3];
- curMatch = (hash + kFix4HashSize)[hv];
- hash [h2] = pos;
- (hash + kFix3HashSize)[h3] = pos;
- (hash + kFix4HashSize)[hv] = pos;
- SET_mmm
- maxLen = 3;
- for (;;)
- {
- if (d2 < mmm && *(cur - d2) == *cur)
- {
- distances[0] = 2;
- distances[1] = d2 - 1;
- distances += 2;
- if (*(cur - d2 + 2) == cur[2])
- {
- // distances[-2] = 3;
- }
- else if (d3 < mmm && *(cur - d3) == *cur)
- {
- d2 = d3;
- distances[1] = d3 - 1;
- distances += 2;
- }
- else
- break;
- }
- else if (d3 < mmm && *(cur - d3) == *cur)
- {
- d2 = d3;
- distances[1] = d3 - 1;
- distances += 2;
- }
- else
- break;
- UPDATE_maxLen
- distances[-2] = (UInt32)maxLen;
- if (maxLen == lenLimit)
- {
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS_RET
- }
- break;
- }
-
- GET_MATCHES_FOOTER_HC(maxLen)
- }
- static UInt32 * Hc5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
- {
- CMatchFinder *p = (CMatchFinder *)_p;
- UInt32 mmm;
- UInt32 h2, h3, d2, d3, pos;
- unsigned maxLen;
- UInt32 *hash;
- GET_MATCHES_HEADER(5)
- HASH5_CALC
- hash = p->hash;
- pos = p->pos;
- d2 = pos - hash [h2];
- d3 = pos - (hash + kFix3HashSize)[h3];
- // d4 = pos - (hash + kFix4HashSize)[h4];
- curMatch = (hash + kFix5HashSize)[hv];
- hash [h2] = pos;
- (hash + kFix3HashSize)[h3] = pos;
- // (hash + kFix4HashSize)[h4] = pos;
- (hash + kFix5HashSize)[hv] = pos;
- SET_mmm
-
- maxLen = 4;
- for (;;)
- {
- if (d2 < mmm && *(cur - d2) == *cur)
- {
- distances[0] = 2;
- distances[1] = d2 - 1;
- distances += 2;
- if (*(cur - d2 + 2) == cur[2])
- {
- }
- else if (d3 < mmm && *(cur - d3) == *cur)
- {
- distances[1] = d3 - 1;
- distances += 2;
- d2 = d3;
- }
- else
- break;
- }
- else if (d3 < mmm && *(cur - d3) == *cur)
- {
- distances[1] = d3 - 1;
- distances += 2;
- d2 = d3;
- }
- else
- break;
- distances[-2] = 3;
- if (*(cur - d2 + 3) != cur[3])
- break;
- UPDATE_maxLen
- distances[-2] = (UInt32)maxLen;
- if (maxLen == lenLimit)
- {
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS_RET
- }
- break;
- }
-
- GET_MATCHES_FOOTER_HC(maxLen)
- }
- UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
- {
- GET_MATCHES_HEADER(3)
- HASH_ZIP_CALC
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- GET_MATCHES_FOOTER_HC(2)
- }
- static void Bt2_MatchFinder_Skip(void *_p, UInt32 num)
- {
- CMatchFinder *p = (CMatchFinder *)_p;
- SKIP_HEADER(2)
- {
- HASH2_CALC
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- }
- SKIP_FOOTER
- }
- void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
- {
- SKIP_HEADER(3)
- {
- HASH_ZIP_CALC
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- }
- SKIP_FOOTER
- }
- static void Bt3_MatchFinder_Skip(void *_p, UInt32 num)
- {
- CMatchFinder *p = (CMatchFinder *)_p;
- SKIP_HEADER(3)
- {
- UInt32 h2;
- UInt32 *hash;
- HASH3_CALC
- hash = p->hash;
- curMatch = (hash + kFix3HashSize)[hv];
- hash[h2] =
- (hash + kFix3HashSize)[hv] = p->pos;
- }
- SKIP_FOOTER
- }
- static void Bt4_MatchFinder_Skip(void *_p, UInt32 num)
- {
- CMatchFinder *p = (CMatchFinder *)_p;
- SKIP_HEADER(4)
- {
- UInt32 h2, h3;
- UInt32 *hash;
- HASH4_CALC
- hash = p->hash;
- curMatch = (hash + kFix4HashSize)[hv];
- hash [h2] =
- (hash + kFix3HashSize)[h3] =
- (hash + kFix4HashSize)[hv] = p->pos;
- }
- SKIP_FOOTER
- }
- static void Bt5_MatchFinder_Skip(void *_p, UInt32 num)
- {
- CMatchFinder *p = (CMatchFinder *)_p;
- SKIP_HEADER(5)
- {
- UInt32 h2, h3;
- UInt32 *hash;
- HASH5_CALC
- hash = p->hash;
- curMatch = (hash + kFix5HashSize)[hv];
- hash [h2] =
- (hash + kFix3HashSize)[h3] =
- // (hash + kFix4HashSize)[h4] =
- (hash + kFix5HashSize)[hv] = p->pos;
- }
- SKIP_FOOTER
- }
- #define HC_SKIP_HEADER(minLen) \
- do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
- const Byte *cur; \
- UInt32 *hash; \
- UInt32 *son; \
- UInt32 pos = p->pos; \
- UInt32 num2 = num; \
- /* (p->pos == p->posLimit) is not allowed here !!! */ \
- { const UInt32 rem = p->posLimit - pos; if (num2 >= rem) num2 = rem; } \
- num -= num2; \
- { const UInt32 cycPos = p->cyclicBufferPos; \
- son = p->son + cycPos; \
- p->cyclicBufferPos = cycPos + num2; } \
- cur = p->buffer; \
- hash = p->hash; \
- do { \
- UInt32 curMatch; \
- UInt32 hv;
- #define HC_SKIP_FOOTER \
- cur++; pos++; *son++ = curMatch; \
- } while (--num2); \
- p->buffer = cur; \
- p->pos = pos; \
- if (pos == p->posLimit) MatchFinder_CheckLimits(p); \
- }} while(num); \
- static void Hc4_MatchFinder_Skip(void *_p, UInt32 num)
- {
- CMatchFinder *p = (CMatchFinder *)_p;
- HC_SKIP_HEADER(4)
- UInt32 h2, h3;
- HASH4_CALC
- curMatch = (hash + kFix4HashSize)[hv];
- hash [h2] =
- (hash + kFix3HashSize)[h3] =
- (hash + kFix4HashSize)[hv] = pos;
-
- HC_SKIP_FOOTER
- }
- static void Hc5_MatchFinder_Skip(void *_p, UInt32 num)
- {
- CMatchFinder *p = (CMatchFinder *)_p;
- HC_SKIP_HEADER(5)
-
- UInt32 h2, h3;
- HASH5_CALC
- curMatch = (hash + kFix5HashSize)[hv];
- hash [h2] =
- (hash + kFix3HashSize)[h3] =
- // (hash + kFix4HashSize)[h4] =
- (hash + kFix5HashSize)[hv] = pos;
-
- HC_SKIP_FOOTER
- }
- void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
- {
- HC_SKIP_HEADER(3)
- HASH_ZIP_CALC
- curMatch = hash[hv];
- hash[hv] = pos;
- HC_SKIP_FOOTER
- }
- void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
- {
- vTable->Init = MatchFinder_Init;
- vTable->GetNumAvailableBytes = MatchFinder_GetNumAvailableBytes;
- vTable->GetPointerToCurrentPos = MatchFinder_GetPointerToCurrentPos;
- if (!p->btMode)
- {
- if (p->numHashBytes <= 4)
- {
- vTable->GetMatches = Hc4_MatchFinder_GetMatches;
- vTable->Skip = Hc4_MatchFinder_Skip;
- }
- else
- {
- vTable->GetMatches = Hc5_MatchFinder_GetMatches;
- vTable->Skip = Hc5_MatchFinder_Skip;
- }
- }
- else if (p->numHashBytes == 2)
- {
- vTable->GetMatches = Bt2_MatchFinder_GetMatches;
- vTable->Skip = Bt2_MatchFinder_Skip;
- }
- else if (p->numHashBytes == 3)
- {
- vTable->GetMatches = Bt3_MatchFinder_GetMatches;
- vTable->Skip = Bt3_MatchFinder_Skip;
- }
- else if (p->numHashBytes == 4)
- {
- vTable->GetMatches = Bt4_MatchFinder_GetMatches;
- vTable->Skip = Bt4_MatchFinder_Skip;
- }
- else
- {
- vTable->GetMatches = Bt5_MatchFinder_GetMatches;
- vTable->Skip = Bt5_MatchFinder_Skip;
- }
- }
- void LzFindPrepare(void)
- {
- #ifndef FORCE_LZFIND_SATUR_SUB_128
- #ifdef USE_LZFIND_SATUR_SUB_128
- LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
- #ifdef MY_CPU_ARM_OR_ARM64
- {
- if (CPU_IsSupported_NEON())
- {
- // #pragma message ("=== LzFind NEON")
- PRF(printf("\n=== LzFind NEON\n"));
- f = LzFind_SaturSub_128;
- }
- // f = 0; // for debug
- }
- #else // MY_CPU_ARM_OR_ARM64
- if (CPU_IsSupported_SSE41())
- {
- // #pragma message ("=== LzFind SSE41")
- PRF(printf("\n=== LzFind SSE41\n"));
- f = LzFind_SaturSub_128;
- #ifdef USE_LZFIND_SATUR_SUB_256
- if (CPU_IsSupported_AVX2())
- {
- // #pragma message ("=== LzFind AVX2")
- PRF(printf("\n=== LzFind AVX2\n"));
- f = LzFind_SaturSub_256;
- }
- #endif
- }
- #endif // MY_CPU_ARM_OR_ARM64
- g_LzFind_SaturSub = f;
- #endif // USE_LZFIND_SATUR_SUB_128
- #endif // FORCE_LZFIND_SATUR_SUB_128
- }
- #undef MOVE_POS
- #undef MOVE_POS_RET
- #undef PRF
|