LzFind.c 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746
  1. /* LzFind.c -- Match finder for LZ algorithms
  2. : Igor Pavlov : Public domain */
  3. #include "Precomp.h"
  4. #include <string.h>
  5. // #include <stdio.h>
  6. #include "CpuArch.h"
  7. #include "LzFind.h"
  8. #include "LzHash.h"
  9. #define kBlockMoveAlign (1 << 7) // alignment for memmove()
  10. #define kBlockSizeAlign (1 << 16) // alignment for block allocation
  11. #define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary
  12. #define kEmptyHashValue 0
  13. #define kMaxValForNormalize ((UInt32)0)
  14. // #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug
  15. // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
  16. #define GET_AVAIL_BYTES(p) \
  17. Inline_MatchFinder_GetNumAvailableBytes(p)
  18. // #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
  19. #define kFix5HashSize kFix4HashSize
  20. /*
  21. HASH2_CALC:
  22. if (hv) match, then cur[0] and cur[1] also match
  23. */
  24. #define HASH2_CALC hv = GetUi16(cur);
  25. // (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255]
  26. /*
  27. HASH3_CALC:
  28. if (cur[0]) and (h2) match, then cur[1] also match
  29. if (cur[0]) and (hv) match, then cur[1] and cur[2] also match
  30. */
  31. #define HASH3_CALC { \
  32. UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
  33. h2 = temp & (kHash2Size - 1); \
  34. hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
  35. #define HASH4_CALC { \
  36. UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
  37. h2 = temp & (kHash2Size - 1); \
  38. temp ^= ((UInt32)cur[2] << 8); \
  39. h3 = temp & (kHash3Size - 1); \
  40. hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; }
  41. #define HASH5_CALC { \
  42. UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
  43. h2 = temp & (kHash2Size - 1); \
  44. temp ^= ((UInt32)cur[2] << 8); \
  45. h3 = temp & (kHash3Size - 1); \
  46. temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \
  47. /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \
  48. hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; }
  49. #define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
  50. static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
  51. {
  52. // if (!p->directInput)
  53. {
  54. ISzAlloc_Free(alloc, p->bufBase);
  55. p->bufBase = NULL;
  56. }
  57. }
  58. static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc)
  59. {
  60. if (blockSize == 0)
  61. return 0;
  62. if (!p->bufBase || p->blockSize != blockSize)
  63. {
  64. // size_t blockSizeT;
  65. LzInWindow_Free(p, alloc);
  66. p->blockSize = blockSize;
  67. // blockSizeT = blockSize;
  68. // printf("\nblockSize = 0x%x\n", blockSize);
  69. /*
  70. #if defined _WIN64
  71. // we can allocate 4GiB, but still use UInt32 for (p->blockSize)
  72. // we use UInt32 type for (p->blockSize), because
  73. // we don't want to wrap over 4 GiB,
  74. // when we use (p->streamPos - p->pos) that is UInt32.
  75. if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign)
  76. {
  77. blockSizeT = ((size_t)1 << 32);
  78. printf("\nchanged to blockSizeT = 4GiB\n");
  79. }
  80. #endif
  81. */
  82. p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
  83. // printf("\nbufferBase = %p\n", p->bufBase);
  84. // return 0; // for debug
  85. }
  86. return (p->bufBase != NULL);
  87. }
  88. static const Byte *MatchFinder_GetPointerToCurrentPos(void *p)
  89. {
  90. return ((CMatchFinder *)p)->buffer;
  91. }
  92. static UInt32 MatchFinder_GetNumAvailableBytes(void *p)
  93. {
  94. return GET_AVAIL_BYTES((CMatchFinder *)p);
  95. }
  96. Z7_NO_INLINE
  97. static void MatchFinder_ReadBlock(CMatchFinder *p)
  98. {
  99. if (p->streamEndWasReached || p->result != SZ_OK)
  100. return;
  101. /* We use (p->streamPos - p->pos) value.
  102. (p->streamPos < p->pos) is allowed. */
  103. if (p->directInput)
  104. {
  105. UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
  106. if (curSize > p->directInputRem)
  107. curSize = (UInt32)p->directInputRem;
  108. p->streamPos += curSize;
  109. p->directInputRem -= curSize;
  110. if (p->directInputRem == 0)
  111. p->streamEndWasReached = 1;
  112. return;
  113. }
  114. for (;;)
  115. {
  116. const Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
  117. size_t size = (size_t)(p->bufBase + p->blockSize - dest);
  118. if (size == 0)
  119. {
  120. /* we call ReadBlock() after NeedMove() and MoveBlock().
  121. NeedMove() and MoveBlock() povide more than (keepSizeAfter)
  122. to the end of (blockSize).
  123. So we don't execute this branch in normal code flow.
  124. We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock().
  125. */
  126. // p->result = SZ_ERROR_FAIL; // we can show error here
  127. return;
  128. }
  129. // #define kRead 3
  130. // if (size > kRead) size = kRead; // for debug
  131. /*
  132. // we need cast (Byte *)dest.
  133. #ifdef __clang__
  134. #pragma GCC diagnostic ignored "-Wcast-qual"
  135. #endif
  136. */
  137. p->result = ISeqInStream_Read(p->stream,
  138. p->bufBase + (dest - p->bufBase), &size);
  139. if (p->result != SZ_OK)
  140. return;
  141. if (size == 0)
  142. {
  143. p->streamEndWasReached = 1;
  144. return;
  145. }
  146. p->streamPos += (UInt32)size;
  147. if (GET_AVAIL_BYTES(p) > p->keepSizeAfter)
  148. return;
  149. /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function
  150. (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */
  151. }
  152. // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter)
  153. }
  154. Z7_NO_INLINE
  155. void MatchFinder_MoveBlock(CMatchFinder *p)
  156. {
  157. const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore;
  158. const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
  159. p->buffer = p->bufBase + keepBefore;
  160. memmove(p->bufBase,
  161. p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
  162. keepBefore + (size_t)GET_AVAIL_BYTES(p));
  163. }
  164. /* We call MoveBlock() before ReadBlock().
  165. So MoveBlock() can be wasteful operation, if the whole input data
  166. can fit in current block even without calling MoveBlock().
  167. in important case where (dataSize <= historySize)
  168. condition (p->blockSize > dataSize + p->keepSizeAfter) is met
  169. So there is no MoveBlock() in that case case.
  170. */
  171. int MatchFinder_NeedMove(CMatchFinder *p)
  172. {
  173. if (p->directInput)
  174. return 0;
  175. if (p->streamEndWasReached || p->result != SZ_OK)
  176. return 0;
  177. return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
  178. }
  179. void MatchFinder_ReadIfRequired(CMatchFinder *p)
  180. {
  181. if (p->keepSizeAfter >= GET_AVAIL_BYTES(p))
  182. MatchFinder_ReadBlock(p);
  183. }
  184. static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
  185. {
  186. p->cutValue = 32;
  187. p->btMode = 1;
  188. p->numHashBytes = 4;
  189. p->numHashBytes_Min = 2;
  190. p->numHashOutBits = 0;
  191. p->bigHash = 0;
  192. }
  193. #define kCrcPoly 0xEDB88320
  194. void MatchFinder_Construct(CMatchFinder *p)
  195. {
  196. unsigned i;
  197. p->buffer = NULL;
  198. p->bufBase = NULL;
  199. p->directInput = 0;
  200. p->stream = NULL;
  201. p->hash = NULL;
  202. p->expectedDataSize = (UInt64)(Int64)-1;
  203. MatchFinder_SetDefaultSettings(p);
  204. for (i = 0; i < 256; i++)
  205. {
  206. UInt32 r = (UInt32)i;
  207. unsigned j;
  208. for (j = 0; j < 8; j++)
  209. r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
  210. p->crc[i] = r;
  211. }
  212. }
  213. #undef kCrcPoly
  214. static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
  215. {
  216. ISzAlloc_Free(alloc, p->hash);
  217. p->hash = NULL;
  218. }
  219. void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
  220. {
  221. MatchFinder_FreeThisClassMemory(p, alloc);
  222. LzInWindow_Free(p, alloc);
  223. }
  224. static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
  225. {
  226. const size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
  227. if (sizeInBytes / sizeof(CLzRef) != num)
  228. return NULL;
  229. return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
  230. }
  231. #if (kBlockSizeReserveMin < kBlockSizeAlign * 2)
  232. #error Stop_Compiling_Bad_Reserve
  233. #endif
  234. static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
  235. {
  236. UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter);
  237. /*
  238. if (historySize > kMaxHistorySize)
  239. return 0;
  240. */
  241. // printf("\nhistorySize == 0x%x\n", historySize);
  242. if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow
  243. return 0;
  244. {
  245. const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign;
  246. const UInt32 rem = kBlockSizeMax - blockSize;
  247. const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2))
  248. + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here
  249. if (blockSize >= kBlockSizeMax
  250. || rem < kBlockSizeReserveMin) // we reject settings that will be slow
  251. return 0;
  252. if (reserve >= rem)
  253. blockSize = kBlockSizeMax;
  254. else
  255. {
  256. blockSize += reserve;
  257. blockSize &= ~(UInt32)(kBlockSizeAlign - 1);
  258. }
  259. }
  260. // printf("\n LzFind_blockSize = %x\n", blockSize);
  261. // printf("\n LzFind_blockSize = %d\n", blockSize >> 20);
  262. return blockSize;
  263. }
  264. // input is historySize
  265. static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs)
  266. {
  267. if (p->numHashBytes == 2)
  268. return (1 << 16) - 1;
  269. if (hs != 0)
  270. hs--;
  271. hs |= (hs >> 1);
  272. hs |= (hs >> 2);
  273. hs |= (hs >> 4);
  274. hs |= (hs >> 8);
  275. // we propagated 16 bits in (hs). Low 16 bits must be set later
  276. if (hs >= (1 << 24))
  277. {
  278. if (p->numHashBytes == 3)
  279. hs = (1 << 24) - 1;
  280. /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
  281. }
  282. // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
  283. hs |= (1 << 16) - 1; /* don't change it! */
  284. // bt5: we adjust the size with recommended minimum size
  285. if (p->numHashBytes >= 5)
  286. hs |= (256 << kLzHash_CrcShift_2) - 1;
  287. return hs;
  288. }
  289. // input is historySize
  290. static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs)
  291. {
  292. if (p->numHashBytes == 2)
  293. return (1 << 16) - 1;
  294. if (hs != 0)
  295. hs--;
  296. hs |= (hs >> 1);
  297. hs |= (hs >> 2);
  298. hs |= (hs >> 4);
  299. hs |= (hs >> 8);
  300. // we propagated 16 bits in (hs). Low 16 bits must be set later
  301. hs >>= 1;
  302. if (hs >= (1 << 24))
  303. {
  304. if (p->numHashBytes == 3)
  305. hs = (1 << 24) - 1;
  306. else
  307. hs >>= 1;
  308. /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
  309. }
  310. // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
  311. hs |= (1 << 16) - 1; /* don't change it! */
  312. // bt5: we adjust the size with recommended minimum size
  313. if (p->numHashBytes >= 5)
  314. hs |= (256 << kLzHash_CrcShift_2) - 1;
  315. return hs;
  316. }
  317. int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
  318. UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
  319. ISzAllocPtr alloc)
  320. {
  321. /* we need one additional byte in (p->keepSizeBefore),
  322. since we use MoveBlock() after (p->pos++) and before dictionary using */
  323. // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug
  324. p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
  325. keepAddBufferAfter += matchMaxLen;
  326. /* we need (p->keepSizeAfter >= p->numHashBytes) */
  327. if (keepAddBufferAfter < p->numHashBytes)
  328. keepAddBufferAfter = p->numHashBytes;
  329. // keepAddBufferAfter -= 2; // for debug
  330. p->keepSizeAfter = keepAddBufferAfter;
  331. if (p->directInput)
  332. p->blockSize = 0;
  333. if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
  334. {
  335. size_t hashSizeSum;
  336. {
  337. UInt32 hs;
  338. UInt32 hsCur;
  339. if (p->numHashOutBits != 0)
  340. {
  341. unsigned numBits = p->numHashOutBits;
  342. const unsigned nbMax =
  343. (p->numHashBytes == 2 ? 16 :
  344. (p->numHashBytes == 3 ? 24 : 32));
  345. if (numBits >= nbMax)
  346. numBits = nbMax;
  347. if (numBits >= 32)
  348. hs = (UInt32)0 - 1;
  349. else
  350. hs = ((UInt32)1 << numBits) - 1;
  351. // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
  352. hs |= (1 << 16) - 1; /* don't change it! */
  353. if (p->numHashBytes >= 5)
  354. hs |= (256 << kLzHash_CrcShift_2) - 1;
  355. {
  356. const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
  357. if (hs >= hs2)
  358. hs = hs2;
  359. }
  360. hsCur = hs;
  361. if (p->expectedDataSize < historySize)
  362. {
  363. const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
  364. if (hsCur >= hs2)
  365. hsCur = hs2;
  366. }
  367. }
  368. else
  369. {
  370. hs = MatchFinder_GetHashMask(p, historySize);
  371. hsCur = hs;
  372. if (p->expectedDataSize < historySize)
  373. {
  374. hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
  375. if (hsCur >= hs) // is it possible?
  376. hsCur = hs;
  377. }
  378. }
  379. p->hashMask = hsCur;
  380. hashSizeSum = hs;
  381. hashSizeSum++;
  382. if (hashSizeSum < hs)
  383. return 0;
  384. {
  385. UInt32 fixedHashSize = 0;
  386. if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size;
  387. if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size;
  388. // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
  389. hashSizeSum += fixedHashSize;
  390. p->fixedHashSize = fixedHashSize;
  391. }
  392. }
  393. p->matchMaxLen = matchMaxLen;
  394. {
  395. size_t newSize;
  396. size_t numSons;
  397. const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
  398. p->historySize = historySize;
  399. p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
  400. numSons = newCyclicBufferSize;
  401. if (p->btMode)
  402. numSons <<= 1;
  403. newSize = hashSizeSum + numSons;
  404. if (numSons < newCyclicBufferSize || newSize < numSons)
  405. return 0;
  406. // aligned size is not required here, but it can be better for some loops
  407. #define NUM_REFS_ALIGN_MASK 0xF
  408. newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
  409. // 22.02: we don't reallocate buffer, if old size is enough
  410. if (p->hash && p->numRefs >= newSize)
  411. return 1;
  412. MatchFinder_FreeThisClassMemory(p, alloc);
  413. p->numRefs = newSize;
  414. p->hash = AllocRefs(newSize, alloc);
  415. if (p->hash)
  416. {
  417. p->son = p->hash + hashSizeSum;
  418. return 1;
  419. }
  420. }
  421. }
  422. MatchFinder_Free(p, alloc);
  423. return 0;
  424. }
  425. static void MatchFinder_SetLimits(CMatchFinder *p)
  426. {
  427. UInt32 k;
  428. UInt32 n = kMaxValForNormalize - p->pos;
  429. if (n == 0)
  430. n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0)
  431. k = p->cyclicBufferSize - p->cyclicBufferPos;
  432. if (k < n)
  433. n = k;
  434. k = GET_AVAIL_BYTES(p);
  435. {
  436. const UInt32 ksa = p->keepSizeAfter;
  437. UInt32 mm = p->matchMaxLen;
  438. if (k > ksa)
  439. k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock
  440. else if (k >= mm)
  441. {
  442. // the limitation for (p->lenLimit) update
  443. k -= mm; // optimization : to reduce the number of checks
  444. k++;
  445. // k = 1; // non-optimized version : for debug
  446. }
  447. else
  448. {
  449. mm = k;
  450. if (k != 0)
  451. k = 1;
  452. }
  453. p->lenLimit = mm;
  454. }
  455. if (k < n)
  456. n = k;
  457. p->posLimit = p->pos + n;
  458. }
  459. void MatchFinder_Init_LowHash(CMatchFinder *p)
  460. {
  461. size_t i;
  462. CLzRef *items = p->hash;
  463. const size_t numItems = p->fixedHashSize;
  464. for (i = 0; i < numItems; i++)
  465. items[i] = kEmptyHashValue;
  466. }
  467. void MatchFinder_Init_HighHash(CMatchFinder *p)
  468. {
  469. size_t i;
  470. CLzRef *items = p->hash + p->fixedHashSize;
  471. const size_t numItems = (size_t)p->hashMask + 1;
  472. for (i = 0; i < numItems; i++)
  473. items[i] = kEmptyHashValue;
  474. }
  475. void MatchFinder_Init_4(CMatchFinder *p)
  476. {
  477. if (!p->directInput)
  478. p->buffer = p->bufBase;
  479. {
  480. /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
  481. the code in CMatchFinderMt expects (pos = 1) */
  482. p->pos =
  483. p->streamPos =
  484. 1; // it's smallest optimal value. do not change it
  485. // 0; // for debug
  486. }
  487. p->result = SZ_OK;
  488. p->streamEndWasReached = 0;
  489. }
  490. // (CYC_TO_POS_OFFSET == 0) is expected by some optimized code
  491. #define CYC_TO_POS_OFFSET 0
  492. // #define CYC_TO_POS_OFFSET 1 // for debug
  493. void MatchFinder_Init(void *_p)
  494. {
  495. CMatchFinder *p = (CMatchFinder *)_p;
  496. MatchFinder_Init_HighHash(p);
  497. MatchFinder_Init_LowHash(p);
  498. MatchFinder_Init_4(p);
  499. // if (readData)
  500. MatchFinder_ReadBlock(p);
  501. /* if we init (cyclicBufferPos = pos), then we can use one variable
  502. instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */
  503. p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos)
  504. // p->cyclicBufferPos = 0; // smallest value
  505. // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses.
  506. MatchFinder_SetLimits(p);
  507. }
  508. #ifdef MY_CPU_X86_OR_AMD64
  509. #if defined(__clang__) && (__clang_major__ >= 4) \
  510. || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900)
  511. // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
  512. #define USE_LZFIND_SATUR_SUB_128
  513. #define USE_LZFIND_SATUR_SUB_256
  514. #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
  515. #define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2")))
  516. #elif defined(_MSC_VER)
  517. #if (_MSC_VER >= 1600)
  518. #define USE_LZFIND_SATUR_SUB_128
  519. #endif
  520. #if (_MSC_VER >= 1900)
  521. #define USE_LZFIND_SATUR_SUB_256
  522. #endif
  523. #endif
  524. #elif defined(MY_CPU_ARM64) \
  525. /* || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) */
  526. #if defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
  527. || defined(__GNUC__) && (__GNUC__ >= 6)
  528. #define USE_LZFIND_SATUR_SUB_128
  529. #ifdef MY_CPU_ARM64
  530. // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
  531. #else
  532. #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=neon")))
  533. #endif
  534. #elif defined(_MSC_VER)
  535. #if (_MSC_VER >= 1910)
  536. #define USE_LZFIND_SATUR_SUB_128
  537. #endif
  538. #endif
  539. #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
  540. #include <arm64_neon.h>
  541. #else
  542. #include <arm_neon.h>
  543. #endif
  544. #endif
  545. #ifdef USE_LZFIND_SATUR_SUB_128
  546. // #define Z7_SHOW_HW_STATUS
  547. #ifdef Z7_SHOW_HW_STATUS
  548. #include <stdio.h>
  549. #define PRF(x) x
  550. PRF(;)
  551. #else
  552. #define PRF(x)
  553. #endif
  554. #ifdef MY_CPU_ARM_OR_ARM64
  555. #ifdef MY_CPU_ARM64
  556. // #define FORCE_LZFIND_SATUR_SUB_128
  557. #endif
  558. typedef uint32x4_t LzFind_v128;
  559. #define SASUB_128_V(v, s) \
  560. vsubq_u32(vmaxq_u32(v, s), s)
  561. #else // MY_CPU_ARM_OR_ARM64
  562. #include <smmintrin.h> // sse4.1
  563. typedef __m128i LzFind_v128;
  564. // SSE 4.1
  565. #define SASUB_128_V(v, s) \
  566. _mm_sub_epi32(_mm_max_epu32(v, s), s)
  567. #endif // MY_CPU_ARM_OR_ARM64
  568. #define SASUB_128(i) \
  569. *( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \
  570. *(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2);
  571. Z7_NO_INLINE
  572. static
  573. #ifdef LZFIND_ATTRIB_SSE41
  574. LZFIND_ATTRIB_SSE41
  575. #endif
  576. void
  577. Z7_FASTCALL
  578. LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
  579. {
  580. const LzFind_v128 sub2 =
  581. #ifdef MY_CPU_ARM_OR_ARM64
  582. vdupq_n_u32(subValue);
  583. #else
  584. _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
  585. #endif
  586. Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
  587. do
  588. {
  589. SASUB_128(0) SASUB_128(1) items += 2 * 4;
  590. SASUB_128(0) SASUB_128(1) items += 2 * 4;
  591. }
  592. while (items != lim);
  593. }
  594. #ifdef USE_LZFIND_SATUR_SUB_256
  595. #include <immintrin.h> // avx
  596. /*
  597. clang :immintrin.h uses
  598. #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
  599. defined(__AVX2__)
  600. #include <avx2intrin.h>
  601. #endif
  602. so we need <avxintrin.h> for clang-cl */
  603. #if defined(__clang__)
  604. #include <avxintrin.h>
  605. #include <avx2intrin.h>
  606. #endif
  607. // AVX2:
  608. #define SASUB_256(i) \
  609. *( __m256i *)( void *)(items + (i) * 8) = \
  610. _mm256_sub_epi32(_mm256_max_epu32( \
  611. *(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2);
  612. Z7_NO_INLINE
  613. static
  614. #ifdef LZFIND_ATTRIB_AVX2
  615. LZFIND_ATTRIB_AVX2
  616. #endif
  617. void
  618. Z7_FASTCALL
  619. LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
  620. {
  621. const __m256i sub2 = _mm256_set_epi32(
  622. (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
  623. (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
  624. Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
  625. do
  626. {
  627. SASUB_256(0) SASUB_256(1) items += 2 * 8;
  628. SASUB_256(0) SASUB_256(1) items += 2 * 8;
  629. }
  630. while (items != lim);
  631. }
  632. #endif // USE_LZFIND_SATUR_SUB_256
  633. #ifndef FORCE_LZFIND_SATUR_SUB_128
  634. typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)(
  635. UInt32 subValue, CLzRef *items, const CLzRef *lim);
  636. static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
  637. #endif // FORCE_LZFIND_SATUR_SUB_128
  638. #endif // USE_LZFIND_SATUR_SUB_128
  639. // kEmptyHashValue must be zero
  640. // #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; }
  641. #define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; }
  642. #ifdef FORCE_LZFIND_SATUR_SUB_128
  643. #define DEFAULT_SaturSub LzFind_SaturSub_128
  644. #else
  645. #define DEFAULT_SaturSub LzFind_SaturSub_32
  646. Z7_NO_INLINE
  647. static
  648. void
  649. Z7_FASTCALL
  650. LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
  651. {
  652. Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
  653. do
  654. {
  655. SASUB_32(0) SASUB_32(1) items += 2;
  656. SASUB_32(0) SASUB_32(1) items += 2;
  657. SASUB_32(0) SASUB_32(1) items += 2;
  658. SASUB_32(0) SASUB_32(1) items += 2;
  659. }
  660. while (items != lim);
  661. }
  662. #endif
  663. Z7_NO_INLINE
  664. void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
  665. {
  666. #define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7)
  667. Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
  668. for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
  669. {
  670. SASUB_32(0)
  671. items++;
  672. }
  673. {
  674. const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1);
  675. CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask);
  676. numItems &= k_Align_Mask;
  677. if (items != lim)
  678. {
  679. #if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128)
  680. if (g_LzFind_SaturSub)
  681. g_LzFind_SaturSub(subValue, items, lim);
  682. else
  683. #endif
  684. DEFAULT_SaturSub(subValue, items, lim);
  685. }
  686. items = lim;
  687. }
  688. Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
  689. for (; numItems != 0; numItems--)
  690. {
  691. SASUB_32(0)
  692. items++;
  693. }
  694. }
  695. // call MatchFinder_CheckLimits() only after (p->pos++) update
  696. Z7_NO_INLINE
  697. static void MatchFinder_CheckLimits(CMatchFinder *p)
  698. {
  699. if (// !p->streamEndWasReached && p->result == SZ_OK &&
  700. p->keepSizeAfter == GET_AVAIL_BYTES(p))
  701. {
  702. // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p))
  703. if (MatchFinder_NeedMove(p))
  704. MatchFinder_MoveBlock(p);
  705. MatchFinder_ReadBlock(p);
  706. }
  707. if (p->pos == kMaxValForNormalize)
  708. if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data.
  709. /*
  710. if we disable normalization for last bytes of data, and
  711. if (data_size == 4 GiB), we don't call wastfull normalization,
  712. but (pos) will be wrapped over Zero (0) in that case.
  713. And we cannot resume later to normal operation
  714. */
  715. {
  716. // MatchFinder_Normalize(p);
  717. /* after normalization we need (p->pos >= p->historySize + 1); */
  718. /* we can reduce subValue to aligned value, if want to keep alignment
  719. of (p->pos) and (p->buffer) for speculated accesses. */
  720. const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
  721. // const UInt32 subValue = (1 << 15); // for debug
  722. // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
  723. MatchFinder_REDUCE_OFFSETS(p, subValue)
  724. MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize);
  725. {
  726. size_t numSonRefs = p->cyclicBufferSize;
  727. if (p->btMode)
  728. numSonRefs <<= 1;
  729. MatchFinder_Normalize3(subValue, p->son, numSonRefs);
  730. }
  731. }
  732. if (p->cyclicBufferPos == p->cyclicBufferSize)
  733. p->cyclicBufferPos = 0;
  734. MatchFinder_SetLimits(p);
  735. }
  736. /*
  737. (lenLimit > maxLen)
  738. */
  739. Z7_FORCE_INLINE
  740. static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
  741. size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
  742. UInt32 *d, unsigned maxLen)
  743. {
  744. /*
  745. son[_cyclicBufferPos] = curMatch;
  746. for (;;)
  747. {
  748. UInt32 delta = pos - curMatch;
  749. if (cutValue-- == 0 || delta >= _cyclicBufferSize)
  750. return d;
  751. {
  752. const Byte *pb = cur - delta;
  753. curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)];
  754. if (pb[maxLen] == cur[maxLen] && *pb == *cur)
  755. {
  756. UInt32 len = 0;
  757. while (++len != lenLimit)
  758. if (pb[len] != cur[len])
  759. break;
  760. if (maxLen < len)
  761. {
  762. maxLen = len;
  763. *d++ = len;
  764. *d++ = delta - 1;
  765. if (len == lenLimit)
  766. return d;
  767. }
  768. }
  769. }
  770. }
  771. */
  772. const Byte *lim = cur + lenLimit;
  773. son[_cyclicBufferPos] = curMatch;
  774. do
  775. {
  776. UInt32 delta;
  777. if (curMatch == 0)
  778. break;
  779. // if (curMatch2 >= curMatch) return NULL;
  780. delta = pos - curMatch;
  781. if (delta >= _cyclicBufferSize)
  782. break;
  783. {
  784. ptrdiff_t diff;
  785. curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)];
  786. diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
  787. if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])
  788. {
  789. const Byte *c = cur;
  790. while (*c == c[diff])
  791. {
  792. if (++c == lim)
  793. {
  794. d[0] = (UInt32)(lim - cur);
  795. d[1] = delta - 1;
  796. return d + 2;
  797. }
  798. }
  799. {
  800. const unsigned len = (unsigned)(c - cur);
  801. if (maxLen < len)
  802. {
  803. maxLen = len;
  804. d[0] = (UInt32)len;
  805. d[1] = delta - 1;
  806. d += 2;
  807. }
  808. }
  809. }
  810. }
  811. }
  812. while (--cutValue);
  813. return d;
  814. }
  815. Z7_FORCE_INLINE
  816. UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
  817. size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
  818. UInt32 *d, UInt32 maxLen)
  819. {
  820. CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
  821. CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
  822. unsigned len0 = 0, len1 = 0;
  823. UInt32 cmCheck;
  824. // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
  825. cmCheck = (UInt32)(pos - _cyclicBufferSize);
  826. if ((UInt32)pos < _cyclicBufferSize)
  827. cmCheck = 0;
  828. if (cmCheck < curMatch)
  829. do
  830. {
  831. const UInt32 delta = pos - curMatch;
  832. {
  833. CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1);
  834. const Byte *pb = cur - delta;
  835. unsigned len = (len0 < len1 ? len0 : len1);
  836. const UInt32 pair0 = pair[0];
  837. if (pb[len] == cur[len])
  838. {
  839. if (++len != lenLimit && pb[len] == cur[len])
  840. while (++len != lenLimit)
  841. if (pb[len] != cur[len])
  842. break;
  843. if (maxLen < len)
  844. {
  845. maxLen = (UInt32)len;
  846. *d++ = (UInt32)len;
  847. *d++ = delta - 1;
  848. if (len == lenLimit)
  849. {
  850. *ptr1 = pair0;
  851. *ptr0 = pair[1];
  852. return d;
  853. }
  854. }
  855. }
  856. if (pb[len] < cur[len])
  857. {
  858. *ptr1 = curMatch;
  859. // const UInt32 curMatch2 = pair[1];
  860. // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
  861. // curMatch = curMatch2;
  862. curMatch = pair[1];
  863. ptr1 = pair + 1;
  864. len1 = len;
  865. }
  866. else
  867. {
  868. *ptr0 = curMatch;
  869. curMatch = pair[0];
  870. ptr0 = pair;
  871. len0 = len;
  872. }
  873. }
  874. }
  875. while(--cutValue && cmCheck < curMatch);
  876. *ptr0 = *ptr1 = kEmptyHashValue;
  877. return d;
  878. }
  879. static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
  880. size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
  881. {
  882. CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
  883. CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
  884. unsigned len0 = 0, len1 = 0;
  885. UInt32 cmCheck;
  886. cmCheck = (UInt32)(pos - _cyclicBufferSize);
  887. if ((UInt32)pos < _cyclicBufferSize)
  888. cmCheck = 0;
  889. if (// curMatch >= pos || // failure
  890. cmCheck < curMatch)
  891. do
  892. {
  893. const UInt32 delta = pos - curMatch;
  894. {
  895. CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1);
  896. const Byte *pb = cur - delta;
  897. unsigned len = (len0 < len1 ? len0 : len1);
  898. if (pb[len] == cur[len])
  899. {
  900. while (++len != lenLimit)
  901. if (pb[len] != cur[len])
  902. break;
  903. {
  904. if (len == lenLimit)
  905. {
  906. *ptr1 = pair[0];
  907. *ptr0 = pair[1];
  908. return;
  909. }
  910. }
  911. }
  912. if (pb[len] < cur[len])
  913. {
  914. *ptr1 = curMatch;
  915. curMatch = pair[1];
  916. ptr1 = pair + 1;
  917. len1 = len;
  918. }
  919. else
  920. {
  921. *ptr0 = curMatch;
  922. curMatch = pair[0];
  923. ptr0 = pair;
  924. len0 = len;
  925. }
  926. }
  927. }
  928. while(--cutValue && cmCheck < curMatch);
  929. *ptr0 = *ptr1 = kEmptyHashValue;
  930. return;
  931. }
  932. #define MOVE_POS \
  933. p->cyclicBufferPos++; \
  934. p->buffer++; \
  935. { const UInt32 pos1 = p->pos + 1; \
  936. p->pos = pos1; \
  937. if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
  938. #define MOVE_POS_RET MOVE_POS return distances;
  939. Z7_NO_INLINE
  940. static void MatchFinder_MovePos(CMatchFinder *p)
  941. {
  942. /* we go here at the end of stream data, when (avail < num_hash_bytes)
  943. We don't update sons[cyclicBufferPos << btMode].
  944. So (sons) record will contain junk. And we cannot resume match searching
  945. to normal operation, even if we will provide more input data in buffer.
  946. p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue
  947. if (p->btMode)
  948. p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
  949. */
  950. MOVE_POS
  951. }
  952. #define GET_MATCHES_HEADER2(minLen, ret_op) \
  953. UInt32 hv; const Byte *cur; UInt32 curMatch; \
  954. UInt32 lenLimit = p->lenLimit; \
  955. if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; } \
  956. cur = p->buffer;
  957. #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
  958. #define SKIP_HEADER(minLen) \
  959. do { GET_MATCHES_HEADER2(minLen, continue)
  960. #define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, \
  961. p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
  962. #define SKIP_FOOTER \
  963. SkipMatchesSpec(MF_PARAMS(p)); \
  964. MOVE_POS \
  965. } while (--num);
  966. #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
  967. distances = func(MF_PARAMS(p), distances, (UInt32)_maxLen_); \
  968. MOVE_POS_RET
  969. #define GET_MATCHES_FOOTER_BT(_maxLen_) \
  970. GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
  971. #define GET_MATCHES_FOOTER_HC(_maxLen_) \
  972. GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec)
  973. #define UPDATE_maxLen { \
  974. const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \
  975. const Byte *c = cur + maxLen; \
  976. const Byte *lim = cur + lenLimit; \
  977. for (; c != lim; c++) if (*(c + diff) != *c) break; \
  978. maxLen = (unsigned)(c - cur); }
  979. static UInt32* Bt2_MatchFinder_GetMatches(void *_p, UInt32 *distances)
  980. {
  981. CMatchFinder *p = (CMatchFinder *)_p;
  982. GET_MATCHES_HEADER(2)
  983. HASH2_CALC
  984. curMatch = p->hash[hv];
  985. p->hash[hv] = p->pos;
  986. GET_MATCHES_FOOTER_BT(1)
  987. }
  988. UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
  989. {
  990. GET_MATCHES_HEADER(3)
  991. HASH_ZIP_CALC
  992. curMatch = p->hash[hv];
  993. p->hash[hv] = p->pos;
  994. GET_MATCHES_FOOTER_BT(2)
  995. }
  996. #define SET_mmm \
  997. mmm = p->cyclicBufferSize; \
  998. if (pos < mmm) \
  999. mmm = pos;
  1000. static UInt32* Bt3_MatchFinder_GetMatches(void *_p, UInt32 *distances)
  1001. {
  1002. CMatchFinder *p = (CMatchFinder *)_p;
  1003. UInt32 mmm;
  1004. UInt32 h2, d2, pos;
  1005. unsigned maxLen;
  1006. UInt32 *hash;
  1007. GET_MATCHES_HEADER(3)
  1008. HASH3_CALC
  1009. hash = p->hash;
  1010. pos = p->pos;
  1011. d2 = pos - hash[h2];
  1012. curMatch = (hash + kFix3HashSize)[hv];
  1013. hash[h2] = pos;
  1014. (hash + kFix3HashSize)[hv] = pos;
  1015. SET_mmm
  1016. maxLen = 2;
  1017. if (d2 < mmm && *(cur - d2) == *cur)
  1018. {
  1019. UPDATE_maxLen
  1020. distances[0] = (UInt32)maxLen;
  1021. distances[1] = d2 - 1;
  1022. distances += 2;
  1023. if (maxLen == lenLimit)
  1024. {
  1025. SkipMatchesSpec(MF_PARAMS(p));
  1026. MOVE_POS_RET
  1027. }
  1028. }
  1029. GET_MATCHES_FOOTER_BT(maxLen)
  1030. }
  1031. static UInt32* Bt4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
  1032. {
  1033. CMatchFinder *p = (CMatchFinder *)_p;
  1034. UInt32 mmm;
  1035. UInt32 h2, h3, d2, d3, pos;
  1036. unsigned maxLen;
  1037. UInt32 *hash;
  1038. GET_MATCHES_HEADER(4)
  1039. HASH4_CALC
  1040. hash = p->hash;
  1041. pos = p->pos;
  1042. d2 = pos - hash [h2];
  1043. d3 = pos - (hash + kFix3HashSize)[h3];
  1044. curMatch = (hash + kFix4HashSize)[hv];
  1045. hash [h2] = pos;
  1046. (hash + kFix3HashSize)[h3] = pos;
  1047. (hash + kFix4HashSize)[hv] = pos;
  1048. SET_mmm
  1049. maxLen = 3;
  1050. for (;;)
  1051. {
  1052. if (d2 < mmm && *(cur - d2) == *cur)
  1053. {
  1054. distances[0] = 2;
  1055. distances[1] = d2 - 1;
  1056. distances += 2;
  1057. if (*(cur - d2 + 2) == cur[2])
  1058. {
  1059. // distances[-2] = 3;
  1060. }
  1061. else if (d3 < mmm && *(cur - d3) == *cur)
  1062. {
  1063. d2 = d3;
  1064. distances[1] = d3 - 1;
  1065. distances += 2;
  1066. }
  1067. else
  1068. break;
  1069. }
  1070. else if (d3 < mmm && *(cur - d3) == *cur)
  1071. {
  1072. d2 = d3;
  1073. distances[1] = d3 - 1;
  1074. distances += 2;
  1075. }
  1076. else
  1077. break;
  1078. UPDATE_maxLen
  1079. distances[-2] = (UInt32)maxLen;
  1080. if (maxLen == lenLimit)
  1081. {
  1082. SkipMatchesSpec(MF_PARAMS(p));
  1083. MOVE_POS_RET
  1084. }
  1085. break;
  1086. }
  1087. GET_MATCHES_FOOTER_BT(maxLen)
  1088. }
  1089. static UInt32* Bt5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
  1090. {
  1091. CMatchFinder *p = (CMatchFinder *)_p;
  1092. UInt32 mmm;
  1093. UInt32 h2, h3, d2, d3, pos;
  1094. unsigned maxLen;
  1095. UInt32 *hash;
  1096. GET_MATCHES_HEADER(5)
  1097. HASH5_CALC
  1098. hash = p->hash;
  1099. pos = p->pos;
  1100. d2 = pos - hash [h2];
  1101. d3 = pos - (hash + kFix3HashSize)[h3];
  1102. // d4 = pos - (hash + kFix4HashSize)[h4];
  1103. curMatch = (hash + kFix5HashSize)[hv];
  1104. hash [h2] = pos;
  1105. (hash + kFix3HashSize)[h3] = pos;
  1106. // (hash + kFix4HashSize)[h4] = pos;
  1107. (hash + kFix5HashSize)[hv] = pos;
  1108. SET_mmm
  1109. maxLen = 4;
  1110. for (;;)
  1111. {
  1112. if (d2 < mmm && *(cur - d2) == *cur)
  1113. {
  1114. distances[0] = 2;
  1115. distances[1] = d2 - 1;
  1116. distances += 2;
  1117. if (*(cur - d2 + 2) == cur[2])
  1118. {
  1119. }
  1120. else if (d3 < mmm && *(cur - d3) == *cur)
  1121. {
  1122. distances[1] = d3 - 1;
  1123. distances += 2;
  1124. d2 = d3;
  1125. }
  1126. else
  1127. break;
  1128. }
  1129. else if (d3 < mmm && *(cur - d3) == *cur)
  1130. {
  1131. distances[1] = d3 - 1;
  1132. distances += 2;
  1133. d2 = d3;
  1134. }
  1135. else
  1136. break;
  1137. distances[-2] = 3;
  1138. if (*(cur - d2 + 3) != cur[3])
  1139. break;
  1140. UPDATE_maxLen
  1141. distances[-2] = (UInt32)maxLen;
  1142. if (maxLen == lenLimit)
  1143. {
  1144. SkipMatchesSpec(MF_PARAMS(p));
  1145. MOVE_POS_RET
  1146. }
  1147. break;
  1148. }
  1149. GET_MATCHES_FOOTER_BT(maxLen)
  1150. }
  1151. static UInt32* Hc4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
  1152. {
  1153. CMatchFinder *p = (CMatchFinder *)_p;
  1154. UInt32 mmm;
  1155. UInt32 h2, h3, d2, d3, pos;
  1156. unsigned maxLen;
  1157. UInt32 *hash;
  1158. GET_MATCHES_HEADER(4)
  1159. HASH4_CALC
  1160. hash = p->hash;
  1161. pos = p->pos;
  1162. d2 = pos - hash [h2];
  1163. d3 = pos - (hash + kFix3HashSize)[h3];
  1164. curMatch = (hash + kFix4HashSize)[hv];
  1165. hash [h2] = pos;
  1166. (hash + kFix3HashSize)[h3] = pos;
  1167. (hash + kFix4HashSize)[hv] = pos;
  1168. SET_mmm
  1169. maxLen = 3;
  1170. for (;;)
  1171. {
  1172. if (d2 < mmm && *(cur - d2) == *cur)
  1173. {
  1174. distances[0] = 2;
  1175. distances[1] = d2 - 1;
  1176. distances += 2;
  1177. if (*(cur - d2 + 2) == cur[2])
  1178. {
  1179. // distances[-2] = 3;
  1180. }
  1181. else if (d3 < mmm && *(cur - d3) == *cur)
  1182. {
  1183. d2 = d3;
  1184. distances[1] = d3 - 1;
  1185. distances += 2;
  1186. }
  1187. else
  1188. break;
  1189. }
  1190. else if (d3 < mmm && *(cur - d3) == *cur)
  1191. {
  1192. d2 = d3;
  1193. distances[1] = d3 - 1;
  1194. distances += 2;
  1195. }
  1196. else
  1197. break;
  1198. UPDATE_maxLen
  1199. distances[-2] = (UInt32)maxLen;
  1200. if (maxLen == lenLimit)
  1201. {
  1202. p->son[p->cyclicBufferPos] = curMatch;
  1203. MOVE_POS_RET
  1204. }
  1205. break;
  1206. }
  1207. GET_MATCHES_FOOTER_HC(maxLen)
  1208. }
  1209. static UInt32 * Hc5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
  1210. {
  1211. CMatchFinder *p = (CMatchFinder *)_p;
  1212. UInt32 mmm;
  1213. UInt32 h2, h3, d2, d3, pos;
  1214. unsigned maxLen;
  1215. UInt32 *hash;
  1216. GET_MATCHES_HEADER(5)
  1217. HASH5_CALC
  1218. hash = p->hash;
  1219. pos = p->pos;
  1220. d2 = pos - hash [h2];
  1221. d3 = pos - (hash + kFix3HashSize)[h3];
  1222. // d4 = pos - (hash + kFix4HashSize)[h4];
  1223. curMatch = (hash + kFix5HashSize)[hv];
  1224. hash [h2] = pos;
  1225. (hash + kFix3HashSize)[h3] = pos;
  1226. // (hash + kFix4HashSize)[h4] = pos;
  1227. (hash + kFix5HashSize)[hv] = pos;
  1228. SET_mmm
  1229. maxLen = 4;
  1230. for (;;)
  1231. {
  1232. if (d2 < mmm && *(cur - d2) == *cur)
  1233. {
  1234. distances[0] = 2;
  1235. distances[1] = d2 - 1;
  1236. distances += 2;
  1237. if (*(cur - d2 + 2) == cur[2])
  1238. {
  1239. }
  1240. else if (d3 < mmm && *(cur - d3) == *cur)
  1241. {
  1242. distances[1] = d3 - 1;
  1243. distances += 2;
  1244. d2 = d3;
  1245. }
  1246. else
  1247. break;
  1248. }
  1249. else if (d3 < mmm && *(cur - d3) == *cur)
  1250. {
  1251. distances[1] = d3 - 1;
  1252. distances += 2;
  1253. d2 = d3;
  1254. }
  1255. else
  1256. break;
  1257. distances[-2] = 3;
  1258. if (*(cur - d2 + 3) != cur[3])
  1259. break;
  1260. UPDATE_maxLen
  1261. distances[-2] = (UInt32)maxLen;
  1262. if (maxLen == lenLimit)
  1263. {
  1264. p->son[p->cyclicBufferPos] = curMatch;
  1265. MOVE_POS_RET
  1266. }
  1267. break;
  1268. }
  1269. GET_MATCHES_FOOTER_HC(maxLen)
  1270. }
  1271. UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
  1272. {
  1273. GET_MATCHES_HEADER(3)
  1274. HASH_ZIP_CALC
  1275. curMatch = p->hash[hv];
  1276. p->hash[hv] = p->pos;
  1277. GET_MATCHES_FOOTER_HC(2)
  1278. }
  1279. static void Bt2_MatchFinder_Skip(void *_p, UInt32 num)
  1280. {
  1281. CMatchFinder *p = (CMatchFinder *)_p;
  1282. SKIP_HEADER(2)
  1283. {
  1284. HASH2_CALC
  1285. curMatch = p->hash[hv];
  1286. p->hash[hv] = p->pos;
  1287. }
  1288. SKIP_FOOTER
  1289. }
  1290. void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
  1291. {
  1292. SKIP_HEADER(3)
  1293. {
  1294. HASH_ZIP_CALC
  1295. curMatch = p->hash[hv];
  1296. p->hash[hv] = p->pos;
  1297. }
  1298. SKIP_FOOTER
  1299. }
  1300. static void Bt3_MatchFinder_Skip(void *_p, UInt32 num)
  1301. {
  1302. CMatchFinder *p = (CMatchFinder *)_p;
  1303. SKIP_HEADER(3)
  1304. {
  1305. UInt32 h2;
  1306. UInt32 *hash;
  1307. HASH3_CALC
  1308. hash = p->hash;
  1309. curMatch = (hash + kFix3HashSize)[hv];
  1310. hash[h2] =
  1311. (hash + kFix3HashSize)[hv] = p->pos;
  1312. }
  1313. SKIP_FOOTER
  1314. }
  1315. static void Bt4_MatchFinder_Skip(void *_p, UInt32 num)
  1316. {
  1317. CMatchFinder *p = (CMatchFinder *)_p;
  1318. SKIP_HEADER(4)
  1319. {
  1320. UInt32 h2, h3;
  1321. UInt32 *hash;
  1322. HASH4_CALC
  1323. hash = p->hash;
  1324. curMatch = (hash + kFix4HashSize)[hv];
  1325. hash [h2] =
  1326. (hash + kFix3HashSize)[h3] =
  1327. (hash + kFix4HashSize)[hv] = p->pos;
  1328. }
  1329. SKIP_FOOTER
  1330. }
  1331. static void Bt5_MatchFinder_Skip(void *_p, UInt32 num)
  1332. {
  1333. CMatchFinder *p = (CMatchFinder *)_p;
  1334. SKIP_HEADER(5)
  1335. {
  1336. UInt32 h2, h3;
  1337. UInt32 *hash;
  1338. HASH5_CALC
  1339. hash = p->hash;
  1340. curMatch = (hash + kFix5HashSize)[hv];
  1341. hash [h2] =
  1342. (hash + kFix3HashSize)[h3] =
  1343. // (hash + kFix4HashSize)[h4] =
  1344. (hash + kFix5HashSize)[hv] = p->pos;
  1345. }
  1346. SKIP_FOOTER
  1347. }
  1348. #define HC_SKIP_HEADER(minLen) \
  1349. do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
  1350. const Byte *cur; \
  1351. UInt32 *hash; \
  1352. UInt32 *son; \
  1353. UInt32 pos = p->pos; \
  1354. UInt32 num2 = num; \
  1355. /* (p->pos == p->posLimit) is not allowed here !!! */ \
  1356. { const UInt32 rem = p->posLimit - pos; if (num2 >= rem) num2 = rem; } \
  1357. num -= num2; \
  1358. { const UInt32 cycPos = p->cyclicBufferPos; \
  1359. son = p->son + cycPos; \
  1360. p->cyclicBufferPos = cycPos + num2; } \
  1361. cur = p->buffer; \
  1362. hash = p->hash; \
  1363. do { \
  1364. UInt32 curMatch; \
  1365. UInt32 hv;
  1366. #define HC_SKIP_FOOTER \
  1367. cur++; pos++; *son++ = curMatch; \
  1368. } while (--num2); \
  1369. p->buffer = cur; \
  1370. p->pos = pos; \
  1371. if (pos == p->posLimit) MatchFinder_CheckLimits(p); \
  1372. }} while(num); \
  1373. static void Hc4_MatchFinder_Skip(void *_p, UInt32 num)
  1374. {
  1375. CMatchFinder *p = (CMatchFinder *)_p;
  1376. HC_SKIP_HEADER(4)
  1377. UInt32 h2, h3;
  1378. HASH4_CALC
  1379. curMatch = (hash + kFix4HashSize)[hv];
  1380. hash [h2] =
  1381. (hash + kFix3HashSize)[h3] =
  1382. (hash + kFix4HashSize)[hv] = pos;
  1383. HC_SKIP_FOOTER
  1384. }
  1385. static void Hc5_MatchFinder_Skip(void *_p, UInt32 num)
  1386. {
  1387. CMatchFinder *p = (CMatchFinder *)_p;
  1388. HC_SKIP_HEADER(5)
  1389. UInt32 h2, h3;
  1390. HASH5_CALC
  1391. curMatch = (hash + kFix5HashSize)[hv];
  1392. hash [h2] =
  1393. (hash + kFix3HashSize)[h3] =
  1394. // (hash + kFix4HashSize)[h4] =
  1395. (hash + kFix5HashSize)[hv] = pos;
  1396. HC_SKIP_FOOTER
  1397. }
  1398. void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
  1399. {
  1400. HC_SKIP_HEADER(3)
  1401. HASH_ZIP_CALC
  1402. curMatch = hash[hv];
  1403. hash[hv] = pos;
  1404. HC_SKIP_FOOTER
  1405. }
  1406. void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
  1407. {
  1408. vTable->Init = MatchFinder_Init;
  1409. vTable->GetNumAvailableBytes = MatchFinder_GetNumAvailableBytes;
  1410. vTable->GetPointerToCurrentPos = MatchFinder_GetPointerToCurrentPos;
  1411. if (!p->btMode)
  1412. {
  1413. if (p->numHashBytes <= 4)
  1414. {
  1415. vTable->GetMatches = Hc4_MatchFinder_GetMatches;
  1416. vTable->Skip = Hc4_MatchFinder_Skip;
  1417. }
  1418. else
  1419. {
  1420. vTable->GetMatches = Hc5_MatchFinder_GetMatches;
  1421. vTable->Skip = Hc5_MatchFinder_Skip;
  1422. }
  1423. }
  1424. else if (p->numHashBytes == 2)
  1425. {
  1426. vTable->GetMatches = Bt2_MatchFinder_GetMatches;
  1427. vTable->Skip = Bt2_MatchFinder_Skip;
  1428. }
  1429. else if (p->numHashBytes == 3)
  1430. {
  1431. vTable->GetMatches = Bt3_MatchFinder_GetMatches;
  1432. vTable->Skip = Bt3_MatchFinder_Skip;
  1433. }
  1434. else if (p->numHashBytes == 4)
  1435. {
  1436. vTable->GetMatches = Bt4_MatchFinder_GetMatches;
  1437. vTable->Skip = Bt4_MatchFinder_Skip;
  1438. }
  1439. else
  1440. {
  1441. vTable->GetMatches = Bt5_MatchFinder_GetMatches;
  1442. vTable->Skip = Bt5_MatchFinder_Skip;
  1443. }
  1444. }
  1445. void LzFindPrepare(void)
  1446. {
  1447. #ifndef FORCE_LZFIND_SATUR_SUB_128
  1448. #ifdef USE_LZFIND_SATUR_SUB_128
  1449. LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
  1450. #ifdef MY_CPU_ARM_OR_ARM64
  1451. {
  1452. if (CPU_IsSupported_NEON())
  1453. {
  1454. // #pragma message ("=== LzFind NEON")
  1455. PRF(printf("\n=== LzFind NEON\n"));
  1456. f = LzFind_SaturSub_128;
  1457. }
  1458. // f = 0; // for debug
  1459. }
  1460. #else // MY_CPU_ARM_OR_ARM64
  1461. if (CPU_IsSupported_SSE41())
  1462. {
  1463. // #pragma message ("=== LzFind SSE41")
  1464. PRF(printf("\n=== LzFind SSE41\n"));
  1465. f = LzFind_SaturSub_128;
  1466. #ifdef USE_LZFIND_SATUR_SUB_256
  1467. if (CPU_IsSupported_AVX2())
  1468. {
  1469. // #pragma message ("=== LzFind AVX2")
  1470. PRF(printf("\n=== LzFind AVX2\n"));
  1471. f = LzFind_SaturSub_256;
  1472. }
  1473. #endif
  1474. }
  1475. #endif // MY_CPU_ARM_OR_ARM64
  1476. g_LzFind_SaturSub = f;
  1477. #endif // USE_LZFIND_SATUR_SUB_128
  1478. #endif // FORCE_LZFIND_SATUR_SUB_128
  1479. }
  1480. #undef MOVE_POS
  1481. #undef MOVE_POS_RET
  1482. #undef PRF