LzFindOpt.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578
  1. /* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
  2. 2023-04-02 : Igor Pavlov : Public domain */
  3. #include "Precomp.h"
  4. #include "CpuArch.h"
  5. #include "LzFind.h"
  6. // #include "LzFindMt.h"
  7. // #define LOG_ITERS
  8. // #define LOG_THREAD
  9. #ifdef LOG_THREAD
  10. #include <stdio.h>
  11. #define PRF(x) x
  12. #else
  13. // #define PRF(x)
  14. #endif
  15. #ifdef LOG_ITERS
  16. #include <stdio.h>
  17. UInt64 g_NumIters_Tree;
  18. UInt64 g_NumIters_Loop;
  19. UInt64 g_NumIters_Bytes;
  20. #define LOG_ITER(x) x
  21. #else
  22. #define LOG_ITER(x)
  23. #endif
  24. // ---------- BT THREAD ----------
  25. #define USE_SON_PREFETCH
  26. #define USE_LONG_MATCH_OPT
  27. #define kEmptyHashValue 0
  28. // #define CYC_TO_POS_OFFSET 0
  29. // #define CYC_TO_POS_OFFSET 1 // for debug
  30. /*
  31. Z7_NO_INLINE
  32. UInt32 * Z7_FASTCALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
  33. UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
  34. {
  35. do
  36. {
  37. UInt32 delta;
  38. if (hash == size)
  39. break;
  40. delta = *hash++;
  41. if (delta == 0 || delta > (UInt32)pos)
  42. return NULL;
  43. lenLimit++;
  44. if (delta == (UInt32)pos)
  45. {
  46. CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
  47. *d++ = 0;
  48. ptr1[0] = kEmptyHashValue;
  49. ptr1[1] = kEmptyHashValue;
  50. }
  51. else
  52. {
  53. UInt32 *_distances = ++d;
  54. CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
  55. CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
  56. const Byte *len0 = cur, *len1 = cur;
  57. UInt32 cutValue = _cutValue;
  58. const Byte *maxLen = cur + _maxLen;
  59. for (LOG_ITER(g_NumIters_Tree++);;)
  60. {
  61. LOG_ITER(g_NumIters_Loop++);
  62. {
  63. const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
  64. CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
  65. const Byte *len = (len0 < len1 ? len0 : len1);
  66. #ifdef USE_SON_PREFETCH
  67. const UInt32 pair0 = *pair;
  68. #endif
  69. if (len[diff] == len[0])
  70. {
  71. if (++len != lenLimit && len[diff] == len[0])
  72. while (++len != lenLimit)
  73. {
  74. LOG_ITER(g_NumIters_Bytes++);
  75. if (len[diff] != len[0])
  76. break;
  77. }
  78. if (maxLen < len)
  79. {
  80. maxLen = len;
  81. *d++ = (UInt32)(len - cur);
  82. *d++ = delta - 1;
  83. if (len == lenLimit)
  84. {
  85. const UInt32 pair1 = pair[1];
  86. *ptr1 =
  87. #ifdef USE_SON_PREFETCH
  88. pair0;
  89. #else
  90. pair[0];
  91. #endif
  92. *ptr0 = pair1;
  93. _distances[-1] = (UInt32)(d - _distances);
  94. #ifdef USE_LONG_MATCH_OPT
  95. if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
  96. break;
  97. {
  98. for (;;)
  99. {
  100. hash++;
  101. pos++;
  102. cur++;
  103. lenLimit++;
  104. {
  105. CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
  106. #if 0
  107. *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
  108. #else
  109. const UInt32 p0 = ptr[0 + (diff * 2)];
  110. const UInt32 p1 = ptr[1 + (diff * 2)];
  111. ptr[0] = p0;
  112. ptr[1] = p1;
  113. // ptr[0] = ptr[0 + (diff * 2)];
  114. // ptr[1] = ptr[1 + (diff * 2)];
  115. #endif
  116. }
  117. // PrintSon(son + 2, pos - 1);
  118. // printf("\npos = %x delta = %x\n", pos, delta);
  119. len++;
  120. *d++ = 2;
  121. *d++ = (UInt32)(len - cur);
  122. *d++ = delta - 1;
  123. if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
  124. break;
  125. }
  126. }
  127. #endif
  128. break;
  129. }
  130. }
  131. }
  132. {
  133. const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
  134. if (len[diff] < len[0])
  135. {
  136. delta = pair[1];
  137. if (delta >= curMatch)
  138. return NULL;
  139. *ptr1 = curMatch;
  140. ptr1 = pair + 1;
  141. len1 = len;
  142. }
  143. else
  144. {
  145. delta = *pair;
  146. if (delta >= curMatch)
  147. return NULL;
  148. *ptr0 = curMatch;
  149. ptr0 = pair;
  150. len0 = len;
  151. }
  152. delta = (UInt32)pos - delta;
  153. if (--cutValue == 0 || delta >= pos)
  154. {
  155. *ptr0 = *ptr1 = kEmptyHashValue;
  156. _distances[-1] = (UInt32)(d - _distances);
  157. break;
  158. }
  159. }
  160. }
  161. } // for (tree iterations)
  162. }
  163. pos++;
  164. cur++;
  165. }
  166. while (d < limit);
  167. *posRes = (UInt32)pos;
  168. return d;
  169. }
  170. */
  171. /* define cbs if you use 2 functions.
  172. GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
  173. GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
  174. do not define cbs if you use 1 function:
  175. GetMatchesSpecN_2()
  176. */
  177. // #define cbs _cyclicBufferSize
  178. /*
  179. we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
  180. to eliminate "movsx" BUG in old MSVC x64 compiler.
  181. */
  182. UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
  183. UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
  184. size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
  185. UInt32 *posRes);
  186. Z7_NO_INLINE
  187. UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
  188. UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
  189. size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
  190. UInt32 *posRes)
  191. {
  192. do // while (hash != size)
  193. {
  194. UInt32 delta;
  195. #ifndef cbs
  196. UInt32 cbs;
  197. #endif
  198. if (hash == size)
  199. break;
  200. delta = *hash++;
  201. if (delta == 0)
  202. return NULL;
  203. lenLimit++;
  204. #ifndef cbs
  205. cbs = _cyclicBufferSize;
  206. if ((UInt32)pos < cbs)
  207. {
  208. if (delta > (UInt32)pos)
  209. return NULL;
  210. cbs = (UInt32)pos;
  211. }
  212. #endif
  213. if (delta >= cbs)
  214. {
  215. CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
  216. *d++ = 0;
  217. ptr1[0] = kEmptyHashValue;
  218. ptr1[1] = kEmptyHashValue;
  219. }
  220. else
  221. {
  222. UInt32 *_distances = ++d;
  223. CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
  224. CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
  225. UInt32 cutValue = _cutValue;
  226. const Byte *len0 = cur, *len1 = cur;
  227. const Byte *maxLen = cur + _maxLen;
  228. // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
  229. for (LOG_ITER(g_NumIters_Tree++);;)
  230. {
  231. LOG_ITER(g_NumIters_Loop++);
  232. {
  233. // SPEC code
  234. CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
  235. + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
  236. ) << 1);
  237. const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
  238. const Byte *len = (len0 < len1 ? len0 : len1);
  239. #ifdef USE_SON_PREFETCH
  240. const UInt32 pair0 = *pair;
  241. #endif
  242. if (len[diff] == len[0])
  243. {
  244. if (++len != lenLimit && len[diff] == len[0])
  245. while (++len != lenLimit)
  246. {
  247. LOG_ITER(g_NumIters_Bytes++);
  248. if (len[diff] != len[0])
  249. break;
  250. }
  251. if (maxLen < len)
  252. {
  253. maxLen = len;
  254. *d++ = (UInt32)(len - cur);
  255. *d++ = delta - 1;
  256. if (len == lenLimit)
  257. {
  258. const UInt32 pair1 = pair[1];
  259. *ptr1 =
  260. #ifdef USE_SON_PREFETCH
  261. pair0;
  262. #else
  263. pair[0];
  264. #endif
  265. *ptr0 = pair1;
  266. _distances[-1] = (UInt32)(d - _distances);
  267. #ifdef USE_LONG_MATCH_OPT
  268. if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
  269. break;
  270. {
  271. for (;;)
  272. {
  273. *d++ = 2;
  274. *d++ = (UInt32)(lenLimit - cur);
  275. *d++ = delta - 1;
  276. cur++;
  277. lenLimit++;
  278. // SPEC
  279. _cyclicBufferPos++;
  280. {
  281. // SPEC code
  282. CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
  283. const CLzRef *src = dest + ((diff
  284. + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
  285. // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
  286. #if 0
  287. *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
  288. #else
  289. const UInt32 p0 = src[0];
  290. const UInt32 p1 = src[1];
  291. dest[0] = p0;
  292. dest[1] = p1;
  293. #endif
  294. }
  295. pos++;
  296. hash++;
  297. if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
  298. break;
  299. } // for() end for long matches
  300. }
  301. #endif
  302. break; // break from TREE iterations
  303. }
  304. }
  305. }
  306. {
  307. const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
  308. if (len[diff] < len[0])
  309. {
  310. delta = pair[1];
  311. *ptr1 = curMatch;
  312. ptr1 = pair + 1;
  313. len1 = len;
  314. if (delta >= curMatch)
  315. return NULL;
  316. }
  317. else
  318. {
  319. delta = *pair;
  320. *ptr0 = curMatch;
  321. ptr0 = pair;
  322. len0 = len;
  323. if (delta >= curMatch)
  324. return NULL;
  325. }
  326. delta = (UInt32)pos - delta;
  327. if (--cutValue == 0 || delta >= cbs)
  328. {
  329. *ptr0 = *ptr1 = kEmptyHashValue;
  330. _distances[-1] = (UInt32)(d - _distances);
  331. break;
  332. }
  333. }
  334. }
  335. } // for (tree iterations)
  336. }
  337. pos++;
  338. _cyclicBufferPos++;
  339. cur++;
  340. }
  341. while (d < limit);
  342. *posRes = (UInt32)pos;
  343. return d;
  344. }
  345. /*
  346. typedef UInt32 uint32plus; // size_t
  347. UInt32 * Z7_FASTCALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
  348. UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
  349. size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
  350. UInt32 *posRes)
  351. {
  352. do // while (hash != size)
  353. {
  354. UInt32 delta;
  355. #ifndef cbs
  356. UInt32 cbs;
  357. #endif
  358. if (hash == size)
  359. break;
  360. delta = *hash++;
  361. if (delta == 0)
  362. return NULL;
  363. #ifndef cbs
  364. cbs = _cyclicBufferSize;
  365. if ((UInt32)pos < cbs)
  366. {
  367. if (delta > (UInt32)pos)
  368. return NULL;
  369. cbs = (UInt32)pos;
  370. }
  371. #endif
  372. if (delta >= cbs)
  373. {
  374. CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
  375. *d++ = 0;
  376. ptr1[0] = kEmptyHashValue;
  377. ptr1[1] = kEmptyHashValue;
  378. }
  379. else
  380. {
  381. CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
  382. CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
  383. UInt32 *_distances = ++d;
  384. uint32plus len0 = 0, len1 = 0;
  385. UInt32 cutValue = _cutValue;
  386. uint32plus maxLen = _maxLen;
  387. // lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
  388. for (LOG_ITER(g_NumIters_Tree++);;)
  389. {
  390. LOG_ITER(g_NumIters_Loop++);
  391. {
  392. // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
  393. CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
  394. + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
  395. ) << 1);
  396. const Byte *pb = cur - delta;
  397. uint32plus len = (len0 < len1 ? len0 : len1);
  398. #ifdef USE_SON_PREFETCH
  399. const UInt32 pair0 = *pair;
  400. #endif
  401. if (pb[len] == cur[len])
  402. {
  403. if (++len != lenLimit && pb[len] == cur[len])
  404. while (++len != lenLimit)
  405. if (pb[len] != cur[len])
  406. break;
  407. if (maxLen < len)
  408. {
  409. maxLen = len;
  410. *d++ = (UInt32)len;
  411. *d++ = delta - 1;
  412. if (len == lenLimit)
  413. {
  414. {
  415. const UInt32 pair1 = pair[1];
  416. *ptr0 = pair1;
  417. *ptr1 =
  418. #ifdef USE_SON_PREFETCH
  419. pair0;
  420. #else
  421. pair[0];
  422. #endif
  423. }
  424. _distances[-1] = (UInt32)(d - _distances);
  425. #ifdef USE_LONG_MATCH_OPT
  426. if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
  427. break;
  428. {
  429. const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
  430. for (;;)
  431. {
  432. *d++ = 2;
  433. *d++ = (UInt32)lenLimit;
  434. *d++ = delta - 1;
  435. _cyclicBufferPos++;
  436. {
  437. CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
  438. const CLzRef *src = dest + ((diff +
  439. (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
  440. #if 0
  441. *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
  442. #else
  443. const UInt32 p0 = src[0];
  444. const UInt32 p1 = src[1];
  445. dest[0] = p0;
  446. dest[1] = p1;
  447. #endif
  448. }
  449. hash++;
  450. pos++;
  451. cur++;
  452. pb++;
  453. if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
  454. break;
  455. }
  456. }
  457. #endif
  458. break;
  459. }
  460. }
  461. }
  462. {
  463. const UInt32 curMatch = (UInt32)pos - delta;
  464. if (pb[len] < cur[len])
  465. {
  466. delta = pair[1];
  467. *ptr1 = curMatch;
  468. ptr1 = pair + 1;
  469. len1 = len;
  470. }
  471. else
  472. {
  473. delta = *pair;
  474. *ptr0 = curMatch;
  475. ptr0 = pair;
  476. len0 = len;
  477. }
  478. {
  479. if (delta >= curMatch)
  480. return NULL;
  481. delta = (UInt32)pos - delta;
  482. if (delta >= cbs
  483. // delta >= _cyclicBufferSize || delta >= pos
  484. || --cutValue == 0)
  485. {
  486. *ptr0 = *ptr1 = kEmptyHashValue;
  487. _distances[-1] = (UInt32)(d - _distances);
  488. break;
  489. }
  490. }
  491. }
  492. }
  493. } // for (tree iterations)
  494. }
  495. pos++;
  496. _cyclicBufferPos++;
  497. cur++;
  498. }
  499. while (d < limit);
  500. *posRes = (UInt32)pos;
  501. return d;
  502. }
  503. */