dcxxhash.pas 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204
  1. {
  2. * xxHash - Extremely Fast Hash algorithm
  3. * Copyright (C) 2012-2023 Yann Collet
  4. *
  5. * The Pascal translation by Alexander Koblov, 2024
  6. *
  7. * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions are
  11. * met:
  12. *
  13. * * Redistributions of source code must retain the above copyright
  14. * notice, this list of conditions and the following disclaimer.
  15. * * Redistributions in binary form must reproduce the above
  16. * copyright notice, this list of conditions and the following disclaimer
  17. * in the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. * You can contact the author at:
  33. * - xxHash homepage: https://www.xxhash.com
  34. * - xxHash source repository: https://github.com/Cyan4973/xxHash
  35. }
  36. unit DCxxhash;
  37. {$mode objfpc}{$H+}
  38. {$inline on}{$Q-}
  39. {$macro on}{$R-}
  40. interface
  41. uses
  42. SysUtils;
  43. const
  44. XXH3_SECRET_DEFAULT_SIZE = 192;
  45. XXH3_INTERNALBUFFER_SIZE = 256;
  46. type
  47. XXH64_hash_t = UInt64;
  48. XXH32_hash_t = UInt32;
  49. XXH128_hash_t = record
  50. low64: XXH64_hash_t;
  51. high64: XXH64_hash_t;
  52. end;
  53. {$CODEALIGN RECORDMIN=64}
  54. PXXH3_state_t = ^XXH3_state_t;
  55. XXH3_state_t = record
  56. acc: array[0..7] of XXH64_hash_t;
  57. customSecret: array[0..Pred(XXH3_SECRET_DEFAULT_SIZE)] of Byte;
  58. buffer: array[0..Pred(XXH3_INTERNALBUFFER_SIZE)] of Byte;
  59. bufferedSize: XXH32_hash_t;
  60. useSeed: XXH32_hash_t;
  61. nbStripesSoFar: UIntPtr;
  62. totalLen: XXH64_hash_t;
  63. nbStripesPerBlock: UIntPtr;
  64. secretLimit: UIntPtr;
  65. seed: XXH64_hash_t;
  66. reserved64: XXH64_hash_t;
  67. extSecret: PByte;
  68. end;
  69. function XXH3_createState: PXXH3_state_t;
  70. procedure XXH3_freeState(statePtr: PXXH3_state_t);
  71. procedure XXH3_128bits_reset(statePtr: PXXH3_state_t);
  72. procedure XXH3_128bits_update(state: PXXH3_state_t; const input: PByte; len: UIntPtr);
  73. function XXH3_128bits_digest (const state: PXXH3_state_t): XXH128_hash_t;
  74. implementation
  75. {$IF DEFINED(CPUX86_64)}
  76. uses
  77. CPU;
  78. {$ENDIF}
  79. {$CODEALIGN CONSTMIN=64}
  80. const
  81. XXH_PRIME32_1 = $9E3779B1;
  82. XXH_PRIME32_2 = $85EBCA77;
  83. XXH_PRIME32_3 = $C2B2AE3D;
  84. XXH_PRIME64_1 = UInt64($9E3779B185EBCA87);
  85. XXH_PRIME64_2 = UInt64($C2B2AE3D27D4EB4F);
  86. XXH_PRIME64_3 = UInt64($165667B19E3779F9);
  87. XXH_PRIME64_4 = UInt64($85EBCA77C2B2AE63);
  88. XXH_PRIME64_5 = UInt64($27D4EB2F165667C5);
  89. XXH3_MIDSIZE_MAX = 240;
  90. XXH_SECRET_LASTACC_START = 7;
  91. XXH_SECRET_MERGEACCS_START = 11;
  92. XXH3_MIDSIZE_STARTOFFSET = 3;
  93. XXH3_MIDSIZE_LASTOFFSET = 17;
  94. XXH_SECRET_CONSUME_RATE = 8;
  95. XXH_STRIPE_LEN = 64;
  96. XXH_ACC_SIZE = 64;
  97. XXH3_SECRET_SIZE_MIN = 136;
  98. XXH_SECRET_DEFAULT_SIZE = 192;
  99. PRIME_MX1 = UInt64($165667919E3779F9);
  100. PRIME_MX2 = UInt64($9FB21C651E98DF25);
  101. XXH_ACC_ALIGN = 64; //* for compatibility with avx512 */
  102. XXH3_INTERNALBUFFER_STRIPES = (XXH3_INTERNALBUFFER_SIZE div XXH_STRIPE_LEN);
  103. //*! Pseudorandom secret taken directly from FARSH. */
  104. const XXH3_kSecret: array[0..Pred(XXH_SECRET_DEFAULT_SIZE)] of Byte = (
  105. $b8, $fe, $6c, $39, $23, $a4, $4b, $be, $7c, $01, $81, $2c, $f7, $21, $ad, $1c,
  106. $de, $d4, $6d, $e9, $83, $90, $97, $db, $72, $40, $a4, $a4, $b7, $b3, $67, $1f,
  107. $cb, $79, $e6, $4e, $cc, $c0, $e5, $78, $82, $5a, $d0, $7d, $cc, $ff, $72, $21,
  108. $b8, $08, $46, $74, $f7, $43, $24, $8e, $e0, $35, $90, $e6, $81, $3a, $26, $4c,
  109. $3c, $28, $52, $bb, $91, $c3, $00, $cb, $88, $d0, $65, $8b, $1b, $53, $2e, $a3,
  110. $71, $64, $48, $97, $a2, $0d, $f9, $4e, $38, $19, $ef, $46, $a9, $de, $ac, $d8,
  111. $a8, $fa, $76, $3f, $e3, $9c, $34, $3f, $f9, $dc, $bb, $c7, $c7, $0b, $4f, $1d,
  112. $8a, $51, $e0, $4b, $cd, $b4, $59, $31, $c8, $9f, $7e, $c9, $d9, $78, $73, $64,
  113. $ea, $c5, $ac, $83, $34, $d3, $eb, $c3, $c5, $81, $a0, $ff, $fa, $13, $63, $eb,
  114. $17, $0d, $dd, $51, $b7, $f0, $da, $49, $d3, $16, $55, $26, $29, $d4, $68, $9e,
  115. $2b, $16, $be, $58, $7d, $47, $a1, $fc, $8f, $f8, $b8, $d1, $7a, $d0, $31, $ce,
  116. $45, $cb, $3a, $8f, $95, $16, $04, $28, $af, $d7, $fb, $ca, $bb, $4b, $40, $7e
  117. );
  118. type
  119. TXXH3_scrambleAcc_f = procedure(acc: PByte; const secret: PByte);
  120. TXXH3_accumulate_512_f = procedure(acc: PByte; const input: PByte; const secret: PByte);
  121. TXXH3_accumulate_f = procedure(acc: PByte; const input: PByte; const secret: PByte; nbStripes: UIntPtr);
  122. var
  123. XXH3_accumulate: TXXH3_accumulate_f;
  124. XXH3_scrambleAcc: TXXH3_scrambleAcc_f;
  125. XXH3_accumulate_512: TXXH3_accumulate_512_f;
  126. function XXH_readLE32(const ptr: Pointer): UInt32; inline;
  127. begin
  128. Result:= PUInt32(ptr)^;
  129. end;
  130. function XXH_readLE64(const ptr: Pointer): UInt64; inline;
  131. begin
  132. Result:= PUInt64(ptr)^;
  133. end;
  134. function XXH_mult32to64(x, y: UInt64): UInt64; inline;
  135. begin
  136. Result:= (x and $FFFFFFFF) * (y and $FFFFFFFF);
  137. end;
  138. function XXH_xorshift64(v64: UInt64; shift: Integer): UInt64; inline;
  139. begin
  140. // XXH_ASSERT(0 <= shift && shift < 64);
  141. Result:= v64 xor (v64 shr shift);
  142. end;
  143. function XXH64_avalanche(hash: UInt64): UInt64;
  144. begin
  145. hash := hash xor hash shr 33;
  146. hash *= XXH_PRIME64_2;
  147. hash := hash xor hash shr 29;
  148. hash *= XXH_PRIME64_3;
  149. hash := hash xor hash shr 32;
  150. Result := hash;
  151. end;
  152. function XXH_alignedMalloc(s: UIntPtr; align: UIntPtr): Pointer;
  153. var
  154. offset: UIntPtr;
  155. base, ptr: PByte;
  156. begin
  157. Assert((align <= 128) and (align >= 8)); //* range check */
  158. Assert((align and (align-1)) = 0); //* power of 2 */
  159. Assert((s <> 0) and (s < (s + align))); //* empty/overflow */
  160. //* Overallocate to make room for manual realignment and an offset byte */
  161. base := GetMem(s + align);
  162. if (base <> nil) then
  163. begin
  164. {*
  165. * Get the offset needed to align this pointer.
  166. *
  167. * Even if the returned pointer is aligned, there will always be
  168. * at least one byte to store the offset to the original pointer.
  169. *}
  170. offset := align - (UIntPtr(base) and (align - 1)); //* base % align */
  171. //* Add the offset for the now-aligned pointer */
  172. ptr := base + offset;
  173. Assert(UIntPtr(ptr) mod align = 0);
  174. //* Store the offset immediately before the returned pointer. */
  175. ptr[-1] := Byte(offset);
  176. Exit(ptr);
  177. end;
  178. Result:= nil;
  179. end;
  180. procedure XXH_alignedFree(p: Pointer);
  181. var
  182. offset: Byte;
  183. base, ptr: PByte;
  184. begin
  185. if (p <> nil) then
  186. begin
  187. ptr:= PByte(p);
  188. //* Get the offset byte we added in XXH_malloc. */
  189. offset:= ptr[-1];
  190. //* Free the original malloc'd pointer */
  191. base:= ptr - offset;
  192. FreeMem(base);
  193. end;
  194. end;
  195. function XXH3_createState: PXXH3_state_t;
  196. begin
  197. Result:= XXH_alignedMalloc(SizeOf(XXH3_state_t), XXH_ACC_ALIGN);
  198. if (Result = nil) then Exit(nil);
  199. Result^.seed:= 0;
  200. Result^.extSecret:= nil;
  201. end;
  202. procedure XXH3_freeState(statePtr: PXXH3_state_t);
  203. begin
  204. XXH_alignedFree(statePtr);
  205. end;
  206. procedure XXH3_reset_internal(statePtr: PXXH3_state_t; seed: XXH64_hash_t;
  207. const secret: PByte; secretSize: UIntPtr);
  208. var
  209. initStart: PByte;
  210. initLength: UIntPtr;
  211. begin
  212. Assert(statePtr <> nil);
  213. initStart:= @statePtr^.bufferedSize;
  214. initLength:= @statePtr^.nbStripesPerBlock - initStart;
  215. //* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
  216. FillChar(initStart^, initLength, 0);
  217. statePtr^.acc[0]:= XXH_PRIME32_3;
  218. statePtr^.acc[1]:= XXH_PRIME64_1;
  219. statePtr^.acc[2]:= XXH_PRIME64_2;
  220. statePtr^.acc[3]:= XXH_PRIME64_3;
  221. statePtr^.acc[4]:= XXH_PRIME64_4;
  222. statePtr^.acc[5]:= XXH_PRIME32_2;
  223. statePtr^.acc[6]:= XXH_PRIME64_5;
  224. statePtr^.acc[7]:= XXH_PRIME32_1;
  225. statePtr^.seed:= seed;
  226. statePtr^.useSeed:= XXH32_hash_t(seed <> 0);
  227. statePtr^.extSecret:= secret;
  228. Assert(secretSize >= XXH3_SECRET_SIZE_MIN);
  229. statePtr^.secretLimit:= secretSize - XXH_STRIPE_LEN;
  230. statePtr^.nbStripesPerBlock:= statePtr^.secretLimit div XXH_SECRET_CONSUME_RATE;
  231. end;
  232. procedure XXH3_64bits_reset(statePtr: PXXH3_state_t);
  233. begin
  234. XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
  235. end;
  236. procedure XXH3_128bits_reset(statePtr: PXXH3_state_t);
  237. begin
  238. XXH3_64bits_reset(statePtr);
  239. end;
  240. {$IF DEFINED(CPUX86_64)}
  241. const
  242. SSE_PRIME32_1: array[0..3] of UInt32 = (XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1);
  243. procedure XXH3_accumulate_512_sse2(acc: PByte; const input: PByte; const secret: PByte); assembler; nostackframe;
  244. // UNIX RDI, RSI, RDX
  245. // WIN64: RCX, RDX, R8
  246. asm
  247. {$IF DEFINED(UNIX)}
  248. movq %rdx, %r8
  249. movq %rdi, %rcx
  250. movq %rsi, %rdx
  251. {$ENDIF}
  252. movdqu (%rdx), %xmm3
  253. movdqu (%r8), %xmm0
  254. movdqu (%rdx), %xmm4
  255. movdqu 16(%rdx), %xmm5
  256. pxor %xmm3, %xmm0
  257. movdqu 16(%rdx), %xmm2
  258. movdqu 32(%rdx), %xmm3
  259. pshufd $49, %xmm0, %xmm1
  260. pmuludq %xmm1, %xmm0
  261. pshufd $78, %xmm4, %xmm1
  262. movdqu 32(%rdx), %xmm4
  263. paddq %xmm1, %xmm0
  264. paddq (%rcx), %xmm0
  265. movups %xmm0, (%rcx)
  266. movdqu 16(%r8), %xmm0
  267. pxor %xmm5, %xmm0
  268. pshufd $49, %xmm0, %xmm1
  269. pmuludq %xmm1, %xmm0
  270. pshufd $78, %xmm2, %xmm1
  271. paddq %xmm1, %xmm0
  272. paddq 16(%rcx), %xmm0
  273. movups %xmm0, 16(%rcx)
  274. movdqu 32(%r8), %xmm0
  275. pxor %xmm3, %xmm0
  276. pshufd $49, %xmm0, %xmm1
  277. pmuludq %xmm1, %xmm0
  278. pshufd $78, %xmm4, %xmm1
  279. paddq %xmm1, %xmm0
  280. paddq 32(%rcx), %xmm0
  281. movdqu 48(%rdx), %xmm1
  282. movups %xmm0, 32(%rcx)
  283. movdqu 48(%r8), %xmm0
  284. pxor %xmm1, %xmm0
  285. pshufd $78, %xmm1, %xmm1
  286. pshufd $49, %xmm0, %xmm2
  287. pmuludq %xmm2, %xmm0
  288. paddq %xmm1, %xmm0
  289. paddq 48(%rcx), %xmm0
  290. movups %xmm0, 48(%rcx)
  291. end;
  292. procedure XXH3_accumulate_sse2(acc: PByte; const input: PByte; const secret: PByte; nbStripes: UIntPtr); assembler; nostackframe;
  293. // UNIX RDI, RSI, RDX, RCX
  294. // WIN64: RCX, RDX, R8, R9
  295. asm
  296. {$IF DEFINED(UNIX)}
  297. movq %rdx, %r8
  298. movq %rcx, %r9
  299. movq %rdi, %rcx
  300. movq %rsi, %rdx
  301. {$ENDIF}
  302. testq %r9, %r9
  303. je .L271
  304. leaq 448(%rdx), %rax
  305. prefetcht0 384(%rdx)
  306. movdqu (%rcx), %xmm4
  307. movdqu 16(%rcx), %xmm3
  308. movdqu 32(%rcx), %xmm2
  309. movdqu 48(%rcx), %xmm1
  310. xorl %edx, %edx
  311. jmp .L276
  312. .L274:
  313. prefetcht0 (%rax)
  314. addq $64, %rax
  315. .L276:
  316. movdqu (%r8,%rdx,8), %xmm0
  317. movdqu -448(%rax), %xmm5
  318. pxor %xmm5, %xmm0
  319. pshufd $49, %xmm0, %xmm5
  320. pmuludq %xmm5, %xmm0
  321. movdqu -448(%rax), %xmm5
  322. pshufd $78, %xmm5, %xmm5
  323. paddq %xmm5, %xmm0
  324. movdqu -432(%rax), %xmm5
  325. paddq %xmm0, %xmm4
  326. movdqu 16(%r8,%rdx,8), %xmm0
  327. pxor %xmm5, %xmm0
  328. pshufd $49, %xmm0, %xmm5
  329. pmuludq %xmm5, %xmm0
  330. movdqu -432(%rax), %xmm5
  331. pshufd $78, %xmm5, %xmm5
  332. paddq %xmm5, %xmm0
  333. movdqu -416(%rax), %xmm5
  334. paddq %xmm0, %xmm3
  335. movdqu 32(%r8,%rdx,8), %xmm0
  336. pxor %xmm5, %xmm0
  337. pshufd $49, %xmm0, %xmm5
  338. pmuludq %xmm5, %xmm0
  339. movdqu -416(%rax), %xmm5
  340. pshufd $78, %xmm5, %xmm5
  341. paddq %xmm5, %xmm0
  342. movdqu -400(%rax), %xmm5
  343. paddq %xmm0, %xmm2
  344. movdqu 48(%r8,%rdx,8), %xmm0
  345. addq $1, %rdx
  346. pxor %xmm5, %xmm0
  347. pshufd $49, %xmm0, %xmm5
  348. pmuludq %xmm5, %xmm0
  349. movdqu -400(%rax), %xmm5
  350. pshufd $78, %xmm5, %xmm5
  351. paddq %xmm5, %xmm0
  352. paddq %xmm0, %xmm1
  353. cmpq %rdx, %r9
  354. jne .L274
  355. movups %xmm4, (%rcx)
  356. movups %xmm3, 16(%rcx)
  357. movups %xmm2, 32(%rcx)
  358. movups %xmm1, 48(%rcx)
  359. .L271:
  360. ret
  361. end;
  362. procedure XXH3_accumulate_512_avx2(acc: PByte; const input: PByte; const secret: PByte); assembler; nostackframe;
  363. // UNIX RDI, RSI, RDX
  364. // WIN64: RCX, RDX, R8
  365. asm
  366. {$IF DEFINED(UNIX)}
  367. movq %rdx, %r8
  368. movq %rdi, %rcx
  369. movq %rsi, %rdx
  370. {$ENDIF}
  371. vmovdqu (%r8), %ymm3
  372. vpxor (%rdx), %ymm3, %ymm0
  373. vpsrlq $32, %ymm0, %ymm1
  374. vpmuludq %ymm1, %ymm0, %ymm0
  375. vpshufd $78, (%rdx), %ymm1
  376. vpaddq %ymm1, %ymm0, %ymm0
  377. vpaddq (%rcx), %ymm0, %ymm0
  378. vmovdqu 32(%rdx), %ymm1
  379. vmovdqu %ymm0, (%rcx)
  380. vpxor 32(%r8), %ymm1, %ymm0
  381. vpshufd $78, %ymm1, %ymm1
  382. vpsrlq $32, %ymm0, %ymm2
  383. vpmuludq %ymm2, %ymm0, %ymm0
  384. vpaddq %ymm1, %ymm0, %ymm0
  385. vpaddq 32(%rcx), %ymm0, %ymm0
  386. vmovdqu %ymm0, 32(%rcx)
  387. vzeroupper
  388. end;
  389. procedure XXH3_accumulate_avx2(acc: PByte; const input: PByte; const secret: PByte; nbStripes: UIntPtr); assembler; nostackframe;
  390. // UNIX RDI, RSI, RDX, RCX
  391. // WIN64: RCX, RDX, R8, R9
  392. asm
  393. {$IF DEFINED(UNIX)}
  394. movq %rdx, %r8
  395. movq %rcx, %r9
  396. movq %rdi, %rcx
  397. movq %rsi, %rdx
  398. {$ENDIF}
  399. testq %r9, %r9
  400. je .L290
  401. leaq 448(%rdx), %rax
  402. prefetcht0 384(%rdx)
  403. vmovdqu (%rcx), %ymm3
  404. xorl %edx, %edx
  405. vmovdqu 32(%rcx), %ymm2
  406. jmp .L288
  407. .L286:
  408. prefetcht0 (%rax)
  409. addq $64, %rax
  410. .L288:
  411. vmovdqu (%r8,%rdx,8), %ymm4
  412. vpxor -448(%rax), %ymm4, %ymm0
  413. vmovdqu 32(%r8,%rdx,8), %ymm5
  414. addq $1, %rdx
  415. vpsrlq $32, %ymm0, %ymm1
  416. vpmuludq %ymm1, %ymm0, %ymm0
  417. vpshufd $78, -448(%rax), %ymm1
  418. vpaddq %ymm1, %ymm0, %ymm0
  419. vpaddq %ymm3, %ymm0, %ymm3
  420. vpxor -416(%rax), %ymm5, %ymm0
  421. vpsrlq $32, %ymm0, %ymm1
  422. vpmuludq %ymm1, %ymm0, %ymm0
  423. vpshufd $78, -416(%rax), %ymm1
  424. vpaddq %ymm1, %ymm0, %ymm0
  425. vpaddq %ymm2, %ymm0, %ymm2
  426. cmpq %rdx, %r9
  427. jne .L286
  428. vmovdqu %ymm3, (%rcx)
  429. vmovdqu %ymm2, 32(%rcx)
  430. vzeroupper
  431. .L290:
  432. ret
  433. end;
  434. procedure XXH3_scrambleAcc_sse2(acc: PByte; const secret: PByte); assembler; nostackframe;
  435. // UNIX RDI, RSI
  436. // WIN64: RCX, RDX
  437. asm
  438. {$IF DEFINED(UNIX)}
  439. movq %rdi, %rcx
  440. movq %rsi, %rdx
  441. {$ENDIF}
  442. movdqu (%rcx), %xmm1
  443. movdqu (%rdx), %xmm0
  444. pxor (%rcx), %xmm0
  445. psrlq $47, %xmm1
  446. pxor %xmm1, %xmm0
  447. movdqu SSE_PRIME32_1(%rip), %xmm1
  448. pshufd $49, %xmm0, %xmm2
  449. pmuludq %xmm1, %xmm2
  450. pmuludq %xmm1, %xmm0
  451. psllq $32, %xmm2
  452. paddq %xmm2, %xmm0
  453. movdqu 16(%rcx), %xmm2
  454. movups %xmm0, (%rcx)
  455. movdqu 16(%rdx), %xmm0
  456. pxor 16(%rcx), %xmm0
  457. psrlq $47, %xmm2
  458. pxor %xmm2, %xmm0
  459. pshufd $49, %xmm0, %xmm2
  460. pmuludq %xmm1, %xmm0
  461. pmuludq %xmm1, %xmm2
  462. psllq $32, %xmm2
  463. paddq %xmm2, %xmm0
  464. movdqu 32(%rcx), %xmm2
  465. movups %xmm0, 16(%rcx)
  466. movdqu 32(%rdx), %xmm0
  467. pxor 32(%rcx), %xmm0
  468. psrlq $47, %xmm2
  469. pxor %xmm2, %xmm0
  470. pshufd $49, %xmm0, %xmm2
  471. pmuludq %xmm1, %xmm0
  472. pmuludq %xmm1, %xmm2
  473. psllq $32, %xmm2
  474. paddq %xmm2, %xmm0
  475. movdqu 48(%rcx), %xmm2
  476. movups %xmm0, 32(%rcx)
  477. movdqu 48(%rdx), %xmm0
  478. pxor 48(%rcx), %xmm0
  479. psrlq $47, %xmm2
  480. pxor %xmm2, %xmm0
  481. pshufd $49, %xmm0, %xmm2
  482. pmuludq %xmm1, %xmm0
  483. pmuludq %xmm2, %xmm1
  484. psllq $32, %xmm1
  485. paddq %xmm1, %xmm0
  486. movups %xmm0, 48(%rcx)
  487. end;
  488. procedure XXH3_scrambleAcc_avx2(acc: PByte; const secret: PByte); assembler; nostackframe;
  489. // UNIX RDI, RSI
  490. // WIN64: RCX, RDX
  491. asm
  492. {$IF DEFINED(UNIX)}
  493. movq %rdi, %rcx
  494. movq %rsi, %rdx
  495. {$ENDIF}
  496. movl $-1640531535, %eax
  497. vmovdqu (%rcx), %ymm3
  498. vmovdqu (%rdx), %ymm4
  499. vmovdqu 32(%rcx), %ymm5
  500. vpxor %ymm3, %ymm4, %ymm0
  501. vpsrlq $47, %ymm3, %ymm1
  502. vmovdqu 32(%rdx), %ymm3
  503. vpxor %ymm1, %ymm0, %ymm0
  504. vmovd %eax, %xmm1
  505. vpbroadcastd %xmm1, %ymm1
  506. vpsrlq $32, %ymm0, %ymm2
  507. vpmuludq %ymm1, %ymm2, %ymm2
  508. vpmuludq %ymm1, %ymm0, %ymm0
  509. vpsllq $32, %ymm2, %ymm2
  510. vpaddq %ymm2, %ymm0, %ymm0
  511. vpsrlq $47, %ymm5, %ymm2
  512. vmovdqu %ymm0, (%rcx)
  513. vpxor %ymm5, %ymm3, %ymm0
  514. vpxor %ymm2, %ymm0, %ymm0
  515. vpsrlq $32, %ymm0, %ymm2
  516. vpmuludq %ymm1, %ymm0, %ymm0
  517. vpmuludq %ymm1, %ymm2, %ymm1
  518. vpsllq $32, %ymm1, %ymm1
  519. vpaddq %ymm1, %ymm0, %ymm0
  520. vmovdqu %ymm0, 32(%rcx)
  521. vzeroupper
  522. end;
  523. {$ELSE}
  524. const
  525. XXH_ACC_NB = 8;
  526. function XXH_mult32to64_add64(lhs, rhs, acc: UInt64): UInt64; inline;
  527. begin
  528. Result:= XXH_mult32to64(UInt32(lhs), UInt32(rhs)) + acc;
  529. end;
  530. procedure XXH3_scalarRound(acc: PByte; const input: PByte; const secret: PByte; lane: UIntPtr); inline;
  531. var
  532. xinput, xsecret: PByte;
  533. data_val, data_key: UInt64;
  534. xacc: PUInt64 absolute acc;
  535. begin
  536. xinput:= input;
  537. xsecret:= secret;
  538. Assert(lane < XXH_ACC_NB);
  539. // XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
  540. data_val:= XXH_readLE64(xinput + lane * 8);
  541. data_key:= data_val xor XXH_readLE64(xsecret + lane * 8);
  542. xacc[lane xor 1] += data_val; //* swap adjacent lanes */
  543. xacc[lane]:= XXH_mult32to64_add64(data_key, data_key shr 32, xacc[lane]);
  544. end;
  545. procedure XXH3_accumulate_512_scalar(acc: PByte; const input: PByte; const secret: PByte);
  546. begin
  547. XXH3_scalarRound(acc, input, secret, 0);
  548. XXH3_scalarRound(acc, input, secret, 1);
  549. XXH3_scalarRound(acc, input, secret, 2);
  550. XXH3_scalarRound(acc, input, secret, 3);
  551. XXH3_scalarRound(acc, input, secret, 4);
  552. XXH3_scalarRound(acc, input, secret, 5);
  553. XXH3_scalarRound(acc, input, secret, 6);
  554. XXH3_scalarRound(acc, input, secret, 7);
  555. end;
  556. procedure XXH3_accumulate_scalar(acc: PByte; const input: PByte; const secret: PByte; nbStripes: UIntPtr);
  557. var
  558. n: UIntPtr;
  559. in_: PByte;
  560. begin
  561. for n:= 0 to nbStripes - 1 do
  562. begin
  563. in_:= input + n * XXH_STRIPE_LEN;
  564. XXH3_accumulate_512_scalar(acc, in_, secret + n * XXH_SECRET_CONSUME_RATE);
  565. end;
  566. end;
  567. procedure XXH3_scalarScrambleRound(acc: PByte; const secret: PByte; lane: UIntPtr); inline;
  568. var
  569. acc64: UInt64;
  570. key64: UInt64;
  571. xacc: PUInt64;
  572. xsecret: PByte;
  573. begin
  574. xacc:= PUInt64(acc); //* presumed aligned */
  575. xsecret:= secret; //* no alignment restriction */
  576. // XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
  577. Assert(lane < XXH_ACC_NB);
  578. key64:= XXH_readLE64(xsecret + lane * 8);
  579. acc64:= xacc[lane];
  580. acc64:= XXH_xorshift64(acc64, 47);
  581. acc64:= acc64 xor key64;
  582. acc64 *= XXH_PRIME32_1;
  583. xacc[lane]:= acc64;
  584. end;
  585. procedure XXH3_scrambleAcc_scalar(acc: PByte; const secret: PByte); inline;
  586. begin
  587. XXH3_scalarScrambleRound(acc, secret, 0);
  588. XXH3_scalarScrambleRound(acc, secret, 1);
  589. XXH3_scalarScrambleRound(acc, secret, 2);
  590. XXH3_scalarScrambleRound(acc, secret, 3);
  591. XXH3_scalarScrambleRound(acc, secret, 4);
  592. XXH3_scalarScrambleRound(acc, secret, 5);
  593. XXH3_scalarScrambleRound(acc, secret, 6);
  594. XXH3_scalarScrambleRound(acc, secret, 7);
  595. end;
  596. {$ENDIF}
  597. function XXH3_consumeStripes(acc: PByte; nbStripesSoFarPtr: PUIntPtr; nbStripesPerBlock: UIntPtr;
  598. input: PByte; nbStripes: UIntPtr;
  599. const secret: PByte; secretLimit: UIntPtr;
  600. f_acc: TXXH3_accumulate_f;
  601. f_scramble: TXXH3_scrambleAcc_f): PByte; inline;
  602. var
  603. initialSecret: PByte;
  604. nbStripesThisIter: UIntPtr;
  605. begin
  606. initialSecret:= secret + nbStripesSoFarPtr^ * XXH_SECRET_CONSUME_RATE;
  607. //* Process full blocks */
  608. if (nbStripes >= (nbStripesPerBlock - nbStripesSoFarPtr^)) then
  609. begin
  610. //* Process the initial partial block... */
  611. nbStripesThisIter:= nbStripesPerBlock - nbStripesSoFarPtr^;
  612. repeat
  613. //* Accumulate and scramble */
  614. f_acc(acc, input, initialSecret, nbStripesThisIter);
  615. f_scramble(acc, secret + secretLimit);
  616. input += nbStripesThisIter * XXH_STRIPE_LEN;
  617. nbStripes -= nbStripesThisIter;
  618. //* Then continue the loop with the full block size */
  619. nbStripesThisIter:= nbStripesPerBlock;
  620. initialSecret:= secret;
  621. until not (nbStripes >= nbStripesPerBlock);
  622. nbStripesSoFarPtr^:= 0;
  623. end;
  624. //* Process a partial block */
  625. if (nbStripes > 0) then
  626. begin
  627. f_acc(acc, input, initialSecret, nbStripes);
  628. input += nbStripes * XXH_STRIPE_LEN;
  629. nbStripesSoFarPtr^ += nbStripes;
  630. end;
  631. //* Return end pointer */
  632. Result:= input;
  633. end;
  634. procedure XXH3_update(const state: PXXH3_state_t; input: PByte; len: UIntPtr;
  635. f_acc: TXXH3_accumulate_f; f_scramble: TXXH3_scrambleAcc_f); // inline;
  636. var
  637. bEnd: PByte;
  638. acc: PUInt64;
  639. secret: PByte;
  640. loadSize: UIntPtr;
  641. nbStripes: UIntPtr;
  642. begin
  643. bEnd:= input + len;
  644. if (state^.extSecret = nil) then
  645. secret:= state^.customSecret
  646. else begin
  647. secret:= state^.extSecret;
  648. end;
  649. acc:= state^.acc;
  650. state^.totalLen += len;
  651. Assert(state^.bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
  652. //* small input : just fill in tmp buffer */
  653. if (len <= XXH3_INTERNALBUFFER_SIZE - state^.bufferedSize) then
  654. begin
  655. Move(input^, state^.buffer[state^.bufferedSize], len);
  656. state^.bufferedSize += XXH32_hash_t(len);
  657. Exit;
  658. end;
  659. //* total input is now > XXH3_INTERNALBUFFER_SIZE */
  660. Assert(XXH3_INTERNALBUFFER_SIZE mod XXH_STRIPE_LEN = 0); //* clean multiple */
  661. (*
  662. * Internal buffer is partially filled (always, except at beginning)
  663. * Complete it, then consume it.
  664. *)
  665. if (state^.bufferedSize > 0) then
  666. begin
  667. loadSize:= XXH3_INTERNALBUFFER_SIZE - state^.bufferedSize;
  668. Move(input^, state^.buffer[state^.bufferedSize], loadSize);
  669. input += loadSize;
  670. XXH3_consumeStripes(PByte(acc),
  671. @state^.nbStripesSoFar, state^.nbStripesPerBlock,
  672. state^.buffer, XXH3_INTERNALBUFFER_STRIPES,
  673. secret, state^.secretLimit,
  674. f_acc, f_scramble);
  675. state^.bufferedSize:= 0;
  676. end;
  677. Assert(input < bEnd);
  678. if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) then
  679. begin
  680. nbStripes:= UIntPtr(bEnd - 1 - input) div XXH_STRIPE_LEN;
  681. input:= XXH3_consumeStripes(PByte(acc),
  682. @state^.nbStripesSoFar, state^.nbStripesPerBlock,
  683. input, nbStripes,
  684. secret, state^.secretLimit,
  685. f_acc, f_scramble);
  686. Move((input - XXH_STRIPE_LEN)^, state^.buffer[ + sizeof(state^.buffer) - XXH_STRIPE_LEN], XXH_STRIPE_LEN);
  687. end;
  688. //* Some remaining input (always) : buffer it */
  689. Assert(input < bEnd);
  690. Assert(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
  691. Assert(state^.bufferedSize = 0);
  692. Move(input^, state^.buffer[0], UIntPtr(bEnd - input));
  693. state^.bufferedSize:= XXH32_hash_t(bEnd - input);
  694. end;
  695. procedure XXH3_64bits_update(state: PXXH3_state_t; const input: Pointer; len: UIntPtr); inline;
  696. begin
  697. XXH3_update(state, input, len, XXH3_accumulate, XXH3_scrambleAcc);
  698. end;
  699. procedure XXH3_128bits_update(state: PXXH3_state_t; const input: PByte; len: UIntPtr);
  700. begin
  701. XXH3_64bits_update(state, input, len);
  702. end;
  703. procedure XXH3_digest_long(acc: PUInt64; const state: PXXH3_state_t;
  704. const secret: PByte); inline;
  705. var
  706. lastStripePtr: PByte;
  707. nbStripes, nbStripesSoFar, catchupSize: UIntPtr;
  708. lastStripe: array[0..Pred(XXH_STRIPE_LEN)] of Byte;
  709. begin
  710. (*
  711. * Digest on a local copy. This way, the state remains unaltered, and it can
  712. * continue ingesting more input afterwards.
  713. *)
  714. Move(state^.acc[0], acc^, sizeof(state^.acc));
  715. if (state^.bufferedSize >= XXH_STRIPE_LEN) then
  716. begin
  717. //* Consume remaining stripes then point to remaining data in buffer */
  718. nbStripes:= (state^.bufferedSize - 1) div XXH_STRIPE_LEN;
  719. nbStripesSoFar:= state^.nbStripesSoFar;
  720. XXH3_consumeStripes(PByte(acc),
  721. @nbStripesSoFar, state^.nbStripesPerBlock,
  722. state^.buffer, nbStripes,
  723. secret, state^.secretLimit,
  724. XXH3_accumulate, XXH3_scrambleAcc);
  725. lastStripePtr:= @state^.buffer[state^.bufferedSize - XXH_STRIPE_LEN];
  726. end else begin //* bufferedSize < XXH_STRIPE_LEN */
  727. //* Copy to temp buffer */
  728. catchupSize:= XXH_STRIPE_LEN - state^.bufferedSize;
  729. Assert(state^.bufferedSize > 0); //* there is always some input buffered */
  730. Move(state^.buffer[sizeof(state^.buffer) - catchupSize], lastStripe[0], catchupSize);
  731. Move(state^.buffer[0], lastStripe[catchupSize], state^.bufferedSize);
  732. lastStripePtr:= lastStripe;
  733. end;
  734. //* Last stripe */
  735. XXH3_accumulate_512(PByte(acc),
  736. lastStripePtr,
  737. secret + state^.secretLimit - XXH_SECRET_LASTACC_START);
  738. end;
  739. function XXH_mult64to128(lhs, rhs: UInt64): XXH128_hash_t;
  740. var
  741. cross, upper, lower: UInt64;
  742. lo_lo, hi_lo, lo_hi, hi_hi: UInt64;
  743. begin
  744. //* First calculate all of the cross products. */
  745. lo_lo:= XXH_mult32to64(lhs and $FFFFFFFF, rhs and $FFFFFFFF);
  746. hi_lo:= XXH_mult32to64(lhs shr 32, rhs and $FFFFFFFF);
  747. lo_hi:= XXH_mult32to64(lhs and $FFFFFFFF, rhs shr 32);
  748. hi_hi:= XXH_mult32to64(lhs shr 32, rhs shr 32);
  749. //* Now add the products together. These will never overflow. */
  750. cross:= (lo_lo shr 32) + (hi_lo and $FFFFFFFF) + lo_hi;
  751. upper:= (hi_lo shr 32) + (cross shr 32) + hi_hi;
  752. lower:= (cross shl 32) or (lo_lo and $FFFFFFFF);
  753. Result.low64 := lower;
  754. Result.high64 := upper;
  755. end;
  756. function XXH3_mul128_fold64(lhs, rhs: UInt64): UInt64;
  757. var
  758. product: XXH128_hash_t;
  759. begin
  760. product:= XXH_mult64to128(lhs, rhs);
  761. Result:= product.low64 xor product.high64;
  762. end;
  763. function XXH3_mix2Accs(const acc: PUInt64; const secret: PByte): Uint64; inline;
  764. begin
  765. Result:= XXH3_mul128_fold64(
  766. acc[0] xor XXH_readLE64(secret),
  767. acc[1] xor XXH_readLE64(secret + 8) );
  768. end;
  769. function XXH3_avalanche(h64: UInt64): XXH64_hash_t;
  770. begin
  771. h64:= XXH_xorshift64(h64, 37);
  772. h64 *= PRIME_MX1;
  773. h64:= XXH_xorshift64(h64, 32);
  774. Result:= h64;
  775. end;
  776. function XXH3_mergeAccs(const acc: PUInt64; const secret: PByte; start: UInt64): XXH64_hash_t;
  777. var
  778. i: UIntPtr;
  779. begin
  780. Result:= start;
  781. for i:= 0 to 3 do
  782. begin
  783. result += XXH3_mix2Accs(acc + 2 * i, secret + 16 * i);
  784. end;
  785. Result:= XXH3_avalanche(Result);
  786. end;
  787. function XXH3_len_9to16_128b(const input: PByte; len: UIntPtr; const secret: PByte; seed: XXH64_hash_t): XXH128_hash_t; inline;
  788. var
  789. m128: XXH128_hash_t;
  790. bitflipl, bitfliph, input_lo, input_hi: UInt64;
  791. begin
  792. Assert(input <> nil);
  793. Assert(secret <> nil);
  794. Assert((9 <= len) and (len <= 16));
  795. bitflipl := (XXH_readLE64(secret+32) xor XXH_readLE64(secret+40)) - seed;
  796. bitfliph := (XXH_readLE64(secret+48) xor XXH_readLE64(secret+56)) + seed;
  797. input_lo := XXH_readLE64(input);
  798. input_hi := XXH_readLE64(input + len - 8);
  799. m128:= XXH_mult64to128(input_lo xor input_hi xor bitflipl, XXH_PRIME64_1);
  800. {*
  801. * Put len in the middle of m128 to ensure that the length gets mixed to
  802. * both the low and high bits in the 128x64 multiply below.
  803. *}
  804. m128.low64 += UInt64(len - 1) << 54;
  805. input_hi := input_hi xor bitfliph;
  806. {*
  807. * Add the high 32 bits of input_hi to the high 32 bits of m128, then
  808. * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
  809. * the high 64 bits of m128.
  810. *
  811. * The best approach to this operation is different on 32-bit and 64-bit.
  812. *}
  813. {$IF DEFINED(CPU32)}
  814. {*
  815. * 32-bit optimized version, which is more readable.
  816. *
  817. * On 32-bit, it removes an ADC and delays a dependency between the two
  818. * halves of m128.high64, but it generates an extra mask on 64-bit.
  819. *}
  820. m128.high64 += (input_hi and UInt64($FFFFFFFF00000000)) + XXH_mult32to64(UInt32(input_hi), XXH_PRIME32_2);
  821. {$ELSE}
  822. {*
  823. * 64-bit optimized (albeit more confusing) version.
  824. *
  825. * Uses some properties of addition and multiplication to remove the mask:
  826. *
  827. * Let:
  828. * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
  829. * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
  830. * c = XXH_PRIME32_2
  831. *
  832. * a + (b * c)
  833. * Inverse Property: x + y - x == y
  834. * a + (b * (1 + c - 1))
  835. * Distributive Property: x * (y + z) == (x * y) + (x * z)
  836. * a + (b * 1) + (b * (c - 1))
  837. * Identity Property: x * 1 == x
  838. * a + b + (b * (c - 1))
  839. *
  840. * Substitute a, b, and c:
  841. * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
  842. *
  843. * Since input_hi.hi + input_hi.lo == input_hi, we get this:
  844. * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
  845. *}
  846. m128.high64 += input_hi + XXH_mult32to64(UInt32(input_hi), XXH_PRIME32_2 - 1);
  847. {$ENDIF}
  848. //* m128 ^= XXH_swap64(m128 >> 64); */
  849. m128.low64 := m128.low64 xor SwapEndian(m128.high64);
  850. //* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
  851. Result:= XXH_mult64to128(m128.low64, XXH_PRIME64_2);
  852. Result.high64 += m128.high64 * XXH_PRIME64_2;
  853. Result.low64 := XXH3_avalanche(Result.low64);
  854. Result.high64 := XXH3_avalanche(Result.high64);
  855. end;
  856. function XXH3_len_4to8_128b(const input: PByte; len: UIntPtr; const secret: PByte; seed: XXH64_hash_t): XXH128_hash_t; inline;
  857. var
  858. input_lo, input_hi: UInt32;
  859. input_64, bitflip, keyed: UInt64;
  860. begin
  861. Assert(input <> nil);
  862. Assert(secret <> nil);
  863. Assert((4 <= len) and (len <= 8));
  864. seed := seed xor (UInt64(SwapEndian(UInt32(seed))) shl 32);
  865. input_lo := XXH_readLE32(input);
  866. input_hi := XXH_readLE32(input + len - 4);
  867. input_64 := input_lo + (UInt64(input_hi) shl 32);
  868. bitflip := (XXH_readLE64(secret+16) xor XXH_readLE64(secret+24)) + seed;
  869. keyed := input_64 xor bitflip;
  870. ///* Shift len to the left to ensure it is even, this avoids even multiplies. */
  871. Result:= XXH_mult64to128(keyed, XXH_PRIME64_1 + (len shl 2));
  872. Result.high64 += (Result.low64 shl 1);
  873. Result.low64 := Result.low64 xor (Result.high64 shr 3);
  874. Result.low64 := XXH_xorshift64(Result.low64, 35);
  875. Result.low64 *= PRIME_MX2;
  876. Result.low64 := XXH_xorshift64(Result.low64, 28);
  877. Result.high64 := XXH3_avalanche(Result.high64);
  878. end;
  879. function XXH3_len_1to3_128b(const input: PByte; len: UIntPtr; const secret: PByte; seed: XXH64_hash_t): XXH128_hash_t; inline;
  880. var
  881. c1, c2, c3: Byte;
  882. combinedl, combinedh: UInt32;
  883. bitflipl, bitfliph, keyed_lo, keyed_hi: UInt64;
  884. begin
  885. //* A doubled version of 1to3_64b with different constants. */
  886. Assert(input <> nil);
  887. Assert((1 <= len) and (len <= 3));
  888. Assert(secret <> nil);
  889. (*
  890. * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
  891. * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
  892. * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
  893. *)
  894. c1 := input[0];
  895. c2 := input[len >> 1];
  896. c3 := input[len - 1];
  897. combinedl := (UInt32(c1) shl 16) or (UInt32(c2) shl 24) or
  898. (UInt32(c3) shl 0) or (UInt32(len) shl 8);
  899. combinedh := RolDWord(SwapEndian(combinedl), 13);
  900. bitflipl := (XXH_readLE32(secret) xor XXH_readLE32(secret + 4)) + seed;
  901. bitfliph := (XXH_readLE32(secret+8) xor XXH_readLE32(secret + 12)) - seed;
  902. keyed_lo := UInt64(combinedl) xor bitflipl;
  903. keyed_hi := UInt64(combinedh) xor bitfliph;
  904. Result.low64 := XXH64_avalanche(keyed_lo);
  905. Result.high64 := XXH64_avalanche(keyed_hi);
  906. end;
  907. function XXH3_len_0to16_128b(const input: PByte; len: UIntPtr; const secret: PByte; seed: XXH64_hash_t): XXH128_hash_t; inline;
  908. var
  909. bitflipl, bitfliph: UInt64;
  910. begin
  911. Assert(len <= 16);
  912. if (len > 8) then
  913. Result:= XXH3_len_9to16_128b(input, len, secret, seed)
  914. else if (len >= 4) then
  915. Result:= XXH3_len_4to8_128b(input, len, secret, seed)
  916. else if (len > 0) then
  917. Result:= XXH3_len_1to3_128b(input, len, secret, seed)
  918. else begin
  919. bitflipl:= XXH_readLE64(secret+64) xor XXH_readLE64(secret+72);
  920. bitfliph:= XXH_readLE64(secret+80) xor XXH_readLE64(secret+88);
  921. Result.low64:= XXH64_avalanche(seed xor bitflipl);
  922. Result.high64:= XXH64_avalanche( seed xor bitfliph);
  923. end;
  924. end;
  925. function XXH3_mix16B(const input: PByte;
  926. const secret: PByte; seed64: UInt64): UInt64; inline;
  927. var
  928. input_lo, input_hi: UInt64;
  929. begin
  930. input_lo := XXH_readLE64(input);
  931. input_hi := XXH_readLE64(input+8);
  932. Result:= XXH3_mul128_fold64(
  933. input_lo xor (XXH_readLE64(secret) + seed64),
  934. input_hi xor (XXH_readLE64(secret+8) - seed64)
  935. );
  936. end;
  937. function XXH128_mix32B(var acc: XXH128_hash_t; const input_1: PByte; const input_2: PByte;
  938. const secret: PByte; seed: XXH64_hash_t): XXH128_hash_t; inline;
  939. begin
  940. acc.low64 += XXH3_mix16B (input_1, secret+0, seed);
  941. acc.low64 := acc.low64 xor (XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8));
  942. acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
  943. acc.high64 := acc.high64 xor (XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8));
  944. Result:= acc;
  945. end;
  946. function XXH3_len_17to128_128b(const input: PByte; len: UIntPtr;
  947. const secret: PByte; secretSize: UIntPtr;
  948. seed: XXH64_hash_t): XXH128_hash_t; inline;
  949. var
  950. acc: XXH128_hash_t;
  951. begin
  952. Assert(secretSize >= XXH3_SECRET_SIZE_MIN);
  953. Assert((16 < len) and (len <= 128));
  954. acc.low64 := len * XXH_PRIME64_1;
  955. acc.high64 := 0;
  956. if (len > 32) then
  957. begin
  958. if (len > 64) then
  959. begin
  960. if (len > 96) then
  961. begin
  962. acc := XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
  963. end;
  964. acc := XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
  965. end;
  966. acc := XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
  967. end;
  968. acc := XXH128_mix32B(acc, input, input+len-16, secret, seed);
  969. Result.low64 := acc.low64 + acc.high64;
  970. Result.high64 := (acc.low64 * XXH_PRIME64_1)
  971. + (acc.high64 * XXH_PRIME64_4)
  972. + ((len - seed) * XXH_PRIME64_2);
  973. Result.low64 := XXH3_avalanche(Result.low64);
  974. Result.high64 := XXH64_hash_t(0) - XXH3_avalanche(Result.high64);
  975. end;
  976. function XXH3_len_129to240_128b(const input: PBYte; len: UIntPtr;
  977. const secret: PByte; secretSize: UIntPtr;
  978. seed: XXH64_hash_t): XXH128_hash_t; inline;
  979. var
  980. i: UInt32;
  981. acc: XXH128_hash_t;
  982. begin
  983. Assert(secretSize >= XXH3_SECRET_SIZE_MIN);
  984. Assert((128 < len) and (len <= XXH3_MIDSIZE_MAX));
  985. acc.low64 := len * XXH_PRIME64_1;
  986. acc.high64 := 0;
  987. {*
  988. * We set as `i` as offset + 32. We do this so that unchanged
  989. * `len` can be used as upper bound. This reaches a sweet spot
  990. * where both x86 and aarch64 get simple agen and good codegen
  991. * for the loop.
  992. *}
  993. i:= 32;
  994. while (i < 160) do
  995. begin
  996. acc := XXH128_mix32B(acc,
  997. input + i - 32,
  998. input + i - 16,
  999. secret + i - 32,
  1000. seed);
  1001. Inc(i, 32);
  1002. end;
  1003. acc.low64 := XXH3_avalanche(acc.low64);
  1004. acc.high64 := XXH3_avalanche(acc.high64);
  1005. {*
  1006. * NB: `i <= len` will duplicate the last 32-bytes if
  1007. * len % 32 was zero. This is an unfortunate necessity to keep
  1008. * the hash result stable.
  1009. *}
  1010. i:= 160;
  1011. while i <= len do
  1012. begin
  1013. acc := XXH128_mix32B(acc,
  1014. input + i - 32,
  1015. input + i - 16,
  1016. secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
  1017. seed);
  1018. Inc(i, 32);
  1019. end;
  1020. //* last bytes */
  1021. acc := XXH128_mix32B(acc,
  1022. input + len - 16,
  1023. input + len - 32,
  1024. secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
  1025. XXH64_hash_t(0) - seed);
  1026. Result.low64 := acc.low64 + acc.high64;
  1027. Result.high64 := (acc.low64 * XXH_PRIME64_1)
  1028. + (acc.high64 * XXH_PRIME64_4)
  1029. + ((len - seed) * XXH_PRIME64_2);
  1030. Result.low64 := XXH3_avalanche(Result.low64);
  1031. Result.high64 := XXH64_hash_t(0) - XXH3_avalanche(Result.high64);
  1032. end;
  1033. function XXH3_128bits_internal(const input: PByte; len: UIntPtr; seed64: XXH64_hash_t;
  1034. const secret: PByte; secretLen: UIntPtr): XXH128_hash_t; inline;
  1035. begin
  1036. Assert(len <= XXH3_MIDSIZE_MAX);
  1037. Assert(secretLen >= XXH3_SECRET_SIZE_MIN);
  1038. (*
  1039. * If an action is to be taken if `secret` conditions are not respected,
  1040. * it should be done here.
  1041. * For now, it's a contract pre-condition.
  1042. * Adding a check and a branch here would cost performance at every hash.
  1043. *)
  1044. if (len <= 16) then
  1045. Result:= XXH3_len_0to16_128b(input, len, secret, seed64)
  1046. else if (len <= 128) then
  1047. Result:= XXH3_len_17to128_128b(input, len, secret, secretLen, seed64)
  1048. else begin
  1049. Result:= XXH3_len_129to240_128b(input, len, secret, secretLen, seed64);
  1050. end;
  1051. end;
  1052. function XXH3_128bits_digest(const state: PXXH3_state_t): XXH128_hash_t;
  1053. var
  1054. acc: PUInt64;
  1055. secret: PByte;
  1056. buffer: array[0..Pred(XXH_ACC_SIZE + XXH_ACC_ALIGN)] of Byte;
  1057. begin
  1058. if (state^.extSecret = nil) then
  1059. secret:= state^.customSecret
  1060. else begin
  1061. secret:= state^.extSecret;
  1062. end;
  1063. if (state^.totalLen > XXH3_MIDSIZE_MAX) then
  1064. begin
  1065. acc:= System.Align(@buffer[0], XXH_ACC_ALIGN);
  1066. Assert(UIntPtr(acc) mod XXH_ACC_ALIGN = 0);
  1067. XXH3_digest_long(acc, state, secret);
  1068. Assert(state^.secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
  1069. Result.low64 := XXH3_mergeAccs(acc,
  1070. secret + XXH_SECRET_MERGEACCS_START,
  1071. UInt64(state^.totalLen) * XXH_PRIME64_1);
  1072. Result.high64 := XXH3_mergeAccs(acc,
  1073. secret + state^.secretLimit + XXH_STRIPE_LEN -
  1074. XXH_ACC_SIZE - XXH_SECRET_MERGEACCS_START,
  1075. not (UInt64(state^.totalLen) * XXH_PRIME64_2));
  1076. end
  1077. else begin
  1078. //* len <= XXH3_MIDSIZE_MAX : short code */
  1079. {
  1080. if (state^.useSeed)
  1081. Result:= XXH3_128bits_withSeed(state^.buffer, UIntPtr(state^.totalLen), state^.seed);
  1082. else
  1083. }
  1084. Result:= XXH3_128bits_internal(state^.buffer, UIntPtr(state^.totalLen), 0,
  1085. secret, state^.secretLimit + XXH_STRIPE_LEN);
  1086. end;
  1087. end;
  1088. initialization
  1089. {$IF DEFINED(CPUX86_64)}
  1090. if AVX2Support then
  1091. begin
  1092. XXH3_accumulate:= @XXH3_accumulate_avx2;
  1093. XXH3_scrambleAcc:= @XXH3_scrambleAcc_avx2;
  1094. XXH3_accumulate_512:= @XXH3_accumulate_512_avx2;
  1095. end
  1096. else begin
  1097. XXH3_accumulate:= @XXH3_accumulate_sse2;
  1098. XXH3_scrambleAcc:= @XXH3_scrambleAcc_sse2;
  1099. XXH3_accumulate_512:= @XXH3_accumulate_512_sse2;
  1100. end;
  1101. {$ELSE}
  1102. XXH3_accumulate:= @XXH3_accumulate_scalar;
  1103. XXH3_scrambleAcc:= @XXH3_scrambleAcc_scalar;
  1104. XXH3_accumulate_512:= @XXH3_accumulate_512_scalar;
  1105. {$ENDIF}
  1106. end.