Bläddra i källkod

Merge pull request #1170 from Kelimion/xxhash

xxhash: Add tests for streaming input.
Jeroen van Rijn 4 år sedan
förälder
incheckning
6d7df6f335

+ 1 - 1
core/hash/xxhash/common.odin

@@ -41,7 +41,7 @@ Alignment :: enum {
 }
 
 Error :: enum {
-	Okay = 0,
+	None = 0,
 	Error,
 }
 

+ 372 - 0
core/hash/xxhash/streaming.odin

@@ -0,0 +1,372 @@
+/*
+	An implementation of Yann Collet's [xxhash Fast Hash Algorithm](https://cyan4973.github.io/xxHash/).
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+
+	Made available under Odin's BSD-3 license, based on the original C code.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+*/
+package xxhash
+
+import "core:mem"
+import "core:intrinsics"
+
+/*
+	===   XXH3 128-bit streaming   ===
+
+	All the functions are actually the same as for 64-bit streaming variant.
+	The only difference is the finalization routine.
+*/
+XXH3_128_reset :: proc(state: ^XXH3_state) -> (err: Error) {
+	if state == nil {
+		return .Error
+	}
+	XXH3_reset_internal(state, 0, XXH3_kSecret[:], len(XXH3_kSecret))
+	return .None
+}
+XXH3_64_reset :: XXH3_128_reset
+
+XXH3_128_reset_with_secret :: proc(state: ^XXH3_state, secret: []u8) -> (err: Error) {
+	if state == nil {
+		return .Error
+	}
+	if secret == nil || len(secret) < XXH3_SECRET_SIZE_MIN {
+		return .Error
+	}
+	XXH3_reset_internal(state, 0, secret, len(secret))
+	return .None
+}
+XXH3_64_reset_with_secret :: XXH3_128_reset_with_secret
+
+XXH3_128_reset_with_seed :: proc(state: ^XXH3_state, seed: XXH64_hash) -> (err: Error) {
+	if seed == 0 {
+		return XXH3_128_reset(state)
+	}
+	if seed != state.seed {
+		XXH3_init_custom_secret(state.custom_secret[:], seed)
+	}
+	XXH3_reset_internal(state, seed, nil, XXH_SECRET_DEFAULT_SIZE)
+	return .None
+}
+XXH3_64_reset_with_seed :: XXH3_128_reset_with_seed
+
+XXH3_128_update :: proc(state: ^XXH3_state, input: []u8) -> (err: Error) {
+	if len(input) < XXH3_MIDSIZE_MAX {
+		return .Error
+	}
+	return XXH3_update(state, input, XXH3_accumulate_512, XXH3_scramble_accumulator)
+}
+XXH3_64_update :: XXH3_128_update
+
+XXH3_128_digest :: proc(state: ^XXH3_state) -> (hash: XXH3_128_hash) {
+	secret := state.custom_secret[:] if len(state.external_secret) == 0 else state.external_secret[:]
+
+	if state.total_length > XXH3_MIDSIZE_MAX {
+		acc: [XXH_ACC_NB]XXH64_hash
+		XXH3_digest_long(acc[:], state, secret)
+
+		assert(state.secret_limit + XXH_STRIPE_LEN >= XXH_ACC_NB + XXH_SECRET_MERGEACCS_START)
+		{
+			h128 := XXH128_hash_t{}
+
+			h128.low  = XXH3_mergeAccs(
+				acc[:],
+				secret[XXH_SECRET_MERGEACCS_START:],
+				state.total_length * XXH_PRIME64_1)
+
+			h128.high = XXH3_mergeAccs(
+				acc[:],
+				secret[state.secret_limit + XXH_STRIPE_LEN - size_of(acc) - XXH_SECRET_MERGEACCS_START:],
+				~(u64(state.total_length) * XXH_PRIME64_2))
+
+			return h128.h
+		}
+	}
+	/* len <= XXH3_MIDSIZE_MAX : short code */
+	if state.seed != 0 {
+		return XXH3_128_with_seed(state.buffer[:state.total_length], state.seed)
+	}
+	return XXH3_128_with_secret(state.buffer[:state.total_length], secret[:state.secret_limit + XXH_STRIPE_LEN])
+}
+
+/*======   Canonical representation   ======*/
+
+XXH3_128_canonical_from_hash :: proc(hash: XXH128_hash_t) -> (canonical: XXH128_canonical) {
+	#assert(size_of(XXH128_canonical) == size_of(XXH128_hash_t))
+
+	t := hash
+	when ODIN_ENDIAN == "little" {
+		t.high = byte_swap(t.high)
+		t.low  = byte_swap(t.low)
+	}
+	mem_copy(&canonical.digest,    &t.high, size_of(u64))
+	mem_copy(&canonical.digest[8], &t.low,  size_of(u64))
+	return
+}
+
+XXH3_128_hash_from_canonical :: proc(src: ^XXH128_canonical) -> (hash: u128) {
+	h := XXH128_hash_t{}
+
+	high := (^u64be)(&src.digest[0])^
+	low  := (^u64be)(&src.digest[8])^
+
+	h.high = u64(high)
+	h.low  = u64(low)
+	return h.h
+}
+
+/* ===   XXH3 streaming   === */
+
+XXH3_init_state :: proc(state: ^XXH3_state) {
+	state.seed = 0
+}
+
+XXH3_create_state :: proc(allocator := context.allocator) -> (res: ^XXH3_state, err: Error) {
+	state, mem_error := mem.new_aligned(XXH3_state, 64, allocator)
+	err = nil if mem_error == nil else .Error
+
+	XXH3_init_state(state)
+	return state, nil
+}
+
+XXH3_destroy_state :: proc(state: ^XXH3_state, allocator := context.allocator) -> (err: Error) {
+	free(state)
+	return .None
+}
+
+XXH3_copy_state :: proc(dest, src: ^XXH3_state) {
+	assert(dest != nil && src != nil)
+	mem_copy(dest, src, size_of(XXH3_state))
+}
+
+XXH3_reset_internal :: proc(state: ^XXH3_state, seed: XXH64_hash, secret: []u8, secret_size: uint) {
+	assert(state != nil)
+
+	init_start  := offset_of(XXH3_state, buffered_size)
+	init_length := offset_of(XXH3_state, stripes_per_block) - init_start
+
+	assert(offset_of(XXH3_state, stripes_per_block) > init_start)
+
+	/*
+		Set members from buffered_size to stripes_per_block (excluded) to 0
+	*/
+	offset  := rawptr(uintptr(state) + uintptr(init_start))
+	intrinsics.mem_zero(offset, init_length)
+
+	state.acc[0] = XXH_PRIME32_3
+	state.acc[1] = XXH_PRIME64_1
+	state.acc[2] = XXH_PRIME64_2
+	state.acc[3] = XXH_PRIME64_3
+	state.acc[4] = XXH_PRIME64_4
+	state.acc[5] = XXH_PRIME32_2
+	state.acc[6] = XXH_PRIME64_5
+	state.acc[7] = XXH_PRIME32_1
+	state.seed = seed
+	state.external_secret = secret
+
+	assert(secret_size >= XXH3_SECRET_SIZE_MIN)
+
+	state.secret_limit = secret_size - XXH_STRIPE_LEN
+	state.stripes_per_block = state.secret_limit / XXH_SECRET_CONSUME_RATE
+}
+
+/*
+	Note: when XXH3_consumeStripes() is invoked, there must be a guarantee that at least
+	one more byte must be consumed from input so that the function can blindly consume
+	all stripes using the "normal" secret segment.
+*/
+
+XXH3_consume_stripes :: #force_inline proc(
+		acc: []xxh_u64, stripes_so_far: ^uint, stripes_per_block: uint, input: []u8,
+		number_of_stripes: uint, secret: []u8, secret_limit: uint,
+		f_acc512: XXH3_accumulate_512_f, f_scramble: XXH3_scramble_accumulator_f) {
+
+	assert(number_of_stripes <= stripes_per_block) /* can handle max 1 scramble per invocation */
+	assert(stripes_so_far^ < stripes_per_block)
+
+	if stripes_per_block - stripes_so_far^ <= number_of_stripes {
+		/* need a scrambling operation */
+		stripes_to_end_of_block := stripes_per_block - stripes_so_far^
+		stripes_after_block     := number_of_stripes - stripes_to_end_of_block
+
+		XXH3_accumulate(acc, input, secret[stripes_so_far^ * XXH_SECRET_CONSUME_RATE:], stripes_to_end_of_block, f_acc512)
+
+		f_scramble(acc, secret[secret_limit:])
+		XXH3_accumulate(acc, input[stripes_to_end_of_block * XXH_STRIPE_LEN:], secret, stripes_after_block, f_acc512)
+		stripes_so_far^ = stripes_after_block
+	} else {
+		XXH3_accumulate(acc, input, secret[stripes_so_far^ * XXH_SECRET_CONSUME_RATE:], number_of_stripes, f_acc512)
+		stripes_so_far^ += number_of_stripes
+	}
+}
+
+/*
+	Both XXH3_64bits_update and XXH3_128bits_update use this routine.
+*/
+XXH3_update :: #force_inline proc(
+		state: ^XXH3_state, input: []u8,
+		f_acc512: XXH3_accumulate_512_f,
+		f_scramble: XXH3_scramble_accumulator_f) -> (err: Error) {
+
+	input  := input
+	length := len(input)
+	secret := state.custom_secret[:] if len(state.external_secret) == 0 else state.external_secret[:]
+
+	assert(len(input) > 0)
+
+	state.total_length += u64(length)
+	assert(state.buffered_size <= XXH3_INTERNAL_BUFFER_SIZE)
+
+	if int(state.buffered_size) + length <= XXH3_INTERNAL_BUFFER_SIZE {  /* fill in tmp buffer */
+		mem_copy(&state.buffer[state.buffered_size], &input[0], length)
+		state.buffered_size += u32(length)
+		return .None
+	}
+
+	/* total input is now > XXH3_INTERNAL_BUFFER_SIZE */
+	XXH3_INTERNAL_BUFFER_STRIPES :: XXH3_INTERNAL_BUFFER_SIZE / XXH_STRIPE_LEN
+	#assert(XXH3_INTERNAL_BUFFER_SIZE % XXH_STRIPE_LEN == 0) /* clean multiple */
+
+	/*
+		Internal buffer is partially filled (always, except at beginning)
+		Complete it, then consume it.
+	*/
+	if state.buffered_size > 0 {
+		load_size := int(XXH3_INTERNAL_BUFFER_SIZE - state.buffered_size)
+		mem_copy(&state.buffer[state.buffered_size], &input[0], load_size)
+		input = input[load_size:]
+
+		XXH3_consume_stripes(
+			state.acc[:], &state.stripes_so_far, state.stripes_per_block,
+			state.buffer[:], XXH3_INTERNAL_BUFFER_STRIPES,
+			secret, state.secret_limit, f_acc512, f_scramble)
+		state.buffered_size = 0
+	}
+	assert(len(input) > 0)
+
+	/* Consume input by a multiple of internal buffer size */
+	if len(input) > XXH3_INTERNAL_BUFFER_SIZE {
+		tail := input[:len(input) - XXH_STRIPE_LEN]
+		for len(input) > XXH3_INTERNAL_BUFFER_SIZE {
+			XXH3_consume_stripes(
+				state.acc[:], &state.stripes_so_far, state.stripes_per_block,
+				input, XXH3_INTERNAL_BUFFER_STRIPES,
+				secret, state.secret_limit, f_acc512, f_scramble)
+
+			input = input[XXH3_INTERNAL_BUFFER_SIZE:]
+		}
+		/* for last partial stripe */
+		mem_copy(&state.buffer[XXH3_INTERNAL_BUFFER_SIZE - XXH_STRIPE_LEN], &tail[0], XXH_STRIPE_LEN)
+	}
+
+	length = len(input)
+	assert(length > 0)
+
+	/* Some remaining input (always) : buffer it */
+	mem_copy(&state.buffer[0], &input[0], length)
+	state.buffered_size = u32(length)
+	return .None
+}
+
+XXH3_digest_long :: #force_inline proc(acc: []u64, state: ^XXH3_state, secret: []u8) {
+	/*
+		Digest on a local copy. This way, the state remains unaltered, and it can
+		continue ingesting more input afterwards.
+	*/
+	mem_copy(&acc[0], &state.acc[0], size_of(state.acc))
+
+	if state.buffered_size >= XXH_STRIPE_LEN {
+		number_of_stripes := uint((state.buffered_size - 1) / XXH_STRIPE_LEN)
+		stripes_so_far    := state.stripes_so_far
+
+		XXH3_consume_stripes(
+			acc[:], &stripes_so_far, state.stripes_per_block, state.buffer[:], number_of_stripes,
+			secret, state.secret_limit, XXH3_accumulate_512, XXH3_scramble_accumulator)
+
+		/* last stripe */
+		XXH3_accumulate_512(
+			acc[:],
+			state.buffer[state.buffered_size - XXH_STRIPE_LEN:],
+			secret[state.secret_limit - XXH_SECRET_LASTACC_START:])
+
+	} else {  /* bufferedSize < XXH_STRIPE_LEN */
+		last_stripe: [XXH_STRIPE_LEN]u8
+		catchup_size := int(XXH_STRIPE_LEN) - int(state.buffered_size)
+		assert(state.buffered_size > 0)  /* there is always some input buffered */
+
+		mem_copy(&last_stripe[0],            &state.buffer[XXH3_INTERNAL_BUFFER_SIZE - catchup_size], catchup_size)
+		mem_copy(&last_stripe[catchup_size], &state.buffer[0],                                        int(state.buffered_size))
+		XXH3_accumulate_512(acc[:], last_stripe[:], secret[state.secret_limit - XXH_SECRET_LASTACC_START:])
+	}
+}
+
+XXH3_64_digest :: proc(state: ^XXH3_state) -> (hash: XXH64_hash) {
+	secret := state.custom_secret[:] if len(state.external_secret) == 0 else state.external_secret[:]
+
+	if state.total_length > XXH3_MIDSIZE_MAX {
+		acc: [XXH_ACC_NB]xxh_u64
+		XXH3_digest_long(acc[:], state, secret[:])
+
+		return XXH3_mergeAccs(acc[:], secret[ XXH_SECRET_MERGEACCS_START:], state.total_length * XXH_PRIME64_1)
+	}
+
+	/* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
+	if state.seed == 0 {
+		return XXH3_64_with_seed(state.buffer[:state.total_length], state.seed)
+	}
+	return XXH3_64_with_secret(state.buffer[:state.total_length], secret[:state.secret_limit + XXH_STRIPE_LEN])
+}
+
+XXH3_generate_secret :: proc(secret_buffer: []u8, custom_seed: []u8) {
+	secret_length := len(secret_buffer)
+	assert(secret_length >= XXH3_SECRET_SIZE_MIN)
+
+	custom_seed_size := len(custom_seed)
+	if custom_seed_size == 0 {
+		k := XXH3_kSecret
+		mem_copy(&secret_buffer[0], &k[0], XXH_SECRET_DEFAULT_SIZE)
+		return
+	}
+
+	{
+		segment_size :: size_of(XXH128_hash_t)
+		number_of_segments := u64(XXH_SECRET_DEFAULT_SIZE / segment_size)
+
+		seeds: [12]u64le
+		assert(number_of_segments == 12)
+		assert(segment_size * number_of_segments == XXH_SECRET_DEFAULT_SIZE) /* exact multiple */
+
+		scrambler := XXH3_128_canonical_from_hash(XXH128_hash_t{h=XXH3_128(custom_seed[:])})
+
+		/*
+			Copy customSeed to seeds[], truncating or repeating as necessary.
+			TODO: Convert `mem_copy` to slice copies.
+		*/
+		{
+			to_fill := min(custom_seed_size, size_of(seeds))
+			filled  := to_fill
+			mem_copy(&seeds[0], &custom_seed[0], to_fill)
+			for filled < size_of(seeds) {
+				to_fill = min(filled, size_of(seeds) - filled)
+				seed_offset := rawptr(uintptr(&seeds[0]) + uintptr(filled))
+				mem_copy(seed_offset, &seeds[0], to_fill)
+				filled += to_fill
+			}
+		}
+
+		/*
+			Generate secret
+		*/
+		mem_copy(&secret_buffer[0], &scrambler, size_of(scrambler))
+
+		for segment_number := u64(1); segment_number < number_of_segments; segment_number += 1 {
+			segment_start := segment_number * segment_size
+
+			this_seed := u64(seeds[segment_number]) + segment_number
+			segment := XXH3_128_canonical_from_hash(XXH128_hash_t{h=XXH3_128(scrambler.digest[:], this_seed)})
+
+			mem_copy(&secret_buffer[segment_start], &segment, size_of(segment))
+		}
+	}
+}

+ 56 - 540
core/hash/xxhash/xxhash_3.odin

@@ -30,7 +30,7 @@ import "core:intrinsics"
 XXH_SECRET_DEFAULT_SIZE :: max(XXH3_SECRET_SIZE_MIN, #config(XXH_SECRET_DEFAULT_SIZE, 192))
 #assert(XXH_SECRET_DEFAULT_SIZE % 64 == 0)
 
-XXH3_kSecret :: [?]u8{
+XXH3_kSecret := [XXH_SECRET_DEFAULT_SIZE]u8{
 	0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
 	0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
 	0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
@@ -48,7 +48,7 @@ XXH3_kSecret :: [?]u8{
 	Do not change this constant.
 */
 XXH3_SECRET_SIZE_MIN    :: 136
-#assert(size_of(XXH3_kSecret) == 192 && size_of(XXH3_kSecret) > XXH3_SECRET_SIZE_MIN)
+#assert(len(XXH3_kSecret) == 192 && len(XXH3_kSecret) > XXH3_SECRET_SIZE_MIN)
 
 XXH_ACC_ALIGN           :: 8   /* scalar */
 
@@ -71,15 +71,15 @@ XXH3_state :: struct {
 	buffered_size:     u32,
 	reserved32:        u32,
 	stripes_so_far:    uint,
-	total_len:         u64,
+	total_length:      u64,
 	stripes_per_block: uint,
 	secret_limit:      uint,
 	seed:              u64,
 	reserved64:        u64,
-	external_secret:   ^[]u8,
+	external_secret:   []u8,
 }
 #assert(offset_of(XXH3_state, acc)    % 64 == 0 && offset_of(XXH3_state, custom_secret) % 64 == 0 &&
-        offset_of(XXH3_state, buffer) % 64 == 0)
+		offset_of(XXH3_state, buffer) % 64 == 0)
 
 /************************************************************************
 *  XXH3 128-bit variant
@@ -100,6 +100,10 @@ XXH128_hash_t :: struct #raw_union {
 }
 #assert(size_of(xxh_u128) == size_of(XXH128_hash_t))
 
+XXH128_canonical :: struct {
+	digest: [size_of(XXH128_hash_t)]u8,
+}
+
 /*
 	The reason for the separate function is to prevent passing too many structs
 	around by value. This will hopefully inline the multiply, but we don't force it.
@@ -146,12 +150,12 @@ XXH3_rrmxmx :: #force_inline proc(h64, length: xxh_u64) -> (res: xxh_u64) {
 
 /*
 	==========================================
-	       XXH3 128 bits (a.k.a XXH128)
+		   XXH3 128 bits (a.k.a XXH128)
 	==========================================
 	XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
 	even without counting the significantly larger output size.
 
- 	For example, extra steps are taken to avoid the seed-dependent collisions
+	For example, extra steps are taken to avoid the seed-dependent collisions
 	in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
 
 	This strength naturally comes at the cost of some speed, especially on short
@@ -289,9 +293,6 @@ XXH128_mix32B :: #force_inline proc(acc: xxh_u128, input_1: []u8, input_2: []u8,
 	}
 }
 
-
-
-
 @(optimization_mode="speed")
 XXH3_len_17to128_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
 	length := len(input)
@@ -335,18 +336,18 @@ XXH3_len_129to240_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xx
 		i: int
 		#no_bounds_check for i = 0; i < 4; i += 1 {
 			acc.h = XXH128_mix32B(acc.h,
-				                  input[32 * i:],
-				                  input [32 * i + 16:],
-				                  secret[32 * i:],
-				                  seed)
+								  input[32 * i:],
+								  input [32 * i + 16:],
+								  secret[32 * i:],
+								  seed)
 		}
 		acc.low  = XXH3_avalanche(acc.low)
 		acc.high = XXH3_avalanche(acc.high)
 
 		#no_bounds_check for i = 4; i < nbRounds; i += 1 {
 			acc.h = XXH128_mix32B(acc.h,
-				                  input[32 * i:], input[32 * i + 16:],
-				                  secret[XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)):],
+								  input[32 * i:], input[32 * i + 16:],
+								  secret[XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)):],
 								  seed)
 		}
 		/* last bytes */
@@ -360,9 +361,9 @@ XXH3_len_129to240_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xx
 			h128 := XXH128_hash_t{}
 			h128.low  = acc.low + acc.high
 			h128.high = u64(
-				        u128(acc.low  * XXH_PRIME64_1) \
-			          + u128(acc.high * XXH_PRIME64_4) \
-			          + u128((u64(length) - seed) * XXH_PRIME64_2))
+						u128(acc.low  * XXH_PRIME64_1) \
+					  + u128(acc.high * XXH_PRIME64_4) \
+					  + u128((u64(length) - seed) * XXH_PRIME64_2))
 			h128.low  = XXH3_avalanche(h128.low)
 			h128.high = u64(i64(0) - i64(XXH3_avalanche(h128.high)))
 			return h128.h
@@ -406,18 +407,20 @@ XXH3_hashLong_128b_internal :: #force_inline proc(
 /*
  * It's important for performance that XXH3_hashLong is not inlined.
  */
+@(optimization_mode="speed")
 XXH3_hashLong_128b_default :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) {
-	k_secret := XXH3_kSecret
-	return XXH3_hashLong_128b_internal(input, k_secret[:], XXH3_accumulate_512, XXH3_scramble_accumulator)
+	return XXH3_hashLong_128b_internal(input, XXH3_kSecret[:], XXH3_accumulate_512, XXH3_scramble_accumulator)
 }
 
 /*
  * It's important for performance that XXH3_hashLong is not inlined.
  */
+@(optimization_mode="speed")
 XXH3_hashLong_128b_withSecret :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) {
 	return XXH3_hashLong_128b_internal(input, secret, XXH3_accumulate_512, XXH3_scramble_accumulator)
 }
 
+@(optimization_mode="speed")
 XXH3_hashLong_128b_withSeed_internal :: #force_inline proc(
 								input: []u8, seed: xxh_u64, secret: []u8,
 								f_acc512: XXH3_accumulate_512_f,
@@ -425,8 +428,7 @@ XXH3_hashLong_128b_withSeed_internal :: #force_inline proc(
 								f_initSec: XXH3_init_custom_secret_f) -> (res: XXH3_128_hash) {
 
 	if seed == 0 {
-		k := XXH3_kSecret
-		return XXH3_hashLong_128b_internal(input, k[:], f_acc512, f_scramble)
+		return XXH3_hashLong_128b_internal(input, XXH3_kSecret[:], f_acc512, f_scramble)
 	}
 
 	{
@@ -439,12 +441,14 @@ XXH3_hashLong_128b_withSeed_internal :: #force_inline proc(
 /*
  * It's important for performance that XXH3_hashLong is not inlined.
  */
+ @(optimization_mode="speed")
 XXH3_hashLong_128b_withSeed :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) {
 	return XXH3_hashLong_128b_withSeed_internal(input, seed, secret, XXH3_accumulate_512, XXH3_scramble_accumulator , XXH3_init_custom_secret)
 }
 
 XXH3_hashLong128_f :: #type proc(input: []u8, seed: xxh_u64, secret: []u8)  -> (res: XXH3_128_hash)
 
+@(optimization_mode="speed")
 XXH3_128bits_internal :: #force_inline proc(
 	input: []u8, seed: xxh_u64, secret: []u8, f_hl128: XXH3_hashLong128_f) -> (res: XXH3_128_hash) {
 
@@ -470,150 +474,22 @@ XXH3_128bits_internal :: #force_inline proc(
 }
 
 /* ===   Public XXH128 API   === */
-
+@(optimization_mode="speed")
 XXH3_128_default :: proc(input: []u8) -> (hash: XXH3_128_hash) {
-	k := XXH3_kSecret
-	return XXH3_128bits_internal(input, 0, k[:], XXH3_hashLong_128b_withSeed)
+	return XXH3_128bits_internal(input, 0, XXH3_kSecret[:], XXH3_hashLong_128b_withSeed)
 }
 
+@(optimization_mode="speed")
 XXH3_128_with_seed :: proc(input: []u8, seed: xxh_u64) -> (hash: XXH3_128_hash) {
-	k := XXH3_kSecret
-	return XXH3_128bits_internal(input, seed, k[:], XXH3_hashLong_128b_withSeed)
+	return XXH3_128bits_internal(input, seed, XXH3_kSecret[:], XXH3_hashLong_128b_withSeed)
 }
 
+@(optimization_mode="speed")
 XXH3_128_with_secret :: proc(input: []u8, secret: []u8) -> (hash: XXH3_128_hash) {
 	return XXH3_128bits_internal(input, 0, secret, XXH3_hashLong_128b_withSecret)
 }
 XXH3_128 :: proc { XXH3_128_default, XXH3_128_with_seed, XXH3_128_with_secret }
 
-/* ===   XXH3 128-bit streaming   === */
-
-/*
-	All the functions are actually the same as for 64-bit streaming variant.
-	The only difference is the finalization routine.
-*/
-
-/*
-
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset(XXH3_state_t* statePtr)
-{
-	if (statePtr == NULL) return XXH_ERROR;
-	XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
-	return XXH_OK;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
-{
-	if (statePtr == NULL) return XXH_ERROR;
-	XXH3_reset_internal(statePtr, 0, secret, secretSize);
-	if (secret == NULL) return XXH_ERROR;
-	if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
-	return XXH_OK;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
-{
-	if (statePtr == NULL) return XXH_ERROR;
-	if (seed==0) return XXH3_128bits_reset(statePtr);
-	if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed);
-	XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
-	return XXH_OK;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
-{
-	return XXH3_update(state, (const xxh_u8*)input, len,
-					   XXH3_accumulate_512, XXH3_scrambleAcc);
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
-{
-	const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
-	if (state->totalLen > XXH3_MIDSIZE_MAX) {
-		XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
-		XXH3_digest_long(acc, state, secret);
-		XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
-		{   XXH128_hash_t h128;
-			h128.low64  = XXH3_mergeAccs(acc,
-										 secret + XXH_SECRET_MERGEACCS_START,
-										 (xxh_u64)state->totalLen * XXH_PRIME64_1);
-			h128.high64 = XXH3_mergeAccs(acc,
-										 secret + state->secretLimit + XXH_STRIPE_LEN
-												- sizeof(acc) - XXH_SECRET_MERGEACCS_START,
-										 ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
-			return h128;
-		}
-	}
-	/* len <= XXH3_MIDSIZE_MAX : short code */
-	if (state->seed)
-		return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
-	return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
-								   secret, state->secretLimit + XXH_STRIPE_LEN);
-}
-
-/* 128-bit utility functions */
-
-#include <string.h>   /* memcmp, memcpy */
-
-/* return : 1 is equal, 0 if different */
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
-{
-	/* note : XXH128_hash_t is compact, it has no padding byte */
-	return !(memcmp(&h1, &h2, sizeof(h1)));
-}
-
-/* This prototype is compatible with stdlib's qsort().
- * return : >0 if *h128_1  > *h128_2
- *          <0 if *h128_1  < *h128_2
- *          =0 if *h128_1 == *h128_2  */
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
-{
-	XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
-	XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
-	int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
-	/* note : bets that, in most cases, hash values are different */
-	if (hcmp) return hcmp;
-	return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
-}
-
-
-/*======   Canonical representation   ======*/
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API void
-XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
-{
-	XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
-	if (XXH_CPU_LITTLE_ENDIAN) {
-		hash.high64 = XXH_swap64(hash.high64);
-		hash.low64  = XXH_swap64(hash.low64);
-	}
-	memcpy(dst, &hash.high64, sizeof(hash.high64));
-	memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t
-XXH128_hashFromCanonical(const XXH128_canonical_t* src)
-{
-	XXH128_hash_t h;
-	h.high64 = XXH_readBE64(src);
-	h.low64  = XXH_readBE64(src->digest + 8);
-	return h;
-}
-
-*/
-
-
 /*
 	==========================================
 	Short keys
@@ -818,9 +694,10 @@ XXH3_len_129to240_64b :: proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res:
 
 /* =======     Long Keys     ======= */
 
-XXH_STRIPE_LEN          :: 64
-XXH_SECRET_CONSUME_RATE :: 8 /* nb of secret bytes consumed at each accumulation */
-XXH_ACC_NB              :: (XXH_STRIPE_LEN / size_of(xxh_u64))
+XXH_STRIPE_LEN           :: 64
+XXH_SECRET_CONSUME_RATE  :: 8 /* nb of secret bytes consumed at each accumulation */
+XXH_ACC_NB               :: (XXH_STRIPE_LEN / size_of(xxh_u64))
+XXH_SECRET_LASTACC_START :: 7 /* not aligned on 8, last secret is different from acc & scrambler */
 
 @(optimization_mode="speed")
 XXH_writeLE64 :: #force_inline proc(dst: []u8, v64: u64le) {
@@ -870,9 +747,10 @@ XXH3_accumulate_512_scalar :: #force_inline proc(acc: []xxh_u64, input: []u8, se
 
 	#no_bounds_check for i := uint(0); i < XXH_ACC_NB; i += 1 {
 		data_val    := XXH64_read64(xinput[8 * i:])
-		data_key    := data_val ~ XXH64_read64(xsecret[8 * i:])
+		sec := XXH64_read64(xsecret[8 * i:])
+		data_key    := data_val ~ sec
 		xacc[i ~ 1] += data_val /* swap adjacent lanes */
-		xacc[i    ] += u64(u32(data_key)) * u64(data_key >> 32)
+		xacc[i    ] += u64(u128(u32(data_key)) * u128(u64(data_key >> 32)))
 	}
 }
 
@@ -897,12 +775,10 @@ XXH3_scramble_accumulator_scalar :: #force_inline proc(acc: []xxh_u64, secret: [
 XXH3_init_custom_secret_scalar :: #force_inline proc(custom_secret: []u8, seed64: xxh_u64) {
 	#assert((XXH_SECRET_DEFAULT_SIZE & 15) == 0)
 
-	kSecretPtr := XXH3_kSecret
-
 	nbRounds := XXH_SECRET_DEFAULT_SIZE / 16
 	#no_bounds_check for i := 0; i < nbRounds; i += 1 {
-		lo := XXH64_read64(kSecretPtr[16 * i:    ]) + seed64
-		hi := XXH64_read64(kSecretPtr[16 * i + 8:]) - seed64
+		lo := XXH64_read64(XXH3_kSecret[16 * i:    ]) + seed64
+		hi := XXH64_read64(XXH3_kSecret[16 * i + 8:]) - seed64
 		XXH_writeLE64(custom_secret[16 * i:    ], u64le(lo))
 		XXH_writeLE64(custom_secret[16 * i + 8:], u64le(hi))
 	}
@@ -916,8 +792,8 @@ XXH_PREFETCH_DIST :: 320
  * Assumption: nbStripes will not overflow the secret size
  */
 @(optimization_mode="speed")
-XXH3_accumulate :: #force_inline proc(acc: []xxh_u64, input: []u8, secret: []u8, nbStripes: uint,
-	f_acc512: XXH3_accumulate_512_f) {
+XXH3_accumulate :: #force_inline proc(
+	acc: []xxh_u64, input: []u8, secret: []u8, nbStripes: uint, f_acc512: XXH3_accumulate_512_f) {
 
 	for n := uint(0); n < nbStripes; n += 1 {
 		when !XXH_DISABLE_PREFETCH {
@@ -952,7 +828,6 @@ XXH3_hashLong_internal_loop :: #force_inline proc(acc: []xxh_u64, input: []u8, s
 		/* last stripe */
 		#no_bounds_check {
 			p := input[length - XXH_STRIPE_LEN:]
-			XXH_SECRET_LASTACC_START :: 7  /* not aligned on 8, last secret is different from acc & scrambler */
 			f_acc512(acc, p, secret[secret_size - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START:])
 		}
 	}
@@ -993,6 +868,7 @@ XXH3_hashLong_64b_internal :: #force_inline proc(input: []u8, secret: []u8,
 /*
 	It's important for performance that XXH3_hashLong is not inlined.
 */
+@(optimization_mode="speed")
 XXH3_hashLong_64b_withSecret :: #force_no_inline proc(input: []u8, seed64: xxh_u64, secret: []u8) -> (hash: xxh_u64) {
 	return XXH3_hashLong_64b_internal(input, secret, XXH3_accumulate_512, XXH3_scramble_accumulator)
 }
@@ -1004,9 +880,9 @@ XXH3_hashLong_64b_withSecret :: #force_no_inline proc(input: []u8, seed64: xxh_u
 	This variant enforces that the compiler can detect that,
 	and uses this opportunity to streamline the generated code for better performance.
 */
+@(optimization_mode="speed")
 XXH3_hashLong_64b_default :: #force_no_inline proc(input: []u8, seed64: xxh_u64, secret: []u8) -> (hash: xxh_u64) {
-	k := XXH3_kSecret	
-	return XXH3_hashLong_64b_internal(input, k[:], XXH3_accumulate_512, XXH3_scramble_accumulator)
+	return XXH3_hashLong_64b_internal(input, XXH3_kSecret[:], XXH3_accumulate_512, XXH3_scramble_accumulator)
 }
 
 /*
@@ -1020,14 +896,14 @@ XXH3_hashLong_64b_default :: #force_no_inline proc(input: []u8, seed64: xxh_u64,
 	It's important for performance that XXH3_hashLong is not inlined. Not sure
 	why (uop cache maybe?), but the difference is large and easily measurable.
 */
+@(optimization_mode="speed")
 XXH3_hashLong_64b_withSeed_internal :: #force_no_inline proc(input: []u8,
 									seed:        xxh_u64,
 									f_acc512:    XXH3_accumulate_512_f,
 									f_scramble:  XXH3_scramble_accumulator_f,
 									f_init_sec:  XXH3_init_custom_secret_f) -> (hash: xxh_u64) {
 	if seed == 0 {
-		k := XXH3_kSecret
-		return XXH3_hashLong_64b_internal(input, k[:], f_acc512, f_scramble)
+		return XXH3_hashLong_64b_internal(input, XXH3_kSecret[:], f_acc512, f_scramble)
 	}
 	{
 		secret: [XXH_SECRET_DEFAULT_SIZE]u8
@@ -1039,6 +915,7 @@ XXH3_hashLong_64b_withSeed_internal :: #force_no_inline proc(input: []u8,
 /*
 	It's important for performance that XXH3_hashLong is not inlined.
 */
+@(optimization_mode="speed")
 XXH3_hashLong_64b_withSeed :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (hash: xxh_u64) {
 	return XXH3_hashLong_64b_withSeed_internal(input, seed, XXH3_accumulate_512, XXH3_scramble_accumulator, XXH3_init_custom_secret)
 }
@@ -1046,11 +923,8 @@ XXH3_hashLong_64b_withSeed :: #force_no_inline proc(input: []u8, seed: xxh_u64,
 
 XXH3_hashLong64_f :: #type proc(input: []u8, seed: xxh_u64, secret: []u8)  -> (res: xxh_u64)
 
+@(optimization_mode="speed")
 XXH3_64bits_internal :: proc(input: []u8, seed: xxh_u64, secret: []u8, f_hashLong: XXH3_hashLong64_f) -> (hash: xxh_u64) {
-
-
-
-
 	assert(len(secret) >= XXH3_SECRET_SIZE_MIN)
 	/*
 		If an action is to be taken if len(secret) condition is not respected, it should be done here.
@@ -1069,377 +943,19 @@ XXH3_64bits_internal :: proc(input: []u8, seed: xxh_u64, secret: []u8, f_hashLon
 }
 
 /* ===   Public entry point   === */
-
+@(optimization_mode="speed")
 XXH3_64_default :: proc(input: []u8) -> (hash: xxh_u64) {
-	k := XXH3_kSecret
-	return XXH3_64bits_internal(input, 0, k[:], XXH3_hashLong_64b_default)
+	return XXH3_64bits_internal(input, 0, XXH3_kSecret[:], XXH3_hashLong_64b_default)
 }
 
+@(optimization_mode="speed")
 XXH3_64_with_seed :: proc(input: []u8, seed: xxh_u64) -> (hash: xxh_u64) {
-	k := XXH3_kSecret
-	return XXH3_64bits_internal(input, seed, k[:], XXH3_hashLong_64b_withSeed)
+	return XXH3_64bits_internal(input, seed, XXH3_kSecret[:], XXH3_hashLong_64b_withSeed)
 }
 
+@(optimization_mode="speed")
 XXH3_64_with_secret :: proc(input, secret: []u8) -> (hash: xxh_u64) {
 	return XXH3_64bits_internal(input, 0, secret, XXH3_hashLong_64b_withSecret)
 }
 
-XXH3_64 :: proc { XXH3_64_default, XXH3_64_with_seed, XXH3_64_with_secret }
-
-/*
-
-/* ===   XXH3 streaming   === */
-
-/*
- * Malloc's a pointer that is always aligned to align.
- *
- * This must be freed with `XXH_alignedFree()`.
- *
- * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
- * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
- * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
- *
- * This underalignment previously caused a rather obvious crash which went
- * completely unnoticed due to XXH3_createState() not actually being tested.
- * Credit to RedSpah for noticing this bug.
- *
- * The alignment is done manually: Functions like posix_memalign or _mm_malloc
- * are avoided: To maintain portability, we would have to write a fallback
- * like this anyways, and besides, testing for the existence of library
- * functions without relying on external build tools is impossible.
- *
- * The method is simple: Overallocate, manually align, and store the offset
- * to the original behind the returned pointer.
- *
- * Align must be a power of 2 and 8 <= align <= 128.
- */
-static void* XXH_alignedMalloc(size_t s, size_t align)
-{
-	XXH_ASSERT(align <= 128 && align >= 8); /* range check */
-	XXH_ASSERT((align & (align-1)) == 0);   /* power of 2 */
-	XXH_ASSERT(s != 0 && s < (s + align));  /* empty/overflow */
-	{   /* Overallocate to make room for manual realignment and an offset byte */
-		xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
-		if (base != NULL) {
-			/*
-			 * Get the offset needed to align this pointer.
-			 *
-			 * Even if the returned pointer is aligned, there will always be
-			 * at least one byte to store the offset to the original pointer.
-			 */
-			size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
-			/* Add the offset for the now-aligned pointer */
-			xxh_u8* ptr = base + offset;
-
-			XXH_ASSERT((size_t)ptr % align == 0);
-
-			/* Store the offset immediately before the returned pointer. */
-			ptr[-1] = (xxh_u8)offset;
-			return ptr;
-		}
-		return NULL;
-	}
-}
-/*
- * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
- * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
- */
-static void XXH_alignedFree(void* p)
-{
-	if (p != NULL) {
-		xxh_u8* ptr = (xxh_u8*)p;
-		/* Get the offset byte we added in XXH_malloc. */
-		xxh_u8 offset = ptr[-1];
-		/* Free the original malloc'd pointer */
-		xxh_u8* base = ptr - offset;
-		XXH_free(base);
-	}
-}
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
-{
-	XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
-	if (state==NULL) return NULL;
-	XXH3_INITSTATE(state);
-	return state;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
-{
-	XXH_alignedFree(statePtr);
-	return XXH_OK;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API void
-XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
-{
-	memcpy(dst_state, src_state, sizeof(*dst_state));
-}
-
-static void
-XXH3_reset_internal(XXH3_state_t* statePtr,
-						   XXH64_hash_t seed,
-						   const void* secret, size_t secretSize)
-{
-	size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
-	size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
-	XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
-	XXH_ASSERT(statePtr != NULL);
-	/* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
-	memset((char*)statePtr + initStart, 0, initLength);
-	statePtr->acc[0] = XXH_XXH_PRIME32_3;
-	statePtr->acc[1] = XXH_PRIME64_1;
-	statePtr->acc[2] = XXH_PRIME64_2;
-	statePtr->acc[3] = XXH_PRIME64_3;
-	statePtr->acc[4] = XXH_PRIME64_4;
-	statePtr->acc[5] = XXH_XXH_PRIME32_2;
-	statePtr->acc[6] = XXH_PRIME64_5;
-	statePtr->acc[7] = XXH_XXH_PRIME32_1;
-	statePtr->seed = seed;
-	statePtr->extSecret = (const unsigned char*)secret;
-	XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
-	statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
-	statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset(XXH3_state_t* statePtr)
-{
-	if (statePtr == NULL) return XXH_ERROR;
-	XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
-	return XXH_OK;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
-{
-	if (statePtr == NULL) return XXH_ERROR;
-	XXH3_reset_internal(statePtr, 0, secret, secretSize);
-	if (secret == NULL) return XXH_ERROR;
-	if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
-	return XXH_OK;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
-{
-	if (statePtr == NULL) return XXH_ERROR;
-	if (seed==0) return XXH3_64bits_reset(statePtr);
-	if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed);
-	XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
-	return XXH_OK;
-}
-
-/* Note : when XXH3_consumeStripes() is invoked,
- * there must be a guarantee that at least one more byte must be consumed from input
- * so that the function can blindly consume all stripes using the "normal" secret segment */
-XXH_FORCE_INLINE void
-XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
-					size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
-					const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
-					const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
-					XXH3_f_accumulate_512 f_acc512,
-					XXH3_f_scrambleAcc f_scramble)
-{
-	XXH_ASSERT(nbStripes <= nbStripesPerBlock);  /* can handle max 1 scramble per invocation */
-	XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
-	if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
-		/* need a scrambling operation */
-		size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
-		size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
-		XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);
-		f_scramble(acc, secret + secretLimit);
-		XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);
-		*nbStripesSoFarPtr = nbStripesAfterBlock;
-	} else {
-		XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);
-		*nbStripesSoFarPtr += nbStripes;
-	}
-}
-
-/*
- * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
- */
-XXH_FORCE_INLINE XXH_errorcode
-XXH3_update(XXH3_state_t* state,
-			const xxh_u8* input, size_t len,
-			XXH3_f_accumulate_512 f_acc512,
-			XXH3_f_scrambleAcc f_scramble)
-{
-	if (input==NULL)
-#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
-		return XXH_OK;
-#else
-		return XXH_ERROR;
-#endif
-
-	{   const xxh_u8* const bEnd = input + len;
-		const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
-
-		state->totalLen += len;
-		XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
-
-		if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {  /* fill in tmp buffer */
-			XXH_memcpy(state->buffer + state->bufferedSize, input, len);
-			state->bufferedSize += (XXH32_hash_t)len;
-			return XXH_OK;
-		}
-		/* total input is now > XXH3_INTERNALBUFFER_SIZE */
-
-		#define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
-		XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0);   /* clean multiple */
-
-		/*
-		 * Internal buffer is partially filled (always, except at beginning)
-		 * Complete it, then consume it.
-		 */
-		if (state->bufferedSize) {
-			size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
-			XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
-			input += loadSize;
-			XXH3_consumeStripes(state->acc,
-							   &state->nbStripesSoFar, state->nbStripesPerBlock,
-								state->buffer, XXH3_INTERNALBUFFER_STRIPES,
-								secret, state->secretLimit,
-								f_acc512, f_scramble);
-			state->bufferedSize = 0;
-		}
-		XXH_ASSERT(input < bEnd);
-
-		/* Consume input by a multiple of internal buffer size */
-		if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
-			const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
-			do {
-				XXH3_consumeStripes(state->acc,
-								   &state->nbStripesSoFar, state->nbStripesPerBlock,
-									input, XXH3_INTERNALBUFFER_STRIPES,
-									secret, state->secretLimit,
-									f_acc512, f_scramble);
-				input += XXH3_INTERNALBUFFER_SIZE;
-			} while (input<limit);
-			/* for last partial stripe */
-			memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
-		}
-		XXH_ASSERT(input < bEnd);
-
-		/* Some remaining input (always) : buffer it */
-		XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
-		state->bufferedSize = (XXH32_hash_t)(bEnd-input);
-	}
-
-	return XXH_OK;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
-{
-	return XXH3_update(state, (const xxh_u8*)input, len,
-					   XXH3_accumulate_512, XXH3_scrambleAcc);
-}
-
-
-XXH_FORCE_INLINE void
-XXH3_digest_long (XXH64_hash_t* acc,
-				  const XXH3_state_t* state,
-				  const unsigned char* secret)
-{
-	/*
-	 * Digest on a local copy. This way, the state remains unaltered, and it can
-	 * continue ingesting more input afterwards.
-	 */
-	memcpy(acc, state->acc, sizeof(state->acc));
-	if (state->bufferedSize >= XXH_STRIPE_LEN) {
-		size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
-		size_t nbStripesSoFar = state->nbStripesSoFar;
-		XXH3_consumeStripes(acc,
-						   &nbStripesSoFar, state->nbStripesPerBlock,
-							state->buffer, nbStripes,
-							secret, state->secretLimit,
-							XXH3_accumulate_512, XXH3_scrambleAcc);
-		/* last stripe */
-		XXH3_accumulate_512(acc,
-							state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
-							secret + state->secretLimit - XXH_SECRET_LASTACC_START);
-	} else {  /* bufferedSize < XXH_STRIPE_LEN */
-		xxh_u8 lastStripe[XXH_STRIPE_LEN];
-		size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
-		XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */
-		memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
-		memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
-		XXH3_accumulate_512(acc,
-							lastStripe,
-							secret + state->secretLimit - XXH_SECRET_LASTACC_START);
-	}
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
-{
-	const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
-	if (state->totalLen > XXH3_MIDSIZE_MAX) {
-		XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
-		XXH3_digest_long(acc, state, secret);
-		return XXH3_mergeAccs(acc,
-							  secret + XXH_SECRET_MERGEACCS_START,
-							  (xxh_u64)state->totalLen * XXH_PRIME64_1);
-	}
-	/* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
-	if (state->seed)
-		return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
-	return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
-								  secret, state->secretLimit + XXH_STRIPE_LEN);
-}
-
-
-#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API void
-XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize)
-{
-	XXH_ASSERT(secretBuffer != NULL);
-	if (customSeedSize == 0) {
-		memcpy(secretBuffer, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
-		return;
-	}
-	XXH_ASSERT(customSeed != NULL);
-
-	{   size_t const segmentSize = sizeof(XXH128_hash_t);
-		size_t const nbSegments = XXH_SECRET_DEFAULT_SIZE / segmentSize;
-		XXH128_canonical_t scrambler;
-		XXH64_hash_t seeds[12];
-		size_t segnb;
-		XXH_ASSERT(nbSegments == 12);
-		XXH_ASSERT(segmentSize * nbSegments == XXH_SECRET_DEFAULT_SIZE); /* exact multiple */
-		XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
-
-		/*
-		* Copy customSeed to seeds[], truncating or repeating as necessary.
-		*/
-		{   size_t toFill = XXH_MIN(customSeedSize, sizeof(seeds));
-			size_t filled = toFill;
-			memcpy(seeds, customSeed, toFill);
-			while (filled < sizeof(seeds)) {
-				toFill = XXH_MIN(filled, sizeof(seeds) - filled);
-				memcpy((char*)seeds + filled, seeds, toFill);
-				filled += toFill;
-		}   }
-
-		/* generate secret */
-		memcpy(secretBuffer, &scrambler, sizeof(scrambler));
-		for (segnb=1; segnb < nbSegments; segnb++) {
-			size_t const segmentStart = segnb * segmentSize;
-			XXH128_canonical_t segment;
-			XXH128_canonicalFromHash(&segment,
-				XXH128(&scrambler, sizeof(scrambler), XXH64_read64(seeds + segnb) + segnb) );
-			memcpy((char*)secretBuffer + segmentStart, &segment, sizeof(segment));
-	}   }
-}
-
-*/
+XXH3_64 :: proc { XXH3_64_default, XXH3_64_with_seed, XXH3_64_with_secret }

+ 5 - 5
core/hash/xxhash/xxhash_32.odin

@@ -197,12 +197,12 @@ XXH32 :: proc(input: []u8, seed := XXH32_DEFAULT_SEED) -> (digest: XXH32_hash) {
 */
 XXH32_create_state :: proc(allocator := context.allocator) -> (res: ^XXH32_state, err: Error) {
 	state := new(XXH32_state, allocator)
-	return state, nil if state != nil else .Error
+	return state, .None if state != nil else .Error
 }
 
 XXH32_destroy_state :: proc(state: ^XXH32_state, allocator := context.allocator) -> (err: Error) {
 	free(state, allocator)
-	return nil
+	return .None
 }
 
 XXH32_copy_state :: proc(dest, src: ^XXH32_state) {
@@ -221,7 +221,7 @@ XXH32_reset_state :: proc(state_ptr: ^XXH32_state, seed := XXH32_DEFAULT_SEED) -
 		Do not write into reserved, planned to be removed in a future version.
 	*/
 	mem_copy(state_ptr, &state, size_of(state) - size_of(state.reserved))
-	return nil
+	return .None
 }
 
 XXH32_update :: proc(state: ^XXH32_state, input: []u8) -> (err: Error) {
@@ -236,7 +236,7 @@ XXH32_update :: proc(state: ^XXH32_state, input: []u8) -> (err: Error) {
 		ptr := uintptr(raw_data(state.mem32[:])) + uintptr(state.memsize)
 		mem_copy(rawptr(ptr), raw_data(input), int(length))
 		state.memsize += XXH32_hash(length)
-		return nil
+		return .None
 	}
 
 	if state.memsize > 0 {/* Some data left from previous update */
@@ -276,7 +276,7 @@ XXH32_update :: proc(state: ^XXH32_state, input: []u8) -> (err: Error) {
 		mem_copy(raw_data(state.mem32[:]), raw_data(buf[:]), int(length))
 		state.memsize = u32(length)
 	}
-	return nil
+	return .None
 }
 
 XXH32_digest :: proc(state: ^XXH32_state) -> (res: XXH32_hash) {

+ 5 - 5
core/hash/xxhash/xxhash_64.odin

@@ -163,12 +163,12 @@ XXH64 :: proc(input: []u8, seed := XXH64_DEFAULT_SEED) -> (digest: XXH64_hash) {
 */
 XXH64_create_state :: proc(allocator := context.allocator) -> (res: ^XXH64_state, err: Error) {
 	state := new(XXH64_state, allocator)
-	return state, nil if state != nil else .Error
+	return state, .None if state != nil else .Error
 }
 
 XXH64_destroy_state :: proc(state: ^XXH64_state, allocator := context.allocator) -> (err: Error) {
 	free(state, allocator)
-	return nil
+	return .None
 }
 
 XXH64_copy_state :: proc(dest, src: ^XXH64_state) {
@@ -187,7 +187,7 @@ XXH64_reset_state :: proc(state_ptr: ^XXH64_state, seed := XXH64_DEFAULT_SEED) -
 		Fo not write into reserved64, might be removed in a future version.
 	*/
 	mem_copy(state_ptr, &state, size_of(state) - size_of(state.reserved64))
-	return nil
+	return .None
 }
 
 @(optimization_mode="speed")
@@ -201,7 +201,7 @@ XXH64_update :: proc(state: ^XXH64_state, input: []u8) -> (err: Error) {
 		ptr := uintptr(raw_data(state.mem64[:])) + uintptr(state.memsize)
 		mem_copy(rawptr(ptr), raw_data(input), int(length))
 		state.memsize += u32(length)
-		return nil
+		return .None
 	}
 
 	if state.memsize > 0 {   /* tmp buffer is full */
@@ -241,7 +241,7 @@ XXH64_update :: proc(state: ^XXH64_state, input: []u8) -> (err: Error) {
 		mem_copy(raw_data(state.mem64[:]), raw_data(buf[:]), int(length))
 		state.memsize = u32(length)
 	}
-	return nil
+	return .None
 }
 
 @(optimization_mode="speed")

+ 27 - 3
tests/core/hash/test_core_hash.odin

@@ -202,15 +202,39 @@ test_xxhash_vectors :: proc(t: ^testing.T) {
 
 			xxh32    := xxhash.XXH32(b, u32(seed))
 			xxh64    := xxhash.XXH64(b, seed)
+			xxh3_64  := xxhash.XXH3_64(b, seed)
 			xxh3_128 := xxhash.XXH3_128(b, seed)
 
-			xxh32_error    := fmt.tprintf("[   XXH32(%03d)] Expected: %08x. Got: %08x.", i,   v.xxh_32, xxh32)
-			xxh64_error    := fmt.tprintf("[   XXH64(%03d)] Expected: %16x. Got: %16x.", i,   v.xxh_64, xxh64)
-			xxh3_128_error := fmt.tprintf("[XXH3_128(%03d)] Expected: %32x. Got: %32x.", i, v.xxh3_128, xxh3_128)
+			xxh32_error     := fmt.tprintf("[   XXH32(%03d) ] Expected: %08x. Got: %08x.", i,   v.xxh_32, xxh32)
+			xxh64_error     := fmt.tprintf("[   XXH64(%03d) ] Expected: %16x. Got: %16x.", i,   v.xxh_64, xxh64)
+
+			xxh3_64_error   := fmt.tprintf("[XXH3_64(%03d)  ] Expected: %16x. Got: %16x.", i, v.xxh3_64, xxh3_64)
+			xxh3_128_error  := fmt.tprintf("[XXH3_128(%03d) ] Expected: %32x. Got: %32x.", i, v.xxh3_128, xxh3_128)
 
 			expect(t, xxh32     == v.xxh_32,   xxh32_error)
 			expect(t, xxh64     == v.xxh_64,   xxh64_error)
+			expect(t, xxh3_64   == v.xxh3_64,  xxh3_64_error)
 			expect(t, xxh3_128  == v.xxh3_128, xxh3_128_error)
+
+			if len(b) > xxhash.XXH3_MIDSIZE_MAX {
+				fmt.printf("XXH3 - size: %v\n", len(b))
+
+				xxh3_state, _ := xxhash.XXH3_create_state()
+				xxhash.XXH3_64_reset_with_seed(xxh3_state, seed)
+				xxhash.XXH3_64_update(xxh3_state, b)
+				xxh3_64_streamed := xxhash.XXH3_64_digest(xxh3_state)
+				xxhash.XXH3_destroy_state(xxh3_state)
+				xxh3_64s_error  := fmt.tprintf("[XXH3_64s(%03d) ] Expected: %16x. Got: %16x.", i, v.xxh3_64, xxh3_64_streamed)
+				expect(t, xxh3_64_streamed == v.xxh3_64, xxh3_64s_error)
+
+				xxh3_state2, _ := xxhash.XXH3_create_state()
+				xxhash.XXH3_128_reset_with_seed(xxh3_state2, seed)
+				xxhash.XXH3_128_update(xxh3_state2, b)
+				xxh3_128_streamed := xxhash.XXH3_128_digest(xxh3_state2)
+				xxhash.XXH3_destroy_state(xxh3_state2)
+				xxh3_128s_error  := fmt.tprintf("[XXH3_128s(%03d) ] Expected: %32x. Got: %32x.", i, v.xxh3_128, xxh3_128_streamed)
+				expect(t, xxh3_128_streamed == v.xxh3_128, xxh3_128s_error)
+			}
 		}
 	}