Browse Source

Merge pull request #1149 from Kelimion/xxhash

Add `xxhash` 32-bit and 64-bit digest versions.
Jeroen van Rijn 4 years ago
parent
commit
fc66ce9dd6

+ 78 - 0
core/hash/xxhash/common.odin

@@ -0,0 +1,78 @@
+/*
+	An implementation of Yann Collet's [xxhash Fast Hash Algorithm](https://cyan4973.github.io/xxHash/).
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+
+	Made available under Odin's BSD-3 license, based on the original C code.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+*/
+package xxhash
+
+import "core:intrinsics"
+import "core:runtime"
+mem_copy :: runtime.mem_copy
+
+/*
+	Version definition
+*/
+XXH_VERSION_MAJOR   :: 0
+XXH_VERSION_MINOR   :: 8
+XXH_VERSION_RELEASE :: 1
+XXH_VERSION_NUMBER  :: XXH_VERSION_MAJOR * 100 * 100 + XXH_VERSION_MINOR * 100 + XXH_VERSION_RELEASE
+
+/*
+	0 - Use memcopy, for platforms where unaligned reads are a problem
+	2 - Direct cast, for platforms where unaligned are allowed (default)
+*/
+XXH_FORCE_MEMORY_ACCESS :: #config(XXH_FORCE_MEMORY_ACCESS, 2)
+
+/*
+	`false` - Use this on platforms where unaligned reads are fast
+	`true`  - Use this on platforms where unaligned reads are slow
+*/
+XXH_FORCE_ALIGN_CHECK :: #config(XXH_FORCE_ALIGN_CHECK, false)
+
+Alignment :: enum {
+	Aligned,
+	Unaligned,
+}
+
+Error :: enum {
+	Okay = 0,
+	Error,
+}
+
+@(optimization_mode="speed")
+XXH_rotl32 :: #force_inline proc(x, r: u32) -> (res: u32) {
+	return ((x << r) | (x >> (32 - r)))
+}
+
+@(optimization_mode="speed")
+XXH_rotl64 :: #force_inline proc(x, r: u64) -> (res: u64) {
+	return ((x << r) | (x >> (64 - r)))
+}
+
+@(optimization_mode="speed")
+XXH32_read32 :: #force_inline proc(buf: []u8, alignment: Alignment) -> (res: u32) {
+	if XXH_FORCE_MEMORY_ACCESS == 2 || alignment == .Aligned {
+		#no_bounds_check b := (^u32le)(&buf[0])^
+		return u32(b)
+	} else {
+		b: u32le
+		mem_copy(&b, raw_data(buf[:]), 4)
+		return u32(b)
+	}
+}
+
+@(optimization_mode="speed")
+XXH64_read64 :: #force_inline proc(buf: []u8, alignment: Alignment) -> (res: u64) {
+	if XXH_FORCE_MEMORY_ACCESS == 2 || alignment == .Aligned {
+		#no_bounds_check b := (^u64le)(&buf[0])^
+		return u64(b)
+	} else {
+		b: u64le
+		mem_copy(&b, raw_data(buf[:]), 8)
+		return u64(b)
+	}
+}

+ 319 - 0
core/hash/xxhash/xxhash_32.odin

@@ -0,0 +1,319 @@
+/*
+	An implementation of Yann Collet's [xxhash Fast Hash Algorithm](https://cyan4973.github.io/xxHash/).
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+
+	Made available under Odin's BSD-3 license, based on the original C code.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+*/
+package xxhash
+
+import "core:intrinsics"
+
+/*
+	32-bit hash functions
+*/
+XXH32_hash :: u32
+XXH32_DEFAULT_SEED :: XXH32_hash(0)
+
+XXH32_state :: struct {
+   total_len_32: XXH32_hash,    /*!< Total length hashed, modulo 2^32 */
+   large_len:    XXH32_hash,    /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
+   v1:           XXH32_hash,    /*!< First accumulator lane */
+   v2:           XXH32_hash,    /*!< Second accumulator lane */
+   v3:           XXH32_hash,    /*!< Third accumulator lane */
+   v4:           XXH32_hash,    /*!< Fourth accumulator lane */
+   mem32:        [4]XXH32_hash, /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
+   memsize:      XXH32_hash,    /*!< Amount of data in @ref mem32 */
+   reserved:     XXH32_hash,    /*!< Reserved field. Do not read or write to it, it may be removed. */
+}
+
+XXH32_canonical :: struct {
+	digest: [4]u8,
+}
+
+XXH_PRIME32_1 :: 0x9E3779B1     /*!< 0b10011110001101110111100110110001 */
+XXH_PRIME32_2 :: 0x85EBCA77     /*!< 0b10000101111010111100101001110111 */
+XXH_PRIME32_3 :: 0xC2B2AE3D     /*!< 0b11000010101100101010111000111101 */
+XXH_PRIME32_4 :: 0x27D4EB2F     /*!< 0b00100111110101001110101100101111 */
+XXH_PRIME32_5 :: 0x165667B1     /*!< 0b00010110010101100110011110110001 */
+
+@(optimization_mode="speed")
+XXH32_round :: #force_inline proc(seed, input: XXH32_hash) -> (res: XXH32_hash) {
+	seed := seed
+
+	seed += input * XXH_PRIME32_2
+	seed  = XXH_rotl32(seed, 13)
+	seed *= XXH_PRIME32_1
+	return seed
+}
+
+/*
+	Mix all bits
+*/
+@(optimization_mode="speed")
+XXH32_avalanche :: #force_inline proc(h32: u32) -> (res: u32) {
+	h32 := h32
+
+	h32 ~= h32 >> 15
+	h32 *= XXH_PRIME32_2
+	h32 ~= h32 >> 13
+	h32 *= XXH_PRIME32_3
+	h32 ~= h32 >> 16
+	return h32
+}
+
+@(optimization_mode="speed")
+XXH32_finalize :: #force_inline proc(h32: u32, buf: []u8, alignment: Alignment) -> (res: u32) {
+	process_1 :: #force_inline proc(h32: u32, buf: []u8) -> (h32_res: u32, buf_res: []u8) {
+		#no_bounds_check b := u32(buf[0])
+		h32_res = h32 + b * XXH_PRIME32_5
+		h32_res = XXH_rotl32(h32_res, 11) * XXH_PRIME32_1
+		#no_bounds_check return h32_res, buf[1:]
+	}
+
+	process_4 :: #force_inline proc(h32: u32, buf: []u8, alignment: Alignment) -> (h32_res: u32, buf_res: []u8) {
+		b := XXH32_read32(buf, alignment)
+		h32_res = h32 + b * XXH_PRIME32_3
+		h32_res = XXH_rotl32(h32_res, 17) * XXH_PRIME32_4
+		#no_bounds_check return h32_res, buf[4:]
+	}
+
+	buf := buf
+	h32 := h32
+
+	switch len(buf) & 15 {
+	case 12:
+		h32, buf = process_4(h32, buf, alignment)
+		fallthrough
+	case 8:
+		h32, buf = process_4(h32, buf, alignment)
+		fallthrough
+	case 4:
+		h32, _ = process_4(h32, buf, alignment)
+		return XXH32_avalanche(h32)
+
+	case 13:
+		h32, buf = process_4(h32, buf, alignment)
+		fallthrough
+	case 9:
+		h32, buf = process_4(h32, buf, alignment)
+		fallthrough
+	case 5:
+		h32, buf = process_4(h32, buf, alignment)
+		h32, buf = process_1(h32, buf)
+		return XXH32_avalanche(h32)
+
+	case 14:
+		h32, buf = process_4(h32, buf, alignment)
+		fallthrough
+	case 10:
+		h32, buf = process_4(h32, buf, alignment)
+		fallthrough
+	case 6:
+		h32, buf = process_4(h32, buf, alignment)
+		h32, buf = process_1(h32, buf)
+		h32, buf = process_1(h32, buf)
+		return XXH32_avalanche(h32)
+
+	case 15:
+		h32, buf = process_4(h32, buf, alignment)
+		fallthrough
+	case 11:
+		h32, buf = process_4(h32, buf, alignment)
+		fallthrough
+	case 7:
+		h32, buf = process_4(h32, buf, alignment)
+		fallthrough
+
+	case 3:
+		h32, buf = process_1(h32, buf)
+		fallthrough
+	case 2:
+		h32, buf = process_1(h32, buf)
+		fallthrough
+	case 1:
+		h32, buf = process_1(h32, buf)
+		fallthrough
+	case 0:
+		return XXH32_avalanche(h32)
+	}
+	unreachable()
+}
+
+@(optimization_mode="speed")
+XXH32_endian_align :: #force_inline proc(input: []u8, seed := XXH32_DEFAULT_SEED, alignment: Alignment) -> (res: XXH32_hash) {
+	buf := input
+	length := len(input)
+
+	if length >= 16 {
+		v1 := seed + XXH_PRIME32_1 + XXH_PRIME32_2
+		v2 := seed + XXH_PRIME32_2
+		v3 := seed + 0
+		v4 := seed - XXH_PRIME32_1
+
+		for len(buf) >= 15 {
+			#no_bounds_check v1 = XXH32_round(v1, XXH32_read32(buf, alignment)); buf = buf[4:]
+			#no_bounds_check v2 = XXH32_round(v2, XXH32_read32(buf, alignment)); buf = buf[4:]
+			#no_bounds_check v3 = XXH32_round(v3, XXH32_read32(buf, alignment)); buf = buf[4:]
+			#no_bounds_check v4 = XXH32_round(v4, XXH32_read32(buf, alignment)); buf = buf[4:]
+		}
+
+		res = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18)
+	} else {
+		res  = seed + XXH_PRIME32_5
+	}
+
+	res += u32(length)
+	return XXH32_finalize(res, buf, alignment)
+}
+
+XXH32 :: proc(input: []u8, seed := XXH32_DEFAULT_SEED) -> (digest: XXH32_hash) {
+	when false {
+		/*
+			Simple version, good for code maintenance, but unfortunately slow for small inputs.
+		*/
+		state: XXH32_state
+		XXH32_reset_state(&state, seed)
+		XXH32_update(&state, input)
+		return XXH32_digest(&state)
+	} else {
+		when XXH_FORCE_ALIGN_CHECK {
+			if uintptr(raw_data(input)) & uintptr(3) == 0 {
+				/*
+					Input is 4-bytes aligned, leverage the speed benefit.
+				*/
+				return XXH32_endian_align(input, seed, .Aligned)
+			}
+		}
+		return XXH32_endian_align(input, seed, .Unaligned)
+	}
+}
+
+/*
+	******   Hash streaming   ******
+*/
+XXH32_create_state :: proc(allocator := context.allocator) -> (res: ^XXH32_state, err: Error) {
+	state := new(XXH32_state, allocator)
+	return state, nil if state != nil else .Error
+}
+
+XXH32_destroy_state :: proc(state: ^XXH32_state, allocator := context.allocator) -> (err: Error) {
+	free(state, allocator)
+	return nil
+}
+
+XXH32_copy_state :: proc(dest, src: ^XXH32_state) {
+	assert(dest != nil && src != nil)
+	mem_copy(dest, src, size_of(XXH32_state))
+}
+
+XXH32_reset_state :: proc(state_ptr: ^XXH32_state, seed := XXH32_DEFAULT_SEED) -> (err: Error) {
+	state := XXH32_state{}
+
+	state.v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2
+	state.v2 = seed + XXH_PRIME32_2
+	state.v3 = seed + 0
+	state.v4 = seed - XXH_PRIME32_1
+	/*
+		Do not write into reserved, planned to be removed in a future version.
+	*/
+	mem_copy(state_ptr, &state, size_of(state) - size_of(state.reserved))
+	return nil
+}
+
+XXH32_update :: proc(state: ^XXH32_state, input: []u8) -> (err: Error) {
+
+	buf    := input
+	length := len(buf)
+
+	state.total_len_32 += XXH32_hash(length)
+	state.large_len |= 1 if length >= 16 || state.total_len_32 >= 16 else 0
+
+	if state.memsize + u32(length) < 16 {   /* Fill in tmp buffer */
+		ptr := uintptr(raw_data(state.mem32[:])) + uintptr(state.memsize)
+		mem_copy(rawptr(ptr), raw_data(input), int(length))
+		state.memsize += XXH32_hash(length)
+		return nil
+	}
+
+	if state.memsize > 0 {/* Some data left from previous update */
+		ptr := uintptr(raw_data(state.mem32[:])) + uintptr(state.memsize)
+		mem_copy(rawptr(ptr), raw_data(input), int(16 - state.memsize))
+		{
+			#no_bounds_check state.v1 = XXH32_round(state.v1, state.mem32[0])
+			#no_bounds_check state.v2 = XXH32_round(state.v2, state.mem32[1])
+			#no_bounds_check state.v3 = XXH32_round(state.v3, state.mem32[2])
+			#no_bounds_check state.v4 = XXH32_round(state.v4, state.mem32[3])
+		}
+		buf = buf[16 - state.memsize:]
+		state.memsize = 0
+	}
+
+	if len(buf) >= 16 {
+		v1 := state.v1
+		v2 := state.v2
+		v3 := state.v3
+		v4 := state.v4
+
+		for len(buf) >= 15 {
+			#no_bounds_check v1 = XXH32_round(v1, XXH32_read32(buf, .Unaligned)); buf = buf[4:]
+			#no_bounds_check v2 = XXH32_round(v2, XXH32_read32(buf, .Unaligned)); buf = buf[4:]
+			#no_bounds_check v3 = XXH32_round(v3, XXH32_read32(buf, .Unaligned)); buf = buf[4:]
+			#no_bounds_check v4 = XXH32_round(v4, XXH32_read32(buf, .Unaligned)); buf = buf[4:]
+		}
+
+		state.v1 = v1
+		state.v2 = v2
+		state.v3 = v3
+		state.v4 = v4
+	}
+
+	length = len(buf)
+	if length > 0 {
+		mem_copy(raw_data(state.mem32[:]), raw_data(buf[:]), int(length))
+		state.memsize = u32(length)
+	}
+	return nil
+}
+
+XXH32_digest :: proc(state: ^XXH32_state) -> (res: XXH32_hash) {
+	if state.large_len > 0 {
+		res = XXH_rotl32(state.v1, 1)  + XXH_rotl32(state.v2, 7) + XXH_rotl32(state.v3, 12) + XXH_rotl32(state.v4, 18)
+	} else {
+		res = state.v3 /* == seed */ + XXH_PRIME32_5
+	}
+
+	res += state.total_len_32
+
+	buf := (^[16]u8)(&state.mem32)^
+	alignment: Alignment = .Aligned if uintptr(&state.mem32) & 15 == 0 else .Unaligned
+	return XXH32_finalize(res, buf[:state.memsize], alignment)
+}
+
+/*
+	******   Canonical representation   ******
+
+	The default return values from XXH functions are unsigned 32 and 64 bit integers.
+
+	The canonical representation uses big endian convention,
+	the same convention as human-readable numbers (large digits first).
+
+	This way, hash values can be written into a file or buffer, remaining
+	comparable across different systems.
+
+	The following functions allow transformation of hash values to and from their
+	canonical format.
+*/
+XXH32_canonical_from_hash :: proc(hash: XXH32_hash) -> (canonical: XXH32_canonical) {
+	#assert(size_of(XXH32_canonical) == size_of(XXH32_hash))
+	h := u32be(hash)
+	mem_copy(&canonical, &h, size_of(canonical))
+	return
+}
+
+XXH32_hash_from_canonical :: proc(canonical: ^XXH32_canonical) -> (hash: XXH32_hash) {
+	h := (^u32be)(&canonical.digest)^
+	return XXH32_hash(h)
+}

+ 294 - 0
core/hash/xxhash/xxhash_64.odin

@@ -0,0 +1,294 @@
+/*
+	An implementation of Yann Collet's [xxhash Fast Hash Algorithm](https://cyan4973.github.io/xxHash/).
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+
+	Made available under Odin's BSD-3 license, based on the original C code.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+*/
+package xxhash
+
+import "core:intrinsics"
+
+/*
+	64-bit hash functions
+*/
+XXH64_hash :: u64
+xxh_u64    :: u64
+XXH64_DEFAULT_SEED :: XXH64_hash(0)
+
+XXH64_state :: struct {
+   total_len:    XXH64_hash,    /*!< Total length hashed. This is always 64-bit. */
+   v1:           XXH64_hash,    /*!< First accumulator lane */
+   v2:           XXH64_hash,    /*!< Second accumulator lane */
+   v3:           XXH64_hash,    /*!< Third accumulator lane */
+   v4:           XXH64_hash,    /*!< Fourth accumulator lane */
+   mem64:        [4]XXH64_hash, /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
+   memsize:      XXH32_hash,    /*!< Amount of data in @ref mem64 */
+   reserved32:   XXH32_hash,    /*!< Reserved field, needed for padding anyways*/
+   reserved64:   XXH64_hash,    /*!< Reserved field. Do not read or write to it, it may be removed. */
+}
+
+XXH64_canonical :: struct {
+	digest: [8]u8,
+}
+
+XXH_PRIME64_1 :: 0x9E3779B185EBCA87 /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
+XXH_PRIME64_2 :: 0xC2B2AE3D27D4EB4F /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
+XXH_PRIME64_3 :: 0x165667B19E3779F9 /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
+XXH_PRIME64_4 :: 0x85EBCA77C2B2AE63 /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
+XXH_PRIME64_5 :: 0x27D4EB2F165667C5 /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
+
+@(optimization_mode="speed")
+XXH64_round :: proc(acc, input: xxh_u64) -> (res: xxh_u64) {
+	acc := acc
+
+	acc += input * XXH_PRIME64_2
+	acc  = XXH_rotl64(acc, 31)
+	acc *= XXH_PRIME64_1
+	return acc
+}
+
+@(optimization_mode="speed")
+XXH64_mergeRound :: proc(acc, val: xxh_u64) -> (res: xxh_u64) {
+	res  = acc ~ XXH64_round(0, val)
+	res  = res * XXH_PRIME64_1 + XXH_PRIME64_4
+	return res
+}
+
+@(optimization_mode="speed")
+XXH64_avalanche :: proc(h64: xxh_u64) -> (res: xxh_u64) {
+	res = h64
+	res ~= res >> 33
+	res *= XXH_PRIME64_2
+	res ~= res >> 29
+	res *= XXH_PRIME64_3
+	res ~= res >> 32
+	return res
+}
+
+@(optimization_mode="speed")
+XXH64_finalize :: proc(h64: xxh_u64, buf: []u8, alignment: Alignment) -> (res: xxh_u64) {
+	buf := buf
+	length := len(buf) & 31
+	res = h64
+
+	for length >= 8 {
+		b := XXH64_read64(buf, alignment)
+		k1 := XXH64_round(0, b)
+		#no_bounds_check buf = buf[8:]
+		res ~= k1
+		res  = XXH_rotl64(res, 27) * XXH_PRIME64_1 + XXH_PRIME64_4
+		length -= 8
+	}
+
+	if length >= 4 {
+		res ~= xxh_u64(XXH32_read32(buf, alignment)) * XXH_PRIME64_1
+		#no_bounds_check buf = buf[4:]
+		res = XXH_rotl64(res, 23) * XXH_PRIME64_2 + XXH_PRIME64_3
+		length -= 4
+	}
+
+	for length > 0 {
+		#no_bounds_check b := xxh_u64(buf[0])
+		buf = buf[1:]
+		res ~= b * XXH_PRIME64_5
+		res = XXH_rotl64(res, 11) * XXH_PRIME64_1
+		length -= 1
+	}
+	return XXH64_avalanche(res)
+}
+
+@(optimization_mode="speed")
+XXH64_endian_align :: proc(input: []u8, seed := XXH64_DEFAULT_SEED, alignment := Alignment.Unaligned) -> (res: xxh_u64) {
+	buf    := input
+	length := len(buf)
+
+	if length >= 32 {
+		v1 := seed + XXH_PRIME64_1 + XXH_PRIME64_2
+		v2 := seed + XXH_PRIME64_2
+		v3 := seed + 0
+		v4 := seed - XXH_PRIME64_1
+
+		for len(buf) >= 32 {
+			v1 = XXH64_round(v1, XXH64_read64(buf, alignment)); buf = buf[8:]
+			v2 = XXH64_round(v2, XXH64_read64(buf, alignment)); buf = buf[8:]
+			v3 = XXH64_round(v3, XXH64_read64(buf, alignment)); buf = buf[8:]
+			v4 = XXH64_round(v4, XXH64_read64(buf, alignment)); buf = buf[8:]
+		}
+
+		res = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18)
+		res = XXH64_mergeRound(res, v1)
+		res = XXH64_mergeRound(res, v2)
+		res = XXH64_mergeRound(res, v3)
+		res = XXH64_mergeRound(res, v4)
+	} else {
+		res = seed + XXH_PRIME64_5
+	}
+	res += xxh_u64(length)
+
+	return XXH64_finalize(res, buf, alignment)
+}
+
+XXH64 :: proc(input: []u8, seed := XXH64_DEFAULT_SEED) -> (digest: XXH64_hash) {
+	when false {
+		/*
+			Simple version, good for code maintenance, but unfortunately slow for small inputs.
+		*/
+		state: XXH64_state
+		XXH64_reset_state(&state, seed)
+		buf := input
+		for len(buf) > 0 {
+			l := min(65536, len(buf))
+			XXH64_update(&state, buf[:l])
+			buf = buf[l:]
+		}
+		return XXH64_digest(&state)
+	} else {
+		when XXH_FORCE_ALIGN_CHECK {
+			if uintptr(raw_data(input)) & uintptr(7) == 0 {
+				/*
+					Input is 8-bytes aligned, leverage the speed benefit.
+				*/
+				return XXH64_endian_align(input, seed, .Aligned)
+			}
+		}
+		return XXH64_endian_align(input, seed, .Unaligned)
+	}
+}
+
+/*
+	******   Hash Streaming   ******
+*/
+XXH64_create_state :: proc(allocator := context.allocator) -> (res: ^XXH64_state, err: Error) {
+	state := new(XXH64_state, allocator)
+	return state, nil if state != nil else .Error
+}
+
+XXH64_destroy_state :: proc(state: ^XXH64_state, allocator := context.allocator) -> (err: Error) {
+	free(state, allocator)
+	return nil
+}
+
+XXH64_copy_state :: proc(dest, src: ^XXH64_state) {
+	assert(dest != nil && src != nil)
+	mem_copy(dest, src, size_of(XXH64_state))
+}
+
+XXH64_reset_state :: proc(state_ptr: ^XXH64_state, seed := XXH64_DEFAULT_SEED) -> (err: Error) {
+	state := XXH64_state{}
+
+	state.v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2
+	state.v2 = seed + XXH_PRIME64_2
+	state.v3 = seed + 0
+	state.v4 = seed - XXH_PRIME64_1
+	/*
+		Fo not write into reserved64, might be removed in a future version.
+	*/
+	mem_copy(state_ptr, &state, size_of(state) - size_of(state.reserved64))
+	return nil
+}
+
+@(optimization_mode="speed")
+XXH64_update :: proc(state: ^XXH64_state, input: []u8) -> (err: Error) {
+	buf    := input
+	length := len(buf)
+
+	state.total_len += u64(length)
+
+	if state.memsize + u32(length) < 32 {  /* fill in tmp buffer */
+		ptr := uintptr(raw_data(state.mem64[:])) + uintptr(state.memsize)
+		mem_copy(rawptr(ptr), raw_data(input), int(length))
+		state.memsize += u32(length)
+		return nil
+	}
+
+	if state.memsize > 0 {   /* tmp buffer is full */
+		ptr := uintptr(raw_data(state.mem64[:])) + uintptr(state.memsize)
+		mem_copy(rawptr(ptr), raw_data(input), int(32 - state.memsize))
+		{
+			#no_bounds_check state.v1 = XXH64_round(state.v1, state.mem64[0])
+			#no_bounds_check state.v2 = XXH64_round(state.v2, state.mem64[1])
+			#no_bounds_check state.v3 = XXH64_round(state.v3, state.mem64[2])
+			#no_bounds_check state.v4 = XXH64_round(state.v4, state.mem64[3])
+		}
+		buf = buf[32 - state.memsize:]
+		state.memsize = 0
+	}
+
+	if len(buf) >= 32 {
+		v1 := state.v1
+		v2 := state.v2
+		v3 := state.v3
+		v4 := state.v4
+
+		for len(buf) >= 32 {
+			#no_bounds_check v1 = XXH64_round(v1, XXH64_read64(buf, .Unaligned)); buf = buf[8:]
+			#no_bounds_check v2 = XXH64_round(v2, XXH64_read64(buf, .Unaligned)); buf = buf[8:]
+			#no_bounds_check v3 = XXH64_round(v3, XXH64_read64(buf, .Unaligned)); buf = buf[8:]
+			#no_bounds_check v4 = XXH64_round(v4, XXH64_read64(buf, .Unaligned)); buf = buf[8:]
+		}
+
+		state.v1 = v1
+		state.v2 = v2
+		state.v3 = v3
+		state.v4 = v4
+	}
+
+	length = len(buf)
+	if length > 0 {
+		mem_copy(raw_data(state.mem64[:]), raw_data(buf[:]), int(length))
+		state.memsize = u32(length)
+	}
+	return nil
+}
+
+@(optimization_mode="speed")
+XXH64_digest :: proc(state: ^XXH64_state) -> (res: XXH64_hash) {
+	if state.total_len >= 32 {
+		v1 := state.v1
+		v2 := state.v2
+		v3 := state.v3
+		v4 := state.v4
+
+		res = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18)
+		res = XXH64_mergeRound(res, v1)
+		res = XXH64_mergeRound(res, v2)
+		res = XXH64_mergeRound(res, v3)
+		res = XXH64_mergeRound(res, v4)
+	} else {
+		res = state.v3 /*seed*/ + XXH_PRIME64_5
+	}
+	res += XXH64_hash(state.total_len)
+
+	buf := (^[32]u8)(&state.mem64)^
+	alignment: Alignment = .Aligned if uintptr(&state.mem64) & 15 == 0 else .Unaligned
+	return XXH64_finalize(res, buf[:state.memsize], alignment)
+}
+
+/*
+	******   Canonical representation   ******
+
+	The default return values from XXH functions are unsigned 32 and 64 bit integers.
+
+	The canonical representation uses big endian convention,
+	the same convention as human-readable numbers (large digits first).
+
+	This way, hash values can be written into a file or buffer, remaining
+	comparable across different systems.
+
+	The following functions allow transformation of hash values to and from their
+	canonical format.
+*/
+XXH64_canonical_from_hash :: proc(hash: XXH64_hash) -> (canonical: XXH64_canonical) {
+	#assert(size_of(XXH64_canonical) == size_of(XXH64_hash))
+	h := u64be(hash)
+	mem_copy(&canonical, &h, size_of(canonical))
+	return
+}
+
+XXH64_hash_from_canonical :: proc(canonical: ^XXH64_canonical) -> (hash: XXH64_hash) {
+	h := (^u64be)(&canonical.digest)^
+	return XXH64_hash(h)
+}

+ 58 - 0
core/time/perf.odin

@@ -1,5 +1,7 @@
 package time
 
+import "core:mem"
+
 Tick :: struct {
 	_nsec: i64, // relative amount
 }
@@ -37,3 +39,59 @@ SCOPED_TICK_DURATION :: proc(d: ^Duration) -> Tick {
 _tick_duration_end :: proc(d: ^Duration, t: Tick) {
 	d^ = tick_since(t)
 }
+
+/*
+	Benchmark helpers
+*/
+
+Benchmark_Error :: enum {
+	Okay = 0,
+	Allocation_Error,
+}
+
+Benchmark_Options :: struct {
+	setup:     #type proc(options: ^Benchmark_Options, allocator: mem.Allocator) -> (err: Benchmark_Error),
+	bench:     #type proc(options: ^Benchmark_Options, allocator: mem.Allocator) -> (err: Benchmark_Error),
+	teardown:  #type proc(options: ^Benchmark_Options, allocator: mem.Allocator) -> (err: Benchmark_Error),
+
+	rounds:    int,
+	bytes:     int,
+	input:     []u8,
+
+	count:     int,
+	processed: int,
+	output:    []u8, // Unused for hash benchmarks
+	hash:      u128,
+
+	/*
+		Performance
+	*/
+	duration:             Duration,
+	rounds_per_second:    f64,
+	megabytes_per_second: f64,
+}
+
+benchmark :: proc(options: ^Benchmark_Options, allocator := context.allocator) -> (err: Benchmark_Error) {
+	assert(options != nil)
+	assert(options.bench != nil)
+
+	if options.setup != nil {
+		options->setup(allocator) or_return
+	}
+
+	diff: Duration
+	{
+		SCOPED_TICK_DURATION(&diff)
+		options->bench(allocator) or_return
+	}
+	options.duration = diff
+
+	times_per_second            := f64(Second) / f64(diff)
+	options.rounds_per_second    = times_per_second * f64(options.count)
+	options.megabytes_per_second = f64(options.processed) / f64(1024 * 1024) * times_per_second
+
+	if options.teardown != nil {
+		options->teardown(allocator) or_return
+	}
+	return
+}

+ 2 - 1
core/time/time_windows.odin

@@ -24,7 +24,8 @@ _tick_now :: proc() -> Tick {
 		return q * num + r * num / den
 	}
 
-	@thread_local qpc_frequency: win32.LARGE_INTEGER
+	// @thread_local qpc_frequency: win32.LARGE_INTEGER
+	qpc_frequency: win32.LARGE_INTEGER
 
 	if qpc_frequency == 0 {
 		win32.QueryPerformanceFrequency(&qpc_frequency)

+ 4 - 1
tests/core/Makefile

@@ -1,7 +1,7 @@
 ODIN=../../odin
 PYTHON=$(shell which python3)
 
-all: download_test_assets image_test compress_test strings_test
+all: download_test_assets image_test compress_test strings_test hash_test
 
 download_test_assets:
 	$(PYTHON) download_assets.py
@@ -14,3 +14,6 @@ compress_test:
 
 strings_test:
 	$(ODIN) run strings/test_core_strings.odin
+
+hash_test:
+	$(ODIN) run hash/test_core_hash.odin -o:size -no-bounds-check

+ 5 - 0
tests/core/build.bat

@@ -16,3 +16,8 @@ echo ---
 echo Running core:strings tests
 echo ---
 %PATH_TO_ODIN% run strings %COMMON%
+
+echo ---
+echo Running core:hash tests
+echo ---
+%PATH_TO_ODIN% run hash %COMMON% -o:size

+ 131 - 0
tests/core/hash/test_core_hash.odin

@@ -0,0 +1,131 @@
+package test_core_image
+
+import "core:hash/xxhash"
+import "core:time"
+import "core:testing"
+import "core:fmt"
+
+TEST_count := 0
+TEST_fail  := 0
+
+when ODIN_TEST {
+    expect  :: testing.expect
+    log     :: testing.log
+} else {
+    expect  :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) {
+        fmt.printf("[%v] ", loc)
+        TEST_count += 1
+        if !condition {
+            TEST_fail += 1
+            fmt.println(" FAIL:", message)
+            return
+        }
+        fmt.println(" PASS")
+    }
+    log     :: proc(t: ^testing.T, v: any, loc := #caller_location) {
+        fmt.printf("[%v] ", loc)
+        fmt.printf("log: %v\n", v)
+    }
+}
+
+main :: proc() {
+    t := testing.T{}
+    test_benchmark_runner(&t)
+    fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
+}
+
+/*
+    Benchmarks
+*/
+
+setup_xxhash :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
+    assert(options != nil)
+
+    options.input = make([]u8, options.bytes, allocator)
+    return nil if len(options.input) == options.bytes else .Allocation_Error
+}
+
+teardown_xxhash :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
+    assert(options != nil)
+
+    delete(options.input)
+    return nil
+}
+
+benchmark_xxhash32 :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
+    buf := options.input
+
+    h: u32
+    for _ in 0..=options.rounds {
+        h = xxhash.XXH32(buf)
+    }
+    options.count     = options.rounds
+    options.processed = options.rounds * options.bytes
+    options.hash      = u128(h)
+    return nil
+}
+
+benchmark_xxhash64 :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
+    buf := options.input
+
+    h: u64
+    for _ in 0..=options.rounds {
+        h = xxhash.XXH64(buf)
+    }
+    options.count     = options.rounds
+    options.processed = options.rounds * options.bytes
+    options.hash      = u128(h)
+    return nil
+}
+
+benchmark_print :: proc(name: string, options: ^time.Benchmark_Options) {
+    fmt.printf("\t[%v] %v rounds, %v bytes procesed in %v ns\n\t\t%5.3f rounds/s, %5.3f MiB/s\n",
+        name,
+        options.rounds,
+        options.processed,
+        time.duration_nanoseconds(options.duration),
+        options.rounds_per_second,
+        options.megabytes_per_second,
+    )
+}
+
+@test
+test_benchmark_runner :: proc(t: ^testing.T) {
+    fmt.println("Starting benchmarks:")
+
+    name    := "xxhash32 100 zero bytes"
+    options := &time.Benchmark_Options{
+        rounds   = 1_000,
+        bytes    = 100,
+        setup    = setup_xxhash,
+        bench    = benchmark_xxhash32,
+        teardown = teardown_xxhash,
+    }
+
+    err  := time.benchmark(options, context.allocator)
+    expect(t, err == nil, name)
+    expect(t, options.hash == 0x85f6413c, name)
+    benchmark_print(name, options)
+
+    name = "xxhash32 1 MiB zero bytes"
+    options.bytes = 1_048_576
+    err = time.benchmark(options, context.allocator)
+    expect(t, err == nil, name)
+    expect(t, options.hash == 0x9430f97f, name)
+    benchmark_print(name, options)
+
+    name = "xxhash64 100 zero bytes"
+    options.bytes  = 100
+    options.bench = benchmark_xxhash64
+    err = time.benchmark(options, context.allocator)
+    expect(t, err == nil, name)
+    expect(t, options.hash == 0x17bb1103c92c502f, name)
+    benchmark_print(name, options)
+
+    name = "xxhash64 1 MiB zero bytes"
+    options.bytes = 1_048_576
+    err = time.benchmark(options, context.allocator)
+    expect(t, err == nil, name)
+    expect(t, options.hash == 0x87d2a1b6e1163ef1, name)
+    benchmark_print(name, options)
+}