Browse Source

[xxhash] Add tests for large inputs

Test XXH32, XXH64, XXH3-64 and XXH3-128 for large inputs, with both all-at-once and streaming APIs.

XXH32_create_state and XXH64_create_state now implicitly call their "reset state" variants to simplify the streaming API to 3 steps:
- create state / defer destroy
- update
- digest (finalize)

These are tested with an array of 1, 2, 4, 8 and 16 megabytes worth of zeroes.
All return the same hashes as do both the one-shot version, as well as that of the official xxhsum tool.

3778/3778 tests successful.
Jeroen van Rijn 3 years ago
parent
commit
6985181961

+ 2 - 1
core/hash/xxhash/xxhash_32.odin

@@ -197,6 +197,7 @@ XXH32 :: proc(input: []u8, seed := XXH32_DEFAULT_SEED) -> (digest: XXH32_hash) {
 */
 */
 XXH32_create_state :: proc(allocator := context.allocator) -> (res: ^XXH32_state, err: Error) {
 XXH32_create_state :: proc(allocator := context.allocator) -> (res: ^XXH32_state, err: Error) {
 	state := new(XXH32_state, allocator)
 	state := new(XXH32_state, allocator)
+	XXH32_reset_state(state)
 	return state, .None if state != nil else .Error
 	return state, .None if state != nil else .Error
 }
 }
 
 
@@ -258,7 +259,7 @@ XXH32_update :: proc(state: ^XXH32_state, input: []u8) -> (err: Error) {
 		v3 := state.v3
 		v3 := state.v3
 		v4 := state.v4
 		v4 := state.v4
 
 
-		for len(buf) >= 15 {
+		for len(buf) >= 16 {
 			#no_bounds_check v1 = XXH32_round(v1, XXH32_read32(buf, .Unaligned)); buf = buf[4:]
 			#no_bounds_check v1 = XXH32_round(v1, XXH32_read32(buf, .Unaligned)); buf = buf[4:]
 			#no_bounds_check v2 = XXH32_round(v2, XXH32_read32(buf, .Unaligned)); buf = buf[4:]
 			#no_bounds_check v2 = XXH32_round(v2, XXH32_read32(buf, .Unaligned)); buf = buf[4:]
 			#no_bounds_check v3 = XXH32_round(v3, XXH32_read32(buf, .Unaligned)); buf = buf[4:]
 			#no_bounds_check v3 = XXH32_round(v3, XXH32_read32(buf, .Unaligned)); buf = buf[4:]

+ 1 - 0
core/hash/xxhash/xxhash_64.odin

@@ -163,6 +163,7 @@ XXH64 :: proc(input: []u8, seed := XXH64_DEFAULT_SEED) -> (digest: XXH64_hash) {
 */
 */
 XXH64_create_state :: proc(allocator := context.allocator) -> (res: ^XXH64_state, err: Error) {
 XXH64_create_state :: proc(allocator := context.allocator) -> (res: ^XXH64_state, err: Error) {
 	state := new(XXH64_state, allocator)
 	state := new(XXH64_state, allocator)
+	XXH64_reset_state(state)
 	return state, .None if state != nil else .Error
 	return state, .None if state != nil else .Error
 }
 }
 
 

+ 85 - 1
tests/core/hash/test_core_hash.odin

@@ -31,8 +31,10 @@ when ODIN_TEST {
 main :: proc() {
 main :: proc() {
 	t := testing.T{}
 	t := testing.T{}
 	test_benchmark_runner(&t)
 	test_benchmark_runner(&t)
-	test_xxhash_vectors(&t)
 	test_crc64_vectors(&t)
 	test_crc64_vectors(&t)
+	test_xxhash_vectors(&t)
+	test_xxhash_large(&t)
+
 	fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
 	fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
 	if TEST_fail > 0 {
 	if TEST_fail > 0 {
 		os.exit(1)
 		os.exit(1)
@@ -191,6 +193,88 @@ test_benchmark_runner :: proc(t: ^testing.T) {
 	benchmark_print(name, options)
 	benchmark_print(name, options)
 }
 }
 
 
+@test
+test_xxhash_large :: proc(t: ^testing.T) {
+	many_zeroes := make([]u8, 16 * 1024 * 1024)
+	defer delete(many_zeroes)
+
+	// All at once.
+	for i, v in ZERO_VECTORS {
+		b := many_zeroes[:i]
+
+		xxh32    := xxhash.XXH32(b)
+		xxh64    := xxhash.XXH64(b)
+		xxh3_64  := xxhash.XXH3_64(b)
+		xxh3_128 := xxhash.XXH3_128(b)
+
+		xxh32_error     := fmt.tprintf("[   XXH32(%03d) ] Expected: %08x. Got: %08x.", i,   v.xxh_32,   xxh32)
+		xxh64_error     := fmt.tprintf("[   XXH64(%03d) ] Expected: %16x. Got: %16x.", i,   v.xxh_64,   xxh64)
+		xxh3_64_error   := fmt.tprintf("[XXH3_64(%03d)  ] Expected: %16x. Got: %16x.", i,  v.xxh3_64, xxh3_64)
+		xxh3_128_error  := fmt.tprintf("[XXH3_128(%03d) ] Expected: %32x. Got: %32x.", i, v.xxh3_128, xxh3_128)
+
+		expect(t, xxh32     == v.xxh_32,   xxh32_error)
+		expect(t, xxh64     == v.xxh_64,   xxh64_error)
+		expect(t, xxh3_64   == v.xxh3_64,  xxh3_64_error)
+		expect(t, xxh3_128  == v.xxh3_128, xxh3_128_error)
+	}
+
+	// Streamed
+	for i, v in ZERO_VECTORS {
+		b := many_zeroes[:i]
+
+		bytes_per_update := []int{1, 42, 13, 7, 16, 5, 23, 74, 1024, 511, 1023, 47}
+		update_size_idx: int
+
+		xxh_32_state, xxh_32_err := xxhash.XXH32_create_state()
+		defer xxhash.XXH32_destroy_state(xxh_32_state)
+		expect(t, xxh_32_err == nil, "Problem initializing XXH_32 state.")
+
+		xxh_64_state, xxh_64_err := xxhash.XXH64_create_state()
+		defer xxhash.XXH64_destroy_state(xxh_64_state)
+		expect(t, xxh_64_err == nil, "Problem initializing XXH_64 state.")
+
+		xxh3_64_state, xxh3_64_err := xxhash.XXH3_create_state()
+		defer xxhash.XXH3_destroy_state(xxh3_64_state)
+		expect(t, xxh3_64_err == nil, "Problem initializing XXH3_64 state.")
+
+		xxh3_128_state, xxh3_128_err := xxhash.XXH3_create_state()
+		defer xxhash.XXH3_destroy_state(xxh3_128_state)
+		expect(t, xxh3_128_err == nil, "Problem initializing XXH3_128 state.")
+
+		// XXH3_128_update
+
+		for len(b) > 0 {
+			update_size := min(len(b), bytes_per_update[update_size_idx % len(bytes_per_update)])
+			update_size_idx += 1
+
+			xxhash.XXH32_update   (xxh_32_state,   b[:update_size])
+			xxhash.XXH64_update   (xxh_64_state,   b[:update_size])
+
+			xxhash.XXH3_64_update (xxh3_64_state,  b[:update_size])
+			xxhash.XXH3_128_update(xxh3_128_state, b[:update_size])
+
+			b = b[update_size:]
+		}
+
+		// Now finalize
+		xxh32    := xxhash.XXH32_digest(xxh_32_state)
+		xxh64    := xxhash.XXH64_digest(xxh_64_state)
+
+		xxh3_64  := xxhash.XXH3_64_digest(xxh3_64_state)
+		xxh3_128 := xxhash.XXH3_128_digest(xxh3_128_state)
+
+		xxh32_error     := fmt.tprintf("[   XXH32(%03d) ] Expected: %08x. Got: %08x.", i,   v.xxh_32,   xxh32)
+		xxh64_error     := fmt.tprintf("[   XXH64(%03d) ] Expected: %16x. Got: %16x.", i,   v.xxh_64,   xxh64)
+		xxh3_64_error   := fmt.tprintf("[XXH3_64(%03d)  ] Expected: %16x. Got: %16x.", i,  v.xxh3_64, xxh3_64)
+		xxh3_128_error  := fmt.tprintf("[XXH3_128(%03d) ] Expected: %32x. Got: %32x.", i, v.xxh3_128, xxh3_128)
+
+		expect(t, xxh32     == v.xxh_32,   xxh32_error)
+		expect(t, xxh64     == v.xxh_64,   xxh64_error)
+		expect(t, xxh3_64   == v.xxh3_64,  xxh3_64_error)
+		expect(t, xxh3_128  == v.xxh3_128, xxh3_128_error)
+	}
+}
+
 @test
 @test
 test_xxhash_vectors :: proc(t: ^testing.T) {
 test_xxhash_vectors :: proc(t: ^testing.T) {
 	fmt.println("Verifying against XXHASH_TEST_VECTOR_SEEDED:")
 	fmt.println("Verifying against XXHASH_TEST_VECTOR_SEEDED:")

+ 70 - 2
tests/core/hash/test_vectors_xxhash.odin

@@ -3,7 +3,7 @@
 */
 */
 package test_core_hash
 package test_core_hash
 
 
-XXHASH_Test_Vectors_With_Seed :: struct #packed {
+XXHASH_Test_Vectors :: struct #packed {
 	/*
 	/*
 		Old hashes
 		Old hashes
 	*/
 	*/
@@ -17,7 +17,75 @@ XXHASH_Test_Vectors_With_Seed :: struct #packed {
 	xxh3_128: u128,
 	xxh3_128: u128,
 }
 }
 
 
-XXHASH_TEST_VECTOR_SEEDED := map[u64][257]XXHASH_Test_Vectors_With_Seed{
+ZERO_VECTORS := map[int]XXHASH_Test_Vectors{
+	1024 * 1024 = {
+		/*
+			Old hashes
+		*/
+		xxh_32   = 0x9430f97f,         // xxhsum -H0
+		xxh_64   = 0x87d2a1b6e1163ef1, // xxhsum -H1
+
+		/*
+			XXH3 hashes
+		*/
+		xxh3_128 = 0xb6ef17a3448492b6918780b90550bf34, // xxhsum -H2
+		xxh3_64  = 0x918780b90550bf34,                 // xxhsum -H3
+	},
+	1024 * 2048 = {
+		/*
+			Old hashes
+		*/
+		xxh_32   = 0xeeb74ca1,         // xxhsum -H0
+		xxh_64   = 0xeb8a7322f88e23db, // xxhsum -H1
+
+		/*
+			XXH3 hashes
+		*/
+		xxh3_128 = 0x7b3e6abe1456fd0094e26d8e04364852, // xxhsum -H2
+		xxh3_64  = 0x94e26d8e04364852,                 // xxhsum -H3
+	},
+	1024 * 4096 = {
+		/*
+			Old hashes
+		*/
+		xxh_32   = 0xa59010b8,         // xxhsum -H0
+		xxh_64   = 0x639f9e1a7cbc9d28, // xxhsum -H1
+
+		/*
+			XXH3 hashes
+		*/
+		xxh3_128 = 0x34001ae2f947e773165f453a5f35c459, // xxhsum -H2
+		xxh3_64  = 0x165f453a5f35c459,                 // xxhsum -H3
+	},
+	1024 * 8192 = {
+		/*
+			Old hashes
+		*/
+		xxh_32   = 0xfed1d084,         // xxhsum -H0
+		xxh_64   = 0x86823cbc61f6df0f, // xxhsum -H1
+
+		/*
+			XXH3 hashes
+		*/
+		xxh3_128 = 0x9d6bf1a4e92df02ce881a25e37e37b19, // xxhsum -H2
+		xxh3_64  = 0xe881a25e37e37b19,                 // xxhsum -H3
+	},
+	1024 * 16384 = {
+		/*
+			Old hashes
+		*/
+		xxh_32   = 0x0ee4ebf9,         // xxhsum -H0
+		xxh_64   = 0x412f1e415ee2d80b, // xxhsum -H1
+
+		/*
+			XXH3 hashes
+		*/
+		xxh3_128 = 0x14d914cac1f4c1b1c4979470a1b529a1, // xxhsum -H2
+		xxh3_64  = 0xc4979470a1b529a1,                 // xxhsum -H3
+	},
+}
+
+XXHASH_TEST_VECTOR_SEEDED := map[u64][257]XXHASH_Test_Vectors{
 	0 = {
 	0 = {
 		{ // Length: 000
 		{ // Length: 000
 			/*  XXH32 with seed   */ 0x02cc5d05,
 			/*  XXH32 with seed   */ 0x02cc5d05,