/* * Copyright (c) Contributors to the Open 3D Engine Project. * For complete copyright and license terms please see the LICENSE at the root of this distribution. * * SPDX-License-Identifier: Apache-2.0 OR MIT * */ #include #include #include #include #include using namespace AZ; namespace UnitTest { class MATH_SfmtTest : public LeakDetectionFixture { static const int BLOCK_SIZE = 100000; static const int BLOCK_SIZE64 = 50000; static const int COUNT = 1000; AZ::u64* array1; AZ::u64* array2; public: void SetUp() override { LeakDetectionFixture::SetUp(); array1 = (AZ::u64*)azmalloc(sizeof(AZ::u64) * 2 * (BLOCK_SIZE / 4), AZStd::alignment_of::value); array2 = (AZ::u64*)azmalloc(sizeof(AZ::u64) * 2 * (10000 / 4), AZStd::alignment_of::value); } void TearDown() override { azfree(array1); azfree(array2); LeakDetectionFixture::TearDown(); } void check32() { int i; AZ::u32* array32 = (AZ::u32*)array1; AZ::u32* array32_2 = (AZ::u32*)array2; AZ::u32 ini[] = { 0x1234, 0x5678, 0x9abc, 0xdef0 }; Sfmt g; EXPECT_LT(g.GetMinArray32Size(), 10000); g.FillArray32(array32, 10000); g.FillArray32(array32_2, 10000); g.Seed(); for (i = 0; i < 10000; i++) { EXPECT_NE(array32[i], g.Rand32()); } for (i = 0; i < 700; i++) { EXPECT_NE(array32_2[i], g.Rand32()); } g.Seed(ini, 4); g.FillArray32(array32, 10000); g.FillArray32(array32_2, 10000); g.Seed(ini, 4); for (i = 0; i < 10000; i++) { EXPECT_EQ(array32[i], g.Rand32()); } for (i = 0; i < 700; i++) { EXPECT_EQ(array32_2[i], g.Rand32()); } } void check64() { int i; AZ::u64* array64 = (AZ::u64*)array1; AZ::u64* array64_2 = (AZ::u64*)array2; AZ::u32 ini[] = { 5, 4, 3, 2, 1 }; Sfmt g; EXPECT_LT(g.GetMinArray64Size(), 5000); g.FillArray64(array64, 5000); g.FillArray64(array64_2, 5000); g.Seed(); for (i = 0; i < 5000; i++) { EXPECT_NE(array64[i], g.Rand64()); } for (i = 0; i < 700; i++) { EXPECT_NE(array64_2[i], g.Rand64()); } g.Seed(ini, 5); g.FillArray64(array64, 5000); g.FillArray64(array64_2, 5000); g.Seed(ini, 5); for (i = 0; i < 5000; i++) { EXPECT_EQ(array64[i], g.Rand64()); } for (i = 0; i < 700; i++) { EXPECT_EQ(array64_2[i], g.Rand64()); } } template bool ParallelTest() { Sfmt sfmt; // Arbitrary but deterministic seed values to guarantee that we get the same random numbers produced every time. constexpr AZ::u32 buffer[32] = { 0x8236ed73, 0x854aa369, 0xc7b68864, 0x5f1e49da, 0x58ea5a08, 0xa33fc74b, 0x0336bd81, 0x8d3c11ac, 0x7312bc57, 0x92fee3d1, 0x4b852f9f, 0xa7ac02ad, 0x4d72fb7a, 0x630641c6, 0x1edbeebf, 0xc18fdabe, 0xc6588dba, 0x6a821cf5, 0xec7e24b3, 0x44246d7e, 0x24af2f32, 0x4f64d44c, 0x44b24116, 0x65585572, 0x0f95038d, 0xd3e4c521, 0x6eea6bc1, 0x6d651ab5, 0x25dfc39e, 0x7502d183, 0xd32fc9bf, 0x9854093f }; // Seed the sfmt generator. sfmt.Seed(buffer, AZ_ARRAY_SIZE(buffer)); // Create a single array that will hold all the results, but we'll partition it so that each thread will write // its results to a different subset. AZStd::array results; AZStd::thread threads[NumThreads]; // Use a semaphore to block thread execution until all the threads are created so that we can synchronize // their start times to the best of our ability. AZStd::semaphore startSync; // Spawn a set of threads that will each loop through and call the RandMethod (Rand32 or Rand64) for // a given number of times. for (size_t threadIdx = 0; threadIdx < AZ_ARRAY_SIZE(threads); ++threadIdx) { const size_t startOffset = NumResultsPerThread * threadIdx; auto threadFunc = [&startSync, startOffset, &results, &sfmt]() { // Block so that we can start all the threads at the same time after creation. startSync.acquire(); for (int i = 0; i < NumResultsPerThread; ++i) { // Call Rand32 when uint32_t is the passed-in type, and Rand64 if uint64_t is the passed-in type. if constexpr (AZStd::is_same_v) { results[startOffset + i] = sfmt.Rand32(); } else { results[startOffset + i] = sfmt.Rand64(); } } }; threads[threadIdx] = AZStd::thread(threadFunc); } // Now that all the threads are created, signal them to start. startSync.release(NumThreads); // Wait for all the threads to complete. for (auto& thread : threads) { thread.join(); } // Sort the results from all the threads so that we can easily detect duplicates. AZStd::sort(results.begin(), results.end()); // Look for duplicates and stop if we find one, since that's a full test failure. There's no need to keep iterating. for (size_t idx = 1; idx < results.size(); idx++) { if (results[idx - 1] == results[idx]) { EXPECT_NE(results[idx - 1], results[idx]); return false; } } // Test was successful return true; } }; TEST_F(MATH_SfmtTest, Test32Bit) { check32(); } TEST_F(MATH_SfmtTest, Test64Bit) { check64(); } TEST_F(MATH_SfmtTest, TestParallel32) { // This test verifies that we can call Rand32() successfully in parallel. // It also performs a reasonable verification that we haven't reintroduced a race condition in which the random number // sequence could get repeated for periods of time if multiple threads hit the point where Sfmt refills its number cache // simultaneously. Because it was a race condition, this test isn't foolproof, but the test conditions set below were // causing a near-100% failure rate with the previous race condition and has a 100% success rate with the fix. // The following numbers were chosen to be high enough to expose the problem at least once per run but low enough to keep // the overall test runtime down. constexpr size_t NumIterations = 5; constexpr size_t NumThreads = 8; constexpr size_t NumResultsPerThread = 2000; // Run the test single-threaded once to verify that we don't have any numerical collisions in our test set and test size. bool testSetHasNoNumericalCollisions = ParallelTest<1, NumThreads * NumResultsPerThread, uint32_t>(); ASSERT_TRUE(testSetHasNoNumericalCollisions); // Now run multi-threaded across multiple iterations to verify that we don't hit the race condition. for (size_t iterations = 0; iterations < NumIterations; iterations++) { bool parallelHasNoNumericalCollisions = ParallelTest(); ASSERT_TRUE(parallelHasNoNumericalCollisions); } } TEST_F(MATH_SfmtTest, TestParallel64) { // This test verifies that we can call Rand64() successfully in parallel. // The following numbers were chosen to be high enough to expose the problem at least once per run but low enough to keep // the overall test runtime down. constexpr size_t NumIterations = 5; constexpr size_t NumThreads = 8; constexpr size_t NumResultsPerThread = 2000; // Run the test single-threaded once to verify that we don't have any numerical collisions in our test set and test size. bool testSetHasNoNumericalCollisions = ParallelTest<1, NumThreads * NumResultsPerThread, uint64_t>(); ASSERT_TRUE(testSetHasNoNumericalCollisions); // Now run multi-threaded across multiple iterations to verify that we don't hit the race condition. for (size_t iterations = 0; iterations < NumIterations; iterations++) { bool parallelHasNoNumericalCollisions = ParallelTest(); ASSERT_TRUE(parallelHasNoNumericalCollisions); } } TEST_F(MATH_SfmtTest, TestParallelInterleaved) { Sfmt sfmt; auto threadFunc = [&sfmt]() { for (int i = 0; i < 10000; ++i) { AZ::u64 roll = sfmt.Rand64(); if (roll % 2 == 0) { sfmt.Rand32(); } else { sfmt.Rand64(); } } }; AZStd::thread threads[8]; for (size_t threadIdx = 0; threadIdx < AZ_ARRAY_SIZE(threads); ++threadIdx) { threads[threadIdx] = AZStd::thread(threadFunc); } for (auto& thread : threads) { thread.join(); } } }