Преглед на файлове

Fixes in workgraphs tests

Panagiotis Christopoulos Charitos преди 1 година
родител
ревизия
373ddc7a08
променени са 3 файла, в които са добавени 85 реда и са изтрити 45 реда
  1. 2 1
      Tests/Gr/GrCommon.h
  2. 82 43
      Tests/Gr/GrWorkGraphs.cpp
  3. 1 1
      Tests/Gr/WorkDrainWg.hlsl

+ 2 - 1
Tests/Gr/GrCommon.h

@@ -145,11 +145,12 @@ template<typename T>
 inline TexturePtr createTexture2d(const TextureInitInfo texInit_, ConstWeakArray<T> data)
 inline TexturePtr createTexture2d(const TextureInitInfo texInit_, ConstWeakArray<T> data)
 {
 {
 	TextureInitInfo texInit = texInit_;
 	TextureInitInfo texInit = texInit_;
-	texInit.m_usage |= TextureUsageBit::kTransferDestination;
+	texInit.m_usage |= TextureUsageBit::kCopyDestination;
 
 
 	BufferInitInfo buffInit;
 	BufferInitInfo buffInit;
 	buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
 	buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
 	buffInit.m_size = texInit.m_height * texInit.m_width * getFormatInfo(texInit.m_format).m_texelSize;
 	buffInit.m_size = texInit.m_height * texInit.m_width * getFormatInfo(texInit.m_format).m_texelSize;
+	buffInit.m_usage = BufferUsageBit::kCopySource;
 	ANKI_ASSERT(getFormatInfo(texInit.m_format).m_texelSize == sizeof(T));
 	ANKI_ASSERT(getFormatInfo(texInit.m_format).m_texelSize == sizeof(T));
 	ANKI_ASSERT(buffInit.m_size == data.getSizeInBytes());
 	ANKI_ASSERT(buffInit.m_size == data.getSizeInBytes());
 
 

+ 82 - 43
Tests/Gr/GrWorkGraphs.cpp

@@ -44,6 +44,43 @@ static void clearSwapchain(CommandBufferPtr cmdb = CommandBufferPtr())
 	}
 	}
 }
 }
 
 
+template<typename TFunc>
+static void runBenchmark(U32 iterationCount, U32 iterationsPerCommandBuffer, F64& avgTimePerIterationMs, F64& avgCpuTimePerIterationMs, TFunc func)
+{
+	ANKI_ASSERT(iterationCount >= iterationsPerCommandBuffer && (iterationCount % iterationsPerCommandBuffer) == 0);
+
+	U64 startUs = 0;
+	FencePtr fence;
+
+	const U32 commandBufferCount = iterationCount / iterationsPerCommandBuffer;
+	for(U32 icmdb = 0; icmdb < commandBufferCount; ++icmdb)
+	{
+		CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(CommandBufferInitInfo(CommandBufferFlag::kGeneralWork));
+
+		const U64 cpuTimeStart = HighRezTimer::getCurrentTimeUs();
+		for(U32 i = 0; i < iterationsPerCommandBuffer; ++i)
+		{
+			func(*cmdb);
+		}
+
+		cmdb->endRecording();
+		const U64 cpuTimeEnd = HighRezTimer::getCurrentTimeUs();
+		avgCpuTimePerIterationMs += (Second(cpuTimeEnd - cpuTimeStart) * 0.001) / Second(iterationCount);
+
+		if(icmdb == 0)
+		{
+			startUs = HighRezTimer::getCurrentTimeUs();
+		}
+
+		GrManager::getSingleton().submit(cmdb.get(), {}, (icmdb == commandBufferCount - 1) ? &fence : nullptr);
+	}
+
+	fence->clientWait(kMaxSecond);
+	const U64 endUs = HighRezTimer::getCurrentTimeUs();
+
+	avgTimePerIterationMs = (Second(endUs - startUs) * 0.001) / Second(iterationCount);
+}
+
 ANKI_TEST(Gr, WorkGraphHelloWorld)
 ANKI_TEST(Gr, WorkGraphHelloWorld)
 {
 {
 	// CVarSet::getSingleton().setMultiple(Array<const Char*, 2>{"Device", "1"});
 	// CVarSet::getSingleton().setMultiple(Array<const Char*, 2>{"Device", "1"});
@@ -514,7 +551,7 @@ ANKI_TEST(Gr, WorkGraphsWorkDrain)
 		{
 		{
 			BufferInitInfo scratchInit("scratch");
 			BufferInitInfo scratchInit("scratch");
 			scratchInit.m_size = wgProg->getWorkGraphMemoryRequirements();
 			scratchInit.m_size = wgProg->getWorkGraphMemoryRequirements();
-			scratchInit.m_usage = BufferUsageBit::kAllStorage;
+			scratchInit.m_usage = BufferUsageBit::kAllUav | BufferUsageBit::kAllSrv;
 			scratchBuff = GrManager::getSingleton().newBuffer(scratchInit);
 			scratchBuff = GrManager::getSingleton().newBuffer(scratchInit);
 		}
 		}
 
 
@@ -549,33 +586,28 @@ ANKI_TEST(Gr, WorkGraphsWorkDrain)
 		}
 		}
 
 
 		// Create counter buff
 		// Create counter buff
-		BufferPtr threadgroupCountBuff;
-		{
-			threadgroupCountBuff = createBuffer(BufferUsageBit::kStorageComputeWrite, U32(0u), 1);
-		}
+		BufferPtr threadgroupCountBuff = createBuffer(BufferUsageBit::kUavCompute, U32(0u), 1);
 
 
 		// Result buffers
 		// Result buffers
-		BufferPtr tileMax = createBuffer(BufferUsageBit::kAllStorage, Vec4(0.1f), TILE_COUNT);
-		BufferPtr finalMax = createBuffer(BufferUsageBit::kAllStorage, Vec4(0.1f), 1);
-
-		const U32 iterationCount = (!bBenchmark) ? 1 : 10000u;
-		Second avgTimeMs = 0.0;
-		for(U32 i = 0; i < iterationCount; ++i)
-		{
-			CommandBufferPtr cmdb =
-				GrManager::getSingleton().newCommandBuffer(CommandBufferInitInfo(CommandBufferFlag::kSmallBatch | CommandBufferFlag::kGeneralWork));
-
-			BufferBarrierInfo barr = {BufferView(tileMax.get()), BufferUsageBit::kStorageComputeWrite, BufferUsageBit::kStorageComputeWrite};
-			cmdb->setPipelineBarrier({}, {&barr, 1}, {});
-
-			cmdb->bindTexture(ANKI_REG(t0), TextureView(tex.get(), TextureSubresourceDesc::all()));
-			cmdb->bindStorageBuffer(ANKI_REG(u0), BufferView(tileMax.get()));
-			cmdb->bindStorageBuffer(ANKI_REG(u1), BufferView(finalMax.get()));
-			cmdb->bindStorageBuffer(ANKI_REG(u2), BufferView(threadgroupCountBuff.get()));
+		BufferPtr tileMax = createBuffer(BufferUsageBit::kAllUav | BufferUsageBit::kAllSrv, Vec4(0.1f), TILE_COUNT);
+		BufferPtr finalMax = createBuffer(BufferUsageBit::kAllUav | BufferUsageBit::kAllSrv, Vec4(0.1f), 1);
+
+		const U32 iterationsPerCmdb = (!bBenchmark) ? 1 : 100u;
+		const U32 iterationCount = (!bBenchmark) ? 1 : iterationsPerCmdb * 100;
+		F64 avgTimeMs = 0.0;
+		F64 avgCpuTimeMs = 0.0;
+		runBenchmark(iterationCount, iterationsPerCmdb, avgTimeMs, avgCpuTimeMs, [&](CommandBuffer& cmdb) {
+			BufferBarrierInfo barr = {BufferView(tileMax.get()), BufferUsageBit::kUavCompute, BufferUsageBit::kUavCompute};
+			cmdb.setPipelineBarrier({}, {&barr, 1}, {});
+
+			cmdb.bindSrv(0, 0, TextureView(tex.get(), TextureSubresourceDesc::all()));
+			cmdb.bindUav(0, 0, BufferView(tileMax.get()));
+			cmdb.bindUav(1, 0, BufferView(finalMax.get()));
+			cmdb.bindUav(2, 0, BufferView(threadgroupCountBuff.get()));
 
 
 			if(bWorkgraphs)
 			if(bWorkgraphs)
 			{
 			{
-				cmdb->bindShaderProgram(wgProg.get());
+				cmdb.bindShaderProgram(wgProg.get());
 
 
 				struct FirstNodeRecord
 				struct FirstNodeRecord
 				{
 				{
@@ -585,39 +617,46 @@ ANKI_TEST(Gr, WorkGraphsWorkDrain)
 				Array<FirstNodeRecord, 1> records;
 				Array<FirstNodeRecord, 1> records;
 				records[0].m_gridSize = UVec3(TILE_COUNT_X, TILE_COUNT_Y, 1);
 				records[0].m_gridSize = UVec3(TILE_COUNT_X, TILE_COUNT_Y, 1);
 
 
-				cmdb->dispatchGraph(BufferView(scratchBuff.get()), records.getBegin(), records.getSize(), sizeof(records[0]));
+				cmdb.dispatchGraph(BufferView(scratchBuff.get()), records.getBegin(), records.getSize(), sizeof(records[0]));
 			}
 			}
 			else
 			else
 			{
 			{
-				cmdb->bindShaderProgram(compProg0.get());
-				cmdb->dispatchCompute(TILE_COUNT_X, TILE_COUNT_Y, 1);
+				cmdb.bindShaderProgram(compProg0.get());
+				cmdb.dispatchCompute(TILE_COUNT_X, TILE_COUNT_Y, 1);
 
 
-				barr = {BufferView(tileMax.get()), BufferUsageBit::kStorageComputeWrite, BufferUsageBit::kStorageComputeRead};
-				cmdb->setPipelineBarrier({}, {&barr, 1}, {});
+				barr = {BufferView(tileMax.get()), BufferUsageBit::kUavCompute, BufferUsageBit::kUavCompute};
+				cmdb.setPipelineBarrier({}, {&barr, 1}, {});
 
 
-				cmdb->bindShaderProgram(compProg1.get());
-				cmdb->dispatchCompute(1, 1, 1);
+				cmdb.bindShaderProgram(compProg1.get());
+				cmdb.dispatchCompute(1, 1, 1);
 			}
 			}
+		});
 
 
-			cmdb->endRecording();
+		validateBuffer2(finalMax, Vec4(1.1f, 2.06f, 3.88f, 1.0f));
 
 
-			const U64 start = HighRezTimer::getCurrentTimeUs();
+		if(bBenchmark)
+		{
+			ANKI_TEST_LOGI("Benchmark: avg GPU time: %fms, avg CPU time: %fms", avgTimeMs, avgCpuTimeMs);
+		}
+	}
 
 
-			FencePtr fence;
-			GrManager::getSingleton().submit(cmdb.get(), {}, &fence);
-			fence->clientWait(kMaxSecond);
+	commonDestroy();
+}
 
 
-			const U64 end = HighRezTimer::getCurrentTimeUs();
+ANKI_TEST(Gr, WorkGraphsOverhead)
+{
+	const Bool bBenchmark = getenv("BENCHMARK") && CString(getenv("BENCHMARK")) == "1";
 
 
-			avgTimeMs += (Second(end - start) * 0.001) / Second(iterationCount);
-		}
+	[[maybe_unused]] Error err = CVarSet::getSingleton().setMultiple(Array<const Char*, 2>{"WorkGraphs", "1"});
 
 
-		validateBuffer2(finalMax, Vec4(1.1f, 2.06f, 3.88f, 1.0f));
+	commonInit(!bBenchmark);
 
 
-		if(bBenchmark)
-		{
-			ANKI_TEST_LOGI("Benchmark: avg time: %fms", avgTimeMs);
-		}
+	const Bool bWorkgraphs =
+		getenv("WORKGRAPHS") && CString(getenv("WORKGRAPHS")) == "1" && GrManager::getSingleton().getDeviceCapabilities().m_workGraphs;
+
+	ANKI_TEST_LOGI("Testing with BENCHMARK=%u WORKGRAPHS=%u", bBenchmark, bWorkgraphs);
+
+	{
 	}
 	}
 
 
 	commonDestroy();
 	commonDestroy();

+ 1 - 1
Tests/Gr/WorkDrainWg.hlsl

@@ -57,7 +57,7 @@ main(DispatchNodeInputRecord<FirstNodeInput> input, [MaxRecords(1)] NodeOutput<S
 	{
 	{
 		uint orig;
 		uint orig;
 		InterlockedAdd(g_threadgroupCount[0], 1, orig);
 		InterlockedAdd(g_threadgroupCount[0], 1, orig);
-		lastThreadgroup = orig + 1u == TILE_SIZE_X * TILE_SIZE_Y;
+		lastThreadgroup = (orig + 1u) == TILE_COUNT;
 
 
 		if(lastThreadgroup)
 		if(lastThreadgroup)
 		{
 		{