Browse Source

Worgraph fixes

Panagiotis Christopoulos Charitos 1 year ago
parent
commit
e28eb1cd19
3 changed files with 36 additions and 22 deletions
  1. 14 9
      AnKi/Gr/D3D/D3DCommandBuffer.cpp
  2. 1 1
      AnKi/Gr/D3D/D3DGrManager.cpp
  3. 21 12
      Tests/Gr/GrWorkGraphs.cpp

+ 14 - 9
AnKi/Gr/D3D/D3DCommandBuffer.cpp

@@ -693,6 +693,14 @@ void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textur
 	ANKI_D3D_SELF(CommandBufferImpl);
 	self.commandCommon();
 
+	auto sanitizeAccess = [](D3D12_BARRIER_ACCESS& access) {
+		if((access & D3D12_BARRIER_ACCESS_NO_ACCESS) && access != D3D12_BARRIER_ACCESS_NO_ACCESS)
+		{
+			// If access has other accesses as well as NO_ACCESS then remove the NO_ACCESS
+			access &= ~D3D12_BARRIER_ACCESS_NO_ACCESS;
+		}
+	};
+
 	DynamicArray<D3D12_TEXTURE_BARRIER, MemoryPoolPtrWrapper<StackMemoryPool>> texBarriers(self.m_fastPool);
 	DynamicArray<D3D12_BUFFER_BARRIER, MemoryPoolPtrWrapper<StackMemoryPool>> bufferBarriers(self.m_fastPool);
 
@@ -701,6 +709,9 @@ void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textur
 		const TextureImpl& impl = static_cast<const TextureImpl&>(barrier.m_textureView.getTexture());
 		D3D12_TEXTURE_BARRIER& d3dBarrier = *texBarriers.emplaceBack();
 		d3dBarrier = impl.computeBarrierInfo(barrier.m_previousUsage, barrier.m_nextUsage, barrier.m_textureView.getSubresource());
+
+		sanitizeAccess(d3dBarrier.AccessBefore);
+		sanitizeAccess(d3dBarrier.AccessAfter);
 	}
 
 	for(const BufferBarrierInfo& barrier : buffers)
@@ -712,15 +723,6 @@ void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textur
 		{
 			// Merge barriers
 
-			if(bufferBarriers.getBack().AccessBefore == D3D12_BARRIER_ACCESS_NO_ACCESS && b.AccessBefore != D3D12_BARRIER_ACCESS_NO_ACCESS)
-			{
-				bufferBarriers.getBack().AccessBefore = D3D12_BARRIER_ACCESS(0);
-			}
-			else if(bufferBarriers.getBack().AccessBefore != D3D12_BARRIER_ACCESS_NO_ACCESS && b.AccessBefore == D3D12_BARRIER_ACCESS_NO_ACCESS)
-			{
-				b.AccessBefore = D3D12_BARRIER_ACCESS(0);
-			}
-
 			bufferBarriers.getBack().AccessBefore |= b.AccessBefore;
 			bufferBarriers.getBack().AccessAfter |= b.AccessAfter;
 			bufferBarriers.getBack().SyncBefore |= b.SyncBefore;
@@ -732,6 +734,9 @@ void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textur
 			D3D12_BUFFER_BARRIER& d3dBarrier = *bufferBarriers.emplaceBack();
 			d3dBarrier = b;
 		}
+
+		sanitizeAccess(bufferBarriers.getBack().AccessBefore);
+		sanitizeAccess(bufferBarriers.getBack().AccessAfter);
 	}
 
 	ANKI_ASSERT(accelerationStructures.getSize() == 0 && "TODO");

+ 1 - 1
AnKi/Gr/D3D/D3DGrManager.cpp

@@ -382,7 +382,7 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 				if(res == S_OK)
 				{
 					infoq->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, true);
-					infoq->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, true);
+					// infoq->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, true);
 				}
 			}
 		}

+ 21 - 12
Tests/Gr/GrWorkGraphs.cpp

@@ -49,16 +49,20 @@ static void runBenchmark(U32 iterationCount, U32 iterationsPerCommandBuffer, Boo
 {
 	ANKI_ASSERT(iterationCount >= iterationsPerCommandBuffer && (iterationCount % iterationsPerCommandBuffer) == 0);
 
-	U64 startUs = 0;
 	FencePtr fence;
 
 	F64 avgCpuTimePerIterationMs = 0.0;
+	DynamicArray<TimestampQueryPtr> timestamps;
 
 	const U32 commandBufferCount = iterationCount / iterationsPerCommandBuffer;
 	for(U32 icmdb = 0; icmdb < commandBufferCount; ++icmdb)
 	{
 		CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(CommandBufferInitInfo(CommandBufferFlag::kGeneralWork));
 
+		TimestampQueryPtr query1 = GrManager::getSingleton().newTimestampQuery();
+		cmdb->writeTimestamp(query1.get());
+		timestamps.emplaceBack(query1);
+
 		const U64 cpuTimeStart = HighRezTimer::getCurrentTimeUs();
 		for(U32 i = 0; i < iterationsPerCommandBuffer; ++i)
 		{
@@ -67,15 +71,14 @@ static void runBenchmark(U32 iterationCount, U32 iterationsPerCommandBuffer, Boo
 
 		// clearSwapchain(cmdb);
 
+		TimestampQueryPtr query2 = GrManager::getSingleton().newTimestampQuery();
+		cmdb->writeTimestamp(query2.get());
+		timestamps.emplaceBack(query2);
+
 		cmdb->endRecording();
 		const U64 cpuTimeEnd = HighRezTimer::getCurrentTimeUs();
 		avgCpuTimePerIterationMs += (Second(cpuTimeEnd - cpuTimeStart) * 0.001) / Second(iterationCount);
 
-		if(icmdb == 0)
-		{
-			startUs = HighRezTimer::getCurrentTimeUs();
-		}
-
 		GrManager::getSingleton().submit(cmdb.get(), {}, (icmdb == commandBufferCount - 1) ? &fence : nullptr);
 
 		// GrManager::getSingleton().swapBuffers();
@@ -83,9 +86,16 @@ static void runBenchmark(U32 iterationCount, U32 iterationsPerCommandBuffer, Boo
 
 	const Bool done = fence->clientWait(kMaxSecond);
 	ANKI_TEST_EXPECT_EQ(done, true);
-	const U64 endUs = HighRezTimer::getCurrentTimeUs();
 
-	const F64 avgTimePerIterationMs = (Second(endUs - startUs) * 0.001) / Second(iterationCount);
+	F64 avgTimePerIterationMs = 0.0f;
+	for(U32 i = 0; i < timestamps.getSize(); i += 2)
+	{
+		Second a, b;
+		ANKI_TEST_EXPECT_EQ(timestamps[i]->getResult(a), TimestampQueryResult::kAvailable);
+		ANKI_TEST_EXPECT_EQ(timestamps[i + 1]->getResult(b), TimestampQueryResult::kAvailable);
+
+		avgTimePerIterationMs += (Second(b - a) * 1000.0) / Second(iterationCount);
+	}
 
 	if(bBenchmark)
 	{
@@ -251,8 +261,7 @@ StructuredBuffer<uint> g_positions : register(t1);
 #define THREAD_COUNT 64u
 
 // Operates per object
-[Shader("node")] [NodeLaunch("broadcasting")] [NodeIsProgramEntry] [NodeMaxDispatchGrid(1, 1, 1)]
-[NumThreads(THREAD_COUNT, 1, 1)]
+[Shader("node")] [NodeLaunch("broadcasting")] [NodeIsProgramEntry] [NodeMaxDispatchGrid(1, 1, 1)] [NumThreads(THREAD_COUNT, 1, 1)]
 void main(DispatchNodeInputRecord<FirstNodeRecord> inp, [MaxRecords(THREAD_COUNT)] NodeOutput<SecondNodeRecord> computeAabb,
 		  uint svGroupIndex : SV_GroupIndex, uint svDispatchThreadId : SV_DispatchThreadId)
 {
@@ -451,9 +460,9 @@ void main(uint svDispatchThreadId : SV_DispatchThreadId, uint svGroupIndex : SV_
 
 		// Execute
 		const U32 iterationsPerCmdb = (!bBenchmark) ? 1 : 100u;
-		const U32 iterationCount = (!bBenchmark) ? 1 : iterationsPerCmdb * 10;
+		const U32 iterationCount = (!bBenchmark) ? iterationsPerCmdb : iterationsPerCmdb * 1;
 		runBenchmark(iterationCount, iterationsPerCmdb, bBenchmark, [&](CommandBuffer& cmdb) {
-			BufferBarrierInfo barr = {BufferView(aabbsBuff.get()), BufferUsageBit::kUavCompute, BufferUsageBit::kUavCompute};
+			const BufferBarrierInfo barr = {BufferView(aabbsBuff.get()), BufferUsageBit::kUavCompute, BufferUsageBit::kUavCompute};
 			cmdb.setPipelineBarrier({}, {&barr, 1}, {});
 
 			if(bWorkgraphs)