Explorar o código

Add the synchronization bits for AS

Panagiotis Christopoulos Charitos %!s(int64=5) %!d(string=hai) anos
pai
achega
b6e2ad4462

+ 3 - 0
src/anki/gr/CommandBuffer.h

@@ -412,6 +412,9 @@ public:
 
 
 	void setBufferBarrier(
 	void setBufferBarrier(
 		BufferPtr buff, BufferUsageBit prevUsage, BufferUsageBit nextUsage, PtrSize offset, PtrSize size);
 		BufferPtr buff, BufferUsageBit prevUsage, BufferUsageBit nextUsage, PtrSize offset, PtrSize size);
+
+	void setAccelerationStructureBarrier(
+		AccelerationStructurePtr as, AccelerationStructureUsageBit prevUsage, AccelerationStructureUsageBit nextUsage);
 	/// @}
 	/// @}
 
 
 	/// @name Other
 	/// @name Other

+ 47 - 1
src/anki/gr/vulkan/AccelerationStructureImpl.cpp

@@ -188,7 +188,7 @@ void AccelerationStructureImpl::initBuildInfo()
 				const AccelerationStructureInstance& inInst = m_topLevelInfo.m_instances[i];
 				const AccelerationStructureInstance& inInst = m_topLevelInfo.m_instances[i];
 				static_assert(sizeof(outInst.transform) == sizeof(inInst.m_transform), "See file");
 				static_assert(sizeof(outInst.transform) == sizeof(inInst.m_transform), "See file");
 				memcpy(&outInst.transform.matrix[0][0], &inInst.m_transform, sizeof(inInst.m_transform));
 				memcpy(&outInst.transform.matrix[0][0], &inInst.m_transform, sizeof(inInst.m_transform));
-				outInst.instanceCustomIndex = i;
+				outInst.instanceCustomIndex = i & 0xFFFFFF;
 				outInst.mask = 0xFF;
 				outInst.mask = 0xFF;
 				outInst.instanceShaderBindingTableRecordOffset = 0;
 				outInst.instanceShaderBindingTableRecordOffset = 0;
 				outInst.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR;
 				outInst.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR;
@@ -230,4 +230,50 @@ void AccelerationStructureImpl::initBuildInfo()
 	m_offsetInfo.transformOffset = 0;
 	m_offsetInfo.transformOffset = 0;
 }
 }
 
 
+void AccelerationStructureImpl::computeBarrierInfo(AccelerationStructureUsageBit before,
+	AccelerationStructureUsageBit after,
+	VkPipelineStageFlags& srcStages,
+	VkAccessFlags& srcAccesses,
+	VkPipelineStageFlags& dstStages,
+	VkAccessFlags& dstAccesses)
+{
+	// Before
+	srcStages = 0;
+	dstStages = 0;
+
+	if(before == AccelerationStructureUsageBit::NONE)
+	{
+		srcStages |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+		srcAccesses |= 0;
+	}
+
+	if(!!(before & AccelerationStructureUsageBit::BUILD))
+	{
+		srcStages |= VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR;
+		srcAccesses |= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+	}
+
+	// After
+	dstStages = 0;
+	dstAccesses = 0;
+
+	if(!!(after & AccelerationStructureUsageBit::COMPUTE_READ))
+	{
+		dstStages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+		dstAccesses |= VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
+	}
+
+	if(!!(after & AccelerationStructureUsageBit::RAY_GEN_READ))
+	{
+		dstStages |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
+		dstAccesses |= VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
+	}
+
+	if(after == AccelerationStructureUsageBit::BUILD)
+	{
+		dstStages |= VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR;
+		dstAccesses |= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+	}
+}
+
 } // end namespace anki
 } // end namespace anki

+ 7 - 0
src/anki/gr/vulkan/AccelerationStructureImpl.h

@@ -50,6 +50,13 @@ public:
 		offsetInfo = m_offsetInfo;
 		offsetInfo = m_offsetInfo;
 	}
 	}
 
 
+	static void computeBarrierInfo(AccelerationStructureUsageBit before,
+		AccelerationStructureUsageBit after,
+		VkPipelineStageFlags& srcStages,
+		VkAccessFlags& srcAccesses,
+		VkPipelineStageFlags& dstStages,
+		VkAccessFlags& dstAccesses);
+
 private:
 private:
 	class ASBottomLevelInfo : public BottomLevelAccelerationStructureInitInfo
 	class ASBottomLevelInfo : public BottomLevelAccelerationStructureInitInfo
 	{
 	{

+ 7 - 0
src/anki/gr/vulkan/CommandBuffer.cpp

@@ -366,6 +366,13 @@ void CommandBuffer::setBufferBarrier(
 	self.setBufferBarrier(buff, before, after, offset, size);
 	self.setBufferBarrier(buff, before, after, offset, size);
 }
 }
 
 
+void CommandBuffer::setAccelerationStructureBarrier(
+	AccelerationStructurePtr as, AccelerationStructureUsageBit prevUsage, AccelerationStructureUsageBit nextUsage)
+{
+	ANKI_VK_SELF(CommandBufferImpl);
+	self.setAccelerationStructureBarrierInternal(as, prevUsage, nextUsage);
+}
+
 void CommandBuffer::resetOcclusionQuery(OcclusionQueryPtr query)
 void CommandBuffer::resetOcclusionQuery(OcclusionQueryPtr query)
 {
 {
 	ANKI_VK_SELF(CommandBufferImpl);
 	ANKI_VK_SELF(CommandBufferImpl);

+ 4 - 2
src/anki/gr/vulkan/CommandBufferImpl.cpp

@@ -30,6 +30,7 @@ CommandBufferImpl::~CommandBufferImpl()
 
 
 	m_imgBarriers.destroy(m_alloc);
 	m_imgBarriers.destroy(m_alloc);
 	m_buffBarriers.destroy(m_alloc);
 	m_buffBarriers.destroy(m_alloc);
+	m_memBarriers.destroy(m_alloc);
 	m_queryResetAtoms.destroy(m_alloc);
 	m_queryResetAtoms.destroy(m_alloc);
 	m_writeQueryAtoms.destroy(m_alloc);
 	m_writeQueryAtoms.destroy(m_alloc);
 	m_secondLevelAtoms.destroy(m_alloc);
 	m_secondLevelAtoms.destroy(m_alloc);
@@ -486,8 +487,8 @@ void CommandBufferImpl::flushBarriers()
 		m_srcStageMask,
 		m_srcStageMask,
 		m_dstStageMask,
 		m_dstStageMask,
 		0,
 		0,
-		0,
-		nullptr,
+		m_memBarrierCount,
+		(m_memBarrierCount) ? &m_memBarriers[0] : nullptr,
 		m_buffBarrierCount,
 		m_buffBarrierCount,
 		(m_buffBarrierCount) ? &m_buffBarriers[0] : nullptr,
 		(m_buffBarrierCount) ? &m_buffBarriers[0] : nullptr,
 		finalImgBarrierCount,
 		finalImgBarrierCount,
@@ -497,6 +498,7 @@ void CommandBufferImpl::flushBarriers()
 
 
 	m_imgBarrierCount = 0;
 	m_imgBarrierCount = 0;
 	m_buffBarrierCount = 0;
 	m_buffBarrierCount = 0;
+	m_memBarrierCount = 0;
 	m_srcStageMask = 0;
 	m_srcStageMask = 0;
 	m_dstStageMask = 0;
 	m_dstStageMask = 0;
 }
 }

+ 6 - 1
src/anki/gr/vulkan/CommandBufferImpl.h

@@ -353,7 +353,10 @@ public:
 		PtrSize size,
 		PtrSize size,
 		VkBuffer buff);
 		VkBuffer buff);
 
 
-	void setBufferBarrier(BufferPtr buff, BufferUsageBit before, BufferUsageBit after, PtrSize offset, PtrSize size);
+	void setBufferBarrier(BufferPtr& buff, BufferUsageBit before, BufferUsageBit after, PtrSize offset, PtrSize size);
+
+	void setAccelerationStructureBarrierInternal(
+		AccelerationStructurePtr& as, AccelerationStructureUsageBit prevUsage, AccelerationStructureUsageBit nextUsage);
 
 
 	void fillBuffer(BufferPtr buff, PtrSize offset, PtrSize size, U32 value);
 	void fillBuffer(BufferPtr buff, PtrSize offset, PtrSize size, U32 value);
 
 
@@ -445,8 +448,10 @@ private:
 	/// @{
 	/// @{
 	DynamicArray<VkImageMemoryBarrier> m_imgBarriers;
 	DynamicArray<VkImageMemoryBarrier> m_imgBarriers;
 	DynamicArray<VkBufferMemoryBarrier> m_buffBarriers;
 	DynamicArray<VkBufferMemoryBarrier> m_buffBarriers;
+	DynamicArray<VkMemoryBarrier> m_memBarriers;
 	U16 m_imgBarrierCount = 0;
 	U16 m_imgBarrierCount = 0;
 	U16 m_buffBarrierCount = 0;
 	U16 m_buffBarrierCount = 0;
+	U16 m_memBarrierCount = 0;
 	VkPipelineStageFlags m_srcStageMask = 0;
 	VkPipelineStageFlags m_srcStageMask = 0;
 	VkPipelineStageFlags m_dstStageMask = 0;
 	VkPipelineStageFlags m_dstStageMask = 0;
 	/// @}
 	/// @}

+ 34 - 1
src/anki/gr/vulkan/CommandBufferImpl.inl.h

@@ -244,7 +244,7 @@ inline void CommandBufferImpl::setBufferBarrier(VkPipelineStageFlags srcStage,
 }
 }
 
 
 inline void CommandBufferImpl::setBufferBarrier(
 inline void CommandBufferImpl::setBufferBarrier(
-	BufferPtr buff, BufferUsageBit before, BufferUsageBit after, PtrSize offset, PtrSize size)
+	BufferPtr& buff, BufferUsageBit before, BufferUsageBit after, PtrSize offset, PtrSize size)
 {
 {
 	const BufferImpl& impl = static_cast<const BufferImpl&>(*buff);
 	const BufferImpl& impl = static_cast<const BufferImpl&>(*buff);
 
 
@@ -259,6 +259,39 @@ inline void CommandBufferImpl::setBufferBarrier(
 	m_microCmdb->pushObjectRef(buff);
 	m_microCmdb->pushObjectRef(buff);
 }
 }
 
 
+inline void CommandBufferImpl::setAccelerationStructureBarrierInternal(
+	AccelerationStructurePtr& as, AccelerationStructureUsageBit prevUsage, AccelerationStructureUsageBit nextUsage)
+{
+	commandCommon();
+
+	VkPipelineStageFlags srcStage;
+	VkAccessFlags srcAccess;
+	VkPipelineStageFlags dstStage;
+	VkAccessFlags dstAccess;
+	AccelerationStructureImpl::computeBarrierInfo(prevUsage, nextUsage, srcStage, srcAccess, dstStage, dstAccess);
+
+#if ANKI_BATCH_COMMANDS
+	flushBatches(CommandBufferCommandType::SET_BARRIER);
+
+	VkMemoryBarrier memBarrier{};
+	memBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+	memBarrier.srcAccessMask = srcAccess;
+	memBarrier.dstAccessMask = dstAccess;
+
+	if(m_memBarriers.getSize() <= m_memBarrierCount)
+	{
+		m_memBarriers.resize(m_alloc, max<U32>(2, m_memBarrierCount * 2));
+	}
+
+	m_memBarriers[m_memBarrierCount++] = memBarrier;
+
+	m_srcStageMask |= srcStage;
+	m_dstStageMask |= dstStage;
+#else
+	ANKI_ASSERT(!"TODO");
+#endif
+}
+
 inline void CommandBufferImpl::drawArrays(
 inline void CommandBufferImpl::drawArrays(
 	PrimitiveTopology topology, U32 count, U32 instanceCount, U32 first, U32 baseInstance)
 	PrimitiveTopology topology, U32 count, U32 instanceCount, U32 first, U32 baseInstance)
 {
 {