Bladeren bron

Add support for indirect building of acceleration structures

Panagiotis Christopoulos Charitos 2 jaren geleden
bovenliggende
commit
b431911fc7

+ 24 - 3
AnKi/Gr/AccelerationStructure.h

@@ -60,7 +60,7 @@ public:
 };
 };
 
 
 /// @memberof AccelerationStructureInitInfo
 /// @memberof AccelerationStructureInitInfo
-class AccelerationStructureInstance
+class AccelerationStructureInstanceInfo
 {
 {
 public:
 public:
 	AccelerationStructurePtr m_bottomLevel;
 	AccelerationStructurePtr m_bottomLevel;
@@ -73,11 +73,24 @@ public:
 class TopLevelAccelerationStructureInitInfo
 class TopLevelAccelerationStructureInitInfo
 {
 {
 public:
 public:
-	ConstWeakArray<AccelerationStructureInstance> m_instances;
+	class
+	{
+	public:
+		ConstWeakArray<AccelerationStructureInstanceInfo> m_instances;
+	} m_directArgs; ///< Pass some representation of the instances.
+
+	class
+	{
+	public:
+		U32 m_maxInstanceCount = 0;
+		Buffer* m_instancesBuffer = nullptr;
+		PtrSize m_instancesBufferOffset = kMaxPtrSize;
+	} m_indirectArgs; ///< Pass the instances GPU buffer directly.
 
 
 	Bool isValid() const
 	Bool isValid() const
 	{
 	{
-		return m_instances.getSize() > 0;
+		return m_directArgs.m_instances.getSize() > 0
+			   || (m_indirectArgs.m_maxInstanceCount > 0 && m_indirectArgs.m_instancesBuffer && m_indirectArgs.m_instancesBufferOffset < kMaxPtrSize);
 	}
 	}
 };
 };
 
 
@@ -120,7 +133,15 @@ public:
 		return m_type;
 		return m_type;
 	}
 	}
 
 
+	/// Get the size of the scratch buffer used in building this AS.
+	PtrSize getBuildScratchBufferSize() const
+	{
+		ANKI_ASSERT(m_scratchBufferSize != 0);
+		return m_scratchBufferSize;
+	}
+
 protected:
 protected:
+	PtrSize m_scratchBufferSize = 0;
 	AccelerationStructureType m_type = AccelerationStructureType::kCount;
 	AccelerationStructureType m_type = AccelerationStructureType::kCount;
 
 
 	/// Construct.
 	/// Construct.

+ 9 - 0
AnKi/Gr/CommandBuffer.h

@@ -407,6 +407,15 @@ public:
 	/// Build the acceleration structure.
 	/// Build the acceleration structure.
 	void buildAccelerationStructure(AccelerationStructure* as);
 	void buildAccelerationStructure(AccelerationStructure* as);
 
 
+	/// Build an acceleration stracture indirectly. Only valid for TLASes.
+	/// @param as The AS to build.
+	/// @param scratchBuffer A scratch buffer. Ask the AS for size.
+	/// @param scratchBufferOffset Scratch buffer offset.
+	/// @param rangeBuffer Points to a single AccelerationStructureBuildRangeInfo.
+	/// @param rangeBufferOffsset Offset in rangeBuffer.
+	void buildAccelerationStructureIndirect(AccelerationStructure* as, Buffer* scratchBuffer, PtrSize scratchBufferOffset, Buffer* rangeBuffer,
+											PtrSize rangeBufferOffsset);
+
 	/// Do upscaling by an external upscaler
 	/// Do upscaling by an external upscaler
 	/// @param[in] upscaler the upscaler to use for upscaling
 	/// @param[in] upscaler the upscaler to use for upscaling
 	/// @param[in] inColor Source LowRes RenderTarget.
 	/// @param[in] inColor Source LowRes RenderTarget.

+ 3 - 2
AnKi/Gr/Common.h

@@ -701,6 +701,7 @@ enum class BufferUsageBit : U64
 
 
 	kAccelerationStructureBuild = 1ull << 27ull, ///< Will be used as a position or index buffer in a BLAS build.
 	kAccelerationStructureBuild = 1ull << 27ull, ///< Will be used as a position or index buffer in a BLAS build.
 	kShaderBindingTable = 1ull << 28ull, ///< Will be used as SBT in a traceRays() command.
 	kShaderBindingTable = 1ull << 28ull, ///< Will be used as SBT in a traceRays() command.
+	kAccelerationStructureBuildScratch = 1ull << 29ull, ///< Used in buildAccelerationStructureXXX commands.
 
 
 	// Derived
 	// Derived
 	kAllUniform = kUniformGeometry | kUniformFragment | kUniformCompute | kUniformTraceRays,
 	kAllUniform = kUniformGeometry | kUniformFragment | kUniformCompute | kUniformTraceRays,
@@ -718,12 +719,12 @@ enum class BufferUsageBit : U64
 	kAllTraceRays = kUniformTraceRays | kStorageTraceRaysRead | kStorageTraceRaysWrite | kTextureTraceRaysRead | kTextureTraceRaysWrite
 	kAllTraceRays = kUniformTraceRays | kStorageTraceRaysRead | kStorageTraceRaysWrite | kTextureTraceRaysRead | kTextureTraceRaysWrite
 					| kIndirectTraceRays | kShaderBindingTable,
 					| kIndirectTraceRays | kShaderBindingTable,
 
 
-	kAllRayTracing = kAllTraceRays | kAccelerationStructureBuild,
+	kAllRayTracing = kAllTraceRays | kAccelerationStructureBuild | kAccelerationStructureBuildScratch,
 	kAllRead = kAllUniform | kStorageGeometryRead | kStorageFragmentRead | kStorageComputeRead | kStorageTraceRaysRead | kTextureGeometryRead
 	kAllRead = kAllUniform | kStorageGeometryRead | kStorageFragmentRead | kStorageComputeRead | kStorageTraceRaysRead | kTextureGeometryRead
 			   | kTextureFragmentRead | kTextureComputeRead | kTextureTraceRaysRead | kIndex | kVertex | kIndirectCompute | kIndirectDraw
 			   | kTextureFragmentRead | kTextureComputeRead | kTextureTraceRaysRead | kIndex | kVertex | kIndirectCompute | kIndirectDraw
 			   | kIndirectTraceRays | kTransferSource | kAccelerationStructureBuild | kShaderBindingTable,
 			   | kIndirectTraceRays | kTransferSource | kAccelerationStructureBuild | kShaderBindingTable,
 	kAllWrite = kStorageGeometryWrite | kStorageFragmentWrite | kStorageComputeWrite | kStorageTraceRaysWrite | kTextureGeometryWrite
 	kAllWrite = kStorageGeometryWrite | kStorageFragmentWrite | kStorageComputeWrite | kStorageTraceRaysWrite | kTextureGeometryWrite
-				| kTextureFragmentWrite | kTextureComputeWrite | kTextureTraceRaysWrite | kTransferDestination,
+				| kTextureFragmentWrite | kTextureComputeWrite | kTextureTraceRaysWrite | kTransferDestination | kAccelerationStructureBuildScratch,
 	kAll = kAllRead | kAllWrite,
 	kAll = kAllRead | kAllWrite,
 };
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(BufferUsageBit)
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(BufferUsageBit)

+ 55 - 34
AnKi/Gr/Vulkan/AccelerationStructureImpl.cpp

@@ -5,16 +5,17 @@
 
 
 #include <AnKi/Gr/Vulkan/AccelerationStructureImpl.h>
 #include <AnKi/Gr/Vulkan/AccelerationStructureImpl.h>
 #include <AnKi/Gr/Vulkan/GrManagerImpl.h>
 #include <AnKi/Gr/Vulkan/GrManagerImpl.h>
+#include <AnKi/Gr/Vulkan/FrameGarbageCollector.h>
 
 
 namespace anki {
 namespace anki {
 
 
 AccelerationStructureImpl::~AccelerationStructureImpl()
 AccelerationStructureImpl::~AccelerationStructureImpl()
 {
 {
-	m_topLevelInfo.m_blas.destroy();
-
 	if(m_handle)
 	if(m_handle)
 	{
 	{
-		vkDestroyAccelerationStructureKHR(getGrManagerImpl().getDevice(), m_handle, nullptr);
+		ASGarbage* garbage = anki::newInstance<ASGarbage>(GrMemoryPool::getSingleton());
+		garbage->m_asHandle = m_handle;
+		getGrManagerImpl().getFrameGarbageCollector().newASGarbage(garbage);
 	}
 	}
 }
 }
 
 
@@ -56,7 +57,7 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 		const U32 primitiveCount = inf.m_bottomLevel.m_indexCount / 3;
 		const U32 primitiveCount = inf.m_bottomLevel.m_indexCount / 3;
 		buildSizes.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR;
 		buildSizes.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR;
 		vkGetAccelerationStructureBuildSizesKHR(vkdev, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo, &primitiveCount, &buildSizes);
 		vkGetAccelerationStructureBuildSizesKHR(vkdev, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo, &primitiveCount, &buildSizes);
-		m_scratchBufferSize = U32(buildSizes.buildScratchSize);
+		m_scratchBufferSize = buildSizes.buildScratchSize;
 
 
 		// Create the buffer that holds the AS memory
 		// Create the buffer that holds the AS memory
 		BufferInitInfo bufferInit(inf.getName());
 		BufferInitInfo bufferInit(inf.getName());
@@ -88,36 +89,52 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 	}
 	}
 	else
 	else
 	{
 	{
-		// Create the instances buffer
-		m_topLevelInfo.m_blas.resizeStorage(inf.m_topLevel.m_instances.getSize());
-
-		BufferInitInfo buffInit("AS instances");
-		buffInit.m_size = sizeof(VkAccelerationStructureInstanceKHR) * inf.m_topLevel.m_instances.getSize();
-		buffInit.m_usage = PrivateBufferUsageBit::kAccelerationStructure;
-		buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
-		m_topLevelInfo.m_instancesBuffer = getGrManagerImpl().newBuffer(buffInit);
-
-		VkAccelerationStructureInstanceKHR* instances =
-			static_cast<VkAccelerationStructureInstanceKHR*>(m_topLevelInfo.m_instancesBuffer->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite));
-		for(U32 i = 0; i < inf.m_topLevel.m_instances.getSize(); ++i)
+		const Bool isIndirect = inf.m_topLevel.m_indirectArgs.m_maxInstanceCount > 0;
+
+		if(!isIndirect)
 		{
 		{
-			VkAccelerationStructureInstanceKHR& outInst = instances[i];
-			const AccelerationStructureInstance& inInst = inf.m_topLevel.m_instances[i];
-			static_assert(sizeof(outInst.transform) == sizeof(inInst.m_transform), "See file");
-			memcpy(&outInst.transform, &inInst.m_transform, sizeof(inInst.m_transform));
-			outInst.instanceCustomIndex = i & 0xFFFFFF;
-			outInst.mask = inInst.m_mask;
-			outInst.instanceShaderBindingTableRecordOffset = inInst.m_hitgroupSbtRecordIndex & 0xFFFFFF;
-			outInst.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR | VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR;
-			outInst.accelerationStructureReference = static_cast<const AccelerationStructureImpl&>(*inInst.m_bottomLevel).m_deviceAddress;
-			ANKI_ASSERT(outInst.accelerationStructureReference != 0);
-
-			// Hold the reference
-			m_topLevelInfo.m_blas.emplaceBack(inf.m_topLevel.m_instances[i].m_bottomLevel);
+			// Create and populate the instances buffer
+			m_topLevelInfo.m_blases.resizeStorage(inf.m_topLevel.m_directArgs.m_instances.getSize());
+
+			BufferInitInfo buffInit("AS instances");
+			buffInit.m_size = sizeof(VkAccelerationStructureInstanceKHR) * inf.m_topLevel.m_directArgs.m_instances.getSize();
+			buffInit.m_usage = PrivateBufferUsageBit::kAccelerationStructure;
+			buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
+			m_topLevelInfo.m_instancesBuffer = getGrManagerImpl().newBuffer(buffInit);
+
+			VkAccelerationStructureInstanceKHR* instances =
+				static_cast<VkAccelerationStructureInstanceKHR*>(m_topLevelInfo.m_instancesBuffer->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite));
+			for(U32 i = 0; i < inf.m_topLevel.m_directArgs.m_instances.getSize(); ++i)
+			{
+				VkAccelerationStructureInstanceKHR& outInst = instances[i];
+				const AccelerationStructureInstanceInfo& inInst = inf.m_topLevel.m_directArgs.m_instances[i];
+				static_assert(sizeof(outInst.transform) == sizeof(inInst.m_transform), "See file");
+				memcpy(&outInst.transform, &inInst.m_transform, sizeof(inInst.m_transform));
+				outInst.instanceCustomIndex = i & 0xFFFFFF;
+				outInst.mask = inInst.m_mask;
+				outInst.instanceShaderBindingTableRecordOffset = inInst.m_hitgroupSbtRecordIndex & 0xFFFFFF;
+				outInst.flags =
+					VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR | VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR;
+				outInst.accelerationStructureReference = static_cast<const AccelerationStructureImpl&>(*inInst.m_bottomLevel).m_deviceAddress;
+				ANKI_ASSERT(outInst.accelerationStructureReference != 0);
+
+				// Hold the reference
+				m_topLevelInfo.m_blases.emplaceBack(inf.m_topLevel.m_directArgs.m_instances[i].m_bottomLevel);
+			}
+
+			m_topLevelInfo.m_instancesBuffer->flush(0, kMaxPtrSize);
+			m_topLevelInfo.m_instancesBuffer->unmap();
 		}
 		}
+		else
+		{
+			// Instances buffer already created
+			ANKI_ASSERT(inf.m_topLevel.m_indirectArgs.m_instancesBufferOffset
+							+ sizeof(VkAccelerationStructureInstanceKHR) * inf.m_topLevel.m_indirectArgs.m_maxInstanceCount
+						<= inf.m_topLevel.m_indirectArgs.m_instancesBuffer->getSize());
+			m_topLevelInfo.m_instancesBuffer.reset(inf.m_topLevel.m_indirectArgs.m_instancesBuffer);
 
 
-		m_topLevelInfo.m_instancesBuffer->flush(0, kMaxPtrSize);
-		m_topLevelInfo.m_instancesBuffer->unmap();
+			m_topLevelInfo.m_maxInstanceCount = inf.m_topLevel.m_indirectArgs.m_maxInstanceCount;
+		}
 
 
 		// Geom
 		// Geom
 		VkAccelerationStructureGeometryKHR& geom = m_geometry;
 		VkAccelerationStructureGeometryKHR& geom = m_geometry;
@@ -125,6 +142,10 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 		geom.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
 		geom.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
 		geom.geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR;
 		geom.geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR;
 		geom.geometry.instances.data.deviceAddress = m_topLevelInfo.m_instancesBuffer->getGpuAddress();
 		geom.geometry.instances.data.deviceAddress = m_topLevelInfo.m_instancesBuffer->getGpuAddress();
+		if(isIndirect)
+		{
+			geom.geometry.instances.data.deviceAddress += inf.m_topLevel.m_indirectArgs.m_instancesBufferOffset;
+		}
 		geom.geometry.instances.arrayOfPointers = false;
 		geom.geometry.instances.arrayOfPointers = false;
 		geom.flags = VK_GEOMETRY_OPAQUE_BIT_KHR; // TODO
 		geom.flags = VK_GEOMETRY_OPAQUE_BIT_KHR; // TODO
 
 
@@ -139,10 +160,10 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 
 
 		// Get memory info
 		// Get memory info
 		VkAccelerationStructureBuildSizesInfoKHR buildSizes = {};
 		VkAccelerationStructureBuildSizesInfoKHR buildSizes = {};
-		const U32 instanceCount = inf.m_topLevel.m_instances.getSize();
+		const U32 instanceCount = (isIndirect) ? inf.m_topLevel.m_indirectArgs.m_maxInstanceCount : inf.m_topLevel.m_directArgs.m_instances.getSize();
 		buildSizes.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR;
 		buildSizes.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR;
 		vkGetAccelerationStructureBuildSizesKHR(vkdev, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo, &instanceCount, &buildSizes);
 		vkGetAccelerationStructureBuildSizesKHR(vkdev, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo, &instanceCount, &buildSizes);
-		m_scratchBufferSize = U32(buildSizes.buildScratchSize);
+		m_scratchBufferSize = buildSizes.buildScratchSize;
 
 
 		// Create the buffer that holds the AS memory
 		// Create the buffer that holds the AS memory
 		BufferInitInfo bufferInit(inf.getName());
 		BufferInitInfo bufferInit(inf.getName());
@@ -164,7 +185,7 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 		buildInfo.dstAccelerationStructure = m_handle;
 		buildInfo.dstAccelerationStructure = m_handle;
 
 
 		// Range info
 		// Range info
-		m_rangeInfo.primitiveCount = inf.m_topLevel.m_instances.getSize();
+		m_rangeInfo.primitiveCount = inf.m_topLevel.m_directArgs.m_instances.getSize();
 	}
 	}
 
 
 	return Error::kNone;
 	return Error::kNone;

+ 5 - 5
AnKi/Gr/Vulkan/AccelerationStructureImpl.h

@@ -32,10 +32,10 @@ public:
 		return m_handle;
 		return m_handle;
 	}
 	}
 
 
-	U32 getBuildScratchBufferSize() const
+	U32 getMaxInstanceCount() const
 	{
 	{
-		ANKI_ASSERT(m_scratchBufferSize > 0);
-		return m_scratchBufferSize;
+		ANKI_ASSERT(m_topLevelInfo.m_maxInstanceCount);
+		return m_topLevelInfo.m_maxInstanceCount;
 	}
 	}
 
 
 	void generateBuildInfo(U64 scratchBufferAddress, VkAccelerationStructureBuildGeometryInfoKHR& buildInfo,
 	void generateBuildInfo(U64 scratchBufferAddress, VkAccelerationStructureBuildGeometryInfoKHR& buildInfo,
@@ -61,7 +61,8 @@ private:
 	{
 	{
 	public:
 	public:
 		BufferPtr m_instancesBuffer;
 		BufferPtr m_instancesBuffer;
-		GrDynamicArray<AccelerationStructurePtr> m_blas;
+		GrDynamicArray<AccelerationStructurePtr> m_blases;
+		U32 m_maxInstanceCount = 0; ///< Only for indirect.
 	};
 	};
 
 
 	BufferPtr m_asBuffer;
 	BufferPtr m_asBuffer;
@@ -76,7 +77,6 @@ private:
 	VkAccelerationStructureGeometryKHR m_geometry = {};
 	VkAccelerationStructureGeometryKHR m_geometry = {};
 	VkAccelerationStructureBuildGeometryInfoKHR m_buildInfo = {};
 	VkAccelerationStructureBuildGeometryInfoKHR m_buildInfo = {};
 	VkAccelerationStructureBuildRangeInfoKHR m_rangeInfo = {};
 	VkAccelerationStructureBuildRangeInfoKHR m_rangeInfo = {};
-	U32 m_scratchBufferSize = 0;
 	/// @}
 	/// @}
 };
 };
 /// @}
 /// @}

+ 7 - 0
AnKi/Gr/Vulkan/CommandBuffer.cpp

@@ -365,6 +365,13 @@ void CommandBuffer::buildAccelerationStructure(AccelerationStructure* as)
 	self.buildAccelerationStructureInternal(as);
 	self.buildAccelerationStructureInternal(as);
 }
 }
 
 
+void CommandBuffer::buildAccelerationStructureIndirect(AccelerationStructure* as, Buffer* scratchBuffer, PtrSize scratchBufferOffset,
+													   Buffer* rangeBuffer, PtrSize rangeBufferOffsset)
+{
+	ANKI_VK_SELF(CommandBufferImpl);
+	self.buildAccelerationStructureIndirectInternal(as, scratchBuffer, scratchBufferOffset, rangeBuffer, rangeBufferOffsset);
+}
+
 void CommandBuffer::upscale(GrUpscaler* upscaler, TextureView* inColor, TextureView* outUpscaledColor, TextureView* motionVectors, TextureView* depth,
 void CommandBuffer::upscale(GrUpscaler* upscaler, TextureView* inColor, TextureView* outUpscaledColor, TextureView* motionVectors, TextureView* depth,
 							TextureView* exposure, Bool resetAccumulation, const Vec2& jitterOffset, const Vec2& motionVectorsScale)
 							TextureView* exposure, Bool resetAccumulation, const Vec2& jitterOffset, const Vec2& motionVectorsScale)
 {
 {

+ 31 - 4
AnKi/Gr/Vulkan/CommandBufferImpl.cpp

@@ -495,7 +495,7 @@ void CommandBufferImpl::buildAccelerationStructureInternal(AccelerationStructure
 
 
 	// Create the scrach buffer
 	// Create the scrach buffer
 	BufferInitInfo bufferInit;
 	BufferInitInfo bufferInit;
-	bufferInit.m_usage = PrivateBufferUsageBit::kAccelerationStructureBuildScratch;
+	bufferInit.m_usage = BufferUsageBit::kAccelerationStructureBuildScratch;
 	bufferInit.m_size = asImpl.getBuildScratchBufferSize();
 	bufferInit.m_size = asImpl.getBuildScratchBufferSize();
 	BufferPtr scratchBuff = getGrManagerImpl().newBuffer(bufferInit);
 	BufferPtr scratchBuff = getGrManagerImpl().newBuffer(bufferInit);
 
 
@@ -504,12 +504,39 @@ void CommandBufferImpl::buildAccelerationStructureInternal(AccelerationStructure
 	VkAccelerationStructureBuildRangeInfoKHR rangeInfo;
 	VkAccelerationStructureBuildRangeInfoKHR rangeInfo;
 	asImpl.generateBuildInfo(scratchBuff->getGpuAddress(), buildInfo, rangeInfo);
 	asImpl.generateBuildInfo(scratchBuff->getGpuAddress(), buildInfo, rangeInfo);
 
 
-	// Do the command
+	// Run the command
 	Array<const VkAccelerationStructureBuildRangeInfoKHR*, 1> pRangeInfos = {&rangeInfo};
 	Array<const VkAccelerationStructureBuildRangeInfoKHR*, 1> pRangeInfos = {&rangeInfo};
 	vkCmdBuildAccelerationStructuresKHR(m_handle, 1, &buildInfo, &pRangeInfos[0]);
 	vkCmdBuildAccelerationStructuresKHR(m_handle, 1, &buildInfo, &pRangeInfos[0]);
+}
+
+void CommandBufferImpl::buildAccelerationStructureIndirectInternal(AccelerationStructure* as, Buffer* scratchBuffer, PtrSize scratchBufferOffset,
+																   Buffer* rangeBuffer, PtrSize rangeBufferOffsset)
+{
+	ANKI_ASSERT(as && scratchBuffer && rangeBuffer);
+	ANKI_ASSERT(as->getBuildScratchBufferSize() + scratchBufferOffset <= scratchBuffer->getSize());
+	ANKI_ASSERT(rangeBufferOffsset + sizeof(AccelerationStructureBuildRangeInfo) <= rangeBuffer->getSize());
+
+	commandCommon();
+
+	// Get objects
+	const AccelerationStructureImpl& asImpl = static_cast<AccelerationStructureImpl&>(*as);
+
+	// Create the build info
+	VkAccelerationStructureBuildGeometryInfoKHR buildInfo;
+	[[maybe_unused]] VkAccelerationStructureBuildRangeInfoKHR rangeInfo;
+	asImpl.generateBuildInfo(scratchBuffer->getGpuAddress() + scratchBufferOffset, buildInfo, rangeInfo);
+
+	// Run the command
+	constexpr U32 kASCount = 1;
+	constexpr U32 kGeometryCount = 1;
+
+	Array<VkDeviceAddress, kASCount> rangeAddr = {rangeBuffer->getGpuAddress() + rangeBufferOffsset};
+	Array<U32, kASCount> strides = {sizeof(AccelerationStructureBuildRangeInfo)};
+
+	Array<U32, kGeometryCount> maxPrimitives = {asImpl.getMaxInstanceCount()};
+	Array<const U32*, kASCount> maxPrimitiveCountArr = {&maxPrimitives[0]};
 
 
-	// Push refs
-	m_microCmdb->pushObjectRef(as);
+	vkCmdBuildAccelerationStructuresIndirectKHR(m_handle, kASCount, &buildInfo, &rangeAddr[0], &strides[0], &maxPrimitiveCountArr[0]);
 }
 }
 
 
 #if ANKI_DLSS
 #if ANKI_DLSS

+ 3 - 0
AnKi/Gr/Vulkan/CommandBufferImpl.h

@@ -426,6 +426,9 @@ public:
 
 
 	void buildAccelerationStructureInternal(AccelerationStructure* as);
 	void buildAccelerationStructureInternal(AccelerationStructure* as);
 
 
+	void buildAccelerationStructureIndirectInternal(AccelerationStructure* as, Buffer* scratchBuffer, PtrSize scratchBufferOffset,
+													Buffer* rangeBuffer, PtrSize rangeBufferOffsset);
+
 	void upscaleInternal(GrUpscaler* upscaler, TextureView* inColor, TextureView* outUpscaledColor, TextureView* motionVectors, TextureView* depth,
 	void upscaleInternal(GrUpscaler* upscaler, TextureView* inColor, TextureView* outUpscaledColor, TextureView* motionVectors, TextureView* depth,
 						 TextureView* exposure, const Bool resetAccumulation, const Vec2& jitterOffset, const Vec2& motionVectorsScale);
 						 TextureView* exposure, const Bool resetAccumulation, const Vec2& jitterOffset, const Vec2& motionVectorsScale);
 
 

+ 1 - 1
AnKi/Gr/Vulkan/Common.cpp

@@ -324,7 +324,7 @@ VkBufferUsageFlags convertBufferUsageBit(BufferUsageBit usageMask)
 		out |= VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR;
 		out |= VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR;
 	}
 	}
 
 
-	if(!!(usageMask & PrivateBufferUsageBit::kAccelerationStructureBuildScratch))
+	if(!!(usageMask & BufferUsageBit::kAccelerationStructureBuildScratch))
 	{
 	{
 		out |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; // Spec says that this will be enough
 		out |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; // Spec says that this will be enough
 	}
 	}

+ 2 - 4
AnKi/Gr/Vulkan/Common.h

@@ -125,12 +125,10 @@ constexpr Second kMaxFenceOrSemaphoreWaitTime = 10.0;
 class PrivateBufferUsageBit
 class PrivateBufferUsageBit
 {
 {
 public:
 public:
-	static constexpr BufferUsageBit kAccelerationStructureBuildScratch = BufferUsageBit(1ull << 29ull);
-
 	/// Buffer that holds the memory for the actual AS.
 	/// Buffer that holds the memory for the actual AS.
-	static constexpr BufferUsageBit kAccelerationStructure = static_cast<BufferUsageBit>(1ull << 30ull);
+	static constexpr BufferUsageBit kAccelerationStructure = BufferUsageBit(1ull << 30ull);
 
 
-	static constexpr BufferUsageBit kAllPrivate = kAccelerationStructureBuildScratch | kAccelerationStructure;
+	static constexpr BufferUsageBit kAllPrivate = kAccelerationStructure;
 };
 };
 static_assert(!(BufferUsageBit::kAll & PrivateBufferUsageBit::kAllPrivate), "Update the bits in PrivateBufferUsageBit");
 static_assert(!(BufferUsageBit::kAll & PrivateBufferUsageBit::kAllPrivate), "Update the bits in PrivateBufferUsageBit");
 
 

+ 17 - 0
AnKi/Gr/Vulkan/FrameGarbageCollector.cpp

@@ -88,6 +88,14 @@ void FrameGarbageCollector::collectGarbage()
 			deleteInstance(GrMemoryPool::getSingleton(), bufferGarbage);
 			deleteInstance(GrMemoryPool::getSingleton(), bufferGarbage);
 		}
 		}
 
 
+		// Dispose AS garbage
+		while(!frame.m_asGarbage.isEmpty())
+		{
+			ASGarbage* garbage = frame.m_asGarbage.popBack();
+			vkDestroyAccelerationStructureKHR(getGrManagerImpl().getDevice(), garbage->m_asHandle, nullptr);
+			deleteInstance(GrMemoryPool::getSingleton(), garbage);
+		}
+
 		deleteInstance(GrMemoryPool::getSingleton(), &frame);
 		deleteInstance(GrMemoryPool::getSingleton(), &frame);
 	}
 	}
 
 
@@ -143,6 +151,15 @@ void FrameGarbageCollector::newBufferGarbage(BufferGarbage* bufferGarbage)
 	frame.m_bufferGarbage.pushBack(bufferGarbage);
 	frame.m_bufferGarbage.pushBack(bufferGarbage);
 }
 }
 
 
+void FrameGarbageCollector::newASGarbage(ASGarbage* garbage)
+{
+	ANKI_ASSERT(garbage);
+	LockGuard<Mutex> lock(m_mtx);
+	ANKI_ASSERT(m_initialized);
+	FrameGarbage& frame = getFrame();
+	frame.m_asGarbage.pushBack(garbage);
+}
+
 void FrameGarbageCollector::destroy()
 void FrameGarbageCollector::destroy()
 {
 {
 	LockGuard<Mutex> lock(m_mtx);
 	LockGuard<Mutex> lock(m_mtx);

+ 12 - 0
AnKi/Gr/Vulkan/FrameGarbageCollector.h

@@ -35,6 +35,14 @@ public:
 	GpuMemoryHandle m_memoryHandle;
 	GpuMemoryHandle m_memoryHandle;
 };
 };
 
 
+/// AS have more data (buffers) that build them but don't bother storing them since buffers will be automatically garbage collected as well.
+/// @memberof FrameGarbageCollector
+class ASGarbage : public IntrusiveListEnabled<ASGarbage>
+{
+public:
+	VkAccelerationStructureKHR m_asHandle = VK_NULL_HANDLE;
+};
+
 /// This class gathers various garbages and disposes them when in some later frame where it is safe to do so. This is
 /// This class gathers various garbages and disposes them when in some later frame where it is safe to do so. This is
 /// used on bindless textures and buffers where we have to wait until the frame where they were deleted is done.
 /// used on bindless textures and buffers where we have to wait until the frame where they were deleted is done.
 class FrameGarbageCollector
 class FrameGarbageCollector
@@ -63,12 +71,16 @@ public:
 	/// @note It's thread-safe.
 	/// @note It's thread-safe.
 	void newBufferGarbage(BufferGarbage* bufferGarbage);
 	void newBufferGarbage(BufferGarbage* bufferGarbage);
 
 
+	/// @note It's thread-safe.
+	void newASGarbage(ASGarbage* garbage);
+
 private:
 private:
 	class FrameGarbage : public IntrusiveListEnabled<FrameGarbage>
 	class FrameGarbage : public IntrusiveListEnabled<FrameGarbage>
 	{
 	{
 	public:
 	public:
 		IntrusiveList<TextureGarbage> m_textureGarbage;
 		IntrusiveList<TextureGarbage> m_textureGarbage;
 		IntrusiveList<BufferGarbage> m_bufferGarbage;
 		IntrusiveList<BufferGarbage> m_bufferGarbage;
+		IntrusiveList<ASGarbage> m_asGarbage;
 		MicroFencePtr m_fence;
 		MicroFencePtr m_fence;
 	};
 	};
 
 

+ 6 - 5
AnKi/Renderer/AccelerationStructureBuilder.cpp

@@ -21,16 +21,17 @@ void AccelerationStructureBuilder::populateRenderGraph(RenderingContext& ctx)
 	ANKI_ASSERT(instanceCount > 0);
 	ANKI_ASSERT(instanceCount > 0);
 
 
 	// Create the instances. Allocate but not construct to save some CPU time
 	// Create the instances. Allocate but not construct to save some CPU time
-	void* instancesMem = ctx.m_tempPool->allocate(sizeof(AccelerationStructureInstance) * instanceCount, alignof(AccelerationStructureInstance));
-	WeakArray<AccelerationStructureInstance> instances(static_cast<AccelerationStructureInstance*>(instancesMem), instanceCount);
+	void* instancesMem =
+		ctx.m_tempPool->allocate(sizeof(AccelerationStructureInstanceInfo) * instanceCount, alignof(AccelerationStructureInstanceInfo));
+	WeakArray<AccelerationStructureInstanceInfo> instances(static_cast<AccelerationStructureInstanceInfo*>(instancesMem), instanceCount);
 
 
 	for(U32 instanceIdx = 0; instanceIdx < instanceCount; ++instanceIdx)
 	for(U32 instanceIdx = 0; instanceIdx < instanceCount; ++instanceIdx)
 	{
 	{
 		const RayTracingInstanceQueueElement& element = instanceElements[instanceIdx];
 		const RayTracingInstanceQueueElement& element = instanceElements[instanceIdx];
 
 
 		// Init instance
 		// Init instance
-		AccelerationStructureInstance& out = instances[instanceIdx];
-		::new(&out) AccelerationStructureInstance();
+		AccelerationStructureInstanceInfo& out = instances[instanceIdx];
+		::new(&out) AccelerationStructureInstanceInfo();
 		out.m_bottomLevel.reset(element.m_bottomLevelAccelerationStructure);
 		out.m_bottomLevel.reset(element.m_bottomLevelAccelerationStructure);
 		memcpy(&out.m_transform, &element.m_transform, sizeof(out.m_transform));
 		memcpy(&out.m_transform, &element.m_transform, sizeof(out.m_transform));
 		out.m_hitgroupSbtRecordIndex = instanceIdx;
 		out.m_hitgroupSbtRecordIndex = instanceIdx;
@@ -40,7 +41,7 @@ void AccelerationStructureBuilder::populateRenderGraph(RenderingContext& ctx)
 	// Create the TLAS
 	// Create the TLAS
 	AccelerationStructureInitInfo initInf("MainTlas");
 	AccelerationStructureInitInfo initInf("MainTlas");
 	initInf.m_type = AccelerationStructureType::kTopLevel;
 	initInf.m_type = AccelerationStructureType::kTopLevel;
-	initInf.m_topLevel.m_instances = instances;
+	initInf.m_topLevel.m_directArgs.m_instances = instances;
 	m_runCtx.m_tlas = GrManager::getSingleton().newAccelerationStructure(initInf);
 	m_runCtx.m_tlas = GrManager::getSingleton().newAccelerationStructure(initInf);
 
 
 	// Need a cleanup
 	// Need a cleanup

+ 27 - 0
AnKi/Shaders/Include/Common.h

@@ -790,4 +790,31 @@ struct DispatchIndirectArgs
 	U32 m_threadGroupCountZ;
 	U32 m_threadGroupCountZ;
 };
 };
 
 
+/// Mirrors VkAccelerationStructureBuildRangeInfoKHR.
+struct AccelerationStructureBuildRangeInfo
+{
+	U32 m_primitiveCount; ///< For a TLAS it's the instance count.
+	U32 m_primitiveOffset;
+	U32 m_firstVertex;
+	U32 m_transformOffset;
+};
+
+/// Mirrors VkGeometryInstanceFlagBitsKHR
+enum class AccellerationStructureFlag : U32
+{
+	kTriangleFacingCullDisable = 1 << 0,
+	kFlipFacing = 1 << 1,
+	kForceOpaque = 1 << 2,
+	kForceNoOpaque = 1 << 3
+};
+
+/// Mirrors VkAccelerationStructureInstanceKHR.
+struct AccelerationStructureInstance
+{
+	Mat3x4 m_transform;
+	U32 m_instanceCustomIndex24_mask8;
+	U32 m_instanceShaderBindingTableRecordOffset24_flags8; ///< flags is AccellerationStructureFlag.
+	UVec2 m_accelerationStructureAddress;
+};
+
 ANKI_END_NAMESPACE
 ANKI_END_NAMESPACE

+ 1 - 0
AnKi/Util/HighRezTimerWindows.cpp

@@ -7,6 +7,7 @@
 #include <AnKi/Util/Assert.h>
 #include <AnKi/Util/Assert.h>
 #include <AnKi/Util/Win32Minimal.h>
 #include <AnKi/Util/Win32Minimal.h>
 #include <cstdio>
 #include <cstdio>
+#include <cstdlib>
 
 
 namespace anki {
 namespace anki {
 
 

+ 4 - 4
Tests/Gr/Gr.cpp

@@ -2464,8 +2464,8 @@ ANKI_TEST(Gr, RayQuery)
 	{
 	{
 		AccelerationStructureInitInfo init;
 		AccelerationStructureInitInfo init;
 		init.m_type = AccelerationStructureType::kTopLevel;
 		init.m_type = AccelerationStructureType::kTopLevel;
-		Array<AccelerationStructureInstance, 1> instances = {{{blas, Mat3x4::getIdentity()}}};
-		init.m_topLevel.m_instances = instances;
+		Array<AccelerationStructureInstanceInfo, 1> instances = {{{blas, Mat3x4::getIdentity()}}};
+		init.m_topLevel.m_directArgs.m_instances = instances;
 
 
 		tlas = g_gr->newAccelerationStructure(init);
 		tlas = g_gr->newAccelerationStructure(init);
 	}
 	}
@@ -2900,7 +2900,7 @@ void main()
 		}
 		}
 
 
 		// TLAS
 		// TLAS
-		Array<AccelerationStructureInstance, U32(GeomWhat::kCount)> instances;
+		Array<AccelerationStructureInstanceInfo, U32(GeomWhat::kCount)> instances;
 		U32 count = 0;
 		U32 count = 0;
 		for(Geom& g : geometries)
 		for(Geom& g : geometries)
 		{
 		{
@@ -2914,7 +2914,7 @@ void main()
 
 
 		AccelerationStructureInitInfo inf;
 		AccelerationStructureInitInfo inf;
 		inf.m_type = AccelerationStructureType::kTopLevel;
 		inf.m_type = AccelerationStructureType::kTopLevel;
-		inf.m_topLevel.m_instances = instances;
+		inf.m_topLevel.m_directArgs.m_instances = instances;
 
 
 		tlas = g_gr->newAccelerationStructure(inf);
 		tlas = g_gr->newAccelerationStructure(inf);
 	}
 	}