Ver código fonte

Some AS refactoring

Panagiotis Christopoulos Charitos 4 meses atrás
pai
commit
a6594c4634

+ 38 - 30
AnKi/Gr/AccelerationStructure.h

@@ -27,52 +27,48 @@ public:
 	Format m_positionsFormat = Format::kNone;
 	U32 m_positionCount = 0;
 
-	Bool isValid() const
+	Bool isValid(Bool validateBuffers) const
 	{
 		Bool valid = true;
 
-		valid = valid && (m_indexBuffer.isValid() && m_indexCount * getIndexSize(m_indexType) == m_indexBuffer.getRange());
+		valid = valid && (m_indexCount >= 3 && m_indexType != IndexType::kCount);
+
+		if(validateBuffers)
+		{
+			valid = valid && (m_indexBuffer.isValid() && m_indexCount * getIndexSize(m_indexType) == m_indexBuffer.getRange());
+		}
 
 		const U32 vertSize = getFormatInfo(m_positionsFormat).m_texelSize;
-		valid = valid
-				&& (m_positionBuffer.isValid() && m_positionStride >= vertSize && m_positionStride * m_positionCount == m_positionBuffer.getRange());
+		valid = valid && (m_positionStride >= vertSize && m_positionCount >= 3);
+
+		if(validateBuffers)
+		{
+			valid = valid && (m_positionBuffer.isValid() && m_positionStride * m_positionCount == m_positionBuffer.getRange());
+		}
 
 		return valid;
 	}
 };
 
-/// @memberof AccelerationStructureInitInfo
-class AccelerationStructureInstanceInfo
-{
-public:
-	AccelerationStructure* m_bottomLevel = nullptr;
-	Mat3x4 m_transform = Mat3x4::getIdentity();
-	U32 m_hitgroupSbtRecordIndex = 0; ///< Points to a hitgroup SBT record.
-	U8 m_mask = 0xFF; ///< A mask that this instance belongs to. Will be tested against what's in traceRayEXT().
-};
-
 /// @memberof AccelerationStructureInitInfo
 class TopLevelAccelerationStructureInitInfo
 {
 public:
-	class
-	{
-	public:
-		ConstWeakArray<AccelerationStructureInstanceInfo> m_instances;
-	} m_directArgs; ///< Pass some representation of the instances.
+	U32 m_instanceCount = 0;
+	BufferView m_instancesBuffer; ///< Filled with AccelerationStructureInstance structs.
 
-	class
+	Bool isValid(Bool validateBuffers) const
 	{
-	public:
-		U32 m_maxInstanceCount = 0;
-		BufferView m_instancesBuffer; ///< Filled with AccelerationStructureInstance structs.
-	} m_indirectArgs; ///< Pass the instances GPU buffer directly.
+		Bool valid = true;
 
-	Bool isValid() const
-	{
-		return m_directArgs.m_instances.getSize() > 0
-			   || (m_indirectArgs.m_maxInstanceCount > 0 && m_indirectArgs.m_instancesBuffer.isValid()
-				   && m_indirectArgs.m_instancesBuffer.getRange() == sizeof(AccelerationStructureInstance) * m_indirectArgs.m_maxInstanceCount);
+		valid = valid && m_instanceCount > 0;
+
+		if(validateBuffers)
+		{
+			valid = valid && (m_instancesBuffer.getRange() == sizeof(AccelerationStructureInstance) * m_instanceCount);
+		}
+
+		return valid;
 	}
 };
 
@@ -85,6 +81,8 @@ public:
 	BottomLevelAccelerationStructureInitInfo m_bottomLevel;
 	TopLevelAccelerationStructureInitInfo m_topLevel;
 
+	BufferView m_accelerationStructureBuffer; ///< Optionaly supply the buffer of the AS.
+
 	AccelerationStructureInitInfo(CString name = {})
 		: GrBaseInitInfo(name)
 	{
@@ -97,7 +95,17 @@ public:
 			return false;
 		}
 
-		return (m_type == AccelerationStructureType::kBottomLevel) ? m_bottomLevel.isValid() : m_topLevel.isValid();
+		return (m_type == AccelerationStructureType::kBottomLevel) ? m_bottomLevel.isValid(true) : m_topLevel.isValid(true);
+	}
+
+	Bool isValidForGettingMemoryRequirements() const
+	{
+		if(m_type == AccelerationStructureType::kCount)
+		{
+			return false;
+		}
+
+		return (m_type == AccelerationStructureType::kBottomLevel) ? m_bottomLevel.isValid(false) : m_topLevel.isValid(false);
 	}
 };
 

+ 5 - 6
AnKi/Gr/Common.h

@@ -181,9 +181,6 @@ public:
 	/// The max combined size of shared variables (with paddings) in compute shaders.
 	PtrSize m_computeSharedMemorySize = 16_KB;
 
-	/// Alignment of the scratch buffer used in AS building.
-	U32 m_accelerationStructureBuildScratchOffsetAlignment = 0;
-
 	/// Each SBT record should be a multiple of this.
 	U32 m_sbtRecordAlignment = kMaxU32;
 
@@ -734,6 +731,7 @@ enum class BufferUsageBit : U64
 	kAccelerationStructureBuild = 1ull << 19ull, ///< Will be used as a position or index buffer in a BLAS build.
 	kShaderBindingTable = 1ull << 20ull, ///< Will be used as SBT in a traceRays() command.
 	kAccelerationStructureBuildScratch = 1ull << 21ull, ///< Used in buildAccelerationStructureXXX commands.
+	kAccelerationStructure = 1ull << 22ull, ///< Will be used as AS.
 
 	// Derived
 	kAllConstant = kConstantGeometry | kConstantPixel | kConstantCompute | kConstantTraceRays,
@@ -748,9 +746,10 @@ enum class BufferUsageBit : U64
 	kAllCompute = kConstantCompute | kSrvCompute | kUavCompute | kIndirectCompute,
 	kAllTraceRays = kConstantTraceRays | kSrvTraceRays | kUavTraceRays | kIndirectTraceRays | kShaderBindingTable,
 
-	kAllRayTracing = kAllTraceRays | kAccelerationStructureBuild | kAccelerationStructureBuildScratch,
-	kAllRead = kAllConstant | kAllSrv | kAllUav | kVertexOrIndex | kAllIndirect | kCopySource | kAccelerationStructureBuild | kShaderBindingTable,
-	kAllWrite = kAllUav | kCopyDestination | kAccelerationStructureBuildScratch,
+	kAllRayTracing = kAllTraceRays | kAccelerationStructureBuild | kAccelerationStructureBuildScratch | kAccelerationStructure,
+	kAllRead = kAllConstant | kAllSrv | kAllUav | kVertexOrIndex | kAllIndirect | kCopySource | kAccelerationStructureBuild | kShaderBindingTable
+			   | kAccelerationStructure,
+	kAllWrite = kAllUav | kCopyDestination | kAccelerationStructureBuildScratch | kAccelerationStructure,
 
 	kAllShaderResource = kAllConstant | kAllSrv | kAllUav,
 

+ 73 - 80
AnKi/Gr/D3D/D3DAccelerationStructure.cpp

@@ -23,8 +23,8 @@ AccelerationStructure* AccelerationStructure::newInstance(const AccelerationStru
 
 U64 AccelerationStructure::getGpuAddress() const
 {
-	ANKI_ASSERT(!"TODO");
-	return 0;
+	ANKI_D3D_SELF_CONST(AccelerationStructureImpl);
+	return self.m_asBuffer->getGpuAddress() + self.m_asBufferOffset;
 }
 
 AccelerationStructureImpl::~AccelerationStructureImpl()
@@ -37,12 +37,35 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 
 	m_type = inf.m_type;
 
+	PtrSize asBufferSize;
+	getMemoryRequirement(inf, asBufferSize, m_scratchBufferSize);
+
+	// Allocate AS buffer
+	BufferView asBuff = inf.m_accelerationStructureBuffer;
+	if(!asBuff.isValid())
+	{
+		BufferInitInfo bufferInit(inf.getName());
+		bufferInit.m_usage = BufferUsageBit::kAccelerationStructure;
+		bufferInit.m_size = asBufferSize;
+		m_asBuffer = getGrManagerImpl().newBuffer(bufferInit);
+		m_asBufferOffset = 0;
+	}
+	else
+	{
+		const PtrSize alignedOffset = getAlignedRoundUp(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT, asBuff.getOffset());
+		asBuff = asBuff.incrementOffset(alignedOffset - asBuff.getOffset());
+		ANKI_ASSERT(asBuff.getRange() <= asBufferSize);
+
+		m_asBuffer.reset(&asBuff.getBuffer());
+		m_asBufferOffset = asBuff.getOffset();
+	}
+
 	if(inf.m_type == AccelerationStructureType::kBottomLevel)
 	{
 		// Setup the geom descr
 		m_blas.m_geometryDesc = {};
 		m_blas.m_geometryDesc.Type = D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES;
-		m_blas.m_geometryDesc.Flags = D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE; // TODO
+		m_blas.m_geometryDesc.Flags = D3D12_RAYTRACING_GEOMETRY_FLAG_NONE;
 		m_blas.m_geometryDesc.Triangles.Transform3x4 = 0;
 		m_blas.m_geometryDesc.Triangles.IndexFormat = convertIndexType(inf.m_bottomLevel.m_indexType);
 		m_blas.m_geometryDesc.Triangles.VertexFormat = convertFormat(inf.m_bottomLevel.m_positionsFormat);
@@ -54,85 +77,14 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 			inf.m_bottomLevel.m_positionBuffer.getBuffer().getGpuAddress() + inf.m_bottomLevel.m_positionBuffer.getOffset();
 		m_blas.m_geometryDesc.Triangles.VertexBuffer.StrideInBytes = inf.m_bottomLevel.m_positionStride;
 
-		// Get sizes
-		D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {};
-		inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
-		inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE;
-		inputs.NumDescs = 1;
-		inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
-		inputs.pGeometryDescs = &m_blas.m_geometryDesc;
-
-		D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo = {};
-		getDevice().GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &prebuildInfo);
-		m_scratchBufferSize = prebuildInfo.ScratchDataSizeInBytes;
-
-		// Create the AS buffer
-		BufferInitInfo asBuffInit(inf.getName());
-		asBuffInit.m_size = prebuildInfo.ResultDataMaxSizeInBytes;
-		asBuffInit.m_usage = PrivateBufferUsageBit::kAccelerationStructure;
-		m_asBuffer.reset(GrManager::getSingleton().newBuffer(asBuffInit).get());
+		m_blas.m_indexBuff.reset(&inf.m_bottomLevel.m_indexBuffer.getBuffer());
+		m_blas.m_positionsBuff.reset(&inf.m_bottomLevel.m_positionBuffer.getBuffer());
 	}
 	else
 	{
-		const Bool isIndirect = inf.m_topLevel.m_indirectArgs.m_maxInstanceCount > 0;
-		const U32 instanceCount = (isIndirect) ? inf.m_topLevel.m_indirectArgs.m_maxInstanceCount : inf.m_topLevel.m_directArgs.m_instances.getSize();
-
-		// Get sizes
-		D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {};
-		inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
-		inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD;
-		inputs.NumDescs = instanceCount;
-		inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
-
-		D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo = {};
-		getDevice().GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &prebuildInfo);
-		m_scratchBufferSize = prebuildInfo.ScratchDataSizeInBytes;
-
-		// Create the AS buffer
-		BufferInitInfo asBuffInit(inf.getName());
-		asBuffInit.m_size = prebuildInfo.ResultDataMaxSizeInBytes;
-		asBuffInit.m_usage = PrivateBufferUsageBit::kAccelerationStructure;
-		m_asBuffer.reset(GrManager::getSingleton().newBuffer(asBuffInit).get());
-
-		// Create instances buffer
-		if(!isIndirect)
-		{
-			BufferInitInfo buffInit("AS instances");
-			buffInit.m_size = inf.m_topLevel.m_directArgs.m_instances.getSize() * sizeof(AccelerationStructureInstance);
-			buffInit.m_usage = BufferUsageBit::kAllUav;
-			buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
-			m_tlas.m_instancesBuff.reset(GrManager::getSingleton().newBuffer(buffInit).get());
-
-			WeakArray<AccelerationStructureInstance> mapped(
-				static_cast<AccelerationStructureInstance*>(m_tlas.m_instancesBuff->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite)),
-				inf.m_topLevel.m_directArgs.m_instances.getSize());
-
-			for(U32 i = 0; i < inf.m_topLevel.m_directArgs.m_instances.getSize(); ++i)
-			{
-				const AccelerationStructureInstanceInfo& in = inf.m_topLevel.m_directArgs.m_instances[i];
-				AccelerationStructureInstance& out = mapped[i];
-
-				const AccelerationStructureImpl& blas = static_cast<const AccelerationStructureImpl&>(*in.m_bottomLevel);
-				const U64 blasAddr = blas.m_asBuffer->getGpuAddress();
-
-				const U32 flags =
-					D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE | D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE;
-
-				out.m_transform = in.m_transform;
-				out.m_mask8_instanceCustomIndex24 = (in.m_mask << 24) | (i & 0xFFFFFF);
-				out.m_flags8_instanceShaderBindingTableRecordOffset24 = (flags << 24) | in.m_hitgroupSbtRecordIndex;
-				memcpy(&out.m_accelerationStructureAddress, &blasAddr, sizeof(blasAddr));
-			}
-
-			m_tlas.m_instancesBuff->unmap();
-		}
-		else
-		{
-			m_tlas.m_instancesBuff.reset(&inf.m_topLevel.m_indirectArgs.m_instancesBuffer.getBuffer());
-			m_tlas.m_instancesBuffOffset = inf.m_topLevel.m_indirectArgs.m_instancesBuffer.getOffset();
-		}
-
-		m_tlas.m_instanceCount = instanceCount;
+		m_tlas.m_instancesBuff.reset(&inf.m_topLevel.m_instancesBuffer.getBuffer());
+		m_tlas.m_instancesBuffOffset = inf.m_topLevel.m_instancesBuffer.getOffset();
+		m_tlas.m_instanceCount = inf.m_topLevel.m_instanceCount;
 	}
 
 	return Error::kNone;
@@ -141,7 +93,7 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 void AccelerationStructureImpl::fillBuildInfo(BufferView scratchBuff, D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC& buildDesc) const
 {
 	buildDesc = {};
-	buildDesc.DestAccelerationStructureData = m_asBuffer->getGpuAddress();
+	buildDesc.DestAccelerationStructureData = m_asBuffer->getGpuAddress() + m_asBufferOffset;
 	buildDesc.ScratchAccelerationStructureData = scratchBuff.getBuffer().getGpuAddress() + scratchBuff.getOffset();
 
 	if(m_type == AccelerationStructureType::kBottomLevel)
@@ -252,4 +204,45 @@ D3D12_GLOBAL_BARRIER AccelerationStructureImpl::computeBarrierInfo(AccelerationS
 	return barrier;
 }
 
+void AccelerationStructureImpl::getMemoryRequirement(const AccelerationStructureInitInfo& inf, PtrSize& asBufferSize, PtrSize& buildScratchBufferSize)
+{
+	ANKI_ASSERT(inf.isValidForGettingMemoryRequirements());
+
+	D3D12_RAYTRACING_GEOMETRY_DESC geomDesc;
+	D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo = {};
+	D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {};
+
+	if(inf.m_type == AccelerationStructureType::kBottomLevel)
+	{
+		geomDesc = {};
+		geomDesc.Type = D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES;
+		geomDesc.Flags = D3D12_RAYTRACING_GEOMETRY_FLAG_NONE;
+		geomDesc.Triangles.Transform3x4 = 0;
+		geomDesc.Triangles.IndexFormat = convertIndexType(inf.m_bottomLevel.m_indexType);
+		geomDesc.Triangles.VertexFormat = convertFormat(inf.m_bottomLevel.m_positionsFormat);
+		geomDesc.Triangles.IndexCount = inf.m_bottomLevel.m_indexCount;
+		geomDesc.Triangles.VertexCount = inf.m_bottomLevel.m_positionCount;
+		geomDesc.Triangles.VertexBuffer.StrideInBytes = inf.m_bottomLevel.m_positionStride;
+
+		inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
+		inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE;
+		inputs.NumDescs = 1;
+		inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
+		inputs.pGeometryDescs = &geomDesc;
+
+		getDevice().GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &prebuildInfo);
+	}
+	else
+	{
+		inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
+		inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD;
+		inputs.NumDescs = inf.m_topLevel.m_instanceCount;
+		inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
+	}
+
+	getDevice().GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &prebuildInfo);
+	asBufferSize = prebuildInfo.ResultDataMaxSizeInBytes;
+	buildScratchBufferSize = prebuildInfo.ScratchDataSizeInBytes;
+}
+
 } // end namespace anki

+ 8 - 0
AnKi/Gr/D3D/D3DAccelerationStructure.h

@@ -16,6 +16,8 @@ namespace anki {
 /// AccelerationStructure implementation.
 class AccelerationStructureImpl final : public AccelerationStructure
 {
+	friend class AccelerationStructure;
+
 public:
 	AccelerationStructureImpl(CString name)
 		: AccelerationStructure(name)
@@ -35,8 +37,11 @@ public:
 		return *m_asBuffer;
 	}
 
+	static void getMemoryRequirement(const AccelerationStructureInitInfo& init, PtrSize& asBufferSize, PtrSize& buildScratchBufferSize);
+
 private:
 	BufferInternalPtr m_asBuffer;
+	PtrSize m_asBufferOffset = kMaxPtrSize;
 
 	class
 	{
@@ -50,6 +55,9 @@ private:
 	{
 	public:
 		D3D12_RAYTRACING_GEOMETRY_DESC m_geometryDesc;
+
+		BufferInternalPtr m_positionsBuff;
+		BufferInternalPtr m_indexBuff;
 	} m_blas;
 };
 /// @}

+ 3 - 3
AnKi/Gr/D3D/D3DBuffer.cpp

@@ -142,18 +142,18 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	resourceDesc.SampleDesc.Quality = 0;
 	resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
 	resourceDesc.Flags = {};
-	if(!!(m_usage & BufferUsageBit::kAllUav) || !!(m_usage & PrivateBufferUsageBit::kAccelerationStructure))
+	if(!!(m_usage & BufferUsageBit::kAllUav) || !!(m_usage & BufferUsageBit::kAccelerationStructure))
 	{
 		resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
 	}
-	if(!(m_usage & BufferUsageBit::kAllShaderResource) && !(m_usage & PrivateBufferUsageBit::kAccelerationStructure))
+	if(!(m_usage & BufferUsageBit::kAllShaderResource) && !(m_usage & BufferUsageBit::kAccelerationStructure))
 	{
 		resourceDesc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
 	}
 
 	// Create resource
 	D3D12_RESOURCE_STATES initialState;
-	if(!!(m_usage & PrivateBufferUsageBit::kAccelerationStructure))
+	if(!!(m_usage & BufferUsageBit::kAccelerationStructure))
 	{
 		initialState = D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE;
 	}

+ 0 - 11
AnKi/Gr/D3D/D3DCommon.h

@@ -81,17 +81,6 @@ void invokeDred();
 using D3D12GraphicsCommandListX = ID3D12GraphicsCommandList10;
 using ID3D12DeviceX = ID3D12Device14;
 
-/// Some internal buffer usage flags.
-class PrivateBufferUsageBit
-{
-public:
-	/// Buffer that holds the memory for the actual AS.
-	static constexpr BufferUsageBit kAccelerationStructure = BufferUsageBit(1ull << 30ull);
-
-	static constexpr BufferUsageBit kAllPrivate = kAccelerationStructure;
-};
-static_assert(!(BufferUsageBit::kAll & PrivateBufferUsageBit::kAllPrivate), "Update the bits in PrivateBufferUsageBit");
-
 enum class D3DTextureViewType : U8
 {
 	kSrv,

+ 7 - 1
AnKi/Gr/D3D/D3DGrManager.cpp

@@ -101,6 +101,13 @@ GrManager::~GrManager()
 {
 }
 
+PtrSize GrManager::getAccelerationStructureMemoryRequirement(const AccelerationStructureInitInfo& init) const
+{
+	PtrSize asSize, unused;
+	AccelerationStructureImpl::getMemoryRequirement(init, asSize, unused);
+	return asSize + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT;
+}
+
 Error GrManager::init(GrManagerInitInfo& inf)
 {
 	ANKI_D3D_SELF(GrManagerImpl);
@@ -526,7 +533,6 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 		m_capabilities.m_texelBufferBindOffsetAlignment = 32;
 		m_capabilities.m_fastConstantsSize = kMaxFastConstantsSize;
 		m_capabilities.m_computeSharedMemorySize = D3D12_CS_TGSM_REGISTER_COUNT * sizeof(F32);
-		m_capabilities.m_accelerationStructureBuildScratchOffsetAlignment = 32; // ?
 		m_capabilities.m_sbtRecordAlignment = 32; // ?
 		m_capabilities.m_maxDrawIndirectCount = kMaxU32;
 		m_capabilities.m_discreteGpu = !architecture.UMA;

+ 3 - 0
AnKi/Gr/GrManager.h

@@ -82,6 +82,9 @@ public:
 	[[nodiscard]] AccelerationStructurePtr newAccelerationStructure(const AccelerationStructureInitInfo& init);
 	/// @}
 
+	/// Get the size of the acceleration structure if you are planning to supply a custom buffer.
+	PtrSize getAccelerationStructureMemoryRequirement(const AccelerationStructureInitInfo& init) const;
+
 	ANKI_INTERNAL CString getCacheDirectory() const
 	{
 		return m_cacheDir.toCString();

+ 103 - 85
AnKi/Gr/Vulkan/VkAccelerationStructure.cpp

@@ -24,7 +24,7 @@ AccelerationStructure* AccelerationStructure::newInstance(const AccelerationStru
 U64 AccelerationStructure::getGpuAddress() const
 {
 	ANKI_VK_SELF_CONST(AccelerationStructureImpl);
-	return self.getAsDeviceAddress();
+	return self.m_deviceAddress;
 }
 
 AccelerationStructureImpl::~AccelerationStructureImpl()
@@ -43,6 +43,31 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 	m_type = inf.m_type;
 	const VkDevice vkdev = getGrManagerImpl().getDevice();
 
+	PtrSize asBufferSize;
+	getMemoryRequirement(inf, asBufferSize, m_scratchBufferSize);
+	m_scratchBufferSize += getGrManagerImpl().getVulkanCapabilities().m_asBuildScratchAlignment;
+
+	// Allocate AS buffer
+	BufferView asBuff = inf.m_accelerationStructureBuffer;
+	if(!asBuff.isValid())
+	{
+		BufferInitInfo bufferInit(inf.getName());
+		bufferInit.m_usage = BufferUsageBit::kAccelerationStructure;
+		bufferInit.m_size = asBufferSize;
+		m_asBuffer = getGrManagerImpl().newBuffer(bufferInit);
+		m_asBufferOffset = 0;
+	}
+	else
+	{
+		const PtrSize alignedOffset = getAlignedRoundUp(getGrManagerImpl().getVulkanCapabilities().m_asBufferAlignment, asBuff.getOffset());
+		asBuff = asBuff.incrementOffset(alignedOffset - asBuff.getOffset());
+		ANKI_ASSERT(asBuff.getRange() <= asBufferSize);
+
+		m_asBuffer.reset(&asBuff.getBuffer());
+		m_asBufferOffset = asBuff.getOffset();
+	}
+
+	// Create the AS
 	if(m_type == AccelerationStructureType::kBottomLevel)
 	{
 		// Geom
@@ -58,7 +83,7 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 		geom.geometry.triangles.indexType = convertIndexType(inf.m_bottomLevel.m_indexType);
 		geom.geometry.triangles.indexData.deviceAddress =
 			inf.m_bottomLevel.m_indexBuffer.getBuffer().getGpuAddress() + inf.m_bottomLevel.m_indexBuffer.getOffset();
-		geom.flags = 0; // VK_GEOMETRY_OPAQUE_BIT_KHR; // TODO
+		geom.flags = 0;
 
 		// Geom build info
 		VkAccelerationStructureBuildGeometryInfoKHR& buildInfo = m_buildInfo;
@@ -69,26 +94,13 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 		buildInfo.geometryCount = 1;
 		buildInfo.pGeometries = &geom;
 
-		// Get memory info
-		VkAccelerationStructureBuildSizesInfoKHR buildSizes = {};
-		const U32 primitiveCount = inf.m_bottomLevel.m_indexCount / 3;
-		buildSizes.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR;
-		vkGetAccelerationStructureBuildSizesKHR(vkdev, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo, &primitiveCount, &buildSizes);
-		m_scratchBufferSize = buildSizes.buildScratchSize;
-
-		// Create the buffer that holds the AS memory
-		BufferInitInfo bufferInit(inf.getName());
-		bufferInit.m_usage = PrivateBufferUsageBit::kAccelerationStructure;
-		bufferInit.m_size = buildSizes.accelerationStructureSize;
-		m_asBuffer = getGrManagerImpl().newBuffer(bufferInit);
-
 		// Create the AS
 		VkAccelerationStructureCreateInfoKHR asCi = {};
 		asCi.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR;
 		asCi.createFlags = 0;
 		asCi.buffer = static_cast<const BufferImpl&>(*m_asBuffer).getHandle();
-		asCi.offset = 0;
-		asCi.size = buildSizes.accelerationStructureSize;
+		asCi.offset = m_asBufferOffset;
+		asCi.size = asBufferSize;
 		asCi.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
 		ANKI_VK_CHECK(vkCreateAccelerationStructureKHR(vkdev, &asCi, nullptr, &m_handle));
 
@@ -102,68 +114,24 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 		buildInfo.dstAccelerationStructure = m_handle;
 
 		// Range info
-		m_rangeInfo.primitiveCount = primitiveCount;
+		m_rangeInfo.primitiveCount = inf.m_bottomLevel.m_indexCount / 3;
+
+		m_blas.m_positionsBuffer.reset(&inf.m_bottomLevel.m_positionBuffer.getBuffer());
+		m_blas.m_indexBuffer.reset(&inf.m_bottomLevel.m_positionBuffer.getBuffer());
 	}
 	else
 	{
-		const Bool isIndirect = inf.m_topLevel.m_indirectArgs.m_maxInstanceCount > 0;
-
-		if(!isIndirect)
-		{
-			// Create and populate the instances buffer
-			m_topLevelInfo.m_blases.resizeStorage(inf.m_topLevel.m_directArgs.m_instances.getSize());
-
-			BufferInitInfo buffInit("AS instances");
-			buffInit.m_size = sizeof(VkAccelerationStructureInstanceKHR) * inf.m_topLevel.m_directArgs.m_instances.getSize();
-			buffInit.m_usage = PrivateBufferUsageBit::kAccelerationStructure;
-			buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
-			m_topLevelInfo.m_instancesBuffer = getGrManagerImpl().newBuffer(buffInit);
-
-			VkAccelerationStructureInstanceKHR* instances =
-				static_cast<VkAccelerationStructureInstanceKHR*>(m_topLevelInfo.m_instancesBuffer->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite));
-			for(U32 i = 0; i < inf.m_topLevel.m_directArgs.m_instances.getSize(); ++i)
-			{
-				VkAccelerationStructureInstanceKHR& outInst = instances[i];
-				const AccelerationStructureInstanceInfo& inInst = inf.m_topLevel.m_directArgs.m_instances[i];
-				static_assert(sizeof(outInst.transform) == sizeof(inInst.m_transform), "See file");
-				memcpy(&outInst.transform, &inInst.m_transform, sizeof(inInst.m_transform));
-				outInst.instanceCustomIndex = i & 0xFFFFFF;
-				outInst.mask = inInst.m_mask;
-				outInst.instanceShaderBindingTableRecordOffset = inInst.m_hitgroupSbtRecordIndex & 0xFFFFFF;
-				outInst.flags =
-					VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR | VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR;
-				outInst.accelerationStructureReference = static_cast<const AccelerationStructureImpl&>(*inInst.m_bottomLevel).m_deviceAddress;
-				ANKI_ASSERT(outInst.accelerationStructureReference != 0);
-
-				// Hold the reference
-				m_topLevelInfo.m_blases.emplaceBack(inf.m_topLevel.m_directArgs.m_instances[i].m_bottomLevel);
-			}
-
-			m_topLevelInfo.m_instancesBuffer->flush(0, kMaxPtrSize);
-			m_topLevelInfo.m_instancesBuffer->unmap();
-		}
-		else
-		{
-			// Instances buffer already created
-			ANKI_ASSERT(sizeof(VkAccelerationStructureInstanceKHR) * inf.m_topLevel.m_indirectArgs.m_maxInstanceCount
-						<= inf.m_topLevel.m_indirectArgs.m_instancesBuffer.getRange());
-			m_topLevelInfo.m_instancesBuffer.reset(&inf.m_topLevel.m_indirectArgs.m_instancesBuffer.getBuffer());
-
-			m_topLevelInfo.m_maxInstanceCount = inf.m_topLevel.m_indirectArgs.m_maxInstanceCount;
-		}
+		ANKI_ASSERT(sizeof(VkAccelerationStructureInstanceKHR) * inf.m_topLevel.m_instanceCount <= inf.m_topLevel.m_instancesBuffer.getRange());
+		m_tlas.m_instancesBuffer.reset(&inf.m_topLevel.m_instancesBuffer.getBuffer());
 
 		// Geom
 		VkAccelerationStructureGeometryKHR& geom = m_geometry;
 		geom.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
 		geom.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
 		geom.geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR;
-		geom.geometry.instances.data.deviceAddress = m_topLevelInfo.m_instancesBuffer->getGpuAddress();
-		if(isIndirect)
-		{
-			geom.geometry.instances.data.deviceAddress += inf.m_topLevel.m_indirectArgs.m_instancesBuffer.getOffset();
-		}
+		geom.geometry.instances.data.deviceAddress = m_tlas.m_instancesBuffer->getGpuAddress() + inf.m_topLevel.m_instancesBuffer.getOffset();
 		geom.geometry.instances.arrayOfPointers = false;
-		geom.flags = VK_GEOMETRY_OPAQUE_BIT_KHR; // TODO
+		geom.flags = 0;
 
 		// Geom build info
 		VkAccelerationStructureBuildGeometryInfoKHR& buildInfo = m_buildInfo;
@@ -174,26 +142,13 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 		buildInfo.geometryCount = 1;
 		buildInfo.pGeometries = &geom;
 
-		// Get memory info
-		VkAccelerationStructureBuildSizesInfoKHR buildSizes = {};
-		const U32 instanceCount = (isIndirect) ? inf.m_topLevel.m_indirectArgs.m_maxInstanceCount : inf.m_topLevel.m_directArgs.m_instances.getSize();
-		buildSizes.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR;
-		vkGetAccelerationStructureBuildSizesKHR(vkdev, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo, &instanceCount, &buildSizes);
-		m_scratchBufferSize = buildSizes.buildScratchSize;
-
-		// Create the buffer that holds the AS memory
-		BufferInitInfo bufferInit(inf.getName());
-		bufferInit.m_usage = PrivateBufferUsageBit::kAccelerationStructure;
-		bufferInit.m_size = buildSizes.accelerationStructureSize;
-		m_asBuffer = getGrManagerImpl().newBuffer(bufferInit);
-
 		// Create the AS
 		VkAccelerationStructureCreateInfoKHR asCi = {};
 		asCi.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR;
 		asCi.createFlags = 0;
 		asCi.buffer = static_cast<const BufferImpl&>(*m_asBuffer).getHandle();
-		asCi.offset = 0;
-		asCi.size = buildSizes.accelerationStructureSize;
+		asCi.offset = m_asBufferOffset;
+		asCi.size = asBufferSize;
 		asCi.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
 		ANKI_VK_CHECK(vkCreateAccelerationStructureKHR(vkdev, &asCi, nullptr, &m_handle));
 
@@ -201,7 +156,7 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 		buildInfo.dstAccelerationStructure = m_handle;
 
 		// Range info
-		m_rangeInfo.primitiveCount = instanceCount;
+		m_rangeInfo.primitiveCount = inf.m_topLevel.m_instanceCount;
 	}
 
 	return Error::kNone;
@@ -307,4 +262,67 @@ VkMemoryBarrier AccelerationStructureImpl::computeBarrierInfo(AccelerationStruct
 	return barrier;
 }
 
+void AccelerationStructureImpl::generateBuildInfo(U64 scratchBufferAddress, VkAccelerationStructureBuildGeometryInfoKHR& buildInfo,
+												  VkAccelerationStructureBuildRangeInfoKHR& rangeInfo) const
+{
+	buildInfo = m_buildInfo;
+	buildInfo.scratchData.deviceAddress =
+		getAlignedRoundUp(getGrManagerImpl().getVulkanCapabilities().m_asBuildScratchAlignment, scratchBufferAddress);
+	rangeInfo = m_rangeInfo;
+}
+
+void AccelerationStructureImpl::getMemoryRequirement(const AccelerationStructureInitInfo& inf, PtrSize& asBufferSize, PtrSize& buildScratchBufferSize)
+{
+	ANKI_ASSERT(inf.isValidForGettingMemoryRequirements());
+
+	VkAccelerationStructureGeometryKHR geom = {};
+	geom.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
+
+	VkAccelerationStructureBuildGeometryInfoKHR buildInfo = {};
+	buildInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR;
+
+	VkAccelerationStructureBuildSizesInfoKHR buildSizes = {};
+	buildSizes.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR;
+
+	if(inf.m_type == AccelerationStructureType::kBottomLevel)
+	{
+		geom.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
+		geom.geometry.triangles.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR;
+		geom.geometry.triangles.vertexFormat = convertFormat(inf.m_bottomLevel.m_positionsFormat);
+		geom.geometry.triangles.vertexStride = inf.m_bottomLevel.m_positionStride;
+		geom.geometry.triangles.maxVertex = inf.m_bottomLevel.m_positionCount - 1;
+		geom.geometry.triangles.indexType = convertIndexType(inf.m_bottomLevel.m_indexType);
+		geom.flags = 0;
+
+		buildInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
+		buildInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR;
+		buildInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
+		buildInfo.geometryCount = 1;
+		buildInfo.pGeometries = &geom;
+
+		const U32 primitiveCount = inf.m_bottomLevel.m_indexCount / 3;
+		vkGetAccelerationStructureBuildSizesKHR(getVkDevice(), VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo, &primitiveCount,
+												&buildSizes);
+	}
+	else
+	{
+		geom.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
+		geom.geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR;
+		geom.geometry.instances.arrayOfPointers = false;
+		geom.flags = 0;
+
+		buildInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
+		buildInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR;
+		buildInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
+		buildInfo.geometryCount = 1;
+		buildInfo.pGeometries = &geom;
+
+		vkGetAccelerationStructureBuildSizesKHR(getVkDevice(), VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo,
+												&inf.m_topLevel.m_instanceCount, &buildSizes);
+	}
+
+	asBufferSize = buildSizes.accelerationStructureSize;
+	buildScratchBufferSize = buildSizes.buildScratchSize;
+}
+
 } // end namespace anki

+ 9 - 22
AnKi/Gr/Vulkan/VkAccelerationStructure.h

@@ -16,6 +16,8 @@ namespace anki {
 /// AccelerationStructure implementation.
 class AccelerationStructureImpl final : public AccelerationStructure
 {
+	friend class AccelerationStructure;
+
 public:
 	AccelerationStructureImpl(CString name)
 		: AccelerationStructure(name)
@@ -32,29 +34,14 @@ public:
 		return m_handle;
 	}
 
-	U32 getMaxInstanceCount() const
-	{
-		ANKI_ASSERT(m_topLevelInfo.m_maxInstanceCount);
-		return m_topLevelInfo.m_maxInstanceCount;
-	}
-
-	VkDeviceAddress getAsDeviceAddress() const
-	{
-		ANKI_ASSERT(m_deviceAddress);
-		return m_deviceAddress;
-	}
-
 	void generateBuildInfo(U64 scratchBufferAddress, VkAccelerationStructureBuildGeometryInfoKHR& buildInfo,
-						   VkAccelerationStructureBuildRangeInfoKHR& rangeInfo) const
-	{
-		buildInfo = m_buildInfo;
-		buildInfo.scratchData.deviceAddress = scratchBufferAddress;
-		rangeInfo = m_rangeInfo;
-	}
+						   VkAccelerationStructureBuildRangeInfoKHR& rangeInfo) const;
 
 	static VkMemoryBarrier computeBarrierInfo(AccelerationStructureUsageBit before, AccelerationStructureUsageBit after,
 											  VkPipelineStageFlags& srcStages, VkPipelineStageFlags& dstStages);
 
+	static void getMemoryRequirement(const AccelerationStructureInitInfo& init, PtrSize& asBufferSize, PtrSize& buildScratchBufferSize);
+
 private:
 	class ASBottomLevelInfo
 	{
@@ -67,16 +54,16 @@ private:
 	{
 	public:
 		BufferInternalPtr m_instancesBuffer;
-		GrDynamicArray<AccelerationStructureInternalPtr> m_blases;
-		U32 m_maxInstanceCount = 0; ///< Only for indirect.
 	};
 
 	BufferInternalPtr m_asBuffer;
+	PtrSize m_asBufferOffset = kMaxPtrSize;
+
 	VkAccelerationStructureKHR m_handle = VK_NULL_HANDLE;
 	VkDeviceAddress m_deviceAddress = 0;
 
-	ASBottomLevelInfo m_bottomLevelInfo;
-	ASTopLevelInfo m_topLevelInfo;
+	ASBottomLevelInfo m_blas;
+	ASTopLevelInfo m_tlas;
 
 	/// @name Build-time info
 	/// @{

+ 2 - 2
AnKi/Gr/Vulkan/VkBuffer.cpp

@@ -131,7 +131,7 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	ANKI_ASSERT(size > 0);
 	ANKI_ASSERT(usage != BufferUsageBit::kNone);
 
-	m_mappedMemoryRangeAlignment = getGrManagerImpl().getPhysicalDeviceProperties().limits.nonCoherentAtomSize;
+	m_mappedMemoryRangeAlignment = getGrManagerImpl().getVulkanCapabilities().m_nonCoherentAtomSize;
 
 	// Align the size to satisfy fill buffer
 	alignRoundUp(4, size);
@@ -446,7 +446,7 @@ VkBufferView BufferImpl::getOrCreateBufferView(Format fmt, PtrSize offset, PtrSi
 	ANKI_ASSERT((range % getFormatInfo(fmt).m_texelSize) == 0 && "Range doesn't align with the number of texel elements");
 
 	[[maybe_unused]] const PtrSize elementCount = range / getFormatInfo(fmt).m_texelSize;
-	ANKI_ASSERT(elementCount <= getGrManagerImpl().getPhysicalDeviceProperties().limits.maxTexelBufferElements);
+	ANKI_ASSERT(elementCount <= getGrManagerImpl().getVulkanCapabilities().m_maxTexelBufferElements);
 
 	// Hash
 	ANKI_BEGIN_PACKED_STRUCT

+ 1 - 1
AnKi/Gr/Vulkan/VkCommon.cpp

@@ -336,7 +336,7 @@ VkBufferUsageFlags convertBufferUsageBit(BufferUsageBit usageMask)
 		out |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; // Spec says that this will be enough
 	}
 
-	if(!!(usageMask & PrivateBufferUsageBit::kAccelerationStructure) && rt)
+	if(!!(usageMask & BufferUsageBit::kAccelerationStructure) && rt)
 	{
 		out |= VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR;
 	}

+ 0 - 11
AnKi/Gr/Vulkan/VkCommon.h

@@ -85,17 +85,6 @@ constexpr F32 kDescriptorPoolSizeScale = 2.0f;
 constexpr U32 kDescriptorBufferedFrameCount = 60 * 5; ///< How many frames worth of descriptors to buffer.
 /// @}
 
-/// Some internal buffer usage flags.
-class PrivateBufferUsageBit
-{
-public:
-	/// Buffer that holds the memory for the actual AS.
-	static constexpr BufferUsageBit kAccelerationStructure = BufferUsageBit(1ull << 30ull);
-
-	static constexpr BufferUsageBit kAllPrivate = kAccelerationStructure;
-};
-static_assert(!(BufferUsageBit::kAll & PrivateBufferUsageBit::kAllPrivate), "Update the bits in PrivateBufferUsageBit");
-
 /// Check if a vulkan function failed. It will abort on failure.
 #define ANKI_VK_CHECKF(x) \
 	do \

+ 29 - 16
AnKi/Gr/Vulkan/VkGrManager.cpp

@@ -149,6 +149,14 @@ void GrManager::submit(WeakArray<CommandBuffer*> cmdbs, WeakArray<Fence*> waitFe
 	self.submitInternal(cmdbs, waitFences, signalFence);
 }
 
+PtrSize GrManager::getAccelerationStructureMemoryRequirement(const AccelerationStructureInitInfo& init) const
+{
+	ANKI_VK_SELF_CONST(GrManagerImpl);
+	PtrSize scratchBufferSize, unused;
+	AccelerationStructureImpl::getMemoryRequirement(init, unused, scratchBufferSize);
+	return scratchBufferSize + self.m_caps.m_asBufferAlignment;
+}
+
 GrManagerImpl::~GrManagerImpl()
 {
 	ANKI_VK_LOGI("Destroying Vulkan backend");
@@ -576,11 +584,9 @@ Error GrManagerImpl::initInstance()
 		m_physicalDevice = devs[chosenPhysDevIdx].m_pdev;
 	}
 
-	m_rtPipelineProps.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR;
-	getPhysicalDeviceProperties2(m_rtPipelineProps);
-
-	m_devProps.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
-	vkGetPhysicalDeviceProperties2(m_physicalDevice, &m_devProps);
+	VkPhysicalDeviceProperties2 props2 = {};
+	props2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+	vkGetPhysicalDeviceProperties2(m_physicalDevice, &props2);
 
 	VkPhysicalDeviceVulkan12Properties props12 = {};
 	props12.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES;
@@ -593,7 +599,7 @@ Error GrManagerImpl::initInstance()
 	m_capabilities.m_maxWaveSize = props13.maxSubgroupSize;
 
 	// Find vendor
-	switch(m_devProps.properties.vendorID)
+	switch(props2.properties.vendorID)
 	{
 	case 0x13B5:
 		m_capabilities.m_gpuVendor = GpuVendor::kArm;
@@ -614,24 +620,25 @@ Error GrManagerImpl::initInstance()
 	default:
 		m_capabilities.m_gpuVendor = GpuVendor::kUnknown;
 	}
-	ANKI_VK_LOGI("GPU is %s. Vendor identified as %s, Driver %s", m_devProps.properties.deviceName, &kGPUVendorStrings[m_capabilities.m_gpuVendor][0],
+	ANKI_VK_LOGI("GPU is %s. Vendor identified as %s, Driver %s", props2.properties.deviceName, &kGPUVendorStrings[m_capabilities.m_gpuVendor][0],
 				 props12.driverInfo);
 
 	// Set limits
 	m_capabilities.m_constantBufferBindOffsetAlignment =
-		computeCompoundAlignment<U32>(ANKI_SAFE_ALIGNMENT, U32(m_devProps.properties.limits.minUniformBufferOffsetAlignment));
+		computeCompoundAlignment<U32>(ANKI_SAFE_ALIGNMENT, U32(props2.properties.limits.minUniformBufferOffsetAlignment));
 	m_capabilities.m_structuredBufferBindOffsetAlignment =
-		computeCompoundAlignment<U32>(ANKI_SAFE_ALIGNMENT, U32(m_devProps.properties.limits.minStorageBufferOffsetAlignment));
+		computeCompoundAlignment<U32>(ANKI_SAFE_ALIGNMENT, U32(props2.properties.limits.minStorageBufferOffsetAlignment));
 	m_capabilities.m_structuredBufferNaturalAlignment = false;
-	m_capabilities.m_texelBufferBindOffsetAlignment = max<U32>(ANKI_SAFE_ALIGNMENT, U32(m_devProps.properties.limits.minTexelBufferOffsetAlignment));
-	m_capabilities.m_computeSharedMemorySize = m_devProps.properties.limits.maxComputeSharedMemorySize;
-	m_capabilities.m_maxDrawIndirectCount = m_devProps.properties.limits.maxDrawIndirectCount;
+	m_capabilities.m_texelBufferBindOffsetAlignment = max<U32>(ANKI_SAFE_ALIGNMENT, U32(props2.properties.limits.minTexelBufferOffsetAlignment));
+	m_capabilities.m_computeSharedMemorySize = props2.properties.limits.maxComputeSharedMemorySize;
+	m_capabilities.m_maxDrawIndirectCount = props2.properties.limits.maxDrawIndirectCount;
 
 	m_capabilities.m_majorApiVersion = vulkanMajor;
 	m_capabilities.m_minorApiVersion = vulkanMinor;
 
-	m_capabilities.m_shaderGroupHandleSize = m_rtPipelineProps.shaderGroupHandleSize;
-	m_capabilities.m_sbtRecordAlignment = m_rtPipelineProps.shaderGroupBaseAlignment;
+	m_caps.m_nonCoherentAtomSize = props2.properties.limits.nonCoherentAtomSize;
+	m_caps.m_maxTexelBufferElements = props2.properties.limits.maxTexelBufferElements;
+	m_caps.m_timestampPeriod = props2.properties.limits.timestampPeriod;
 
 	// DLSS checks
 	m_capabilities.m_dlss = ANKI_DLSS && m_capabilities.m_gpuVendor == GpuVendor::kNvidia;
@@ -988,7 +995,13 @@ Error GrManagerImpl::initDevice()
 		VkPhysicalDeviceAccelerationStructurePropertiesKHR props = {};
 		props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR;
 		getPhysicalDeviceProperties2(props);
-		m_capabilities.m_accelerationStructureBuildScratchOffsetAlignment = props.minAccelerationStructureScratchOffsetAlignment;
+		m_caps.m_asBuildScratchAlignment = props.minAccelerationStructureScratchOffsetAlignment;
+
+		VkPhysicalDeviceRayTracingPipelinePropertiesKHR rtprops = {};
+		rtprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR;
+		getPhysicalDeviceProperties2(rtprops);
+		m_capabilities.m_shaderGroupHandleSize = rtprops.shaderGroupHandleSize;
+		m_capabilities.m_sbtRecordAlignment = rtprops.shaderGroupBaseAlignment;
 	}
 
 	// Pipeline features
@@ -1246,7 +1259,7 @@ TexturePtr GrManagerImpl::acquireNextPresentableTexture()
 
 	// Create some objets outside the lock
 	Array<Char, 16> name;
-	snprintf(name.getBegin(), name.getSize(), "Acquire %lu", m_frame);
+	snprintf(name.getBegin(), name.getSize(), "Acquire %llu", m_frame);
 	MicroFencePtr fence = FenceFactory::getSingleton().newInstance(name.getBegin());
 
 	LockGuard<Mutex> lock(m_globalMtx);

+ 15 - 10
AnKi/Gr/Vulkan/VkGrManager.h

@@ -31,6 +31,17 @@ enum class AsyncComputeType
 	kDisabled
 };
 
+/// A small struct with all the caps we need.
+class VulkanCapabilities
+{
+public:
+	VkDeviceAddress m_nonCoherentAtomSize = 0;
+	U64 m_maxTexelBufferElements = 0;
+	F32 m_timestampPeriod = 0.0f;
+	U32 m_asBuildScratchAlignment = 0;
+	U32 m_asBufferAlignment = 256; // Spec says 256
+};
+
 /// Vulkan implementation of GrManager.
 class GrManagerImpl : public GrManager
 {
@@ -67,14 +78,9 @@ public:
 		}
 	}
 
-	const VkPhysicalDeviceProperties& getPhysicalDeviceProperties() const
+	const VulkanCapabilities& getVulkanCapabilities() const
 	{
-		return m_devProps.properties;
-	}
-
-	const VkPhysicalDeviceRayTracingPipelinePropertiesKHR& getPhysicalDeviceRayTracingProperties() const
-	{
-		return m_rtPipelineProps;
+		return m_caps;
 	}
 
 	TexturePtr acquireNextPresentableTexture();
@@ -201,9 +207,6 @@ private:
 	Array<VkQueue, U32(GpuQueueType::kCount)> m_queues = {nullptr, nullptr};
 	Mutex m_globalMtx;
 
-	VkPhysicalDeviceProperties2 m_devProps = {};
-	VkPhysicalDeviceRayTracingPipelinePropertiesKHR m_rtPipelineProps = {};
-
 	VkDebugUtilsMessengerEXT m_debugUtilsMessager = VK_NULL_HANDLE;
 
 	mutable SpinLock m_shaderStatsMtx;
@@ -215,6 +218,8 @@ private:
 	U8 m_acquiredImageIdx = kMaxU8;
 	FrameState m_frameState = kFrameEnded;
 
+	VulkanCapabilities m_caps;
+
 	Array<PerFrame, kMaxFramesInFlight> m_perFrame;
 
 	VkPhysicalDeviceMemoryProperties m_memoryProperties;

+ 1 - 1
AnKi/Gr/Vulkan/VkShaderProgram.cpp

@@ -410,7 +410,7 @@ Error ShaderProgramImpl::init(const ShaderProgramInitInfo& inf)
 		}
 
 		// Get RT handles
-		const U32 handleArraySize = getGrManagerImpl().getPhysicalDeviceRayTracingProperties().shaderGroupHandleSize * groupCount;
+		const U32 handleArraySize = getGrManagerImpl().getDeviceCapabilities().m_shaderGroupHandleSize * groupCount;
 		m_rt.m_allHandles.resize(handleArraySize, 0_U8);
 		ANKI_VK_CHECK(vkGetRayTracingShaderGroupHandlesKHR(getVkDevice(), m_rt.m_ppline, 0, groupCount, handleArraySize, &m_rt.m_allHandles[0]));
 

+ 1 - 1
AnKi/Gr/Vulkan/VkTimestampQuery.cpp

@@ -63,7 +63,7 @@ Error TimestampQueryImpl::init()
 {
 	ANKI_CHECK(TimestampQueryFactory::getSingleton().newQuery(m_handle));
 
-	m_timestampPeriod = U64(getGrManagerImpl().getPhysicalDeviceProperties().limits.timestampPeriod);
+	m_timestampPeriod = U64(getGrManagerImpl().getVulkanCapabilities().m_timestampPeriod);
 
 	return Error::kNone;
 }

+ 3 - 5
AnKi/Renderer/AccelerationStructureBuilder.cpp

@@ -40,17 +40,15 @@ void AccelerationStructureBuilder::populateRenderGraph(RenderingContext& ctx)
 	// Create the TLAS
 	AccelerationStructureInitInfo initInf("Main TLAS");
 	initInf.m_type = AccelerationStructureType::kTopLevel;
-	initInf.m_topLevel.m_indirectArgs.m_maxInstanceCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
-	initInf.m_topLevel.m_indirectArgs.m_instancesBuffer = visOut.m_instancesBuffer;
+	initInf.m_topLevel.m_instanceCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
+	initInf.m_topLevel.m_instancesBuffer = visOut.m_instancesBuffer;
 	m_runCtx.m_tlas = GrManager::getSingleton().newAccelerationStructure(initInf);
 
 	// Build the AS
 	{
 		RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
 
-		const BufferView scratchBuff = GpuVisibleTransientMemoryPool::getSingleton().allocate(
-			m_runCtx.m_tlas->getBuildScratchBufferSize(),
-			GrManager::getSingleton().getDeviceCapabilities().m_accelerationStructureBuildScratchOffsetAlignment);
+		const BufferView scratchBuff = GpuVisibleTransientMemoryPool::getSingleton().allocate(m_runCtx.m_tlas->getBuildScratchBufferSize(), 1);
 
 		m_runCtx.m_tlasHandle = rgraph.importAccelerationStructure(m_runCtx.m_tlas.get(), AccelerationStructureUsageBit::kNone);
 

+ 1 - 1
AnKi/Resource/AccelerationStructureScratchAllocator.h

@@ -57,7 +57,7 @@ public:
 		}
 
 		const BufferView view(m_buffer.get(), m_offset, size);
-		m_offset += getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_accelerationStructureBuildScratchOffsetAlignment, size);
+		m_offset += size;
 
 		return view;
 	}

+ 4 - 3
AnKi/Shaders/GpuVisibilityAccelerationStructures.ankiprog

@@ -108,9 +108,10 @@ ANKI_FAST_CONSTANTS(GpuVisibilityAccelerationStructuresConstants, g_consts)
 			{
 				AccelerationStructureInstance instance;
 				instance.m_transform = finalTrf;
-				instance.m_mask8_instanceCustomIndex24 = (meshLod.m_tlasInstanceMask << 24u) | (instanceIdx & 0x00FFFFFFu);
-				const U32 instanceFlags = kAccellerationStructureFlagTriangleFrontCounterlockwise;
-				instance.m_flags8_instanceShaderBindingTableRecordOffset24 = (instanceFlags << 24u) | (instanceIdx & 0x00FFFFFFu);
+				instance.m_mask = meshLod.m_tlasInstanceMask;
+				instance.m_instanceCustomIndex = instanceIdx & 0x00FFFFFFu;
+				instance.m_flags = kAccellerationStructureFlagTriangleFrontCounterlockwise;
+				instance.m_instanceShaderBindingTableRecordOffset = instanceIdx & 0x00FFFFFFu;
 				instance.m_accelerationStructureAddress = meshLod.m_blasAddress;
 
 				SBUFF(g_visibleInstances, instanceIdx) = instance;

+ 8 - 2
AnKi/Shaders/Include/Common.h

@@ -546,9 +546,15 @@ enum AccellerationStructureFlag : U32
 struct AccelerationStructureInstance
 {
 	Mat3x4 m_transform;
-	U32 m_mask8_instanceCustomIndex24;
-	U32 m_flags8_instanceShaderBindingTableRecordOffset24; ///< flags is AccellerationStructureFlag.
+	U32 m_instanceCustomIndex : 24;
+	U32 m_mask : 8;
+	U32 m_instanceShaderBindingTableRecordOffset : 24;
+	U32 m_flags : 8; ///< It's AccellerationStructureFlag.
+#if defined(__cplusplus)
+	U64 m_accelerationStructureAddress;
+#else
 	UVec2 m_accelerationStructureAddress;
+#endif
 };
 
 ANKI_END_NAMESPACE

+ 9 - 2
Tests/Gr/Gr.cpp

@@ -2299,10 +2299,17 @@ ANKI_TEST(Gr, RayQuery)
 		// TLAS
 		AccelerationStructurePtr tlas;
 		{
+			AccelerationStructureInstance inst = {};
+			inst.m_accelerationStructureAddress = blas->getGpuAddress();
+			inst.m_transform = Mat3x4::getIdentity();
+			inst.m_mask = 0xFF;
+			inst.m_flags = kAccellerationStructureFlagForceOpaque;
+			BufferPtr instBuff = createBuffer(BufferUsageBit::kAll, inst, 1);
+
 			AccelerationStructureInitInfo init;
 			init.m_type = AccelerationStructureType::kTopLevel;
-			Array<AccelerationStructureInstanceInfo, 1> instances = {{{blas.get(), Mat3x4::getIdentity()}}};
-			init.m_topLevel.m_directArgs.m_instances = instances;
+			init.m_topLevel.m_instancesBuffer = BufferView(instBuff.get());
+			init.m_topLevel.m_instanceCount = 1;
 
 			tlas = GrManager::getSingleton().newAccelerationStructure(init);
 		}