Ver Fonte

Add dispatch rays indirect on D3D

Panagiotis Christopoulos Charitos há 4 meses atrás
pai
commit
2bce20d630

+ 1 - 1
AnKi/GpuMemory/GpuVisibleTransientMemoryPool.h

@@ -57,7 +57,7 @@ private:
 
 		BufferUsageBit buffUsage = BufferUsageBit::kAllConstant | BufferUsageBit::kAllUav | BufferUsageBit::kAllSrv | BufferUsageBit::kIndirectDraw
 								   | BufferUsageBit::kIndirectCompute | BufferUsageBit::kVertexOrIndex | BufferUsageBit::kAllCopy
-								   | BufferUsageBit::kIndirectTraceRays | BufferUsageBit::kShaderBindingTable;
+								   | BufferUsageBit::kIndirectDispatchRays | BufferUsageBit::kShaderBindingTable;
 		if(GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled)
 		{
 			buffUsage |= (BufferUsageBit::kAccelerationStructureBuildScratch | BufferUsageBit::kAccelerationStructureBuild);

+ 3 - 3
AnKi/Gr/CommandBuffer.h

@@ -304,10 +304,10 @@ public:
 	/// @param width Width.
 	/// @param height Height.
 	/// @param depth Depth.
-	void traceRays(const BufferView& sbtBuffer, U32 sbtRecordSize, U32 hitGroupSbtRecordCount, U32 rayTypeCount, U32 width, U32 height, U32 depth);
+	void dispatchRays(const BufferView& sbtBuffer, U32 sbtRecordSize, U32 hitGroupSbtRecordCount, U32 rayTypeCount, U32 width, U32 height, U32 depth);
 
-	/// Same as traceRays but indirect.
-	void traceRaysIndirect(const BufferView& sbtBuffer, U32 sbtRecordSize, U32 hitGroupSbtRecordCount, U32 rayTypeCount, BufferView argsBuffer);
+	/// Same as dispatchRays but indirect.
+	void dispatchRaysIndirect(const BufferView& sbtBuffer, U32 sbtRecordSize, U32 hitGroupSbtRecordCount, U32 rayTypeCount, BufferView argsBuffer);
 
 	/// Blit from surface to surface.
 	void blitTexture(const TextureView& srcView, const TextureView& destView);

+ 21 - 21
AnKi/Gr/Common.h

@@ -487,12 +487,12 @@ enum class TextureUsageBit : U32
 	kSrvGeometry = 1 << 0,
 	kSrvPixel = 1 << 1,
 	kSrvCompute = 1 << 2,
-	kSrvTraceRays = 1 << 3,
+	kSrvDispatchRays = 1 << 3,
 
 	kUavGeometry = 1 << 4,
 	kUavPixel = 1 << 5,
 	kUavCompute = 1 << 6,
-	kUavTraceRays = 1 << 7,
+	kUavDispatchRays = 1 << 7,
 
 	kRtvDsvRead = 1 << 8,
 	kRtvDsvWrite = 1 << 9,
@@ -503,8 +503,8 @@ enum class TextureUsageBit : U32
 	kPresent = 1 << 12,
 
 	// Derived
-	kAllSrv = kSrvGeometry | kSrvPixel | kSrvCompute | kSrvTraceRays,
-	kAllUav = kUavGeometry | kUavPixel | kUavCompute | kUavTraceRays,
+	kAllSrv = kSrvGeometry | kSrvPixel | kSrvCompute | kSrvDispatchRays,
+	kAllUav = kUavGeometry | kUavPixel | kUavCompute | kUavDispatchRays,
 	kAllRtvDsv = kRtvDsvRead | kRtvDsvWrite,
 
 	kAllGeometry = kSrvGeometry | kUavGeometry,
@@ -707,46 +707,46 @@ enum class BufferUsageBit : U64
 	kConstantGeometry = 1ull << 0ull,
 	kConstantPixel = 1ull << 1ull,
 	kConstantCompute = 1ull << 2ull,
-	kConstantTraceRays = 1ull << 3ull,
+	kConstantDispatchRays = 1ull << 3ull,
 
 	kSrvGeometry = 1ull << 4ull,
 	kSrvPixel = 1ull << 5ull,
 	kSrvCompute = 1ull << 6ull,
-	kSrvTraceRays = 1ull << 7ull,
+	kSrvDispatchRays = 1ull << 7ull,
 
 	kUavGeometry = 1ull << 8ull,
 	kUavPixel = 1ull << 9ull,
 	kUavCompute = 1ull << 10ull,
-	kUavTraceRays = 1ull << 11ull,
+	kUavDispatchRays = 1ull << 11ull,
 
 	kVertexOrIndex = 1ull << 12ull,
 
 	kIndirectCompute = 1ull << 14ll,
 	kIndirectDraw = 1ull << 15ull,
-	kIndirectTraceRays = 1ull << 16ull,
+	kIndirectDispatchRays = 1ull << 16ull,
 
 	kCopySource = 1ull << 17ull,
 	kCopyDestination = 1ull << 18ull,
 
 	kAccelerationStructureBuild = 1ull << 19ull, ///< Will be used as a position or index buffer in a BLAS build.
-	kShaderBindingTable = 1ull << 20ull, ///< Will be used as SBT in a traceRays() command.
+	kShaderBindingTable = 1ull << 20ull, ///< Will be used as SBT in a dispatchRays() command.
 	kAccelerationStructureBuildScratch = 1ull << 21ull, ///< Used in buildAccelerationStructureXXX commands.
 	kAccelerationStructure = 1ull << 22ull, ///< Will be used as AS.
 
 	// Derived
-	kAllConstant = kConstantGeometry | kConstantPixel | kConstantCompute | kConstantTraceRays,
-	kAllSrv = kSrvGeometry | kSrvPixel | kSrvCompute | kSrvTraceRays,
-	kAllUav = kUavGeometry | kUavPixel | kUavCompute | kUavTraceRays,
-	kAllIndirect = kIndirectCompute | kIndirectDraw | kIndirectTraceRays,
+	kAllConstant = kConstantGeometry | kConstantPixel | kConstantCompute | kConstantDispatchRays,
+	kAllSrv = kSrvGeometry | kSrvPixel | kSrvCompute | kSrvDispatchRays,
+	kAllUav = kUavGeometry | kUavPixel | kUavCompute | kUavDispatchRays,
+	kAllIndirect = kIndirectCompute | kIndirectDraw | kIndirectDispatchRays,
 	kAllCopy = kCopySource | kCopyDestination,
 
 	kAllGeometry = kConstantGeometry | kSrvGeometry | kUavGeometry | kVertexOrIndex,
 	kAllPixel = kConstantPixel | kSrvPixel | kUavPixel,
 	kAllGraphics = kAllGeometry | kAllPixel | kIndirectDraw,
 	kAllCompute = kConstantCompute | kSrvCompute | kUavCompute | kIndirectCompute,
-	kAllTraceRays = kConstantTraceRays | kSrvTraceRays | kUavTraceRays | kIndirectTraceRays | kShaderBindingTable,
+	kAllDispatchRays = kConstantDispatchRays | kSrvDispatchRays | kUavDispatchRays | kIndirectDispatchRays | kShaderBindingTable,
 
-	kAllRayTracing = kAllTraceRays | kAccelerationStructureBuild | kAccelerationStructureBuildScratch | kAccelerationStructure,
+	kAllRayTracing = kAllDispatchRays | kAccelerationStructureBuild | kAccelerationStructureBuildScratch | kAccelerationStructure,
 	kAllRead = kAllConstant | kAllSrv | kAllUav | kVertexOrIndex | kAllIndirect | kCopySource | kAccelerationStructureBuild | kShaderBindingTable
 			   | kAccelerationStructure,
 	kAllWrite = kAllUav | kCopyDestination | kAccelerationStructureBuildScratch | kAccelerationStructure,
@@ -794,14 +794,14 @@ enum class AccelerationStructureUsageBit : U8
 	kNone = 0,
 	kBuild = 1 << 0,
 	kAttach = 1 << 1, ///< Attached to a TLAS. Only for BLAS.
-	kGeometrySrv = 1 << 2,
-	kPixelSrv = 1 << 3,
-	kComputeSrv = 1 << 4,
-	kTraceRaysSrv = 1 << 5,
+	kSrvGeometry = 1 << 2,
+	kSrvPixel = 1 << 3,
+	kSrvCompute = 1 << 4,
+	kSrvDispatchRays = 1 << 5,
 
 	// Derived
-	kAllGraphics = kGeometrySrv | kPixelSrv,
-	kAllRead = kAttach | kGeometrySrv | kPixelSrv | kComputeSrv | kTraceRaysSrv,
+	kAllGraphics = kSrvGeometry | kSrvPixel,
+	kAllRead = kAttach | kSrvGeometry | kSrvPixel | kSrvCompute | kSrvDispatchRays,
 	kAllWrite = kBuild
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(AccelerationStructureUsageBit)

+ 8 - 8
AnKi/Gr/D3D/D3DAccelerationStructure.cpp

@@ -138,25 +138,25 @@ D3D12_GLOBAL_BARRIER AccelerationStructureImpl::computeBarrierInfo(AccelerationS
 		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
 	}
 
-	if(!!(before & AccelerationStructureUsageBit::kGeometrySrv))
+	if(!!(before & AccelerationStructureUsageBit::kSrvGeometry))
 	{
 		barrier.SyncBefore |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
 		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ; // READ_BIT is the only viable solution by elimination
 	}
 
-	if(!!(before & AccelerationStructureUsageBit::kPixelSrv))
+	if(!!(before & AccelerationStructureUsageBit::kSrvPixel))
 	{
 		barrier.SyncBefore |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
 		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
 	}
 
-	if(!!(before & AccelerationStructureUsageBit::kComputeSrv))
+	if(!!(before & AccelerationStructureUsageBit::kSrvCompute))
 	{
 		barrier.SyncBefore |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
 		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
 	}
 
-	if(!!(before & AccelerationStructureUsageBit::kTraceRaysSrv))
+	if(!!(before & AccelerationStructureUsageBit::kSrvDispatchRays))
 	{
 		barrier.SyncBefore |= D3D12_BARRIER_SYNC_RAYTRACING;
 		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
@@ -175,25 +175,25 @@ D3D12_GLOBAL_BARRIER AccelerationStructureImpl::computeBarrierInfo(AccelerationS
 		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
 	}
 
-	if(!!(after & AccelerationStructureUsageBit::kGeometrySrv))
+	if(!!(after & AccelerationStructureUsageBit::kSrvGeometry))
 	{
 		barrier.SyncAfter |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
 		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ; // READ_BIT is the only viable solution by elimination
 	}
 
-	if(!!(after & AccelerationStructureUsageBit::kPixelSrv))
+	if(!!(after & AccelerationStructureUsageBit::kSrvPixel))
 	{
 		barrier.SyncAfter |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
 		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
 	}
 
-	if(!!(after & AccelerationStructureUsageBit::kComputeSrv))
+	if(!!(after & AccelerationStructureUsageBit::kSrvCompute))
 	{
 		barrier.SyncAfter |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
 		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
 	}
 
-	if(!!(after & AccelerationStructureUsageBit::kTraceRaysSrv))
+	if(!!(after & AccelerationStructureUsageBit::kSrvDispatchRays))
 	{
 		barrier.SyncAfter |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
 		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;

+ 1 - 1
AnKi/Gr/D3D/D3DBuffer.cpp

@@ -229,7 +229,7 @@ D3D12_BARRIER_SYNC BufferImpl::computeSync(BufferUsageBit usage) const
 		sync |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
 	}
 
-	if(!!(usage & (BufferUsageBit::kAllTraceRays & ~BufferUsageBit::kIndirectTraceRays)) && rt)
+	if(!!(usage & (BufferUsageBit::kAllDispatchRays & ~BufferUsageBit::kIndirectDispatchRays)) && rt)
 	{
 		sync |= D3D12_BARRIER_SYNC_RAYTRACING;
 	}

+ 99 - 5
AnKi/Gr/D3D/D3DCommandBuffer.cpp

@@ -581,8 +581,8 @@ void CommandBuffer::dispatchComputeIndirect(const BufferView& argBuffer)
 	self.m_cmdList->ExecuteIndirect(signature, 1, &impl.getD3DResource(), argBuffer.getOffset(), nullptr, 0);
 }
 
-void CommandBuffer::traceRays(const BufferView& sbtBuffer, U32 sbtRecordSize, U32 hitGroupSbtRecordCount, [[maybe_unused]] U32 rayTypeCount,
-							  U32 width, U32 height, U32 depth)
+void CommandBuffer::dispatchRays(const BufferView& sbtBuffer, U32 sbtRecordSize, U32 hitGroupSbtRecordCount, [[maybe_unused]] U32 rayTypeCount,
+								 U32 width, U32 height, U32 depth)
 {
 	ANKI_ASSERT(rayTypeCount == 1 && "TODO");
 	ANKI_D3D_SELF(CommandBufferImpl);
@@ -601,10 +601,97 @@ void CommandBuffer::traceRays(const BufferView& sbtBuffer, U32 sbtRecordSize, U3
 	self.m_cmdList->DispatchRays(&dispatchDesc);
 }
 
-void CommandBuffer::traceRaysIndirect(const BufferView& sbtBuffer, U32 sbtRecordSize, U32 hitGroupSbtRecordCount, U32 rayTypeCount,
-									  BufferView argsBuffer)
+void CommandBuffer::dispatchRaysIndirect(const BufferView& sbtBuffer, U32 sbtRecordSize, U32 hitGroupSbtRecordCount, U32 rayTypeCount,
+										 BufferView argsBuffer)
 {
-	ANKI_ASSERT(!"TODO");
+	ANKI_ASSERT(rayTypeCount == 1 && "TODO");
+	ANKI_ASSERT(sbtBuffer.getRange() == sbtRecordSize * (hitGroupSbtRecordCount + 2));
+	ANKI_ASSERT(argsBuffer.getRange() == sizeof(DispatchIndirectArgs));
+	ANKI_D3D_SELF(CommandBufferImpl);
+	self.dispatchCommon();
+
+	// Allocate the actual indirect buffer
+	if(!self.m_indirectDispatchRays.m_indirectBuff)
+	{
+		D3D12_HEAP_PROPERTIES heapProperties = {};
+		heapProperties.Type = D3D12_HEAP_TYPE_CUSTOM;
+		heapProperties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE;
+
+		if(getGrManagerImpl().getD3DCapabilities().m_rebar && getGrManagerImpl().getDeviceCapabilities().m_discreteGpu)
+		{
+			heapProperties.MemoryPoolPreference = D3D12_MEMORY_POOL_L1;
+		}
+		else
+		{
+			heapProperties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
+		}
+
+		const D3D12_HEAP_FLAGS heapFlags = {};
+
+		D3D12_RESOURCE_DESC resourceDesc = {};
+		resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+		resourceDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
+		resourceDesc.Width = sizeof(D3D12_DISPATCH_RAYS_DESC) * self.m_indirectDispatchRays.kMaxDescriptorCount;
+		resourceDesc.Height = 1;
+		resourceDesc.DepthOrArraySize = 1;
+		resourceDesc.MipLevels = 1;
+		resourceDesc.Format = DXGI_FORMAT_UNKNOWN;
+		resourceDesc.SampleDesc.Count = 1;
+		resourceDesc.SampleDesc.Quality = 0;
+		resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+		resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+
+		const D3D12_RESOURCE_STATES initialState = D3D12_RESOURCE_STATE_COMMON;
+
+		ANKI_D3D_CHECKF(getDevice().CreateCommittedResource(&heapProperties, heapFlags, &resourceDesc, initialState, nullptr,
+															IID_PPV_ARGS(&self.m_indirectDispatchRays.m_indirectBuff)));
+
+		ANKI_D3D_CHECKF(self.m_indirectDispatchRays.m_indirectBuff->SetName(L"DispatchRaysIndirectBuff"));
+
+		const D3D12_RANGE d3dRange = {.Begin = 0, .End = resourceDesc.Width};
+		void* mem = nullptr;
+		ANKI_D3D_CHECKF(self.m_indirectDispatchRays.m_indirectBuff->Map(0, &d3dRange, &mem));
+		self.m_indirectDispatchRays.m_mappedMem = {static_cast<D3D12_DISPATCH_RAYS_DESC*>(mem), self.m_indirectDispatchRays.kMaxDescriptorCount};
+	}
+
+	const PtrSize indirectBuffOffset = self.m_indirectDispatchRays.m_crntDescriptor * sizeof(D3D12_DISPATCH_RAYS_DESC);
+
+	// Write a few things from the CPU
+	const U64 baseAddress = sbtBuffer.getBuffer().getGpuAddress() + sbtBuffer.getOffset();
+
+	D3D12_DISPATCH_RAYS_DESC dispatchDesc = {};
+	dispatchDesc.RayGenerationShaderRecord = {baseAddress, sbtRecordSize};
+	dispatchDesc.MissShaderTable = {baseAddress + sbtRecordSize, sbtRecordSize, sbtRecordSize};
+	dispatchDesc.HitGroupTable = {baseAddress + sbtRecordSize * 2, sbtRecordSize * hitGroupSbtRecordCount, sbtRecordSize};
+
+	self.m_indirectDispatchRays.m_mappedMem[self.m_indirectDispatchRays.m_crntDescriptor] = dispatchDesc;
+
+	// Copy the rest from the GPU
+	self.m_cmdList->CopyBufferRegion(self.m_indirectDispatchRays.m_indirectBuff, indirectBuffOffset + offsetof(D3D12_DISPATCH_RAYS_DESC, Width),
+									 &static_cast<const BufferImpl&>(argsBuffer.getBuffer()).getD3DResource(), argsBuffer.getOffset(),
+									 sizeof(DispatchIndirectArgs));
+
+	// Barrier
+	const D3D12_BUFFER_BARRIER barrier = {.SyncBefore = D3D12_BARRIER_SYNC_COPY,
+										  .SyncAfter = D3D12_BARRIER_SYNC_EXECUTE_INDIRECT,
+										  .AccessBefore = D3D12_BARRIER_ACCESS_COPY_DEST,
+										  .AccessAfter = D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT,
+										  .pResource = self.m_indirectDispatchRays.m_indirectBuff,
+										  .Offset = 0,
+										  .Size = self.m_indirectDispatchRays.m_indirectBuff->GetDesc().Width};
+
+	const D3D12_BARRIER_GROUP barrierGroup = {.Type = D3D12_BARRIER_TYPE_BUFFER, .NumBarriers = 1, .pBufferBarriers = &barrier};
+
+	self.m_cmdList->Barrier(1, &barrierGroup);
+
+	// Execute
+	ID3D12CommandSignature* signature;
+	ANKI_CHECKF(IndirectCommandSignatureFactory::getSingleton().getOrCreateSignature(D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_RAYS,
+																					 sizeof(D3D12_DISPATCH_RAYS_DESC), signature));
+
+	self.m_cmdList->ExecuteIndirect(signature, 1, self.m_indirectDispatchRays.m_indirectBuff, indirectBuffOffset, nullptr, 0);
+
+	++self.m_indirectDispatchRays.m_crntDescriptor;
 }
 
 void CommandBuffer::blitTexture([[maybe_unused]] const TextureView& srcView, [[maybe_unused]] const TextureView& destView)
@@ -954,6 +1041,13 @@ void CommandBuffer::dispatchGraph(const BufferView& scratchBuffer, const void* r
 
 CommandBufferImpl::~CommandBufferImpl()
 {
+	if(m_indirectDispatchRays.m_indirectBuff)
+	{
+		const D3D12_RANGE d3dRange = {.Begin = 0, .End = m_indirectDispatchRays.m_indirectBuff->GetDesc().Width};
+		m_indirectDispatchRays.m_indirectBuff->Unmap(0, &d3dRange);
+
+		safeRelease(m_indirectDispatchRays.m_indirectBuff);
+	}
 }
 
 Error CommandBufferImpl::init(const CommandBufferInitInfo& init)

+ 9 - 0
AnKi/Gr/D3D/D3DCommandBuffer.h

@@ -55,6 +55,15 @@ private:
 
 	const ShaderProgramImpl* m_wgProg = nullptr;
 
+	class
+	{
+	public:
+		ID3D12Resource* m_indirectBuff = nullptr;
+		static constexpr U32 kMaxDescriptorCount = 8;
+		WeakArray<D3D12_DISPATCH_RAYS_DESC> m_mappedMem;
+		U8 m_crntDescriptor = 0;
+	} m_indirectDispatchRays;
+
 	Bool m_descriptorHeapsBound = false;
 	Bool m_debugMarkersEnabled = false;
 	Bool m_lineWidthWarningAlreadyShown = false;

+ 3 - 0
AnKi/Gr/D3D/D3DCommandBufferFactory.cpp

@@ -125,6 +125,9 @@ Error IndirectCommandSignatureFactory::getOrCreateSignatureInternal(Bool takeFas
 	case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH:
 		akType = IndirectCommandSignatureType::kDispatchMesh;
 		break;
+	case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_RAYS:
+		akType = IndirectCommandSignatureType::kDispatchRays;
+		break;
 	default:
 		ANKI_ASSERT(!"Unsupported");
 	}

+ 4 - 2
AnKi/Gr/D3D/D3DCommandBufferFactory.h

@@ -122,6 +122,7 @@ private:
 		kDrawIndexed,
 		kDispatch,
 		kDispatchMesh,
+		kDispatchRays,
 
 		kCount,
 		kFirst = 0
@@ -135,11 +136,12 @@ private:
 	};
 
 	static constexpr Array<U32, U32(IndirectCommandSignatureType::kCount)> kCommonStrides = {
-		sizeof(DrawIndirectArgs), sizeof(DrawIndexedIndirectArgs), sizeof(DispatchIndirectArgs), sizeof(DispatchIndirectArgs)};
+		sizeof(DrawIndirectArgs), sizeof(DrawIndexedIndirectArgs), sizeof(DispatchIndirectArgs), sizeof(DispatchIndirectArgs),
+		sizeof(D3D12_DISPATCH_RAYS_DESC)};
 
 	static constexpr Array<D3D12_INDIRECT_ARGUMENT_TYPE, U32(IndirectCommandSignatureType::kCount)> kAnkiToD3D = {
 		D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED, D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
-		D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH};
+		D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH, D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_RAYS};
 
 	Array<GrDynamicArray<Signature>, U32(IndirectCommandSignatureType::kCount)> m_arrays;
 

+ 44 - 11
AnKi/Gr/D3D/D3DShaderProgram.cpp

@@ -264,19 +264,41 @@ Error ShaderProgramImpl::init(const ShaderProgramInitInfo& inf)
 
 		for(U32 i = 0; i < inf.m_rayTracingShaders.m_hitGroups.getSize(); ++i)
 		{
-			auto chit = rtp.CreateSubobject<CD3DX12_DXIL_LIBRARY_SUBOBJECT>();
-
 			const RayTracingHitGroup& hg = inf.m_rayTracingShaders.m_hitGroups[i];
-			ANKI_ASSERT(hg.m_anyHitShader == nullptr && "TODO");
-			const ShaderImpl& shaderImpl = static_cast<const ShaderImpl&>(*hg.m_closestHitShader);
-			const CD3DX12_SHADER_BYTECODE libCode(shaderImpl.m_binary.getBegin(), shaderImpl.m_binary.getSizeInBytes());
 
-			chit->SetDXILLibrary(&libCode);
-			const std::wstring exportName = std::wstring(L"chit") + std::to_wstring(i);
-			chit->DefineExport(exportName.c_str(), L"main");
+			std::wstring chitExportName;
+			if(hg.m_closestHitShader)
+			{
+				const ShaderImpl& shaderImpl = static_cast<const ShaderImpl&>(*hg.m_closestHitShader);
+				const CD3DX12_SHADER_BYTECODE libCode(shaderImpl.m_binary.getBegin(), shaderImpl.m_binary.getSizeInBytes());
+
+				auto subobj = rtp.CreateSubobject<CD3DX12_DXIL_LIBRARY_SUBOBJECT>();
+				subobj->SetDXILLibrary(&libCode);
+				chitExportName = std::wstring(L"chit") + std::to_wstring(i);
+				subobj->DefineExport(chitExportName.c_str(), L"main");
+			}
+
+			std::wstring ahitExportName;
+			if(hg.m_anyHitShader)
+			{
+				const ShaderImpl& shaderImpl = static_cast<const ShaderImpl&>(*hg.m_anyHitShader);
+				const CD3DX12_SHADER_BYTECODE libCode(shaderImpl.m_binary.getBegin(), shaderImpl.m_binary.getSizeInBytes());
+
+				auto subobj = rtp.CreateSubobject<CD3DX12_DXIL_LIBRARY_SUBOBJECT>();
+				subobj->SetDXILLibrary(&libCode);
+				ahitExportName = std::wstring(L"ahit") + std::to_wstring(i);
+				subobj->DefineExport(ahitExportName.c_str(), L"main");
+			}
 
 			auto hitGroup = rtp.CreateSubobject<CD3DX12_HIT_GROUP_SUBOBJECT>();
-			hitGroup->SetClosestHitShaderImport(exportName.c_str());
+			if(hg.m_closestHitShader)
+			{
+				hitGroup->SetClosestHitShaderImport(chitExportName.c_str());
+			}
+			if(hg.m_anyHitShader)
+			{
+				hitGroup->SetAnyHitShaderImport(ahitExportName.c_str());
+			}
 			hitGroup->SetHitGroupExport((std::wstring(L"hitgroup") + std::to_wstring(i)).c_str());
 			hitGroup->SetHitGroupType(D3D12_HIT_GROUP_TYPE_TRIANGLES);
 
@@ -301,8 +323,19 @@ Error ShaderProgramImpl::init(const ShaderProgramInitInfo& inf)
 			rootSignatureAssociation->SetSubobjectToAssociate(*localRootSignature);
 			for(U32 i = 0; i < inf.m_rayTracingShaders.m_hitGroups.getSize(); ++i)
 			{
-				const std::wstring exportName = std::wstring(L"chit") + std::to_wstring(i);
-				rootSignatureAssociation->AddExport(exportName.c_str());
+				const RayTracingHitGroup& hg = inf.m_rayTracingShaders.m_hitGroups[i];
+
+				if(hg.m_closestHitShader)
+				{
+					const std::wstring exportName = std::wstring(L"chit") + std::to_wstring(i);
+					rootSignatureAssociation->AddExport(exportName.c_str());
+				}
+
+				if(hg.m_anyHitShader)
+				{
+					const std::wstring exportName = std::wstring(L"ahit") + std::to_wstring(i);
+					rootSignatureAssociation->AddExport(exportName.c_str());
+				}
 			}
 		}
 

+ 4 - 4
AnKi/Gr/D3D/D3DTexture.cpp

@@ -601,7 +601,7 @@ void TextureImpl::computeBarrierInfo(TextureUsageBit usage, D3D12_BARRIER_SYNC&
 				stages |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
 			}
 
-			if(!!(usage & TextureUsageBit::kSrvTraceRays) && rt)
+			if(!!(usage & TextureUsageBit::kSrvDispatchRays) && rt)
 			{
 				stages |= D3D12_BARRIER_SYNC_RAYTRACING;
 			}
@@ -627,7 +627,7 @@ void TextureImpl::computeBarrierInfo(TextureUsageBit usage, D3D12_BARRIER_SYNC&
 				stages |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
 			}
 
-			if(!!(usage & TextureUsageBit::kSrvTraceRays) && rt)
+			if(!!(usage & TextureUsageBit::kSrvDispatchRays) && rt)
 			{
 				stages |= D3D12_BARRIER_SYNC_RAYTRACING;
 			}
@@ -680,13 +680,13 @@ void TextureImpl::computeBarrierInfo(TextureUsageBit usage, D3D12_BARRIER_SYNC&
 			accesses |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
 		}
 
-		if(!!(usage & TextureUsageBit::kSrvTraceRays) && rt)
+		if(!!(usage & TextureUsageBit::kSrvDispatchRays) && rt)
 		{
 			stages |= D3D12_BARRIER_SYNC_RAYTRACING;
 			accesses |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
 		}
 
-		if(!!(usage & TextureUsageBit::kUavTraceRays) && rt)
+		if(!!(usage & TextureUsageBit::kUavDispatchRays) && rt)
 		{
 			stages |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
 			accesses |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;

+ 2 - 2
AnKi/Gr/RenderGraph.cpp

@@ -1427,7 +1427,7 @@ StringRaii RenderGraph::bufferUsageToStr(StackMemoryPool& pool, BufferUsageBit u
 	ANKI_BUFF_USAGE(kConstantGeometry);
 	ANKI_BUFF_USAGE(kConstantPixel);
 	ANKI_BUFF_USAGE(kConstantCompute);
-	ANKI_BUFF_USAGE(kConstantTraceRays);
+	ANKI_BUFF_USAGE(kConstantDispatchRays);
 	ANKI_BUFF_USAGE(kStorageGeometryRead);
 	ANKI_BUFF_USAGE(kStorageGeometryWrite);
 	ANKI_BUFF_USAGE(kStorageFragmentRead);
@@ -1448,7 +1448,7 @@ StringRaii RenderGraph::bufferUsageToStr(StackMemoryPool& pool, BufferUsageBit u
 	ANKI_BUFF_USAGE(kVertex);
 	ANKI_BUFF_USAGE(kIndirectCompute);
 	ANKI_BUFF_USAGE(kIndirectDraw);
-	ANKI_BUFF_USAGE(kIndirectTraceRays);
+	ANKI_BUFF_USAGE(kIndirectDispatchRays);
 	ANKI_BUFF_USAGE(kTransferSource);
 	ANKI_BUFF_USAGE(kTransferDestination);
 	ANKI_BUFF_USAGE(kAccelerationStructureBuild);

+ 8 - 8
AnKi/Gr/Vulkan/VkAccelerationStructure.cpp

@@ -189,26 +189,26 @@ VkMemoryBarrier AccelerationStructureImpl::computeBarrierInfo(AccelerationStruct
 		barrier.srcAccessMask |= VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
 	}
 
-	if(!!(before & AccelerationStructureUsageBit::kGeometrySrv))
+	if(!!(before & AccelerationStructureUsageBit::kSrvGeometry))
 	{
 		srcStages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
 					 | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT;
 		barrier.srcAccessMask |= VK_ACCESS_MEMORY_READ_BIT; // READ_BIT is the only viable solution by elimination
 	}
 
-	if(!!(before & AccelerationStructureUsageBit::kPixelSrv))
+	if(!!(before & AccelerationStructureUsageBit::kSrvPixel))
 	{
 		srcStages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
 		barrier.srcAccessMask |= VK_ACCESS_MEMORY_READ_BIT;
 	}
 
-	if(!!(before & AccelerationStructureUsageBit::kComputeSrv))
+	if(!!(before & AccelerationStructureUsageBit::kSrvCompute))
 	{
 		srcStages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
 		barrier.srcAccessMask |= VK_ACCESS_MEMORY_READ_BIT;
 	}
 
-	if(!!(before & AccelerationStructureUsageBit::kTraceRaysSrv))
+	if(!!(before & AccelerationStructureUsageBit::kSrvDispatchRays))
 	{
 		srcStages |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
 		barrier.srcAccessMask |= VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
@@ -229,26 +229,26 @@ VkMemoryBarrier AccelerationStructureImpl::computeBarrierInfo(AccelerationStruct
 		barrier.dstAccessMask |= VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
 	}
 
-	if(!!(after & AccelerationStructureUsageBit::kGeometrySrv))
+	if(!!(after & AccelerationStructureUsageBit::kSrvGeometry))
 	{
 		dstStages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
 					 | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT;
 		barrier.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT; // READ_BIT is the only viable solution by elimination
 	}
 
-	if(!!(after & AccelerationStructureUsageBit::kPixelSrv))
+	if(!!(after & AccelerationStructureUsageBit::kSrvPixel))
 	{
 		dstStages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
 		barrier.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT;
 	}
 
-	if(!!(after & AccelerationStructureUsageBit::kComputeSrv))
+	if(!!(after & AccelerationStructureUsageBit::kSrvCompute))
 	{
 		dstStages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
 		barrier.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT;
 	}
 
-	if(!!(after & AccelerationStructureUsageBit::kTraceRaysSrv))
+	if(!!(after & AccelerationStructureUsageBit::kSrvDispatchRays))
 	{
 		dstStages |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
 		barrier.dstAccessMask |= VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;

+ 1 - 1
AnKi/Gr/Vulkan/VkBuffer.cpp

@@ -333,7 +333,7 @@ VkPipelineStageFlags BufferImpl::computePplineStage(BufferUsageBit usage)
 		stageMask |= VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR;
 	}
 
-	if(!!(usage & (BufferUsageBit::kAllTraceRays & ~BufferUsageBit::kIndirectTraceRays)) && rt)
+	if(!!(usage & (BufferUsageBit::kAllDispatchRays & ~BufferUsageBit::kIndirectDispatchRays)) && rt)
 	{
 		stageMask |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
 	}

+ 4 - 5
AnKi/Gr/Vulkan/VkCommandBuffer.cpp

@@ -702,16 +702,16 @@ void CommandBuffer::dispatchGraph([[maybe_unused]] const BufferView& scratchBuff
 	ANKI_ASSERT(!"Not supported");
 }
 
-void CommandBuffer::traceRays(const BufferView& sbtBuffer, U32 sbtRecordSize32, U32 hitGroupSbtRecordCount, U32 rayTypeCount, U32 width, U32 height,
-							  U32 depth)
+void CommandBuffer::dispatchRays(const BufferView& sbtBuffer, U32 sbtRecordSize32, U32 hitGroupSbtRecordCount, U32 rayTypeCount, U32 width,
+								 U32 height, U32 depth)
 {
 	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.traceRaysInternal(sbtBuffer, sbtRecordSize32, hitGroupSbtRecordCount, rayTypeCount, width, height, depth, {});
 }
 
-void CommandBuffer::traceRaysIndirect(const BufferView& sbtBuffer, U32 sbtRecordSize32, U32 hitGroupSbtRecordCount, U32 rayTypeCount,
-									  BufferView argsBuffer)
+void CommandBuffer::dispatchRaysIndirect(const BufferView& sbtBuffer, U32 sbtRecordSize32, U32 hitGroupSbtRecordCount, U32 rayTypeCount,
+										 BufferView argsBuffer)
 {
 	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
@@ -1297,7 +1297,6 @@ void CommandBufferImpl::traceRaysInternal(const BufferView& sbtBuffer, U32 sbtRe
 	const PtrSize sbtRecordSize = sbtRecordSize32;
 	ANKI_ASSERT(hitGroupSbtRecordCount > 0);
 	ANKI_ASSERT(m_rtProg);
-	[[maybe_unused]] const ShaderProgramImpl& sprog = static_cast<const ShaderProgramImpl&>(*m_rtProg);
 
 	ANKI_ASSERT((hitGroupSbtRecordCount % rayTypeCount) == 0);
 	const PtrSize sbtRecordCount = 1 + rayTypeCount + hitGroupSbtRecordCount;

+ 1 - 1
AnKi/Gr/Vulkan/VkSwapchainFactory.cpp

@@ -226,7 +226,7 @@ Error MicroSwapchain::initInternal()
 			init.m_width = surfaceWidth;
 			init.m_height = surfaceHeight;
 			init.m_format = Format(surfaceFormat); // anki::Format is compatible with VkFormat
-			init.m_usage = TextureUsageBit::kUavCompute | TextureUsageBit::kUavTraceRays | TextureUsageBit::kRtvDsvRead
+			init.m_usage = TextureUsageBit::kUavCompute | TextureUsageBit::kUavDispatchRays | TextureUsageBit::kRtvDsvRead
 						   | TextureUsageBit::kRtvDsvWrite | TextureUsageBit::kPresent;
 			init.m_type = TextureType::k2D;
 

+ 2 - 2
AnKi/Gr/Vulkan/VkTexture.cpp

@@ -334,13 +334,13 @@ void TextureImpl::computeBarrierInfo(TextureUsageBit usage, VkPipelineStageFlags
 		accesses |= VK_ACCESS_SHADER_WRITE_BIT;
 	}
 
-	if(!!(usage & TextureUsageBit::kSrvTraceRays) && rt)
+	if(!!(usage & TextureUsageBit::kSrvDispatchRays) && rt)
 	{
 		stages |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
 		accesses |= VK_ACCESS_SHADER_READ_BIT;
 	}
 
-	if(!!(usage & TextureUsageBit::kUavTraceRays) && rt)
+	if(!!(usage & TextureUsageBit::kUavDispatchRays) && rt)
 	{
 		stages |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
 		accesses |= VK_ACCESS_SHADER_WRITE_BIT;

+ 9 - 9
AnKi/Renderer/IndirectDiffuse.cpp

@@ -123,17 +123,17 @@ void IndirectDiffuse::populateRenderGraph(RenderingContext& ctx)
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtIndirectDiffuse");
 
 		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
-		rpass.newTextureDependency(transientRt1, TextureUsageBit::kUavTraceRays);
-		rpass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvTraceRays);
-		rpass.newTextureDependency(getGBuffer().getColorRt(1), TextureUsageBit::kSrvTraceRays);
-		rpass.newTextureDependency(getGBuffer().getColorRt(2), TextureUsageBit::kSrvTraceRays);
+		rpass.newTextureDependency(transientRt1, TextureUsageBit::kUavDispatchRays);
+		rpass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvDispatchRays);
+		rpass.newTextureDependency(getGBuffer().getColorRt(1), TextureUsageBit::kSrvDispatchRays);
+		rpass.newTextureDependency(getGBuffer().getColorRt(2), TextureUsageBit::kSrvDispatchRays);
 		if(getRenderer().getGeneratedSky().isEnabled())
 		{
-			rpass.newTextureDependency(getRenderer().getGeneratedSky().getEnvironmentMapRt(), TextureUsageBit::kSrvTraceRays);
+			rpass.newTextureDependency(getRenderer().getGeneratedSky().getEnvironmentMapRt(), TextureUsageBit::kSrvDispatchRays);
 		}
-		rpass.newTextureDependency(getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvTraceRays);
+		rpass.newTextureDependency(getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvDispatchRays);
 		rpass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
-												 AccelerationStructureUsageBit::kTraceRaysSrv);
+												 AccelerationStructureUsageBit::kSrvDispatchRays);
 
 		rpass.setWork([this, sbtBuffer, &ctx, transientRt1](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(IndirectDiffuseRayGen);
@@ -192,8 +192,8 @@ void IndirectDiffuse::populateRenderGraph(RenderingContext& ctx)
 			UVec4 dummyConsts;
 			cmdb.setFastConstants(&dummyConsts, sizeof(dummyConsts));
 
-			cmdb.traceRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-						   getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), 1);
+			cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+							  getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), 1);
 		});
 	}
 

+ 17 - 17
AnKi/Renderer/IndirectDiffuseClipmaps.cpp

@@ -276,11 +276,11 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 		pass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
 		if(getRenderer().getGeneratedSky().isEnabled())
 		{
-			pass.newTextureDependency(getRenderer().getGeneratedSky().getEnvironmentMapRt(), TextureUsageBit::kSrvTraceRays);
+			pass.newTextureDependency(getRenderer().getGeneratedSky().getEnvironmentMapRt(), TextureUsageBit::kSrvDispatchRays);
 		}
-		pass.newTextureDependency(getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvTraceRays);
+		pass.newTextureDependency(getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvDispatchRays);
 		pass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
-												AccelerationStructureUsageBit::kTraceRaysSrv);
+												AccelerationStructureUsageBit::kSrvDispatchRays);
 
 		for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
 		{
@@ -352,8 +352,8 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 				const UVec4 consts(clipmap, g_indirectDiffuseClipmapRadianceOctMapSize, 0, 0);
 				cmdb.setFastConstants(&consts, sizeof(consts));
 
-				cmdb.traceRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-							   m_consts.m_totalProbeCount * raysPerProbePerFrame, 1, 1);
+				cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+								  m_consts.m_totalProbeCount * raysPerProbePerFrame, 1, 1);
 			}
 		});
 	}
@@ -442,22 +442,22 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 		pass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
 		if(getRenderer().getGeneratedSky().isEnabled())
 		{
-			pass.newTextureDependency(getRenderer().getGeneratedSky().getEnvironmentMapRt(), TextureUsageBit::kSrvTraceRays);
+			pass.newTextureDependency(getRenderer().getGeneratedSky().getEnvironmentMapRt(), TextureUsageBit::kSrvDispatchRays);
 		}
-		pass.newTextureDependency(getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvTraceRays);
+		pass.newTextureDependency(getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvDispatchRays);
 		pass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
-												AccelerationStructureUsageBit::kTraceRaysSrv);
-		pass.newTextureDependency(getGBuffer().getColorRt(2), TextureUsageBit::kSrvTraceRays);
-		pass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvTraceRays);
+												AccelerationStructureUsageBit::kSrvDispatchRays);
+		pass.newTextureDependency(getGBuffer().getColorRt(2), TextureUsageBit::kSrvDispatchRays);
+		pass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvDispatchRays);
 
 		for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
 		{
-			pass.newTextureDependency(irradianceVolumes[clipmap], TextureUsageBit::kSrvTraceRays);
-			pass.newTextureDependency(probeValidityVolumes[clipmap], TextureUsageBit::kSrvTraceRays);
-			pass.newTextureDependency(distanceMomentsVolumes[clipmap], TextureUsageBit::kSrvTraceRays);
+			pass.newTextureDependency(irradianceVolumes[clipmap], TextureUsageBit::kSrvDispatchRays);
+			pass.newTextureDependency(probeValidityVolumes[clipmap], TextureUsageBit::kSrvDispatchRays);
+			pass.newTextureDependency(distanceMomentsVolumes[clipmap], TextureUsageBit::kSrvDispatchRays);
 		}
 
-		pass.newTextureDependency(lowRezRt, TextureUsageBit::kUavTraceRays);
+		pass.newTextureDependency(lowRezRt, TextureUsageBit::kUavDispatchRays);
 
 		pass.setWork([this, &ctx, sbtBuffer, lowRezRt](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
@@ -519,9 +519,9 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 			const Vec4 consts(g_indirectDiffuseClipmapFirstBounceRayDistance);
 			cmdb.setFastConstants(&consts, sizeof(consts));
 
-			cmdb.traceRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-						   getRenderer().getInternalResolution().x() / 2,
-						   getRenderer().getInternalResolution().y() / (!g_indirectDiffuseClipmapApplyHighQuality + 1), 1);
+			cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+							  getRenderer().getInternalResolution().x() / 2,
+							  getRenderer().getInternalResolution().y() / (!g_indirectDiffuseClipmapApplyHighQuality + 1), 1);
 		});
 	}
 	else

+ 11 - 11
AnKi/Renderer/Reflections.cpp

@@ -269,19 +269,19 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtReflections");
 
 		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
-		rpass.newTextureDependency(transientRt1, TextureUsageBit::kUavTraceRays);
-		rpass.newTextureDependency(hitPosAndDepthRt, TextureUsageBit::kUavTraceRays);
-		rpass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvTraceRays);
-		rpass.newTextureDependency(getGBuffer().getColorRt(1), TextureUsageBit::kSrvTraceRays);
-		rpass.newTextureDependency(getGBuffer().getColorRt(2), TextureUsageBit::kSrvTraceRays);
+		rpass.newTextureDependency(transientRt1, TextureUsageBit::kUavDispatchRays);
+		rpass.newTextureDependency(hitPosAndDepthRt, TextureUsageBit::kUavDispatchRays);
+		rpass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvDispatchRays);
+		rpass.newTextureDependency(getGBuffer().getColorRt(1), TextureUsageBit::kSrvDispatchRays);
+		rpass.newTextureDependency(getGBuffer().getColorRt(2), TextureUsageBit::kSrvDispatchRays);
 		if(getRenderer().getGeneratedSky().isEnabled())
 		{
-			rpass.newTextureDependency(getRenderer().getGeneratedSky().getEnvironmentMapRt(), TextureUsageBit::kSrvTraceRays);
+			rpass.newTextureDependency(getRenderer().getGeneratedSky().getEnvironmentMapRt(), TextureUsageBit::kSrvDispatchRays);
 		}
-		rpass.newTextureDependency(getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvTraceRays);
+		rpass.newTextureDependency(getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvDispatchRays);
 		rpass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
-												 AccelerationStructureUsageBit::kTraceRaysSrv);
-		rpass.newBufferDependency(indirectArgsHandle, BufferUsageBit::kIndirectTraceRays);
+												 AccelerationStructureUsageBit::kSrvDispatchRays);
+		rpass.newBufferDependency(indirectArgsHandle, BufferUsageBit::kIndirectDispatchRays);
 
 		rpass.setWork([this, sbtBuffer, &ctx, transientRt1, hitPosAndDepthRt, pixelsFailedSsrBuff](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(ReflectionsRayGen);
@@ -353,8 +353,8 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 
 			cmdb.setFastConstants(&consts, sizeof(consts));
 
-			cmdb.traceRaysIndirect(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-								   BufferView(m_indirectArgsBuffer.get()).setRange(sizeof(DispatchIndirectArgs)));
+			cmdb.dispatchRaysIndirect(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+									  BufferView(m_indirectArgsBuffer.get()).setRange(sizeof(DispatchIndirectArgs)));
 		});
 	}
 	else

+ 4 - 4
AnKi/Renderer/RtMaterialFetchDbg.cpp

@@ -71,9 +71,9 @@ void RtMaterialFetchDbg::populateRenderGraph(RenderingContext& ctx)
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtMaterialFetch");
 
 		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
-		rpass.newTextureDependency(m_runCtx.m_rt, TextureUsageBit::kUavTraceRays);
+		rpass.newTextureDependency(m_runCtx.m_rt, TextureUsageBit::kUavDispatchRays);
 		rpass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
-												 AccelerationStructureUsageBit::kTraceRaysSrv);
+												 AccelerationStructureUsageBit::kSrvDispatchRays);
 
 		rpass.setWork([this, sbtBuffer, &ctx](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(RtMaterialFetchRayGen);
@@ -126,8 +126,8 @@ void RtMaterialFetchDbg::populateRenderGraph(RenderingContext& ctx)
 			Vec4 dummy;
 			cmdb.setFastConstants(&dummy, sizeof(dummy));
 
-			cmdb.traceRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-						   getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), 1);
+			cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+							  getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), 1);
 		});
 	}
 }

+ 14 - 14
AnKi/Renderer/RtShadows.cpp

@@ -80,7 +80,7 @@ Error RtShadows::init()
 	{
 		TextureInitInfo texinit = getRenderer().create2DRenderTargetInitInfo(
 			getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2, Format::kR8_Unorm,
-			TextureUsageBit::kAllSrv | TextureUsageBit::kUavTraceRays | TextureUsageBit::kUavCompute, "RtShadows History");
+			TextureUsageBit::kAllSrv | TextureUsageBit::kUavDispatchRays | TextureUsageBit::kUavCompute, "RtShadows History");
 		m_historyRt = getRenderer().createAndClearRenderTarget(texinit, TextureUsageBit::kSrvPixel);
 	}
 
@@ -95,7 +95,7 @@ Error RtShadows::init()
 	{
 		TextureInitInfo texinit = getRenderer().create2DRenderTargetInitInfo(
 			getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2, Format::kR32G32_Sfloat,
-			TextureUsageBit::kAllSrv | TextureUsageBit::kUavTraceRays | TextureUsageBit::kUavCompute, "RtShadows Moments #1");
+			TextureUsageBit::kAllSrv | TextureUsageBit::kUavDispatchRays | TextureUsageBit::kUavCompute, "RtShadows Moments #1");
 		m_momentsRts[0] = getRenderer().createAndClearRenderTarget(texinit, TextureUsageBit::kSrvPixel);
 
 		texinit.setName("RtShadows Moments #2");
@@ -120,7 +120,7 @@ Error RtShadows::init()
 	{
 		TextureInitInfo texinit = getRenderer().create2DRenderTargetInitInfo(
 			getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2, Format::kR32_Sfloat,
-			TextureUsageBit::kAllSrv | TextureUsageBit::kUavTraceRays | TextureUsageBit::kUavCompute, "RtShadows history len");
+			TextureUsageBit::kAllSrv | TextureUsageBit::kUavDispatchRays | TextureUsageBit::kUavCompute, "RtShadows history len");
 		ClearValue clear;
 		clear.m_colorf[0] = 1.0f;
 		m_dummyHistoryLenTex = getRenderer().createAndClearRenderTarget(texinit, TextureUsageBit::kSrvPixel, clear);
@@ -137,7 +137,7 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 	ANKI_TRACE_SCOPED_EVENT(RtShadows);
 
 #define ANKI_DEPTH_DEP \
-	getDepthDownscale().getRt(), TextureUsageBit::kSrvTraceRays | TextureUsageBit::kSrvCompute, DepthDownscale::kQuarterInternalResolution
+	getDepthDownscale().getRt(), TextureUsageBit::kSrvDispatchRays | TextureUsageBit::kSrvCompute, DepthDownscale::kQuarterInternalResolution
 
 	RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
 
@@ -263,18 +263,18 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 	{
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtShadows");
 
-		rpass.newTextureDependency(m_runCtx.m_historyRt, TextureUsageBit::kSrvTraceRays);
-		rpass.newTextureDependency(m_runCtx.m_intermediateShadowsRts[0], TextureUsageBit::kUavTraceRays);
+		rpass.newTextureDependency(m_runCtx.m_historyRt, TextureUsageBit::kSrvDispatchRays);
+		rpass.newTextureDependency(m_runCtx.m_intermediateShadowsRts[0], TextureUsageBit::kUavDispatchRays);
 		rpass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
-												 AccelerationStructureUsageBit::kTraceRaysSrv);
+												 AccelerationStructureUsageBit::kSrvDispatchRays);
 		rpass.newTextureDependency(ANKI_DEPTH_DEP);
-		rpass.newTextureDependency(getRenderer().getMotionVectors().getMotionVectorsRt(), TextureUsageBit::kSrvTraceRays);
-		rpass.newTextureDependency(getGBuffer().getColorRt(2), TextureUsageBit::kSrvTraceRays);
+		rpass.newTextureDependency(getRenderer().getMotionVectors().getMotionVectorsRt(), TextureUsageBit::kSrvDispatchRays);
+		rpass.newTextureDependency(getGBuffer().getColorRt(2), TextureUsageBit::kSrvDispatchRays);
 
-		rpass.newTextureDependency(m_runCtx.m_prevMomentsRt, TextureUsageBit::kSrvTraceRays);
-		rpass.newTextureDependency(m_runCtx.m_currentMomentsRt, TextureUsageBit::kUavTraceRays);
+		rpass.newTextureDependency(m_runCtx.m_prevMomentsRt, TextureUsageBit::kSrvDispatchRays);
+		rpass.newTextureDependency(m_runCtx.m_currentMomentsRt, TextureUsageBit::kUavDispatchRays);
 
-		rpass.newBufferDependency(getClusterBinning().getDependency(), BufferUsageBit::kSrvTraceRays);
+		rpass.newBufferDependency(getClusterBinning().getDependency(), BufferUsageBit::kSrvDispatchRays);
 
 		rpass.setWork([this, sbtBuffer, &ctx](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(RtShadows);
@@ -320,8 +320,8 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 			rgraphCtx.bindUav(1, 2, m_runCtx.m_currentMomentsRt);
 			cmdb.bindSrv(7, 2, TextureView(&m_blueNoiseImage->getTexture(), TextureSubresourceDesc::all()));
 
-			cmdb.traceRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-						   getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2, 1);
+			cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+							  getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2, 1);
 		});
 	}
 

+ 1 - 1
AnKi/Renderer/ShadowMapping.cpp

@@ -69,7 +69,7 @@ Error ShadowMapping::init()
 		m_tileCountBothAxis = g_shadowMappingTileCountPerRowOrColumnCVar;
 
 		const TextureUsageBit usage =
-			TextureUsageBit::kSrvPixel | TextureUsageBit::kSrvCompute | TextureUsageBit::kSrvTraceRays | TextureUsageBit::kAllRtvDsv;
+			TextureUsageBit::kSrvPixel | TextureUsageBit::kSrvCompute | TextureUsageBit::kSrvDispatchRays | TextureUsageBit::kAllRtvDsv;
 		TextureInitInfo texinit = getRenderer().create2DRenderTargetInitInfo(
 			m_tileResolution * m_tileCountBothAxis, m_tileResolution * m_tileCountBothAxis, Format::kD32_Sfloat, usage, "ShadowAtlas");
 		ClearValue clearVal;

+ 1 - 1
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -624,7 +624,7 @@ GBufferPixelOut main(
 #	if ANKI_ANY_HIT_SHADER
 
 #		if REALLY_ALPHA_TEST
-[[vk::shader_record_ext]] ConstantBuffer<GpuSceneRenderableInstance> g_gpuSceneRenderable : register(b0); // TODO that won't work on D3D
+ANKI_SHADER_RECORD_CONSTANTS(GpuSceneRenderableInstance, g_gpuSceneRenderable)
 #		endif
 
 [shader("anyhit")] void main(inout RtShadowsRayPayload payload, in Barycentrics barycentrics)

+ 14 - 7
Tests/Gr/Gr.cpp

@@ -2435,7 +2435,7 @@ float4 main(VertOut input) : SV_TARGET0
 			AccelerationStructureBarrierInfo barr3;
 			barr3.m_as = tlas.get();
 			barr3.m_previousUsage = AccelerationStructureUsageBit::kBuild;
-			barr3.m_nextUsage = AccelerationStructureUsageBit::kComputeSrv;
+			barr3.m_nextUsage = AccelerationStructureUsageBit::kSrvCompute;
 
 			cmdb->setPipelineBarrier({}, {}, {&barr3, 1});
 
@@ -2763,7 +2763,7 @@ RWTexture2D<float4> g_uav : register(u0);
 			AccelerationStructureBarrierInfo barr3;
 			barr3.m_as = tlas.get();
 			barr3.m_previousUsage = AccelerationStructureUsageBit::kBuild;
-			barr3.m_nextUsage = AccelerationStructureUsageBit::kComputeSrv;
+			barr3.m_nextUsage = AccelerationStructureUsageBit::kSrvCompute;
 
 			cmdb->setPipelineBarrier({}, {}, {&barr3, 1});
 
@@ -2803,6 +2803,13 @@ float4 main(float4 svPosition : SV_POSITION) : SV_TARGET0
 			blitProg = createVertFragProg(kVertSrc, kPixelSrc);
 		}
 
+		// Build indirect args
+		BufferPtr indirectArgs;
+		{
+			const DispatchIndirectArgs args = {kWidth, kHeight, 1};
+			indirectArgs = createBuffer(BufferUsageBit::kIndirectDispatchRays, args, 1, "IndirectArgs");
+		}
+
 		// Draw
 		constexpr U32 kIterations = 200;
 		for(U i = 0; i < kIterations; ++i)
@@ -2823,7 +2830,7 @@ float4 main(float4 svPosition : SV_POSITION) : SV_TARGET0
 			TextureBarrierInfo barr;
 			barr.m_textureView = TextureView(uav.get());
 			barr.m_previousUsage = (i == 0) ? TextureUsageBit::kNone : TextureUsageBit::kSrvPixel;
-			barr.m_nextUsage = TextureUsageBit::kUavTraceRays;
+			barr.m_nextUsage = TextureUsageBit::kUavDispatchRays;
 			cmdb->setPipelineBarrier({&barr, 1}, {}, {});
 
 			cmdb->bindShaderProgram(prog.get());
@@ -2843,9 +2850,9 @@ float4 main(float4 svPosition : SV_POSITION) : SV_TARGET0
 			cmdb->bindSrv(0, 0, tlas.get());
 			cmdb->bindUav(0, 0, TextureView(uav.get()));
 
-			cmdb->traceRays(BufferView(sbt.get()), sbtRecordSize, 2, 1, kWidth, kHeight, 1);
+			cmdb->dispatchRaysIndirect(BufferView(sbt.get()), sbtRecordSize, 2, 1, BufferView(indirectArgs.get()));
 
-			barr.m_previousUsage = TextureUsageBit::kUavTraceRays;
+			barr.m_previousUsage = TextureUsageBit::kUavDispatchRays;
 			barr.m_nextUsage = TextureUsageBit::kSrvPixel;
 			cmdb->setPipelineBarrier({&barr, 1}, {}, {});
 
@@ -2891,7 +2898,7 @@ static void createCubeBuffers(GrManager& gr, Vec3 min, Vec3 max, BufferPtr& inde
 {
 	BufferInitInfo inf;
 	inf.m_mapAccess = BufferMapAccessBit::kWrite;
-	inf.m_usage = BufferUsageBit::kVertexOrIndex | BufferUsageBit::kSrvTraceRays;
+	inf.m_usage = BufferUsageBit::kVertexOrIndex | BufferUsageBit::kSrvDispatchRays;
 	inf.m_size = sizeof(Vec3) * 8;
 	vertBuffer = gr.newBuffer(inf);
 	WeakArray<Vec3, PtrSize> positions = vertBuffer->map<Vec3>(0, 8, BufferMapAccessBit::kWrite);
@@ -3705,7 +3712,7 @@ void main()
 		cmdb->setFastConstants(&pc, sizeof(pc));
 
 		const U32 sbtRecordSize = g_gr->getDeviceCapabilities().m_sbtRecordAlignment;
-		cmdb->traceRays(BufferView(sbt.get()), sbtRecordSize, U32(GeomWhat::kCount) * 2, 2, WIDTH, HEIGHT, 1);
+		cmdb->dispatchRays(BufferView(sbt.get()), sbtRecordSize, U32(GeomWhat::kCount) * 2, 2, WIDTH, HEIGHT, 1);
 
 		// Copy to present
 		setTextureBarrier(cmdb, offscreenRts[i & 1], TextureUsageBit::kUavTraceRaysWrite, TextureUsageBit::kUavComputeRead,