Browse Source

Add acceleration structure support in D3D

Panagiotis Christopoulos Charitos 4 months ago
parent
commit
b3f2994fbb

+ 223 - 0
AnKi/Gr/D3D/D3DAccelerationStructure.cpp

@@ -4,6 +4,8 @@
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
 #include <AnKi/Gr/D3D/D3DAccelerationStructure.h>
 #include <AnKi/Gr/D3D/D3DAccelerationStructure.h>
+#include <AnKi/Gr/D3D/D3DBuffer.h>
+#include <AnKi/Gr/D3D/D3DGrManager.h>
 
 
 namespace anki {
 namespace anki {
 
 
@@ -29,4 +31,225 @@ AccelerationStructureImpl::~AccelerationStructureImpl()
 {
 {
 }
 }
 
 
+Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
+{
+	ANKI_ASSERT(inf.isValid());
+
+	m_type = inf.m_type;
+
+	if(inf.m_type == AccelerationStructureType::kBottomLevel)
+	{
+		// Setup the geom descr
+		m_blas.m_geometryDesc = {};
+		m_blas.m_geometryDesc.Type = D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES;
+		m_blas.m_geometryDesc.Flags = D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE; // TODO
+		m_blas.m_geometryDesc.Triangles.Transform3x4 = 0;
+		m_blas.m_geometryDesc.Triangles.IndexFormat = convertIndexType(inf.m_bottomLevel.m_indexType);
+		m_blas.m_geometryDesc.Triangles.VertexFormat = convertFormat(inf.m_bottomLevel.m_positionsFormat);
+		m_blas.m_geometryDesc.Triangles.IndexCount = inf.m_bottomLevel.m_indexCount;
+		m_blas.m_geometryDesc.Triangles.VertexCount = inf.m_bottomLevel.m_positionCount;
+		m_blas.m_geometryDesc.Triangles.IndexBuffer =
+			inf.m_bottomLevel.m_indexBuffer.getBuffer().getGpuAddress() + inf.m_bottomLevel.m_indexBuffer.getOffset();
+		m_blas.m_geometryDesc.Triangles.VertexBuffer.StartAddress =
+			inf.m_bottomLevel.m_positionBuffer.getBuffer().getGpuAddress() + inf.m_bottomLevel.m_positionBuffer.getOffset();
+		m_blas.m_geometryDesc.Triangles.VertexBuffer.StrideInBytes = inf.m_bottomLevel.m_positionStride;
+
+		// Get sizes
+		D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {};
+		inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
+		inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE;
+		inputs.NumDescs = 1;
+		inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
+		inputs.pGeometryDescs = &m_blas.m_geometryDesc;
+
+		D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo = {};
+		getDevice().GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &prebuildInfo);
+		m_scratchBufferSize = prebuildInfo.ScratchDataSizeInBytes;
+
+		// Create the AS buffer
+		BufferInitInfo asBuffInit(inf.getName());
+		asBuffInit.m_size = prebuildInfo.ResultDataMaxSizeInBytes;
+		asBuffInit.m_usage = PrivateBufferUsageBit::kAccelerationStructure;
+		m_asBuffer.reset(GrManager::getSingleton().newBuffer(asBuffInit).get());
+	}
+	else
+	{
+		const Bool isIndirect = inf.m_topLevel.m_indirectArgs.m_maxInstanceCount > 0;
+		const U32 instanceCount = (isIndirect) ? inf.m_topLevel.m_indirectArgs.m_maxInstanceCount : inf.m_topLevel.m_directArgs.m_instances.getSize();
+
+		// Get sizes
+		D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {};
+		inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
+		inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD;
+		inputs.NumDescs = instanceCount;
+		inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
+
+		D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo = {};
+		getDevice().GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &prebuildInfo);
+		m_scratchBufferSize = prebuildInfo.ScratchDataSizeInBytes;
+
+		// Create the AS buffer
+		BufferInitInfo asBuffInit(inf.getName());
+		asBuffInit.m_size = prebuildInfo.ResultDataMaxSizeInBytes;
+		asBuffInit.m_usage = PrivateBufferUsageBit::kAccelerationStructure;
+		m_asBuffer.reset(GrManager::getSingleton().newBuffer(asBuffInit).get());
+
+		// Create instances buffer
+		if(!isIndirect)
+		{
+			BufferInitInfo buffInit("AS instances");
+			buffInit.m_size = inf.m_topLevel.m_directArgs.m_instances.getSize() * sizeof(AccelerationStructureInstance);
+			buffInit.m_usage = BufferUsageBit::kAllUav;
+			buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
+			m_tlas.m_instancesBuff.reset(GrManager::getSingleton().newBuffer(buffInit).get());
+
+			WeakArray<AccelerationStructureInstance> mapped(
+				static_cast<AccelerationStructureInstance*>(m_tlas.m_instancesBuff->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite)),
+				inf.m_topLevel.m_directArgs.m_instances.getSize());
+
+			for(U32 i = 0; i < inf.m_topLevel.m_directArgs.m_instances.getSize(); ++i)
+			{
+				const AccelerationStructureInstanceInfo& in = inf.m_topLevel.m_directArgs.m_instances[i];
+				AccelerationStructureInstance& out = mapped[i];
+
+				const AccelerationStructureImpl& blas = static_cast<const AccelerationStructureImpl&>(*in.m_bottomLevel);
+				const U64 blasAddr = blas.m_asBuffer->getGpuAddress();
+
+				const U32 flags =
+					D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE | D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE;
+
+				out.m_transform = in.m_transform;
+				out.m_mask8_instanceCustomIndex24 = (in.m_mask << 24) | (i & 0xFFFFFF);
+				out.m_flags8_instanceShaderBindingTableRecordOffset24 = (flags << 24) | in.m_hitgroupSbtRecordIndex;
+				memcpy(&out.m_accelerationStructureAddress, &blasAddr, sizeof(blasAddr));
+			}
+
+			m_tlas.m_instancesBuff->unmap();
+		}
+		else
+		{
+			m_tlas.m_instancesBuff.reset(&inf.m_topLevel.m_indirectArgs.m_instancesBuffer.getBuffer());
+			m_tlas.m_instancesBuffOffset = inf.m_topLevel.m_indirectArgs.m_instancesBuffer.getOffset();
+		}
+
+		m_tlas.m_instanceCount = instanceCount;
+	}
+
+	return Error::kNone;
+}
+
+void AccelerationStructureImpl::fillBuildInfo(BufferView scratchBuff, D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC& buildDesc) const
+{
+	buildDesc = {};
+	buildDesc.DestAccelerationStructureData = m_asBuffer->getGpuAddress();
+	buildDesc.ScratchAccelerationStructureData = scratchBuff.getBuffer().getGpuAddress() + scratchBuff.getOffset();
+
+	if(m_type == AccelerationStructureType::kBottomLevel)
+	{
+		D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS& inputs = buildDesc.Inputs;
+		inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
+		inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE;
+		inputs.NumDescs = 1;
+		inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
+		inputs.pGeometryDescs = &m_blas.m_geometryDesc;
+	}
+	else
+	{
+		D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS& inputs = buildDesc.Inputs;
+		inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
+		inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD;
+		inputs.NumDescs = m_tlas.m_instanceCount;
+		inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
+		inputs.InstanceDescs = m_tlas.m_instancesBuff->getGpuAddress() + m_tlas.m_instancesBuffOffset;
+	}
+}
+
+D3D12_GLOBAL_BARRIER AccelerationStructureImpl::computeBarrierInfo(AccelerationStructureUsageBit before, AccelerationStructureUsageBit after) const
+{
+	D3D12_GLOBAL_BARRIER barrier = {};
+
+	if(before == AccelerationStructureUsageBit::kNone)
+	{
+		barrier.SyncBefore |= D3D12_BARRIER_SYNC_NONE;
+		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_NO_ACCESS;
+	}
+
+	if(!!(before & AccelerationStructureUsageBit::kBuild))
+	{
+		barrier.SyncBefore |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
+		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE;
+	}
+
+	if(!!(before & AccelerationStructureUsageBit::kAttach))
+	{
+		barrier.SyncBefore |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
+		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
+	}
+
+	if(!!(before & AccelerationStructureUsageBit::kGeometrySrv))
+	{
+		barrier.SyncBefore |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
+		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ; // READ_BIT is the only viable solution by elimination
+	}
+
+	if(!!(before & AccelerationStructureUsageBit::kPixelSrv))
+	{
+		barrier.SyncBefore |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
+		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
+	}
+
+	if(!!(before & AccelerationStructureUsageBit::kComputeSrv))
+	{
+		barrier.SyncBefore |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
+		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
+	}
+
+	if(!!(before & AccelerationStructureUsageBit::kTraceRaysSrv))
+	{
+		barrier.SyncBefore |= D3D12_BARRIER_SYNC_RAYTRACING;
+		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
+	}
+
+	// After
+	if(!!(after & AccelerationStructureUsageBit::kBuild))
+	{
+		barrier.SyncAfter |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
+		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE;
+	}
+
+	if(!!(after & AccelerationStructureUsageBit::kAttach))
+	{
+		barrier.SyncAfter |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
+		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
+	}
+
+	if(!!(after & AccelerationStructureUsageBit::kGeometrySrv))
+	{
+		barrier.SyncAfter |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
+		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ; // READ_BIT is the only viable solution by elimination
+	}
+
+	if(!!(after & AccelerationStructureUsageBit::kPixelSrv))
+	{
+		barrier.SyncAfter |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
+		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
+	}
+
+	if(!!(after & AccelerationStructureUsageBit::kComputeSrv))
+	{
+		barrier.SyncAfter |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
+		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
+	}
+
+	if(!!(after & AccelerationStructureUsageBit::kTraceRaysSrv))
+	{
+		barrier.SyncAfter |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
+		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
+	}
+
+	ANKI_ASSERT(barrier.SyncBefore || barrier.SyncAfter);
+
+	return barrier;
+}
+
 } // end namespace anki
 } // end namespace anki

+ 26 - 3
AnKi/Gr/D3D/D3DAccelerationStructure.h

@@ -6,6 +6,7 @@
 #pragma once
 #pragma once
 
 
 #include <AnKi/Gr/AccelerationStructure.h>
 #include <AnKi/Gr/AccelerationStructure.h>
+#include <AnKi/Gr/D3D/D3DCommon.h>
 
 
 namespace anki {
 namespace anki {
 
 
@@ -23,11 +24,33 @@ public:
 
 
 	~AccelerationStructureImpl();
 	~AccelerationStructureImpl();
 
 
-	Error init(const AccelerationStructureInitInfo& inf)
+	Error init(const AccelerationStructureInitInfo& inf);
+
+	void fillBuildInfo(BufferView scratchBuff, D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC& buildDesc) const;
+
+	D3D12_GLOBAL_BARRIER computeBarrierInfo(AccelerationStructureUsageBit before, AccelerationStructureUsageBit after) const;
+
+	const Buffer& getAsBuffer() const
 	{
 	{
-		ANKI_ASSERT(!"TODO");
-		return Error::kNone;
+		return *m_asBuffer;
 	}
 	}
+
+private:
+	BufferInternalPtr m_asBuffer;
+
+	class
+	{
+	public:
+		BufferInternalPtr m_instancesBuff;
+		PtrSize m_instancesBuffOffset = 0;
+		U32 m_instanceCount = 0;
+	} m_tlas;
+
+	class
+	{
+	public:
+		D3D12_RAYTRACING_GEOMETRY_DESC m_geometryDesc;
+	} m_blas;
 };
 };
 /// @}
 /// @}
 
 

+ 11 - 3
AnKi/Gr/D3D/D3DBuffer.cpp

@@ -142,17 +142,25 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	resourceDesc.SampleDesc.Quality = 0;
 	resourceDesc.SampleDesc.Quality = 0;
 	resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
 	resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
 	resourceDesc.Flags = {};
 	resourceDesc.Flags = {};
-	if(!!(m_usage & BufferUsageBit::kAllUav))
+	if(!!(m_usage & BufferUsageBit::kAllUav) || !!(m_usage & PrivateBufferUsageBit::kAccelerationStructure))
 	{
 	{
 		resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
 		resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
 	}
 	}
-	if(!(m_usage & BufferUsageBit::kAllShaderResource))
+	if(!(m_usage & BufferUsageBit::kAllShaderResource) && !(m_usage & PrivateBufferUsageBit::kAccelerationStructure))
 	{
 	{
 		resourceDesc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
 		resourceDesc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
 	}
 	}
 
 
 	// Create resource
 	// Create resource
-	const D3D12_RESOURCE_STATES initialState = D3D12_RESOURCE_STATE_COMMON;
+	D3D12_RESOURCE_STATES initialState;
+	if(!!(m_usage & PrivateBufferUsageBit::kAccelerationStructure))
+	{
+		initialState = D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE;
+	}
+	else
+	{
+		initialState = D3D12_RESOURCE_STATE_COMMON;
+	}
 	ANKI_D3D_CHECK(getDevice().CreateCommittedResource(&heapProperties, heapFlags, &resourceDesc, initialState, nullptr, IID_PPV_ARGS(&m_resource)));
 	ANKI_D3D_CHECK(getDevice().CreateCommittedResource(&heapProperties, heapFlags, &resourceDesc, initialState, nullptr, IID_PPV_ARGS(&m_resource)));
 
 
 	GrDynamicArray<WChar> wstr;
 	GrDynamicArray<WChar> wstr;

+ 35 - 6
AnKi/Gr/D3D/D3DCommandBuffer.cpp

@@ -9,6 +9,7 @@
 #include <AnKi/Gr/D3D/D3DShaderProgram.h>
 #include <AnKi/Gr/D3D/D3DShaderProgram.h>
 #include <AnKi/Gr/D3D/D3DTimestampQuery.h>
 #include <AnKi/Gr/D3D/D3DTimestampQuery.h>
 #include <AnKi/Gr/D3D/D3DPipelineQuery.h>
 #include <AnKi/Gr/D3D/D3DPipelineQuery.h>
+#include <AnKi/Gr/D3D/D3DAccelerationStructure.h>
 #include <AnKi/Gr/D3D/D3DGrManager.h>
 #include <AnKi/Gr/D3D/D3DGrManager.h>
 #include <AnKi/Util/Tracer.h>
 #include <AnKi/Util/Tracer.h>
 
 
@@ -87,7 +88,7 @@ void CommandBuffer::bindIndexBuffer(const BufferView& buff, IndexType type)
 
 
 	const D3D12_INDEX_BUFFER_VIEW view = {.BufferLocation = impl.getGpuAddress() + buff.getOffset(),
 	const D3D12_INDEX_BUFFER_VIEW view = {.BufferLocation = impl.getGpuAddress() + buff.getOffset(),
 										  .SizeInBytes = U32(buff.getRange()),
 										  .SizeInBytes = U32(buff.getRange()),
-										  .Format = (type == IndexType::kU16) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT};
+										  .Format = convertIndexType(type)};
 
 
 	self.m_cmdList->IASetIndexBuffer(&view);
 	self.m_cmdList->IASetIndexBuffer(&view);
 }
 }
@@ -259,9 +260,12 @@ void CommandBuffer::bindUav(U32 reg, U32 space, const BufferView& buff, Format f
 	self.m_descriptors.bindUav(space, reg, &impl.getD3DResource(), buff.getOffset(), buff.getRange(), fmt);
 	self.m_descriptors.bindUav(space, reg, &impl.getD3DResource(), buff.getOffset(), buff.getRange(), fmt);
 }
 }
 
 
-void CommandBuffer::bindSrv([[maybe_unused]] U32 reg, [[maybe_unused]] U32 space, [[maybe_unused]] AccelerationStructure* as)
+void CommandBuffer::bindSrv(U32 reg, U32 space, AccelerationStructure* as)
 {
 {
-	ANKI_ASSERT(!"TODO");
+	ANKI_D3D_SELF(CommandBufferImpl);
+	const AccelerationStructureImpl& impl = static_cast<const AccelerationStructureImpl&>(*as);
+	const BufferImpl& asBuff = static_cast<const BufferImpl&>(impl.getAsBuffer());
+	self.m_descriptors.bindSrv(space, reg, asBuff.getGpuAddress());
 }
 }
 
 
 void CommandBuffer::bindShaderProgram(ShaderProgram* prog)
 void CommandBuffer::bindShaderProgram(ShaderProgram* prog)
@@ -688,9 +692,18 @@ void CommandBuffer::copyBufferToBuffer(Buffer* src, Buffer* dst, ConstWeakArray<
 	}
 	}
 }
 }
 
 
-void CommandBuffer::buildAccelerationStructure([[maybe_unused]] AccelerationStructure* as, [[maybe_unused]] const BufferView& scratchBuffer)
+void CommandBuffer::buildAccelerationStructure(AccelerationStructure* as, const BufferView& scratchBuffer)
 {
 {
-	ANKI_ASSERT(!"TODO");
+	ANKI_ASSERT(as);
+	ANKI_D3D_SELF(CommandBufferImpl);
+
+	self.commandCommon();
+
+	const AccelerationStructureImpl& impl = static_cast<AccelerationStructureImpl&>(*as);
+	D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC buildDesc;
+	impl.fillBuildInfo(scratchBuffer, buildDesc);
+
+	self.m_cmdList->BuildRaytracingAccelerationStructure(&buildDesc, 0, nullptr);
 }
 }
 
 
 void CommandBuffer::upscale([[maybe_unused]] GrUpscaler* upscaler, [[maybe_unused]] const TextureView& inColor,
 void CommandBuffer::upscale([[maybe_unused]] GrUpscaler* upscaler, [[maybe_unused]] const TextureView& inColor,
@@ -718,6 +731,7 @@ void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textur
 
 
 	DynamicArray<D3D12_TEXTURE_BARRIER, MemoryPoolPtrWrapper<StackMemoryPool>> texBarriers(&self.m_fastPool);
 	DynamicArray<D3D12_TEXTURE_BARRIER, MemoryPoolPtrWrapper<StackMemoryPool>> texBarriers(&self.m_fastPool);
 	DynamicArray<D3D12_BUFFER_BARRIER, MemoryPoolPtrWrapper<StackMemoryPool>> bufferBarriers(&self.m_fastPool);
 	DynamicArray<D3D12_BUFFER_BARRIER, MemoryPoolPtrWrapper<StackMemoryPool>> bufferBarriers(&self.m_fastPool);
+	D3D12_GLOBAL_BARRIER globalBarrier = {};
 
 
 	for(const TextureBarrierInfo& barrier : textures)
 	for(const TextureBarrierInfo& barrier : textures)
 	{
 	{
@@ -754,7 +768,17 @@ void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textur
 		sanitizeAccess(bufferBarriers.getBack().AccessAfter);
 		sanitizeAccess(bufferBarriers.getBack().AccessAfter);
 	}
 	}
 
 
-	ANKI_ASSERT(accelerationStructures.getSize() == 0 && "TODO");
+	for(const AccelerationStructureBarrierInfo& barrier : accelerationStructures)
+	{
+		const D3D12_GLOBAL_BARRIER barr =
+			static_cast<const AccelerationStructureImpl&>(*barrier.m_as).computeBarrierInfo(barrier.m_previousUsage, barrier.m_nextUsage);
+		globalBarrier.SyncBefore |= barr.SyncBefore;
+		globalBarrier.SyncAfter |= barr.SyncAfter;
+		globalBarrier.AccessBefore |= barr.AccessBefore;
+		globalBarrier.AccessAfter |= barr.AccessAfter;
+	}
+	sanitizeAccess(globalBarrier.AccessBefore);
+	sanitizeAccess(globalBarrier.AccessAfter);
 
 
 	Array<D3D12_BARRIER_GROUP, 3> barrierGroups;
 	Array<D3D12_BARRIER_GROUP, 3> barrierGroups;
 	U32 barrierGroupCount = 0;
 	U32 barrierGroupCount = 0;
@@ -773,6 +797,11 @@ void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textur
 											  .pBufferBarriers = bufferBarriers.getBegin()};
 											  .pBufferBarriers = bufferBarriers.getBegin()};
 	}
 	}
 
 
+	if(accelerationStructures.getSize())
+	{
+		barrierGroups[barrierGroupCount++] = {.Type = D3D12_BARRIER_TYPE_GLOBAL, .NumBarriers = 1, .pGlobalBarriers = &globalBarrier};
+	}
+
 	ANKI_ASSERT(barrierGroupCount > 0);
 	ANKI_ASSERT(barrierGroupCount > 0);
 	self.m_cmdList->Barrier(barrierGroupCount, barrierGroups.getBegin());
 	self.m_cmdList->Barrier(barrierGroupCount, barrierGroups.getBegin());
 }
 }

+ 31 - 1
AnKi/Gr/D3D/D3DCommon.h

@@ -81,6 +81,17 @@ void invokeDred();
 using D3D12GraphicsCommandListX = ID3D12GraphicsCommandList10;
 using D3D12GraphicsCommandListX = ID3D12GraphicsCommandList10;
 using ID3D12DeviceX = ID3D12Device14;
 using ID3D12DeviceX = ID3D12Device14;
 
 
+/// Some internal buffer usage flags.
+class PrivateBufferUsageBit
+{
+public:
+	/// Buffer that holds the memory for the actual AS.
+	static constexpr BufferUsageBit kAccelerationStructure = BufferUsageBit(1ull << 30ull);
+
+	static constexpr BufferUsageBit kAllPrivate = kAccelerationStructure;
+};
+static_assert(!(BufferUsageBit::kAll & PrivateBufferUsageBit::kAllPrivate), "Update the bits in PrivateBufferUsageBit");
+
 enum class D3DTextureViewType : U8
 enum class D3DTextureViewType : U8
 {
 {
 	kSrv,
 	kSrv,
@@ -476,7 +487,7 @@ inline [[nodiscard]] D3D12_FILL_MODE convertFillMode(FillMode f)
 	return out;
 	return out;
 }
 }
 
 
-inline [[nodiscard]] D3D12_CULL_MODE convertCullMode(FaceSelectionBit c)
+[[nodiscard]] inline D3D12_CULL_MODE convertCullMode(FaceSelectionBit c)
 {
 {
 	ANKI_ASSERT(c != FaceSelectionBit::kFrontAndBack);
 	ANKI_ASSERT(c != FaceSelectionBit::kFrontAndBack);
 	D3D12_CULL_MODE out = {};
 	D3D12_CULL_MODE out = {};
@@ -498,6 +509,25 @@ inline [[nodiscard]] D3D12_CULL_MODE convertCullMode(FaceSelectionBit c)
 }
 }
 
 
 [[nodiscard]] DXGI_FORMAT convertFormat(Format fmt);
 [[nodiscard]] DXGI_FORMAT convertFormat(Format fmt);
+
+[[nodiscard]] inline DXGI_FORMAT convertIndexType(IndexType ak)
+{
+	DXGI_FORMAT out;
+	switch(ak)
+	{
+	case IndexType::kU16:
+		out = DXGI_FORMAT_R16_UINT;
+		break;
+	case IndexType::kU32:
+		out = DXGI_FORMAT_R32_UINT;
+		break;
+	default:
+		ANKI_ASSERT(0);
+		out = DXGI_FORMAT_UNKNOWN;
+	}
+
+	return out;
+}
 /// @}
 /// @}
 
 
 } // end namespace anki
 } // end namespace anki

+ 13 - 0
AnKi/Gr/D3D/D3DDescriptor.cpp

@@ -717,6 +717,19 @@ void DescriptorState::flush(ID3D12GraphicsCommandList& cmdList)
 
 
 					getDevice().CreateShaderResourceView(view.m_resource, &srvDesc, cbvSrvUavHeapOffset.getCpuOffset());
 					getDevice().CreateShaderResourceView(view.m_resource, &srvDesc, cbvSrvUavHeapOffset.getCpuOffset());
 				}
 				}
+				else if(inDescriptor.m_type == DescriptorType::kAccelerationStructure)
+				{
+					// AS
+
+					ANKI_ASSERT(!outDescriptor.m_isHandle);
+					D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
+
+					srvDesc.ViewDimension = D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE;
+					srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+					srvDesc.RaytracingAccelerationStructure.Location = outDescriptor.m_asAddress;
+
+					getDevice().CreateShaderResourceView(nullptr, &srvDesc, cbvSrvUavHeapOffset.getCpuOffset());
+				}
 				else
 				else
 				{
 				{
 					ANKI_ASSERT(!"TODO");
 					ANKI_ASSERT(!"TODO");

+ 13 - 0
AnKi/Gr/D3D/D3DDescriptor.h

@@ -397,6 +397,18 @@ public:
 		m_spaces[space].m_cbvSrvUavDirty = true;
 		m_spaces[space].m_cbvSrvUavDirty = true;
 	}
 	}
 
 
+	void bindSrv(U32 space, U32 registerBinding, D3D12_GPU_VIRTUAL_ADDRESS asAddress)
+	{
+		Descriptor& descriptor = getDescriptor(HlslResourceType::kSrv, space, registerBinding);
+		descriptor.m_asAddress = asAddress;
+		descriptor.m_isHandle = false;
+#if ANKI_ASSERTIONS_ENABLED
+		descriptor.m_type = DescriptorType::kAccelerationStructure;
+#endif
+
+		m_spaces[space].m_cbvSrvUavDirty = true;
+	}
+
 	void bindCbv(U32 space, U32 registerBinding, ID3D12Resource* resource, PtrSize offset, PtrSize range)
 	void bindCbv(U32 space, U32 registerBinding, ID3D12Resource* resource, PtrSize offset, PtrSize range)
 	{
 	{
 		Descriptor& descriptor = getDescriptor(HlslResourceType::kCbv, space, registerBinding);
 		Descriptor& descriptor = getDescriptor(HlslResourceType::kCbv, space, registerBinding);
@@ -451,6 +463,7 @@ private:
 		{
 		{
 			BufferView m_bufferView; ///< For buffers
 			BufferView m_bufferView; ///< For buffers
 			D3D12_CPU_DESCRIPTOR_HANDLE m_heapOffset; ///< For samplers and texture SRVs/UAVs
 			D3D12_CPU_DESCRIPTOR_HANDLE m_heapOffset; ///< For samplers and texture SRVs/UAVs
+			D3D12_GPU_VIRTUAL_ADDRESS m_asAddress;
 		};
 		};
 
 
 		Bool m_isHandle;
 		Bool m_isHandle;

+ 21 - 14
AnKi/Gr/D3D/D3DGrManager.cpp

@@ -396,35 +396,22 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 	{
 	{
 	case 0x13B5:
 	case 0x13B5:
 		m_capabilities.m_gpuVendor = GpuVendor::kArm;
 		m_capabilities.m_gpuVendor = GpuVendor::kArm;
-		m_capabilities.m_minWaveSize = 16;
-		m_capabilities.m_maxWaveSize = 16;
 		break;
 		break;
 	case 0x10DE:
 	case 0x10DE:
 		m_capabilities.m_gpuVendor = GpuVendor::kNvidia;
 		m_capabilities.m_gpuVendor = GpuVendor::kNvidia;
-		m_capabilities.m_minWaveSize = 32;
-		m_capabilities.m_maxWaveSize = 32;
 		break;
 		break;
 	case 0x1002:
 	case 0x1002:
 	case 0x1022:
 	case 0x1022:
 		m_capabilities.m_gpuVendor = GpuVendor::kAMD;
 		m_capabilities.m_gpuVendor = GpuVendor::kAMD;
-		m_capabilities.m_minWaveSize = 32;
-		m_capabilities.m_maxWaveSize = 64;
 		break;
 		break;
 	case 0x8086:
 	case 0x8086:
 		m_capabilities.m_gpuVendor = GpuVendor::kIntel;
 		m_capabilities.m_gpuVendor = GpuVendor::kIntel;
-		m_capabilities.m_minWaveSize = 8;
-		m_capabilities.m_maxWaveSize = 32;
 		break;
 		break;
 	case 0x5143:
 	case 0x5143:
 		m_capabilities.m_gpuVendor = GpuVendor::kQualcomm;
 		m_capabilities.m_gpuVendor = GpuVendor::kQualcomm;
-		m_capabilities.m_minWaveSize = 64;
-		m_capabilities.m_maxWaveSize = 128;
 		break;
 		break;
 	default:
 	default:
 		m_capabilities.m_gpuVendor = GpuVendor::kUnknown;
 		m_capabilities.m_gpuVendor = GpuVendor::kUnknown;
-		// Choose something really low
-		m_capabilities.m_minWaveSize = 8;
-		m_capabilities.m_maxWaveSize = 8;
 	}
 	}
 	ANKI_D3D_LOGI("Vendor identified as %s", &kGPUVendorStrings[m_capabilities.m_gpuVendor][0]);
 	ANKI_D3D_LOGI("Vendor identified as %s", &kGPUVendorStrings[m_capabilities.m_gpuVendor][0]);
 
 
@@ -489,6 +476,10 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 
 
 	// Set device capabilities (taken from mesa's dozen driver)
 	// Set device capabilities (taken from mesa's dozen driver)
 	{
 	{
+		D3D12_FEATURE_DATA_D3D12_OPTIONS1 options1;
+		ANKI_D3D_CHECK(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS1, &options1, sizeof(options1)));
+		D3D12_FEATURE_DATA_D3D12_OPTIONS5 options5;
+		ANKI_D3D_CHECK(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS5, &options5, sizeof(options5)));
 		D3D12_FEATURE_DATA_D3D12_OPTIONS16 options16;
 		D3D12_FEATURE_DATA_D3D12_OPTIONS16 options16;
 		ANKI_D3D_CHECK(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS16, &options16, sizeof(options16)));
 		ANKI_D3D_CHECK(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS16, &options16, sizeof(options16)));
 		D3D12_FEATURE_DATA_ARCHITECTURE architecture = {.NodeIndex = 0};
 		D3D12_FEATURE_DATA_ARCHITECTURE architecture = {.NodeIndex = 0};
@@ -512,6 +503,23 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 			ANKI_D3D_LOGW("ReBAR not supported");
 			ANKI_D3D_LOGW("ReBAR not supported");
 		}
 		}
 
 
+		if(g_rayTracingCVar && options5.RaytracingTier != D3D12_RAYTRACING_TIER_1_1)
+		{
+			ANKI_D3D_LOGW("Raytracing can't be enabled. Not supported");
+			m_capabilities.m_rayTracingEnabled = false;
+		}
+		else if(g_rayTracingCVar && options5.RaytracingTier == D3D12_RAYTRACING_TIER_1_1)
+		{
+			ANKI_D3D_LOGV("Raytracing supported");
+			m_capabilities.m_rayTracingEnabled = true;
+		}
+		else
+		{
+			m_capabilities.m_rayTracingEnabled = false;
+		}
+
+		m_capabilities.m_minWaveSize = options1.WaveLaneCountMin;
+		m_capabilities.m_maxWaveSize = options1.WaveLaneCountMax;
 		m_capabilities.m_constantBufferBindOffsetAlignment = D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT;
 		m_capabilities.m_constantBufferBindOffsetAlignment = D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT;
 		m_capabilities.m_structuredBufferBindOffsetAlignment = 0; // Not for DX
 		m_capabilities.m_structuredBufferBindOffsetAlignment = 0; // Not for DX
 		m_capabilities.m_structuredBufferNaturalAlignment = true;
 		m_capabilities.m_structuredBufferNaturalAlignment = true;
@@ -523,7 +531,6 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 		m_capabilities.m_maxDrawIndirectCount = kMaxU32;
 		m_capabilities.m_maxDrawIndirectCount = kMaxU32;
 		m_capabilities.m_discreteGpu = !architecture.UMA;
 		m_capabilities.m_discreteGpu = !architecture.UMA;
 		m_capabilities.m_majorApiVersion = 12;
 		m_capabilities.m_majorApiVersion = 12;
-		m_capabilities.m_rayTracingEnabled = g_rayTracingCVar && false; // TODO: Support RT
 		m_capabilities.m_vrs = g_vrsCVar;
 		m_capabilities.m_vrs = g_vrsCVar;
 		m_capabilities.m_unalignedBbpTextureFormats = false;
 		m_capabilities.m_unalignedBbpTextureFormats = false;
 		m_capabilities.m_dlss = false;
 		m_capabilities.m_dlss = false;

+ 6 - 0
AnKi/Gr/D3D/D3DSwapchainFactory.cpp

@@ -79,7 +79,13 @@ Error MicroSwapchain::initInternal()
 		init.m_width = window.getWidth();
 		init.m_width = window.getWidth();
 		init.m_height = window.getHeight();
 		init.m_height = window.getHeight();
 		init.m_format = Format::kR8G8B8A8_Unorm;
 		init.m_format = Format::kR8G8B8A8_Unorm;
+
 		init.m_usage = TextureUsageBit::kRtvDsvRead | TextureUsageBit::kRtvDsvWrite | TextureUsageBit::kPresent;
 		init.m_usage = TextureUsageBit::kRtvDsvRead | TextureUsageBit::kRtvDsvWrite | TextureUsageBit::kPresent;
+		if(m_rtvResources[i]->GetDesc().Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)
+		{
+			init.m_usage |= TextureUsageBit::kAllUav;
+		}
+
 		init.m_type = TextureType::k2D;
 		init.m_type = TextureType::k2D;
 
 
 		TextureImpl* tex = newInstance<TextureImpl>(GrMemoryPool::getSingleton(), init.getName());
 		TextureImpl* tex = newInstance<TextureImpl>(GrMemoryPool::getSingleton(), init.getName());

+ 4 - 4
AnKi/Shaders/Common.hlsl

@@ -243,16 +243,16 @@ CHECK_TEXTURE_3()
 template<typename T>
 template<typename T>
 T uvToNdc(T uv)
 T uvToNdc(T uv)
 {
 {
-	T ndc = uv * 2.0f - 1.0f;
-	ndc.y *= -1.0f;
+	T ndc = uv * 2.0 - 1.0;
+	ndc.y *= -1.0;
 	return ndc;
 	return ndc;
 }
 }
 
 
 template<typename T>
 template<typename T>
 T ndcToUv(T ndc)
 T ndcToUv(T ndc)
 {
 {
-	T uv = ndc * 0.5f + 0.5f;
-	uv.y = 1.0f - uv.y;
+	T uv = ndc * 0.5 + 0.5;
+	uv.y = 1.0 - uv.y;
 	return uv;
 	return uv;
 }
 }
 
 

+ 1 - 1
AnKi/Shaders/Include/Common.h

@@ -542,7 +542,7 @@ enum AccellerationStructureFlag : U32
 	kAccellerationStructureFlagTriangleFrontCounterlockwise = kAccellerationStructureFlagFlipFacing
 	kAccellerationStructureFlagTriangleFrontCounterlockwise = kAccellerationStructureFlagFlipFacing
 };
 };
 
 
-/// Mirrors VkAccelerationStructureInstanceKHR.
+/// Mirrors VkAccelerationStructureInstanceKHR and D3D12_RAYTRACING_INSTANCE_DESC.
 struct AccelerationStructureInstance
 struct AccelerationStructureInstance
 {
 {
 	Mat3x4 m_transform;
 	Mat3x4 m_transform;

+ 210 - 219
Tests/Gr/Gr.cpp

@@ -2240,277 +2240,268 @@ void main()
 
 
 ANKI_TEST(Gr, RayQuery)
 ANKI_TEST(Gr, RayQuery)
 {
 {
-#if 0
-	COMMON_BEGIN();
+	g_rayTracingCVar = true;
+	commonInit();
 
 
-	const Bool useRayTracing = g_gr->getDeviceCapabilities().m_rayTracingEnabled;
-	if(!useRayTracing)
 	{
 	{
-		ANKI_TEST_LOGW("Test will run without using ray tracing");
-	}
+		if(!GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled)
+		{
+			ANKI_TEST_LOGF("Test can't run without ray tracing");
+		}
 
 
-	// Index buffer
-	BufferPtr idxBuffer;
-	if(useRayTracing)
-	{
-		Array<U16, 3> indices = {0, 1, 2};
-		BufferInitInfo init;
-		init.m_mapAccess = BufferMapAccessBit::kWrite;
-		init.m_usage = BufferUsageBit::kIndex;
-		init.m_size = sizeof(indices);
-		idxBuffer = g_gr->newBuffer(init);
+		// Index buffer
+		BufferPtr idxBuffer;
+		{
+			Array<U16, 3> indices = {0, 1, 2};
+			BufferInitInfo init("IdxBuffer");
+			init.m_mapAccess = BufferMapAccessBit::kWrite;
+			init.m_usage = BufferUsageBit::kVertexOrIndex | BufferUsageBit::kAccelerationStructureBuild;
+			init.m_size = sizeof(indices);
+			idxBuffer = GrManager::getSingleton().newBuffer(init);
+
+			void* addr = idxBuffer->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite);
+			memcpy(addr, &indices[0], sizeof(indices));
+			idxBuffer->unmap();
+		}
 
 
-		void* addr = idxBuffer->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite);
-		memcpy(addr, &indices[0], sizeof(indices));
-		idxBuffer->unmap();
-	}
+		// Position buffer (add some padding to complicate things a bit)
+		BufferPtr vertBuffer;
+		{
+			Array<Vec4, 3> verts = {{{-1.0f, 0.0f, 0.0f, 100.0f}, {1.0f, 0.0f, 0.0f, 100.0f}, {0.0f, 2.0f, 0.0f, 100.0f}}};
 
 
-	// Position buffer (add some padding to complicate things a bit)
-	BufferPtr vertBuffer;
-	if(useRayTracing)
-	{
-		Array<Vec4, 3> verts = {{{-1.0f, 0.0f, 0.0f, 100.0f}, {1.0f, 0.0f, 0.0f, 100.0f}, {0.0f, 2.0f, 0.0f, 100.0f}}};
+			BufferInitInfo init("VertBuffer");
+			init.m_mapAccess = BufferMapAccessBit::kWrite;
+			init.m_usage = BufferUsageBit::kVertexOrIndex | BufferUsageBit::kAccelerationStructureBuild;
+			init.m_size = sizeof(verts);
+			vertBuffer = GrManager::getSingleton().newBuffer(init);
 
 
-		BufferInitInfo init;
-		init.m_mapAccess = BufferMapAccessBit::kWrite;
-		init.m_usage = BufferUsageBit::kVertex;
-		init.m_size = sizeof(verts);
-		vertBuffer = g_gr->newBuffer(init);
+			void* addr = vertBuffer->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite);
+			memcpy(addr, &verts[0], sizeof(verts));
+			vertBuffer->unmap();
+		}
 
 
-		void* addr = vertBuffer->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite);
-		memcpy(addr, &verts[0], sizeof(verts));
-		vertBuffer->unmap();
-	}
+		// BLAS
+		AccelerationStructurePtr blas;
+		{
+			AccelerationStructureInitInfo init;
+			init.m_type = AccelerationStructureType::kBottomLevel;
+			init.m_bottomLevel.m_indexBuffer = BufferView(idxBuffer.get());
+			init.m_bottomLevel.m_indexCount = 3;
+			init.m_bottomLevel.m_indexType = IndexType::kU16;
+			init.m_bottomLevel.m_positionBuffer = BufferView(vertBuffer.get());
+			init.m_bottomLevel.m_positionCount = 3;
+			init.m_bottomLevel.m_positionsFormat = Format::kR32G32B32_Sfloat;
+			init.m_bottomLevel.m_positionStride = 4 * 4;
+
+			blas = GrManager::getSingleton().newAccelerationStructure(init);
+		}
 
 
-	// BLAS
-	AccelerationStructurePtr blas;
-	if(useRayTracing)
-	{
-		AccelerationStructureInitInfo init;
-		init.m_type = AccelerationStructureType::kBottomLevel;
-		init.m_bottomLevel.m_indexBuffer = idxBuffer.get();
-		init.m_bottomLevel.m_indexCount = 3;
-		init.m_bottomLevel.m_indexType = IndexType::kU16;
-		init.m_bottomLevel.m_positionBuffer = vertBuffer.get();
-		init.m_bottomLevel.m_positionCount = 3;
-		init.m_bottomLevel.m_positionsFormat = Format::kR32G32B32_Sfloat;
-		init.m_bottomLevel.m_positionStride = 4 * 4;
+		// TLAS
+		AccelerationStructurePtr tlas;
+		{
+			AccelerationStructureInitInfo init;
+			init.m_type = AccelerationStructureType::kTopLevel;
+			Array<AccelerationStructureInstanceInfo, 1> instances = {{{blas.get(), Mat3x4::getIdentity()}}};
+			init.m_topLevel.m_directArgs.m_instances = instances;
 
 
-		blas = g_gr->newAccelerationStructure(init);
-	}
+			tlas = GrManager::getSingleton().newAccelerationStructure(init);
+		}
 
 
-	// TLAS
-	AccelerationStructurePtr tlas;
-	if(useRayTracing)
-	{
-		AccelerationStructureInitInfo init;
-		init.m_type = AccelerationStructureType::kTopLevel;
-		Array<AccelerationStructureInstanceInfo, 1> instances = {{{blas, Mat3x4::getIdentity()}}};
-		init.m_topLevel.m_directArgs.m_instances = instances;
+		// Program
+		ShaderProgramPtr prog;
+		{
+			constexpr const Char* kVertSrc = R"(
+struct VertOut
+{
+	float4 m_svPosition : SV_POSITION;
+	float2 m_uv : TEXCOORDS;
+};
 
 
-		tlas = g_gr->newAccelerationStructure(init);
-	}
+VertOut main(uint svVertexId : SV_VERTEXID)
+{
+	const float2 coord = float2(svVertexId >> 1, svVertexId & 1);
 
 
-	// Program
-	ShaderProgramPtr prog;
-	{
-		CString src = R"(
+	VertOut output;
+	output.m_svPosition = float4(coord * float2(4.0, -4.0) + float2(-1.0, 1.0), 0.0, 1.0);
+	output.m_uv = coord * 2.0;
 
 
-#if USE_RAY_TRACING
-#extension GL_EXT_ray_query : enable
-#endif
+	return output;
+})";
 
 
-layout(push_constant, std140, row_major) uniform b_pc
+			CString kPixelSrc = R"(
+struct Consts
 {
 {
-	Mat4 u_vp;
-	Vec3 u_cameraPos;
-	F32 u_padding0;
+	float4x4 m_invViewProj;
+	float3 m_cameraPos;
+	float m_padding0;
+	float2 m_viewport;
+	float2 m_padding1;
 };
 };
 
 
-#if USE_RAY_TRACING
-layout(set = 0, binding = 0) uniform accelerationStructureEXT u_tlas;
+#if defined(__spirv__)
+[[vk::push_constant]] ConstantBuffer<Consts> g_consts;
+#else
+ConstantBuffer<Consts> g_consts : register(b0, space3000);
 #endif
 #endif
 
 
-layout(location = 0) in Vec2 in_uv;
-layout(location = 0) out Vec3 out_color;
+RaytracingAccelerationStructure g_tlas : register(t0);
 
 
-Bool rayTriangleIntersect(Vec3 orig, Vec3 dir, Vec3 v0, Vec3 v1, Vec3 v2, out F32 t, out F32 u, out F32 v)
+struct VertOut
 {
 {
-	const Vec3 v0v1 = v1 - v0;
-	const Vec3 v0v2 = v2 - v0;
-	const Vec3 pvec = cross(dir, v0v2);
-	const F32 det = dot(v0v1, pvec);
+	float4 m_svPosition : SV_POSITION;
+	float2 m_uv : TEXCOORDS;
+};
 
 
-	if(det < 0.00001)
+float4 main(VertOut input) : SV_TARGET0
+{
+	// Unproject
+	const float2 uv = input.m_uv;
+	float2 ndc = uv * 2.0 - 1.0;
+	ndc.y *= -1;
+	const float4 p4 = mul(g_consts.m_invViewProj, float4(ndc, 1.0, 1.0));
+	const float3 p3 = p4.xyz / p4.w;
+
+	const float3 rayDir = normalize(p3 - g_consts.m_cameraPos);
+	const float3 rayOrigin = g_consts.m_cameraPos;
+
+	RayQuery<RAY_FLAG_NONE> q;
+	const uint cullMask = 0xFFu;
+	const uint traceFlags = RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH;
+	RayDesc ray;
+	ray.Origin = rayOrigin;
+	ray.TMin = 0.01;
+	ray.Direction = rayDir;
+	ray.TMax = 100.0;
+	q.TraceRayInline(g_tlas, traceFlags, cullMask, ray);
+	q.Proceed();
+	bool hit = q.CommittedStatus() == COMMITTED_TRIANGLE_HIT;
+
+	float u = 0;
+    float v = 0;
+    float w = 0;
+	if(hit)
 	{
 	{
-		return false;
+		float2 bary = q.CandidateTriangleBarycentrics();
+		u = 1.0 - bary.x - bary.y;
+		v = bary.x;
+		w = bary.y;
 	}
 	}
 
 
-	const F32 invDet = 1.0 / det;
-
-	const Vec3 tvec = orig - v0;
-	u = dot(tvec, pvec) * invDet;
-	if(u < 0.0 || u > 1.0)
-	{
-		return false;
-	}
+	return float4(u, v, w, 0.0);
+})";
 
 
-	const Vec3 qvec = cross(tvec, v0v1);
-	v = dot(dir, qvec) * invDet;
-	if(v < 0.0 || u + v > 1.0)
-	{
-		return false;
-	}
+			prog = createVertFragProg(kVertSrc, kPixelSrc);
+		}
 
 
-	t = dot(v0v2, qvec) * invDet;
-	return true;
-}
+		// Build AS
+		{
+			CommandBufferInitInfo cinit;
+			cinit.m_flags = CommandBufferFlag::kGeneralWork | CommandBufferFlag::kSmallBatch;
+			CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cinit);
 
 
-void main()
-{
-	// Unproject
-	const Vec2 ndc = in_uv * 2.0 - 1.0;
-	const Vec4 p4 = inverse(u_vp) * Vec4(ndc, 1.0, 1.0);
-	const Vec3 p3 = p4.xyz / p4.w;
+			AccelerationStructureBarrierInfo barr;
+			barr.m_as = blas.get();
+			barr.m_previousUsage = AccelerationStructureUsageBit::kNone;
+			barr.m_nextUsage = AccelerationStructureUsageBit::kBuild;
 
 
-	const Vec3 rayDir = normalize(p3 - u_cameraPos);
-	const Vec3 rayOrigin = u_cameraPos;
+			cmdb->setPipelineBarrier({}, {}, {&barr, 1});
+			BufferInitInfo scratchInit;
+			scratchInit.m_size = blas->getBuildScratchBufferSize();
+			scratchInit.m_usage = BufferUsageBit::kAccelerationStructureBuildScratch;
+			BufferPtr scratchBuff = GrManager::getSingleton().newBuffer(scratchInit);
+			cmdb->buildAccelerationStructure(blas.get(), BufferView(scratchBuff.get()));
 
 
-#if USE_RAY_TRACING
-	Bool hit = false;
-	F32 u = 0.0;
-	F32 v = 0.0;
+			Array<AccelerationStructureBarrierInfo, 2> barr2;
+			barr2[0].m_as = blas.get();
+			barr2[0].m_previousUsage = AccelerationStructureUsageBit::kBuild;
+			barr2[0].m_nextUsage = AccelerationStructureUsageBit::kAttach;
+			barr2[1].m_as = tlas.get();
+			barr2[1].m_previousUsage = AccelerationStructureUsageBit::kNone;
+			barr2[1].m_nextUsage = AccelerationStructureUsageBit::kBuild;
 
 
-	rayQueryEXT rayQuery;
-	rayQueryInitializeEXT(rayQuery, u_tlas, gl_RayFlagsOpaqueEXT | gl_RayFlagsTerminateOnFirstHitEXT, 0xFFu, rayOrigin,
-		0.01, rayDir, 1000.0);
+			cmdb->setPipelineBarrier({}, {}, barr2);
 
 
-	rayQueryProceedEXT(rayQuery);
+			scratchInit.m_size = tlas->getBuildScratchBufferSize();
+			scratchBuff = GrManager::getSingleton().newBuffer(scratchInit);
+			cmdb->buildAccelerationStructure(tlas.get(), BufferView(scratchBuff.get()));
 
 
-	const U32 committedStatus = rayQueryGetIntersectionTypeEXT(rayQuery, true);
-	if(committedStatus == gl_RayQueryCommittedIntersectionTriangleEXT)
-	{
-		const Vec2 bary = rayQueryGetIntersectionBarycentricsEXT(rayQuery, true);
-		u = bary.x;
-		v = bary.y;
-		hit = true;
-	}
-#else
-	// Manual trace
-	Vec3 arr[3] = Vec3[](Vec3(-1.0f, 0.0f, 0.0f), Vec3(1.0f, 0.0f, 0.0f), Vec3(0.0f, 2.0f, 0.0f));
-	F32 t;
-	F32 u;
-	F32 v;
-	const Bool hit = rayTriangleIntersect(rayOrigin, rayDir, arr[0], arr[1], arr[2], t, u, v);
-#endif
+			AccelerationStructureBarrierInfo barr3;
+			barr3.m_as = tlas.get();
+			barr3.m_previousUsage = AccelerationStructureUsageBit::kBuild;
+			barr3.m_nextUsage = AccelerationStructureUsageBit::kComputeSrv;
 
 
-	if(hit)
-	{
-		out_color = Vec3(u, v, 1.0 - (u + v));
-	}
-	else
-	{
-		out_color = Vec3(mix(0.5, 0.2, in_uv.x));
-	}
-}
-		)";
+			cmdb->setPipelineBarrier({}, {}, {&barr3, 1});
 
 
-		String fragSrc;
-		if(useRayTracing)
-		{
-			fragSrc += "#define USE_RAY_TRACING 1\n";
-		}
-		else
-		{
-			fragSrc += "#define USE_RAY_TRACING 0\n";
+			cmdb->endRecording();
+			GrManager::getSingleton().submit(cmdb.get());
 		}
 		}
-		fragSrc += src;
-		prog = createProgram(VERT_QUAD_STRIP_SRC, fragSrc, *g_gr);
-	}
-
-	// Build AS
-	if(useRayTracing)
-	{
-		CommandBufferInitInfo cinit;
-		cinit.m_flags = CommandBufferFlag::kGeneralWork | CommandBufferFlag::kSmallBatch;
-		CommandBufferPtr cmdb = g_gr->newCommandBuffer(cinit);
-
-		setAccelerationStructureBarrier(cmdb, blas, AccelerationStructureUsageBit::kNone, AccelerationStructureUsageBit::kBuild);
-		BufferInitInfo scratchInit;
-		scratchInit.m_size = blas->getBuildScratchBufferSize();
-		scratchInit.m_usage = BufferUsageBit::kAccelerationStructureBuildScratch;
-		BufferPtr scratchBuff = GrManager::getSingleton().newBuffer(scratchInit);
-		cmdb->buildAccelerationStructure(blas.get(), scratchBuff.get(), 0);
-		setAccelerationStructureBarrier(cmdb, blas, AccelerationStructureUsageBit::kBuild, AccelerationStructureUsageBit::kAttach);
-
-		setAccelerationStructureBarrier(cmdb, tlas, AccelerationStructureUsageBit::kNone, AccelerationStructureUsageBit::kBuild);
-		scratchInit.m_size = tlas->getBuildScratchBufferSize();
-		scratchBuff = GrManager::getSingleton().newBuffer(scratchInit);
-		cmdb->buildAccelerationStructure(tlas.get(), scratchBuff.get(), 0);
-		setAccelerationStructureBarrier(cmdb, tlas, AccelerationStructureUsageBit::kBuild, AccelerationStructureUsageBit::kFragmentRead);
 
 
-		cmdb->endRecording();
-		GrManager::getSingleton().submit(cmdb.get());
-	}
-
-	// Draw
-	constexpr U32 ITERATIONS = 200;
-	for(U i = 0; i < ITERATIONS; ++i)
-	{
-		HighRezTimer timer;
-		timer.start();
+		// Draw
+		constexpr U32 kIterations = 200;
+		for(U i = 0; i < kIterations; ++i)
+		{
+			HighRezTimer timer;
+			timer.start();
 
 
-		const Vec4 cameraPos(0.0f, 0.0f, 3.0f, 0.0f);
-		const Mat4 viewMat = Mat4(cameraPos.xyz(), Mat3::getIdentity(), Vec3(1.0f)).getInverse();
-		const Mat4 projMat = Mat4::calculatePerspectiveProjectionMatrix(toRad(90.0f), toRad(90.0f), 0.01f, 1000.0f);
+			GrManager::getSingleton().beginFrame();
 
 
-		CommandBufferInitInfo cinit;
-		cinit.m_flags = CommandBufferFlag::kGeneralWork | CommandBufferFlag::kSmallBatch;
-		CommandBufferPtr cmdb = g_gr->newCommandBuffer(cinit);
+			const Vec4 cameraPos(0.0f, 0.0f, 3.0f, 0.0f);
+			const Mat4 viewMat = Mat4(cameraPos.xyz(), Mat3::getIdentity(), Vec3(1.0f)).invert();
+			const Mat4 projMat = Mat4::calculatePerspectiveProjectionMatrix(toRad(90.0f), toRad(90.0f), 0.01f, 1000.0f);
 
 
-		cmdb->setViewport(0, 0, WIDTH, HEIGHT);
+			CommandBufferInitInfo cinit;
+			cinit.m_flags = CommandBufferFlag::kGeneralWork | CommandBufferFlag::kSmallBatch;
+			CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cinit);
 
 
-		cmdb->bindShaderProgram(prog.get());
-		struct PC
-		{
-			Mat4 m_vp;
-			Vec4 m_cameraPos;
-		} pc;
-		pc.m_vp = projMat * viewMat;
-		pc.m_cameraPos = cameraPos;
-		cmdb->setFastConstants(&pc, sizeof(pc));
+			cmdb->bindShaderProgram(prog.get());
+			struct Consts
+			{
+				Mat4 m_invViewProj;
+				Vec3 m_cameraPos;
+				F32 m_padding0;
+				Vec2 m_viewport;
+				Vec2 m_padding1;
+			} consts;
+			consts.m_invViewProj = (projMat * viewMat).invert().transpose();
+			consts.m_cameraPos = cameraPos.xyz();
+			consts.m_viewport = Vec2(kWidth, kHeight);
+			cmdb->setFastConstants(&consts, sizeof(consts));
+
+			cmdb->bindSrv(0, 0, tlas.get());
 
 
-		if(useRayTracing)
-		{
-			cmdb->bindAccelerationStructure(0, 0, tlas.get());
-		}
+			TexturePtr presentTex = GrManager::getSingleton().acquireNextPresentableTexture();
 
 
-		TexturePtr presentTex = g_gr->acquireNextPresentableTexture();
-		FramebufferPtr fb = createColorFb(*g_gr, presentTex);
+			TextureBarrierInfo barr;
+			barr.m_textureView = TextureView(presentTex.get(), TextureSubresourceDesc::all());
+			barr.m_previousUsage = TextureUsageBit::kNone;
+			barr.m_nextUsage = TextureUsageBit::kRtvDsvWrite;
+			cmdb->setPipelineBarrier({&barr, 1}, {}, {});
 
 
-		setTextureBarrier(cmdb, presentTex, TextureUsageBit::kNone, TextureUsageBit::kRtvDsvWrite, TextureSubresourceInfo{});
+			cmdb->beginRenderPass({TextureView(presentTex.get(), TextureSubresourceDesc::firstSurface())});
+			cmdb->setViewport(0, 0, kWidth, kHeight);
+			cmdb->draw(PrimitiveTopology::kTriangles, 3);
 
 
-		cmdb->beginRenderPass(fb.get(), {TextureUsageBit::kRtvDsvWrite}, {});
-		cmdb->draw(PrimitiveTopology::kTriangleStrip, 4);
-		cmdb->endRenderPass();
+			cmdb->endRenderPass();
 
 
-		setTextureBarrier(cmdb, presentTex, TextureUsageBit::kRtvDsvWrite, TextureUsageBit::kPresent, TextureSubresourceInfo{});
+			barr.m_previousUsage = TextureUsageBit::kRtvDsvWrite;
+			barr.m_nextUsage = TextureUsageBit::kPresent;
+			cmdb->setPipelineBarrier({&barr, 1}, {}, {});
 
 
-		cmdb->endRecording();
-		GrManager::getSingleton().submit(cmdb.get());
+			cmdb->endRecording();
+			GrManager::getSingleton().submit(cmdb.get());
 
 
-		g_gr->swapBuffers();
+			GrManager::getSingleton().endFrame();
 
 
-		timer.stop();
-		const F32 TICK = 1.0f / 30.0f;
-		if(timer.getElapsedTime() < TICK)
-		{
-			HighRezTimer::sleep(TICK - timer.getElapsedTime());
+			timer.stop();
+			const F32 TICK = 1.0f / 30.0f;
+			if(timer.getElapsedTime() < TICK)
+			{
+				HighRezTimer::sleep(TICK - timer.getElapsedTime());
+			}
 		}
 		}
 	}
 	}
 
 
-	COMMON_END();
-#endif
+	commonDestroy();
 }
 }
 
 
 static void createCubeBuffers(GrManager& gr, Vec3 min, Vec3 max, BufferPtr& indexBuffer, BufferPtr& vertBuffer, Bool turnInsideOut = false)
 static void createCubeBuffers(GrManager& gr, Vec3 min, Vec3 max, BufferPtr& indexBuffer, BufferPtr& vertBuffer, Bool turnInsideOut = false)

+ 14 - 0
Tests/Gr/GrCommon.h

@@ -7,6 +7,8 @@
 #include <AnKi/ShaderCompiler.h>
 #include <AnKi/ShaderCompiler.h>
 #include <AnKi/ShaderCompiler/ShaderParser.h>
 #include <AnKi/ShaderCompiler/ShaderParser.h>
 #include <AnKi/ShaderCompiler/Dxc.h>
 #include <AnKi/ShaderCompiler/Dxc.h>
+#include <AnKi/Util/Filesystem.h>
+#include <AnKi/Core/CoreTracer.h>
 #include <Tests/Framework/Framework.h>
 #include <Tests/Framework/Framework.h>
 
 
 namespace anki {
 namespace anki {
@@ -90,6 +92,7 @@ inline void commonInit(Bool validation = true)
 {
 {
 	DefaultMemoryPool::allocateSingleton(allocAligned, nullptr);
 	DefaultMemoryPool::allocateSingleton(allocAligned, nullptr);
 	ShaderCompilerMemoryPool::allocateSingleton(allocAligned, nullptr);
 	ShaderCompilerMemoryPool::allocateSingleton(allocAligned, nullptr);
+	CoreMemoryPool::allocateSingleton(allocAligned, nullptr);
 	g_windowWidthCVar = kWidth;
 	g_windowWidthCVar = kWidth;
 	g_windowHeightCVar = kHeight;
 	g_windowHeightCVar = kHeight;
 	g_vsyncCVar = false;
 	g_vsyncCVar = false;
@@ -98,6 +101,13 @@ inline void commonInit(Bool validation = true)
 	{
 	{
 		[[maybe_unused]] Error err = CVarSet::getSingleton().setMultiple(Array<const Char*, 4>{"Validation", "1", "DebugMarkers", "1"});
 		[[maybe_unused]] Error err = CVarSet::getSingleton().setMultiple(Array<const Char*, 4>{"Validation", "1", "DebugMarkers", "1"});
 	}
 	}
+#if ANKI_TRACING_ENABLED
+	{
+		String tmpDir;
+		[[maybe_unused]] Error err = getTempDirectory(tmpDir);
+		[[maybe_unused]] Error err2 = CoreTracer::allocateSingleton().init(tmpDir);
+	}
+#endif
 
 
 	initWindow();
 	initWindow();
 	ANKI_TEST_EXPECT_NO_ERR(Input::allocateSingleton().init());
 	ANKI_TEST_EXPECT_NO_ERR(Input::allocateSingleton().init());
@@ -111,6 +121,10 @@ inline void commonDestroy()
 	Input::freeSingleton();
 	Input::freeSingleton();
 	NativeWindow::freeSingleton();
 	NativeWindow::freeSingleton();
 	Input::freeSingleton();
 	Input::freeSingleton();
+#if ANKI_TRACING_ENABLED
+	CoreTracer::freeSingleton();
+#endif
+	CoreMemoryPool::freeSingleton();
 	ShaderCompilerMemoryPool::freeSingleton();
 	ShaderCompilerMemoryPool::freeSingleton();
 	DefaultMemoryPool::freeSingleton();
 	DefaultMemoryPool::freeSingleton();
 }
 }