Panagiotis Christopoulos Charitos 4 months ago
parent
commit
0f16d55000

+ 1 - 1
AnKi/Gr/AccelerationStructure.h

@@ -133,7 +133,7 @@ public:
 	U64 getGpuAddress() const;
 
 protected:
-	PtrSize m_scratchBufferSize = 0;
+	PtrSize m_scratchBufferSize = 0; ///< Contains more bytes than what the APIs report. This is done to avoid exposing the alignment.
 	AccelerationStructureType m_type = AccelerationStructureType::kCount;
 
 	/// Construct.

+ 1 - 1
AnKi/Gr/BackendCommon/Format.def.h

@@ -138,7 +138,7 @@ ANKI_FORMAT_DEF(R64G64B64_Sfloat,                    118, 1072, 3, 24,  0,  0,
 ANKI_FORMAT_DEF(R64G64B64A64_Uint,                   119, 1073, 4, 32,  0,  0,  0,  1,         None)
 ANKI_FORMAT_DEF(R64G64B64A64_Sint,                   120, 1074, 4, 32,  0,  0,  0,  2,         None)
 ANKI_FORMAT_DEF(R64G64B64A64_Sfloat,                 121, 1075, 4, 32,  0,  0,  0,  0,         None)
-ANKI_FORMAT_DEF(B10G11R11_Ufloat_Pack32,             122, 1076, 3,  4,  0,  0,  0,  0,         None)
+ANKI_FORMAT_DEF(B10G11R11_Ufloat_Pack32,             122,   26, 3,  4,  0,  0,  0,  0,         None)
 ANKI_FORMAT_DEF(E5B9G9R9_Ufloat_Pack32,              123, 1077, 3,  4,  0,  0,  0,  0,         None)
 ANKI_FORMAT_DEF(D16_Unorm,                           124,   55, 0,  2,  0,  0,  0,  0,        Depth)
 ANKI_FORMAT_DEF(X8D24_Unorm_Pack32,                  125, 1078, 0,  4,  0,  0,  0,  0,        Depth)

+ 1 - 0
AnKi/Gr/Common.h

@@ -801,6 +801,7 @@ enum class AccelerationStructureUsageBit : U8
 
 	// Derived
 	kAllGraphics = kSrvGeometry | kSrvPixel,
+	kAllSrv = kSrvGeometry | kSrvPixel | kSrvCompute | kSrvDispatchRays,
 	kAllRead = kAttach | kSrvGeometry | kSrvPixel | kSrvCompute | kSrvDispatchRays,
 	kAllWrite = kBuild
 };

+ 41 - 62
AnKi/Gr/D3D/D3DAccelerationStructure.cpp

@@ -38,7 +38,8 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 	m_type = inf.m_type;
 
 	PtrSize asBufferSize;
-	getMemoryRequirement(inf, asBufferSize, m_scratchBufferSize);
+	getMemoryRequirement(inf, asBufferSize, m_scratchBufferSize, false);
+	m_scratchBufferSize += kScratchBufferAlignment;
 
 	// Allocate AS buffer
 	BufferView asBuff = inf.m_accelerationStructureBuffer;
@@ -92,8 +93,12 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 
 void AccelerationStructureImpl::fillBuildInfo(BufferView scratchBuff, D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC& buildDesc) const
 {
+	ANKI_ASSERT(scratchBuff.getRange() == m_scratchBufferSize);
 	buildDesc = {};
 	buildDesc.DestAccelerationStructureData = m_asBuffer->getGpuAddress() + m_asBufferOffset;
+
+	const PtrSize offsetDiff = getAlignedRoundUp(kScratchBufferAlignment, scratchBuff.getOffset()) - scratchBuff.getOffset();
+	scratchBuff = scratchBuff.incrementOffset(offsetDiff);
 	buildDesc.ScratchAccelerationStructureData = scratchBuff.getBuffer().getGpuAddress() + scratchBuff.getOffset();
 
 	if(m_type == AccelerationStructureType::kBottomLevel)
@@ -116,95 +121,63 @@ void AccelerationStructureImpl::fillBuildInfo(BufferView scratchBuff, D3D12_BUIL
 	}
 }
 
-D3D12_GLOBAL_BARRIER AccelerationStructureImpl::computeBarrierInfo(AccelerationStructureUsageBit before, AccelerationStructureUsageBit after) const
+D3D12_BARRIER_SYNC AccelerationStructureImpl::computeBarrierSync(AccelerationStructureUsageBit usage)
 {
-	D3D12_GLOBAL_BARRIER barrier = {};
-
-	if(before == AccelerationStructureUsageBit::kNone)
-	{
-		barrier.SyncBefore |= D3D12_BARRIER_SYNC_NONE;
-		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_NO_ACCESS;
-	}
-
-	if(!!(before & AccelerationStructureUsageBit::kBuild))
-	{
-		barrier.SyncBefore |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
-		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE;
-	}
-
-	if(!!(before & AccelerationStructureUsageBit::kAttach))
-	{
-		barrier.SyncBefore |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
-		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
-	}
+	D3D12_BARRIER_SYNC out = {};
 
-	if(!!(before & AccelerationStructureUsageBit::kSrvGeometry))
+	if(usage == AccelerationStructureUsageBit::kNone)
 	{
-		barrier.SyncBefore |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
-		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ; // READ_BIT is the only viable solution by elimination
+		out |= D3D12_BARRIER_SYNC_NONE;
 	}
 
-	if(!!(before & AccelerationStructureUsageBit::kSrvPixel))
+	if(!!(usage & (AccelerationStructureUsageBit::kBuild | AccelerationStructureUsageBit::kAttach)))
 	{
-		barrier.SyncBefore |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
-		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
+		out |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
 	}
 
-	if(!!(before & AccelerationStructureUsageBit::kSrvCompute))
+	if(!!(usage & (AccelerationStructureUsageBit::kAllSrv)))
 	{
-		barrier.SyncBefore |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
-		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
+		out |= D3D12_BARRIER_SYNC_ALL_SHADING;
 	}
 
-	if(!!(before & AccelerationStructureUsageBit::kSrvDispatchRays))
-	{
-		barrier.SyncBefore |= D3D12_BARRIER_SYNC_RAYTRACING;
-		barrier.AccessBefore |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
-	}
+	return out;
+}
 
-	// After
-	if(!!(after & AccelerationStructureUsageBit::kBuild))
-	{
-		barrier.SyncAfter |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
-		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE;
-	}
+D3D12_BARRIER_ACCESS AccelerationStructureImpl::computeBarrierAccess(AccelerationStructureUsageBit usage)
+{
+	D3D12_BARRIER_ACCESS out = {};
 
-	if(!!(after & AccelerationStructureUsageBit::kAttach))
+	if(usage == AccelerationStructureUsageBit::kNone)
 	{
-		barrier.SyncAfter |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
-		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
+		return D3D12_BARRIER_ACCESS_NO_ACCESS;
 	}
 
-	if(!!(after & AccelerationStructureUsageBit::kSrvGeometry))
+	if(!!(usage & AccelerationStructureUsageBit::kAllRead))
 	{
-		barrier.SyncAfter |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
-		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ; // READ_BIT is the only viable solution by elimination
+		out |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
 	}
 
-	if(!!(after & AccelerationStructureUsageBit::kSrvPixel))
+	if(!!(usage & AccelerationStructureUsageBit::kAllWrite))
 	{
-		barrier.SyncAfter |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
-		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
+		out |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE;
 	}
 
-	if(!!(after & AccelerationStructureUsageBit::kSrvCompute))
-	{
-		barrier.SyncAfter |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
-		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
-	}
+	return out;
+}
 
-	if(!!(after & AccelerationStructureUsageBit::kSrvDispatchRays))
-	{
-		barrier.SyncAfter |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
-		barrier.AccessAfter |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
-	}
+D3D12_GLOBAL_BARRIER AccelerationStructureImpl::computeBarrierInfo(AccelerationStructureUsageBit before, AccelerationStructureUsageBit after) const
+{
+	const D3D12_GLOBAL_BARRIER barrier = {.SyncBefore = computeBarrierSync(before),
+										  .SyncAfter = computeBarrierSync(after),
+										  .AccessBefore = computeBarrierAccess(before),
+										  .AccessAfter = computeBarrierAccess(after)};
 
 	ANKI_ASSERT(barrier.SyncBefore || barrier.SyncAfter);
-
 	return barrier;
 }
 
-void AccelerationStructureImpl::getMemoryRequirement(const AccelerationStructureInitInfo& inf, PtrSize& asBufferSize, PtrSize& buildScratchBufferSize)
+void AccelerationStructureImpl::getMemoryRequirement(const AccelerationStructureInitInfo& inf, PtrSize& asBufferSize, PtrSize& buildScratchBufferSize,
+													 Bool alignSizes)
 {
 	ANKI_ASSERT(inf.isValidForGettingMemoryRequirements());
 
@@ -243,6 +216,12 @@ void AccelerationStructureImpl::getMemoryRequirement(const AccelerationStructure
 	getDevice().GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &prebuildInfo);
 	asBufferSize = prebuildInfo.ResultDataMaxSizeInBytes;
 	buildScratchBufferSize = prebuildInfo.ScratchDataSizeInBytes;
+
+	if(alignSizes)
+	{
+		asBufferSize += D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT;
+		buildScratchBufferSize += kScratchBufferAlignment;
+	}
 }
 
 } // end namespace anki

+ 9 - 1
AnKi/Gr/D3D/D3DAccelerationStructure.h

@@ -37,9 +37,13 @@ public:
 		return *m_asBuffer;
 	}
 
-	static void getMemoryRequirement(const AccelerationStructureInitInfo& init, PtrSize& asBufferSize, PtrSize& buildScratchBufferSize);
+	static void getMemoryRequirement(const AccelerationStructureInitInfo& init, PtrSize& asBufferSize, PtrSize& buildScratchBufferSize,
+									 Bool alignSizes = true);
 
 private:
+	/// Spec doesn't say anything about scratch buffer alignment but validation complains.
+	static constexpr U32 kScratchBufferAlignment = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT;
+
 	BufferInternalPtr m_asBuffer;
 	PtrSize m_asBufferOffset = kMaxPtrSize;
 
@@ -59,6 +63,10 @@ private:
 		BufferInternalPtr m_positionsBuff;
 		BufferInternalPtr m_indexBuff;
 	} m_blas;
+
+	static D3D12_BARRIER_SYNC computeBarrierSync(AccelerationStructureUsageBit usage);
+
+	static D3D12_BARRIER_ACCESS computeBarrierAccess(AccelerationStructureUsageBit usage);
 };
 /// @}
 

+ 1 - 1
AnKi/Gr/D3D/D3DGrManager.cpp

@@ -105,7 +105,7 @@ PtrSize GrManager::getAccelerationStructureMemoryRequirement(const AccelerationS
 {
 	PtrSize asSize, unused;
 	AccelerationStructureImpl::getMemoryRequirement(init, asSize, unused);
-	return asSize + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT;
+	return asSize;
 }
 
 Error GrManager::init(GrManagerInitInfo& inf)

+ 1 - 0
AnKi/Gr/D3D/D3DShaderProgram.cpp

@@ -130,6 +130,7 @@ Error ShaderProgramImpl::init(const ShaderProgramInitInfo& inf)
 	for(ShaderInternalPtr& shader : m_shaders)
 	{
 		const ShaderImpl& simpl = static_cast<const ShaderImpl&>(*shader);
+
 		if(firstLink)
 		{
 			refl = simpl.m_reflection;

+ 4 - 4
AnKi/Renderer/IndirectDiffuseClipmaps.cpp

@@ -330,8 +330,8 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 
 			rgraphCtx.bindSrv(srv++, 2, getShadowMapping().getShadowmapRt());
 
-			cmdb.bindSrv(srv++, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(U32)));
-			cmdb.bindSrv(srv++, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(U32)));
+			cmdb.bindSrv(srv++, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(GpuSceneGlobalIlluminationProbe)));
+			cmdb.bindSrv(srv++, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(PixelFailedSsr)));
 
 			for(U32 i = 0; i < 3; ++i)
 			{
@@ -502,8 +502,8 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 
 			rgraphCtx.bindSrv(srv++, 2, getShadowMapping().getShadowmapRt());
 
-			cmdb.bindSrv(srv++, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(U32)));
-			cmdb.bindSrv(srv++, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(U32)));
+			cmdb.bindSrv(srv++, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(GpuSceneGlobalIlluminationProbe)));
+			cmdb.bindSrv(srv++, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(PixelFailedSsr)));
 
 			rgraphCtx.bindSrv(srv++, 2, getGBuffer().getDepthRt());
 			cmdb.bindSrv(srv++, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));

+ 14 - 7
AnKi/Shaders/RtMaterialFetch.hlsl

@@ -10,13 +10,20 @@
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/LightFunctions.hlsl>
 
-struct [raypayload] RtMaterialFetchRayPayload // TODO make it FP16 when you change the GBufferGeneric.ankiprog
+struct [raypayload] RtMaterialFetchRayPayload
 {
-	HVec3 m_diffuseColor : write(closesthit, miss): read(caller);
-	HVec3 m_worldNormal : write(closesthit, miss): read(caller);
-	HVec3 m_emission : write(closesthit, miss): read(caller);
-	F16 m_textureLod : write(caller): read(closesthit);
+	// Use FP32 on D3D because FP16 crashes at least on nVidia
+#if ANKI_GR_BACKEND_VULKAN
+#	define PAYLOAD_SCALAR F16
+#else
+#	define PAYLOAD_SCALAR F32
+#endif
+	vector<PAYLOAD_SCALAR, 3> m_diffuseColor : write(closesthit, miss): read(caller);
+	vector<PAYLOAD_SCALAR, 3> m_worldNormal : write(closesthit, miss): read(caller);
+	vector<PAYLOAD_SCALAR, 3> m_emission : write(closesthit, miss): read(caller);
+	PAYLOAD_SCALAR m_textureLod : write(caller): read(closesthit);
 	F32 m_rayT : write(closesthit, miss): read(caller);
+#undef PAYLOAD_SCALAR
 };
 
 // Have a common resouce interface for all shaders. It should be compatible between all ray shaders in DX and VK
@@ -32,8 +39,8 @@ Texture2D<Vec4> g_envMap : register(t1, SPACE);
 Texture2D<Vec4> g_shadowAtlasTex : register(t2, SPACE);
 
 #	if defined(CLIPMAP_VOLUME)
-StructuredBuffer<U32> g_dummyBuff1 : register(t3, SPACE);
-StructuredBuffer<U32> g_dummyBuff2 : register(t4, SPACE);
+StructuredBuffer<GpuSceneGlobalIlluminationProbe> g_dummyBuff1 : register(t3, SPACE);
+StructuredBuffer<PixelFailedSsr> g_dummyBuff2 : register(t4, SPACE);
 #	else
 StructuredBuffer<GpuSceneGlobalIlluminationProbe> g_giProbes : register(t3, SPACE);
 StructuredBuffer<PixelFailedSsr> g_pixelsFailedSsr : register(t4, SPACE);