Browse Source

Remove one more byte address buffer load

Panagiotis Christopoulos Charitos 1 year ago
parent
commit
f2ab329078

+ 1 - 2
AnKi/Renderer/Sky.cpp

@@ -181,8 +181,7 @@ void Sky::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindShaderProgram(m_computeSunColorGrProg.get());
 
 			rgraphCtx.bindColorTexture(0, 0, transmittanceLutRt);
-			cmdb.bindSampler(0, 1, getRenderer().getSamplers().m_trilinearClamp.get());
-			cmdb.bindUavBuffer(0, 2, ctx.m_globalRenderingConstsBuffer);
+			cmdb.bindUavBuffer(0, 1, ctx.m_globalRenderingConstsBuffer);
 
 			cmdb.dispatchCompute(1, 1, 1);
 		});

+ 0 - 1
AnKi/Renderer/Ssao.cpp

@@ -78,7 +78,6 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 
 	RenderTargetHandle historyRt;
 	RenderTargetHandle finalRt;
-	RenderTargetHandle bentNormalsFinalRt;
 
 	if(m_texImportedOnce) [[likely]]
 	{

+ 2 - 0
AnKi/Renderer/Utils/Drawer.cpp

@@ -70,6 +70,8 @@ void RenderableDrawer::setState(const RenderableDrawerArguments& args, CommandBu
 	cmdb.bindUavBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kRenderables),
 					   GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
 	cmdb.bindUavBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kMeshLods), GpuSceneArrays::MeshLod::getSingleton().getBufferOffsetRange());
+	cmdb.bindUavBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kTransforms),
+					   GpuSceneArrays::Transform::getSingleton().getBufferOffsetRange());
 	cmdb.bindTexture(U32(MaterialSet::kGlobal), U32(MaterialBinding::kHzbTexture),
 					 (args.m_hzbTexture) ? args.m_hzbTexture : &getRenderer().getDummyTextureView2d());
 	cmdb.bindSampler(U32(MaterialSet::kGlobal), U32(MaterialBinding::kNearestClampSampler), getRenderer().getSamplers().m_nearestNearestClamp.get());

+ 14 - 13
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -422,20 +422,21 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 		cmdb.bindUavBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
 		cmdb.bindUavBuffer(0, 2, GpuSceneArrays::MeshLod::getSingleton().getBufferOffsetRange());
 		cmdb.bindUavBuffer(0, 3, GpuSceneBuffer::getSingleton().getBufferOffsetRange());
+		cmdb.bindUavBuffer(0, 4, GpuSceneArrays::Transform::getSingleton().getBufferOffsetRange());
 		if(gatherType & 1u)
 		{
-			cmdb.bindUavBuffer(0, 4, out.m_legacy.m_renderableInstancesBuffer);
-			cmdb.bindUavBuffer(0, 5, out.m_legacy.m_drawIndexedIndirectArgsBuffer);
-			cmdb.bindUavBuffer(0, 6, out.m_legacy.m_mdiDrawCountsBuffer);
+			cmdb.bindUavBuffer(0, 5, out.m_legacy.m_renderableInstancesBuffer);
+			cmdb.bindUavBuffer(0, 6, out.m_legacy.m_drawIndexedIndirectArgsBuffer);
+			cmdb.bindUavBuffer(0, 7, out.m_legacy.m_mdiDrawCountsBuffer);
 		}
 		if(gatherType & 2u)
 		{
-			cmdb.bindUavBuffer(0, 7, out.m_mesh.m_taskShaderIndirectArgsBuffer);
-			cmdb.bindUavBuffer(0, 8, out.m_mesh.m_meshletGroupInstancesBuffer);
+			cmdb.bindUavBuffer(0, 8, out.m_mesh.m_taskShaderIndirectArgsBuffer);
+			cmdb.bindUavBuffer(0, 9, out.m_mesh.m_meshletGroupInstancesBuffer);
 		}
 
 		const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(technique);
-		UVec2* instanceRanges = allocateAndBindUav<UVec2>(cmdb, 0, 9, bucketCount);
+		UVec2* instanceRanges = allocateAndBindUav<UVec2>(cmdb, 0, 10, bucketCount);
 		for(U32 i = 0; i < bucketCount; ++i)
 		{
 			const Bool legacyBucket = m_runCtx.m_renderableInstanceRanges[technique][i].m_instanceCount > 0;
@@ -454,7 +455,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 
 		if(frustumTestData)
 		{
-			FrustumGpuVisibilityConstants* unis = allocateAndBindConstants<FrustumGpuVisibilityConstants>(cmdb, 0, 10);
+			FrustumGpuVisibilityConstants* unis = allocateAndBindConstants<FrustumGpuVisibilityConstants>(cmdb, 0, 11);
 
 			Array<Plane, 6> planes;
 			extractClipPlanes(frustumTestData->m_viewProjMat, planes);
@@ -475,8 +476,8 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 
 			if(frustumTestData->m_hzbRt.isValid())
 			{
-				rpass.bindColorTexture(0, 11, frustumTestData->m_hzbRt);
-				cmdb.bindSampler(0, 12, getRenderer().getSamplers().m_nearestNearestClamp.get());
+				rpass.bindColorTexture(0, 12, frustumTestData->m_hzbRt);
+				cmdb.bindSampler(0, 13, getRenderer().getSamplers().m_nearestNearestClamp.get());
 			}
 		}
 		else
@@ -497,12 +498,12 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 
 		if(gatherAabbIndices)
 		{
-			cmdb.bindUavBuffer(0, 13, out.m_visibleAaabbIndicesBuffer);
+			cmdb.bindUavBuffer(0, 14, out.m_visibleAaabbIndicesBuffer);
 		}
 
 		if(genHash)
 		{
-			cmdb.bindUavBuffer(0, 14, out.m_visiblesHashBuffer);
+			cmdb.bindUavBuffer(0, 15, out.m_visiblesHashBuffer);
 		}
 
 		dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
@@ -607,7 +608,7 @@ void GpuVisibility::populateRenderGraphMeshletInternal(Bool passthrough, BaseGpu
 			cmdb.bindUavBuffer(0, 0, meshletGroupInstancesBuffer);
 			cmdb.bindUavBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
 			cmdb.bindUavBuffer(0, 2, GpuSceneArrays::MeshLod::getSingleton().getBufferOffsetRange());
-			cmdb.bindUavBuffer(0, 3, GpuSceneBuffer::getSingleton().getBufferOffsetRange());
+			cmdb.bindUavBuffer(0, 3, GpuSceneArrays::Transform::getSingleton().getBufferOffsetRange());
 			cmdb.bindUavBuffer(0, 4, UnifiedGeometryBuffer::getSingleton().getBufferOffsetRange());
 			cmdb.bindUavBuffer(0, 5, out.m_drawIndirectArgsBuffer);
 			cmdb.bindUavBuffer(0, 6, out.m_meshletInstancesBuffer);
@@ -895,7 +896,7 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 			cmdb.bindUavBuffer(0, 0, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getBufferOffsetRange());
 			cmdb.bindUavBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
 			cmdb.bindUavBuffer(0, 2, GpuSceneArrays::MeshLod::getSingleton().getBufferOffsetRange());
-			cmdb.bindUavBuffer(0, 3, &GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
+			cmdb.bindUavBuffer(0, 3, GpuSceneArrays::Transform::getSingleton().getBufferOffsetRange());
 			cmdb.bindUavBuffer(0, 4, instancesBuff);
 			cmdb.bindUavBuffer(0, 5, indicesBuff);
 			cmdb.bindUavBuffer(0, 6, m_counterBuffer.get(), 0, sizeof(U32) * 2);

+ 1 - 1
AnKi/Scene/Components/ModelComponent.cpp

@@ -205,7 +205,7 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 
 			// Upload the GpuSceneRenderable
 			GpuSceneRenderable gpuRenderable = {};
-			gpuRenderable.m_worldTransformsOffset = m_gpuSceneTransforms.getGpuSceneOffset();
+			gpuRenderable.m_worldTransformsIndex = m_gpuSceneTransforms.getIndex() * 2;
 			gpuRenderable.m_constantsOffset = m_patchInfos[i].m_gpuSceneUniformsOffset;
 			gpuRenderable.m_meshLodsIndex = m_patchInfos[i].m_gpuSceneMeshLods.getIndex() * kMaxLodCount;
 			gpuRenderable.m_boneTransformsOffset = (hasSkin) ? m_skinComponent->getBoneTransformsGpuSceneOffset() : 0;

+ 1 - 1
AnKi/Scene/Components/ParticleEmitterComponent.cpp

@@ -402,7 +402,7 @@ Error ParticleEmitterComponent::update(SceneComponentUpdateInfo& info, Bool& upd
 		renderable.m_constantsOffset = m_gpuSceneUniforms.getOffset();
 		renderable.m_meshLodsIndex = m_gpuSceneMeshLods.getIndex() * kMaxLodCount;
 		renderable.m_particleEmitterOffset = m_gpuSceneParticleEmitter.getGpuSceneOffset();
-		renderable.m_worldTransformsOffset = 0;
+		renderable.m_worldTransformsIndex = 0;
 		renderable.m_uuid = SceneGraph::getSingleton().getNewUuid();
 		if(!m_gpuSceneRenderable.isValid())
 		{

+ 1 - 1
AnKi/Shaders/ForwardShadingFog.ankiprog

@@ -33,7 +33,7 @@ VertOut main(VertIn input)
 
 	const GpuSceneRenderableInstance renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
 	const GpuSceneMeshLod mesh = g_meshLods[renderable.m_meshLodIndex];
-	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
+	const Mat3x4 worldTransform = g_transforms[renderable.m_worldTransformsIndex];
 	const UnpackedMeshVertex vertex = loadVertex(mesh, input.m_svVertexId, false);
 
 	const Vec3 worldPos = mul(worldTransform, Vec4(vertex.m_position, 1.0));

+ 1 - 1
AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog

@@ -36,7 +36,7 @@ VertOut main(VertIn input)
 
 	const GpuSceneRenderableInstance renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
 	const GpuSceneMeshLod mesh = g_meshLods[renderable.m_meshLodIndex];
-	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
+	const Mat3x4 worldTransform = g_transforms[renderable.m_worldTransformsIndex];
 	const UnpackedMeshVertex vertex = loadVertex(mesh, input.m_svVertexId, false);
 
 	output.m_worldPosition = mul(worldTransform, Vec4(vertex.m_position, 1.0));

+ 10 - 10
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -165,7 +165,7 @@ struct TaskOut
 {
 	U32 m_firstMeshletGeometryDescriptor;
 	U32 m_visibleMeshletsRelativeIndices[kMeshletGroupSize / sizeof(U32)];
-	U32 m_worldTransformsOffset;
+	U32 m_worldTransformsIndex;
 	U32 m_constantsOffset;
 	U32 m_boneTransformsOrParticleEmitterOffset;
 };
@@ -245,7 +245,7 @@ VertOut main(VertIn input)
 #	if SW_MESHLETS
 	const GpuSceneMeshletInstance instance = unpackGpuSceneMeshletInstance(input.m_instanceData);
 
-	if(input.m_svVertexId >= (instance.m_worldTransformsOffset_25bit_meshletPrimitiveCount_7bit & ((1u << 7u) - 1u)) * 3u)
+	if(input.m_svVertexId >= (instance.m_worldTransformsIndex_25bit_meshletPrimitiveCount_7bit & ((1u << 7u) - 1u)) * 3u)
 	{
 		// Discard the primitive
 		output = (VertOut)0;
@@ -264,7 +264,7 @@ VertOut main(VertIn input)
 	UnpackedMeshVertex vert = loadVertex(meshlet, localIdx, ANKI_BONES);
 
 	const U32 constantsOffset = instance.m_constantsOffset;
-	const U32 worldTransformsOffset = instance.m_worldTransformsOffset_25bit_meshletPrimitiveCount_7bit >> 7u;
+	const U32 worldTransformsIndex = instance.m_worldTransformsIndex_25bit_meshletPrimitiveCount_7bit >> 7u;
 	const U32 boneTransformsOrParticleEmitterOffset = instance.m_boneTransformsOrParticleEmitterOffset;
 
 #		if VISUALIZE_MESHLETS
@@ -277,13 +277,13 @@ VertOut main(VertIn input)
 	UnpackedMeshVertex vert = loadVertex(mesh, input.m_svVertexId, ANKI_BONES);
 
 	const U32 constantsOffset = instance.m_constantsOffset;
-	const U32 worldTransformsOffset = instance.m_worldTransformsOffset;
+	const U32 worldTransformsIndex = instance.m_worldTransformsIndex;
 	const U32 boneTransformsOrParticleEmitterOffset = instance.m_boneTransformsOrParticleEmitterOffset;
 #	endif // SW_MESHLETS
 	ANKI_MAYBE_UNUSED(boneTransformsOrParticleEmitterOffset);
 
-	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(worldTransformsOffset);
-	const Mat3x4 prevWorldTransform = g_gpuScene.Load<Mat3x4>(worldTransformsOffset + sizeof(Mat3x4));
+	const Mat3x4 worldTransform = g_transforms[worldTransformsIndex];
+	const Mat3x4 prevWorldTransform = g_transforms[worldTransformsIndex + 1u];
 	ANKI_MAYBE_UNUSED(prevWorldTransform);
 
 #	if UVS
@@ -349,7 +349,7 @@ struct FirstPayload
 	if(svGroupIndex == 0u)
 	{
 		s_payload.m_firstMeshletGeometryDescriptor = firstMeshletGeometryDescriptor;
-		s_payload.m_worldTransformsOffset = renderable.m_worldTransformsOffset;
+		s_payload.m_worldTransformsIndex = renderable.m_worldTransformsIndex;
 		s_payload.m_constantsOffset = renderable.m_constantsOffset;
 		s_payload.m_boneTransformsOrParticleEmitterOffset = renderable.m_boneTransformsOffset;
 
@@ -368,7 +368,7 @@ struct FirstPayload
 		Bool cull = false;
 
 		const MeshletBoundingVolume meshletBoundingVol = g_meshletBoundingVolumes[firstMeshletBoundingVolume + svGroupIndex];
-		const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
+		const Mat3x4 worldTransform = g_transforms[renderable.m_worldTransformsIndex];
 
 #	if MESHLET_BACKFACE_CULLING
 		cull = cullBackfaceMeshlet(meshletBoundingVol, worldTransform, g_globalConstants.m_cameraTransform.getTranslationPart());
@@ -452,8 +452,8 @@ main(U32 svGroupId : SV_GROUPID, U32 svGroupIndex : SV_GROUPINDEX, in payload Ta
 
 			UnpackedMeshVertex vert = loadVertex(meshlet, idx, ANKI_BONES);
 
-			const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(payload.m_worldTransformsOffset);
-			const Mat3x4 prevWorldTransform = g_gpuScene.Load<Mat3x4>(payload.m_worldTransformsOffset + sizeof(Mat3x4));
+			const Mat3x4 worldTransform = g_transforms[payload.m_worldTransformsIndex];
+			const Mat3x4 prevWorldTransform = g_transforms[payload.m_worldTransformsIndex];
 			ANKI_MAYBE_UNUSED(prevWorldTransform);
 
 #	if UVS

+ 17 - 16
AnKi/Shaders/GpuVisibility.ankiprog

@@ -34,44 +34,45 @@ struct DrawIndirectArgsWithPadding
 [[vk::binding(0)]] StructuredBuffer<GpuSceneRenderableBoundingVolume> g_renderableBoundingVolumes;
 [[vk::binding(1)]] StructuredBuffer<GpuSceneRenderable> g_renderables;
 [[vk::binding(2)]] StructuredBuffer<GpuSceneMeshLod> g_meshLods;
-[[vk::binding(3)]] ByteAddressBuffer g_gpuScene;
+[[vk::binding(3)]] StructuredBuffer<Mat3x4> g_transforms;
+[[vk::binding(4)]] ByteAddressBuffer g_gpuScene;
 
 #if GATHER_MDI
 // These 3 have the same size
-[[vk::binding(4)]] RWStructuredBuffer<UVec4> g_instanceRateRenderables;
-[[vk::binding(5)]] RWStructuredBuffer<DrawIndexedIndirectArgs> g_drawIndexedIndirectArgs;
-[[vk::binding(5)]] RWStructuredBuffer<DrawIndirectArgsWithPadding> g_drawIndirectArgs;
+[[vk::binding(5)]] RWStructuredBuffer<UVec4> g_instanceRateRenderables;
+[[vk::binding(6)]] RWStructuredBuffer<DrawIndexedIndirectArgs> g_drawIndexedIndirectArgs;
+[[vk::binding(6)]] RWStructuredBuffer<DrawIndirectArgsWithPadding> g_drawIndirectArgs;
 
 // The MDI counts. One for each render state bucket
-[[vk::binding(6)]] RWStructuredBuffer<U32> g_mdiDrawCounts;
+[[vk::binding(7)]] RWStructuredBuffer<U32> g_mdiDrawCounts;
 #endif
 
 #if GATHER_MESHLET_GROUPS
 // For mesh shading
-[[vk::binding(7)]] RWStructuredBuffer<DispatchIndirectArgs> g_taskShaderIndirectArgs;
-[[vk::binding(8)]] RWStructuredBuffer<GpuSceneMeshletGroupInstance> g_meshletGroupInstances;
+[[vk::binding(8)]] RWStructuredBuffer<DispatchIndirectArgs> g_taskShaderIndirectArgs;
+[[vk::binding(9)]] RWStructuredBuffer<GpuSceneMeshletGroupInstance> g_meshletGroupInstances;
 #endif
 
 // One for each render state bucket. It's either the index of the next indirect args or the index to the next task payload
-[[vk::binding(9)]] StructuredBuffer<UVec2> g_instanceRanges;
+[[vk::binding(10)]] StructuredBuffer<UVec2> g_instanceRanges;
 
 #if DISTANCE_TEST == 0
-[[vk::binding(10)]] ConstantBuffer<FrustumGpuVisibilityConstants> g_consts;
+[[vk::binding(11)]] ConstantBuffer<FrustumGpuVisibilityConstants> g_consts;
 #else
 [[vk::push_constant]] ConstantBuffer<DistanceGpuVisibilityConstants> g_consts;
 #endif
 
 #if HZB_TEST
-[[vk::binding(11)]] Texture2D<Vec4> g_hzbTex;
-[[vk::binding(12)]] SamplerState g_nearestAnyClampSampler;
+[[vk::binding(12)]] Texture2D<Vec4> g_hzbTex;
+[[vk::binding(13)]] SamplerState g_nearestAnyClampSampler;
 #endif
 
 #if GATHER_AABBS
-[[vk::binding(13)]] RWStructuredBuffer<U32> g_visibleAabbIndices; ///< Indices of the visible AABBs. The 1st element is the count.
+[[vk::binding(14)]] RWStructuredBuffer<U32> g_visibleAabbIndices; ///< Indices of the visible AABBs. The 1st element is the count.
 #endif
 
 #if HASH_VISIBLES
-[[vk::binding(14)]] RWStructuredBuffer<GpuVisibilityHash> g_hash;
+[[vk::binding(15)]] RWStructuredBuffer<GpuVisibilityHash> g_hash;
 #endif
 
 [numthreads(64, 1, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
@@ -229,7 +230,7 @@ struct DrawIndirectArgsWithPadding
 				g_drawIndexedIndirectArgs[indirectIdx] = indirect;
 
 				UVec4 instanceVertex;
-				instanceVertex.x = renderable.m_worldTransformsOffset;
+				instanceVertex.x = renderable.m_worldTransformsIndex;
 				instanceVertex.y = renderable.m_constantsOffset;
 				instanceVertex.z = meshLodIndex;
 				instanceVertex.w = renderable.m_boneTransformsOffset;
@@ -247,7 +248,7 @@ struct DrawIndirectArgsWithPadding
 				g_drawIndirectArgs[indirectIdx] = indirect;
 
 				UVec4 instanceVertex;
-				instanceVertex.x = renderable.m_worldTransformsOffset;
+				instanceVertex.x = renderable.m_worldTransformsIndex;
 				instanceVertex.y = renderable.m_constantsOffset;
 				instanceVertex.z = meshLodIndex;
 				instanceVertex.w = renderable.m_particleEmitterOffset;
@@ -261,7 +262,7 @@ struct DrawIndirectArgsWithPadding
 	// Update the renderables hash
 	{
 		// Transform a random point as a way to get a feel for the transform
-		const Mat3x4 trf = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
+		const Mat3x4 trf = g_transforms[renderable.m_worldTransformsIndex];
 		const Vec3 pt = mul(trf, Vec4(1503.98f, 2006.8f, -1400.16f, 1.0f));
 		const UVec3 ptu = UVec3(asuint(pt.x), asuint(pt.y), asuint(pt.z));
 

+ 2 - 2
AnKi/Shaders/GpuVisibilityAccelerationStructures.ankiprog

@@ -14,7 +14,7 @@
 [[vk::binding(0)]] StructuredBuffer<GpuSceneRenderableBoundingVolume> g_renderableBoundingVolumes;
 [[vk::binding(1)]] StructuredBuffer<GpuSceneRenderable> g_renderables;
 [[vk::binding(2)]] StructuredBuffer<GpuSceneMeshLod> g_meshLods;
-[[vk::binding(3)]] ByteAddressBuffer g_gpuScene;
+[[vk::binding(3)]] StructuredBuffer<Mat3x4> g_transforms;
 
 [[vk::binding(4)]] RWStructuredBuffer<AccelerationStructureInstance> g_visibleInstances;
 [[vk::binding(5)]] RWStructuredBuffer<U32> g_visibleRenderableIndices; // 1st element is the count
@@ -86,7 +86,7 @@
 		{
 			// It has a BLAS, write what is to write
 
-			const Mat3x4 transform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
+			const Mat3x4 transform = g_transforms[renderable.m_worldTransformsIndex];
 			Mat3x4 meshQuantizationTransform;
 			meshQuantizationTransform.m_row0 = Vec4(meshLod.m_positionScale, 0.0f, 0.0f, meshLod.m_positionTranslation.x);
 			meshQuantizationTransform.m_row1 = Vec4(0.0f, meshLod.m_positionScale, 0.0f, meshLod.m_positionTranslation.y);

+ 4 - 4
AnKi/Shaders/GpuVisibilityMeshlet.ankiprog

@@ -23,7 +23,7 @@
 [[vk::binding(0)]] StructuredBuffer<GpuSceneMeshletGroupInstance> g_meshletGroupInstances;
 [[vk::binding(1)]] StructuredBuffer<GpuSceneRenderable> g_renderables;
 [[vk::binding(2)]] StructuredBuffer<GpuSceneMeshLod> g_meshLods;
-[[vk::binding(3)]] ByteAddressBuffer g_gpuScene;
+[[vk::binding(3)]] StructuredBuffer<Mat3x4> g_transforms;
 [[vk::binding(4)]] StructuredBuffer<MeshletBoundingVolume> g_meshletBoundingVolumes;
 [[vk::binding(5)]] RWStructuredBuffer<DrawIndirectArgs> g_indirectDrawArgs;
 [[vk::binding(6)]] RWStructuredBuffer<GpuSceneMeshletInstance> g_meshletInstances;
@@ -73,7 +73,7 @@ struct Consts
 
 #if !PASSTHROUGH
 
-		const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
+		const Mat3x4 worldTransform = g_transforms[renderable.m_worldTransformsIndex];
 
 #	if MESHLET_BACKFACE_CULLING
 		const Vec4 coneDirAndAng = unpackSnorm4x8(meshletBoundingVol.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);
@@ -122,8 +122,8 @@ struct Consts
 
 				GpuSceneMeshletInstance instance;
 				instance.m_meshletGeometryDescriptorIndex = firstMeshletGeometryDescriptor + svGroupIndex;
-				instance.m_worldTransformsOffset_25bit_meshletPrimitiveCount_7bit = renderable.m_worldTransformsOffset << 7u;
-				instance.m_worldTransformsOffset_25bit_meshletPrimitiveCount_7bit |= meshletBoundingVol.m_primitiveCount;
+				instance.m_worldTransformsIndex_25bit_meshletPrimitiveCount_7bit = renderable.m_worldTransformsIndex << 7u;
+				instance.m_worldTransformsIndex_25bit_meshletPrimitiveCount_7bit |= meshletBoundingVol.m_primitiveCount;
 				instance.m_constantsOffset = renderable.m_constantsOffset;
 				instance.m_boneTransformsOrParticleEmitterOffset =
 					(renderable.m_boneTransformsOffset) ? renderable.m_boneTransformsOffset : renderable.m_particleEmitterOffset;

+ 2 - 2
AnKi/Shaders/Include/GpuSceneFunctions.h

@@ -12,7 +12,7 @@ ANKI_BEGIN_NAMESPACE
 inline GpuSceneRenderableInstance unpackGpuSceneRenderableVertex(UVec4 x)
 {
 	GpuSceneRenderableInstance o;
-	o.m_worldTransformsOffset = x[0];
+	o.m_worldTransformsIndex = x[0];
 	o.m_constantsOffset = x[1];
 	o.m_meshLodIndex = x[2];
 	o.m_boneTransformsOrParticleEmitterOffset = x[3];
@@ -22,7 +22,7 @@ inline GpuSceneRenderableInstance unpackGpuSceneRenderableVertex(UVec4 x)
 inline GpuSceneMeshletInstance unpackGpuSceneMeshletInstance(UVec4 x)
 {
 	GpuSceneMeshletInstance o;
-	o.m_worldTransformsOffset_25bit_meshletPrimitiveCount_7bit = x[0];
+	o.m_worldTransformsIndex_25bit_meshletPrimitiveCount_7bit = x[0];
 	o.m_constantsOffset = x[1];
 	o.m_meshletGeometryDescriptorIndex = x[2];
 	o.m_boneTransformsOrParticleEmitterOffset = x[3];

+ 3 - 3
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -19,7 +19,7 @@ constexpr F32 kSomeFarDistance = 100000.0f;
 /// @note All offsets in bytes
 struct GpuSceneRenderable
 {
-	U32 m_worldTransformsOffset; ///< First is the crnt transform and the 2nd the previous
+	U32 m_worldTransformsIndex; ///< First index points to the crnt transform and the 2nd to the previous.
 	U32 m_constantsOffset;
 	U32 m_meshLodsIndex; ///< Points to the array of GpuSceneMeshLod. kMaxLodCount are reserved for each renderable.
 	U32 m_boneTransformsOffset; ///< Array of Mat3x4 or 0 if its not a skin.
@@ -31,7 +31,7 @@ struct GpuSceneRenderable
 /// Almost similar to GpuSceneRenderable but with only what the material shaders need. Needs to fit in a UVec4 vertex attribute.
 struct GpuSceneRenderableInstance
 {
-	U32 m_worldTransformsOffset;
+	U32 m_worldTransformsIndex;
 	U32 m_constantsOffset;
 	U32 m_meshLodIndex; ///< Points to a single GpuSceneMeshLod in the mesh lods.
 	U32 m_boneTransformsOrParticleEmitterOffset;
@@ -48,7 +48,7 @@ static_assert(kMaxLodCount == 3);
 /// Minimal data passed to the vertex shaders in the case of meshlet rendering.
 struct GpuSceneMeshletInstance
 {
-	U32 m_worldTransformsOffset_25bit_meshletPrimitiveCount_7bit;
+	U32 m_worldTransformsIndex_25bit_meshletPrimitiveCount_7bit;
 	U32 m_constantsOffset;
 	U32 m_meshletGeometryDescriptorIndex; ///< Index in the UGB.
 	U32 m_boneTransformsOrParticleEmitterOffset;

+ 1 - 1
AnKi/Shaders/Include/MaterialTypes.h

@@ -44,12 +44,12 @@ enum class MaterialBinding : U32
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType) kUnifiedGeometry_##fmt,
 #include <AnKi/Shaders/Include/UnifiedGeometryTypes.defs.h>
 
-	// For mesh shading
 	kMeshletBoundingVolumes, ///< Points to the unified geom buffer
 	kMeshletGeometryDescriptors, ///< Points to the unified geom buffer
 	kMeshletGroups,
 	kRenderables,
 	kMeshLods,
+	kTransforms,
 	kHzbTexture,
 	kNearestClampSampler,
 

+ 1 - 0
AnKi/Shaders/MaterialShadersCommon.hlsl

@@ -30,6 +30,7 @@ ANKI_BINDLESS_SET(MaterialSet::kBindless)
 [[vk::binding(MaterialBinding::kMeshletGroups, MaterialSet::kGlobal)]] StructuredBuffer<GpuSceneMeshletGroupInstance> g_meshletGroups;
 [[vk::binding(MaterialBinding::kRenderables, MaterialSet::kGlobal)]] StructuredBuffer<GpuSceneRenderable> g_renderables;
 [[vk::binding(MaterialBinding::kMeshLods, MaterialSet::kGlobal)]] StructuredBuffer<GpuSceneMeshLod> g_meshLods;
+[[vk::binding(MaterialBinding::kTransforms, MaterialSet::kGlobal)]] StructuredBuffer<Mat3x4> g_transforms;
 [[vk::binding(MaterialBinding::kHzbTexture, MaterialSet::kGlobal)]] Texture2D<Vec4> g_hzbTexture;
 [[vk::binding(MaterialBinding::kNearestClampSampler, MaterialSet::kGlobal)]] SamplerState g_nearestClampSampler;
 

+ 1 - 1
AnKi/Shaders/RtShadowsSbtBuild.ankiprog

@@ -42,7 +42,7 @@
 	}
 
 	// Copy the GpuSceneRenderableInstance
-	g_sbtBuffer[sbtDwordOffset++] = renderable.m_worldTransformsOffset;
+	g_sbtBuffer[sbtDwordOffset++] = renderable.m_worldTransformsIndex;
 	g_sbtBuffer[sbtDwordOffset++] = renderable.m_constantsOffset;
 	g_sbtBuffer[sbtDwordOffset++] = renderable.m_meshLodsIndex;
 	g_sbtBuffer[sbtDwordOffset] = 0;

+ 18 - 9
AnKi/Shaders/Sky.ankiprog

@@ -78,6 +78,22 @@ Vec3 getValFromTLut(Texture2D<Vec4> tex, SamplerState linearAnyClampSampler, Vec
 	return tex.SampleLevel(linearAnyClampSampler, uv, 0.0f).xyz;
 }
 
+/// [no sampler] Get value from transmittance LUT.
+Vec3 getValFromTLut(Texture2D<Vec4> tex, Vec3 pos, Vec3 dirToSun)
+{
+	const F32 height = length(pos);
+	const Vec3 up = pos / height;
+	const F32 sunCosZenithAngle = dot(dirToSun, up);
+
+	const Vec2 uv = Vec2(0.5f + 0.5f * sunCosZenithAngle, (height - kGroundRadiusMM) / (kAtmosphereRadiusMM - kGroundRadiusMM));
+
+	Vec2 texSize;
+	tex.GetDimensions(texSize.x, texSize.y);
+	const Vec2 coord = clamp(uv * texSize, Vec2(0.0f, 0.0f), texSize - 1.0f);
+
+	return tex[IVec2(coord)].xyz;
+}
+
 Vec3 getValFromMultiScattLut(Texture2D<Vec4> tex, SamplerState linearAnyClampSampler, Vec3 pos, Vec3 dirToSun)
 {
 	const F32 height = length(pos);
@@ -406,18 +422,11 @@ Vec3 raymarchScattering(Vec3 pos, Vec3 rayDir, Vec3 dirToSun, F32 tMax, F32 numS
 #include <AnKi/Shaders/TonemappingFunctions.hlsl>
 
 [[vk::binding(0)]] Texture2D<Vec4> g_tLutTex;
-[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
-[[vk::binding(2)]] RWStructuredBuffer<GlobalRendererConstants> g_globalConsts;
+[[vk::binding(1)]] globallycoherent RWStructuredBuffer<GlobalRendererConstants> g_globalConsts;
 
 [numthreads(1, 1, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
-	const F32 height = length(kViewPos);
-	const Vec3 up = kViewPos / height;
-
-	const F32 sunAltitude = (0.5f * kPi) - acos(dot(-g_globalConsts[0].m_directionalLight.m_direction, up));
-	const Vec3 dirToSun = Vec3(0.0f, sin(sunAltitude), -cos(sunAltitude));
-
-	const Vec3 sunTransmittance = getValFromTLut(g_tLutTex, g_linearAnyClampSampler, kViewPos, dirToSun);
+	const Vec3 sunTransmittance = getValFromTLut(g_tLutTex, kViewPos, -g_globalConsts[0].m_directionalLight.m_direction);
 	const F32 sunPower = g_globalConsts[0].m_directionalLight.m_power;
 
 	g_globalConsts[0].m_directionalLight.m_diffuseColor = Vec4(sunPower * sunTransmittance, 0.0f);