Browse Source

Decrease the GPU vis memory a bit but that adds more loads in vertex shaders

Panagiotis Christopoulos Charitos 1 year ago
parent
commit
b7ef6e7141

+ 3 - 3
AnKi/Renderer/Utils/Drawer.cpp

@@ -214,7 +214,7 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 				cmd.m_legacyDraw.m_mdiDrawCountsBufferOffset = args.m_legacy.m_mdiDrawCountsBuffer.m_offset + sizeof(U32) * bucketCount;
 				cmd.m_legacyDraw.m_instancesBuffer = args.m_legacy.m_renderableInstancesBuffer.m_buffer;
 				cmd.m_legacyDraw.m_instancesBufferOffset =
-					args.m_legacy.m_renderableInstancesBuffer.m_offset + legacyGeometryFlowUserCount * sizeof(GpuSceneRenderableVertex);
+					args.m_legacy.m_renderableInstancesBuffer.m_offset + legacyGeometryFlowUserCount * sizeof(GpuSceneRenderableInstance);
 
 				legacyGeometryFlowUserCount += userCount;
 			}
@@ -245,7 +245,7 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 
 		if(it->m_drawType == 0)
 		{
-			cmdb.bindVertexBuffer(0, it->m_legacyDraw.m_instancesBuffer, it->m_legacyDraw.m_instancesBufferOffset, sizeof(GpuSceneRenderableVertex),
+			cmdb.bindVertexBuffer(0, it->m_legacyDraw.m_instancesBuffer, it->m_legacyDraw.m_instancesBufferOffset, sizeof(GpuSceneRenderableInstance),
 								  VertexStepRate::kInstance);
 
 			cmdb.drawIndexedIndirectCount(it->m_legacyDraw.m_primitiveTopology, it->m_legacyDraw.m_drawIndirectArgsBuffer,
@@ -255,7 +255,7 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 		}
 		else if(it->m_drawType == 1)
 		{
-			cmdb.bindVertexBuffer(0, it->m_legacyDraw.m_instancesBuffer, it->m_legacyDraw.m_instancesBufferOffset, sizeof(GpuSceneRenderableVertex),
+			cmdb.bindVertexBuffer(0, it->m_legacyDraw.m_instancesBuffer, it->m_legacyDraw.m_instancesBufferOffset, sizeof(GpuSceneRenderableInstance),
 								  VertexStepRate::kInstance);
 
 			// Yes, the DrawIndexedIndirectArgs is intentional

+ 2 - 2
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -163,7 +163,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 			mem.m_drawIndexedIndirectArgsBuffer =
 				allocateTransientGpuMem(max(1u, maxCounts.m_legacyGeometryFlowUserCount) * sizeof(DrawIndexedIndirectArgs));
 			mem.m_renderableInstancesBuffer =
-				allocateTransientGpuMem(max(1u, maxCounts.m_legacyGeometryFlowUserCount) * sizeof(GpuSceneRenderableVertex));
+				allocateTransientGpuMem(max(1u, maxCounts.m_legacyGeometryFlowUserCount) * sizeof(GpuSceneRenderableInstance));
 
 			mem.m_taskShaderPayloadBuffer = allocateTransientGpuMem(max(1u, maxCounts.m_meshletGroupCount) * sizeof(GpuSceneTaskShaderPayload));
 
@@ -178,7 +178,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 	out.m_legacy.m_drawIndexedIndirectArgsBuffer.m_range = max(1u, counts.m_legacyGeometryFlowUserCount) * sizeof(DrawIndexedIndirectArgs);
 
 	out.m_legacy.m_renderableInstancesBuffer = mem.m_renderableInstancesBuffer;
-	out.m_legacy.m_renderableInstancesBuffer.m_range = max(1u, counts.m_legacyGeometryFlowUserCount) * sizeof(GpuSceneRenderableVertex);
+	out.m_legacy.m_renderableInstancesBuffer.m_range = max(1u, counts.m_legacyGeometryFlowUserCount) * sizeof(GpuSceneRenderableInstance);
 
 	out.m_legacy.m_mdiDrawCountsBuffer = allocateTransientGpuMem(sizeof(U32) * counts.m_bucketCount);
 

+ 1 - 1
AnKi/Renderer/Utils/GpuVisibility.h

@@ -92,7 +92,7 @@ public:
 	class
 	{
 	public:
-		BufferOffsetRange m_renderableInstancesBuffer; ///< An array of GpuSceneRenderableVertex.
+		BufferOffsetRange m_renderableInstancesBuffer; ///< An array of GpuSceneRenderableInstance.
 		BufferOffsetRange m_drawIndexedIndirectArgsBuffer; ///< An array of DrawIndexedIndirectArgs.
 		BufferOffsetRange m_mdiDrawCountsBuffer; ///< An array of U32, one for each render state bucket (even those that use task/mesh flow).
 	} m_legacy; ///< Legacy vertex shading.

+ 3 - 0
AnKi/Scene/Components/ModelComponent.cpp

@@ -190,6 +190,9 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 					meshLod.m_firstMeshletGeometryDescriptor = U32(inf.m_meshletGometryDescriptorsUgbOffset / sizeof(MeshletGeometryDescriptor));
 					meshLod.m_meshletCount = inf.m_meshletCount;
 				}
+
+				meshLod.m_renderableIndex = m_patchInfos[i].m_gpuSceneRenderable.getIndex();
+				meshLod.m_lod = l;
 			}
 
 			// Copy the last LOD to the rest just in case

+ 1 - 1
AnKi/Shaders/ForwardShadingFog.ankiprog

@@ -31,7 +31,7 @@ VertOut main(VertIn input)
 {
 	VertOut output;
 
-	const GpuSceneRenderableVertex renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
+	const GpuSceneRenderableInstance renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
 	const GpuSceneMeshLod mesh = g_meshLods[renderable.m_meshLodIndex];
 	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
 	const UnpackedMeshVertex vertex = loadVertex(mesh, input.m_svVertexId, false);

+ 1 - 1
AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog

@@ -34,7 +34,7 @@ VertOut main(VertIn input)
 {
 	VertOut output;
 
-	const GpuSceneRenderableVertex renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
+	const GpuSceneRenderableInstance renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
 	const GpuSceneMeshLod mesh = g_meshLods[renderable.m_meshLodIndex];
 	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
 	const UnpackedMeshVertex vertex = loadVertex(mesh, input.m_svVertexId, false);

+ 1 - 1
AnKi/Shaders/ForwardShadingParticles.ankiprog

@@ -34,7 +34,7 @@ struct VertOut
 
 VertOut main(VertIn input)
 {
-	const GpuSceneRenderableVertex renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
+	const GpuSceneRenderableInstance renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
 	const GpuSceneParticleEmitter particles = g_gpuScene.Load<GpuSceneParticleEmitter>(renderable.m_boneTransformsOrParticleEmitterOffset);
 	const GpuSceneMeshLod meshLod = g_meshLods[renderable.m_meshLodIndex];
 

+ 26 - 11
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -56,6 +56,7 @@
 #else
 #	define NORMAL_MAPPING 0
 #endif
+#define SW_MESHLETS (ANKI_TECHNIQUE_GBufferSwMeshletRendering || ANKI_TECHNIQUE_ShadowsSwMeshletRendering)
 
 #define VISUALIZE_MESHLETS (0 && GBUFFER)
 #define MESHLET_BACKFACE_CULLING 0
@@ -88,7 +89,11 @@
 struct VertIn
 {
 	U32 m_svVertexId : SV_VERTEXID;
+#if SW_MESHLETS
+	[[vk::location(0)]] U32 m_instanceData : INSTANCE_DATA;
+#else
 	[[vk::location(0)]] UVec4 m_instanceData : INSTANCE_DATA;
+#endif
 };
 
 struct VertOut
@@ -233,21 +238,20 @@ void velocity(Mat3x4 worldTransform, Mat3x4 prevWorldTransform, Vec3 prevLocalPo
 // ===========================================================================
 #if ANKI_VERTEX_SHADER
 
-#	define SW_MESHLETS (ANKI_TECHNIQUE_GBufferSwMeshletRendering || ANKI_TECHNIQUE_ShadowsSwMeshletRendering)
-
 VertOut main(VertIn input)
 {
 	VertOut output;
 
 #	if SW_MESHLETS
-	const GpuSceneMeshletInstance instance = unpackGpuSceneMeshletInstance(input.m_instanceData);
-	const MeshletGeometryDescriptor meshlet = g_meshletGeometryDescriptors[instance.m_meshletGeometryDescriptorIndex];
+	const GpuSceneMeshletInstance instance = {input.m_instanceData};
+	const GpuSceneMeshLod meshLod = g_meshLods[instance.m_meshLodIndex_21bit_meshletIdx_11bit >> 11u];
+	const U32 meshletGlobIndex = meshLod.m_firstMeshletGeometryDescriptor + (instance.m_meshLodIndex_21bit_meshletIdx_11bit & ((1u << 11u) - 1u));
+	const MeshletGeometryDescriptor meshlet = g_meshletGeometryDescriptors[meshletGlobIndex];
 	if(input.m_svVertexId >= (meshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint >> 16u) * 3u)
 	{
 		// Discard the primitive
 		output = (VertOut)0;
 		output.m_svPosition = kNaN;
-		output.m_constantsOffset = instance.m_constantsOffset;
 		return output;
 	}
 
@@ -256,14 +260,25 @@ VertOut main(VertIn input)
 	const U32 localIdx = localIndices[input.m_svVertexId % 3u];
 
 	UnpackedMeshVertex vert = loadVertex(meshlet, localIdx, ANKI_BONES);
+
+	const GpuSceneRenderable renderable = g_renderables[meshLod.m_renderableIndex];
+	const U32 constantsOffset = renderable.m_constantsOffset;
+	const U32 worldTransformsOffset = renderable.m_worldTransformsOffset;
+	const U32 boneTransformsOrParticleEmitterOffset =
+		(renderable.m_boneTransformsOffset) ? renderable.m_boneTransformsOffset : renderable.m_particleEmitterOffset;
 #	else
-	const GpuSceneRenderableVertex instance = unpackGpuSceneRenderableVertex(input.m_instanceData);
+	const GpuSceneRenderableInstance instance = unpackGpuSceneRenderableVertex(input.m_instanceData);
 	const GpuSceneMeshLod mesh = g_meshLods[instance.m_meshLodIndex];
 	UnpackedMeshVertex vert = loadVertex(mesh, input.m_svVertexId, ANKI_BONES);
+
+	const U32 constantsOffset = instance.m_constantsOffset;
+	const U32 worldTransformsOffset = instance.m_worldTransformsOffset;
+	const U32 boneTransformsOrParticleEmitterOffset = instance.m_boneTransformsOrParticleEmitterOffset;
 #	endif
+	ANKI_MAYBE_UNUSED(boneTransformsOrParticleEmitterOffset);
 
-	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(instance.m_worldTransformsOffset);
-	const Mat3x4 prevWorldTransform = g_gpuScene.Load<Mat3x4>(instance.m_worldTransformsOffset + sizeof(Mat3x4));
+	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(worldTransformsOffset);
+	const Mat3x4 prevWorldTransform = g_gpuScene.Load<Mat3x4>(worldTransformsOffset + sizeof(Mat3x4));
 	ANKI_MAYBE_UNUSED(prevWorldTransform);
 
 #	if UVS
@@ -271,11 +286,11 @@ VertOut main(VertIn input)
 #	endif
 	Vec3 prevPos = vert.m_position;
 	ANKI_MAYBE_UNUSED(prevPos);
-	output.m_constantsOffset = instance.m_constantsOffset;
+	output.m_constantsOffset = constantsOffset;
 
 	// Do stuff
 #	if ANKI_BONES
-	skinning(vert, instance.m_boneTransformsOrParticleEmitterOffset, vert.m_position, prevPos, vert.m_normal);
+	skinning(vert, boneTransformsOrParticleEmitterOffset, vert.m_position, prevPos, vert.m_normal);
 #	endif
 
 	const Vec3 worldPos = mul(worldTransform, Vec4(vert.m_position, 1.0));
@@ -684,7 +699,7 @@ FragOut main(
 #if ANKI_ANY_HIT_SHADER
 
 #	if REALLY_ALPHA_TEST
-[[vk::shader_record_ext]] ConstantBuffer<GpuSceneRenderableVertex> g_gpuSceneRenderable;
+[[vk::shader_record_ext]] ConstantBuffer<GpuSceneRenderableInstance> g_gpuSceneRenderable;
 #	endif
 
 [shader("anyhit")] void main(inout RayPayload payload, in Barycentrics barycentrics)

+ 1 - 1
AnKi/Shaders/GBufferGpuParticles.ankiprog

@@ -42,7 +42,7 @@ VertOut main(VertIn input)
 {
 	VertOut output;
 
-	const GpuSceneRenderableVertex renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
+	const GpuSceneRenderableInstance renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
 	const GpuSceneParticleEmitter particles = g_gpuScene.Load<GpuSceneParticleEmitter>(renderable.m_boneTransformsOrParticleEmitterOffset);
 
 	// Read vertex

+ 2 - 6
AnKi/Shaders/GpuVisibilityMeshlet.ankiprog

@@ -54,7 +54,6 @@ struct MaterialGlobalConstants
 	U32 firstMeshletBoundingVolume = meshletGroup * kMeshletGroupSize;
 	const U32 meshletCount = min(kMeshletGroupSize, meshLod.m_meshletCount - firstMeshletBoundingVolume);
 	firstMeshletBoundingVolume += meshLod.m_firstMeshletBoundingVolume;
-	const U32 firstMeshletGeometryDescriptor = meshletGroup * kMeshletGroupSize + meshLod.m_firstMeshletGeometryDescriptor;
 
 	// Meshlet culling
 	if(svGroupIndex < meshletCount)
@@ -104,11 +103,8 @@ struct MaterialGlobalConstants
 			}
 
 			GpuSceneMeshletInstance instance;
-			instance.m_worldTransformsOffset = renderable.m_worldTransformsOffset;
-			instance.m_constantsOffset = renderable.m_constantsOffset;
-			instance.m_meshletGeometryDescriptorIndex = firstMeshletGeometryDescriptor + svGroupIndex;
-			instance.m_boneTransformsOrParticleEmitterOffset =
-				(renderable.m_particleEmitterOffset) ? renderable.m_particleEmitterOffset : renderable.m_boneTransformsOffset;
+			instance.m_meshLodIndex_21bit_meshletIdx_11bit = (renderable.m_meshLodsIndex + lod) << 11u;
+			instance.m_meshLodIndex_21bit_meshletIdx_11bit |= meshletGroup * kMeshletGroupSize + svGroupIndex;
 
 			g_drawInstances[instanceIdx] = instance;
 		}

+ 2 - 12
AnKi/Shaders/Include/GpuSceneFunctions.h

@@ -9,9 +9,9 @@
 
 ANKI_BEGIN_NAMESPACE
 
-inline GpuSceneRenderableVertex unpackGpuSceneRenderableVertex(UVec4 x)
+inline GpuSceneRenderableInstance unpackGpuSceneRenderableVertex(UVec4 x)
 {
-	GpuSceneRenderableVertex o;
+	GpuSceneRenderableInstance o;
 	o.m_worldTransformsOffset = x[0];
 	o.m_constantsOffset = x[1];
 	o.m_meshLodIndex = x[2];
@@ -19,16 +19,6 @@ inline GpuSceneRenderableVertex unpackGpuSceneRenderableVertex(UVec4 x)
 	return o;
 }
 
-inline GpuSceneMeshletInstance unpackGpuSceneMeshletInstance(UVec4 x)
-{
-	GpuSceneMeshletInstance o;
-	o.m_worldTransformsOffset = x[0];
-	o.m_constantsOffset = x[1];
-	o.m_meshletGeometryDescriptorIndex = x[2];
-	o.m_boneTransformsOrParticleEmitterOffset = x[3];
-	return o;
-}
-
 inline GpuSceneRenderableBoundingVolume initGpuSceneRenderableBoundingVolume(Vec3 aabbMin, Vec3 aabbMax, U32 renderableIndex, U32 renderStateBucket)
 {
 	GpuSceneRenderableBoundingVolume gpuVolume;

+ 6 - 9
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -29,16 +29,16 @@ struct GpuSceneRenderable
 };
 
 /// Almost similar to GpuSceneRenderable but with only what the material shaders need. Needs to fit in a UVec4 vertex attribute.
-struct GpuSceneRenderableVertex
+struct GpuSceneRenderableInstance
 {
 	U32 m_worldTransformsOffset;
 	U32 m_constantsOffset;
 	U32 m_meshLodIndex; ///< Points to a single GpuSceneMeshLod in the mesh lods.
 	U32 m_boneTransformsOrParticleEmitterOffset;
 };
-static_assert(sizeof(GpuSceneRenderableVertex) == sizeof(UVec4));
+static_assert(sizeof(GpuSceneRenderableInstance) == sizeof(UVec4));
 
-/// Input to a single task shader threadgroup. Something similar to GpuSceneRenderableVertex but for mesh shading.
+/// Input to a single task shader threadgroup. Something similar to GpuSceneRenderableInstance but for mesh shading.
 struct GpuSceneTaskShaderPayload
 {
 	U32 m_lod_2bit_renderableIdx_21bit_meshletGroup_9bit;
@@ -48,10 +48,7 @@ static_assert(kMaxLodCount == 3);
 /// Minimal data passed to the vertex shaders in the case of meshlet rendering.
 struct GpuSceneMeshletInstance
 {
-	U32 m_worldTransformsOffset;
-	U32 m_constantsOffset;
-	U32 m_meshletGeometryDescriptorIndex; ///< Index in the UGB.
-	U32 m_boneTransformsOrParticleEmitterOffset;
+	U32 m_meshLodIndex_21bit_meshletIdx_11bit;
 };
 
 /// Used in visibility testing.
@@ -71,12 +68,12 @@ struct GpuSceneMeshLod
 	U32 m_vertexOffsets[(U32)VertexStreamId::kMeshRelatedCount];
 	U32 m_indexCount;
 	U32 m_firstIndex; ///< In sizeof(indexType)
-	U32 m_padding1;
+	U32 m_renderableIndex;
 
 	U32 m_firstMeshletBoundingVolume; ///< In sizeof(MeshletBoundingVolume)
 	U32 m_firstMeshletGeometryDescriptor; ///< In sizeof(MeshletGeometryDescriptor)
 	U32 m_meshletCount; ///< Can be zero if the mesh doesn't support mesh shading (or mesh shading is off)
-	U32 m_padding2;
+	U32 m_lod;
 
 	Vec3 m_positionTranslation;
 	F32 m_positionScale;

+ 1 - 1
AnKi/Shaders/RtShadowsHit.ankiprog

@@ -16,7 +16,7 @@
 #pragma anki technique_start ahit RtShadows
 
 #if ALPHA_TEXTURE
-[[vk::shader_record_ext]] ConstantBuffer<GpuSceneRenderableVertex> g_gpuSceneRenderable;
+[[vk::shader_record_ext]] ConstantBuffer<GpuSceneRenderableInstance> g_gpuSceneRenderable;
 #endif
 
 [shader("anyhit")] void main(inout RayPayload payload, in Barycentrics barycentrics)

+ 1 - 1
AnKi/Shaders/RtShadowsSbtBuild.ankiprog

@@ -41,7 +41,7 @@
 		++sbtDwordOffset;
 	}
 
-	// Copy the GpuSceneRenderableVertex
+	// Copy the GpuSceneRenderableInstance
 	g_sbtBuffer[sbtDwordOffset++] = renderable.m_worldTransformsOffset;
 	g_sbtBuffer[sbtDwordOffset++] = renderable.m_constantsOffset;
 	g_sbtBuffer[sbtDwordOffset++] = renderable.m_meshLodsIndex;