Panagiotis Christopoulos Charitos 2 роки тому
батько
коміт
2eba858d47
2 змінених файлів з 33 додано та 26 видалено
  1. 31 24
      AnKi/Shaders/GBufferGeneric.ankiprog
  2. 2 2
      AnKi/Shaders/Include/Common.h

+ 31 - 24
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -273,49 +273,56 @@ main(in payload MeshShaderPayload payload, out vertices VertOut verts[kMaxVertic
 
 
 	SetMeshOutputCounts(vertCount, primCount);
 	SetMeshOutputCounts(vertCount, primCount);
 
 
+	const U32 loopCount = max(kMaxPrimitivesPerMeshlet, kMaxVerticesPerMeshlet) / ANKI_MESH_SHADER_THREADGROUP_SIZE;
+
 	// Write the verts
 	// Write the verts
-	if(svGroupIndex < vertCount)
+	[unroll] for(U32 l = 0; l < loopCount; ++l)
 	{
 	{
-		VertOut output;
+		const U32 idx = l * ANKI_MESH_SHADER_THREADGROUP_SIZE + svGroupIndex;
+
+		if(idx < vertCount)
+		{
+			VertOut output;
 
 
-		UnpackedMeshVertex vert = loadVertex(meshlet, svGroupIndex, ANKI_BONES, payload.m_positionScale, payload.m_positionTranslation);
+			UnpackedMeshVertex vert = loadVertex(meshlet, idx, ANKI_BONES, payload.m_positionScale, payload.m_positionTranslation);
 
 
-		const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(payload.m_worldTransformsOffset);
-		const Mat3x4 prevWorldTransform = g_gpuScene.Load<Mat3x4>(payload.m_worldTransformsOffset + sizeof(Mat3x4));
-		ANKI_MAYBE_UNUSED(prevWorldTransform);
+			const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(payload.m_worldTransformsOffset);
+			const Mat3x4 prevWorldTransform = g_gpuScene.Load<Mat3x4>(payload.m_worldTransformsOffset + sizeof(Mat3x4));
+			ANKI_MAYBE_UNUSED(prevWorldTransform);
 
 
 #if UVS
 #if UVS
-		output.m_uv = vert.m_uv;
+			output.m_uv = vert.m_uv;
 #endif
 #endif
-		Vec3 prevPos = vert.m_position;
-		ANKI_MAYBE_UNUSED(prevPos);
-		output.m_constantsOffset = payload.m_constantsOffset;
+			Vec3 prevPos = vert.m_position;
+			ANKI_MAYBE_UNUSED(prevPos);
+			output.m_constantsOffset = payload.m_constantsOffset;
 
 
-		// Do stuff
+			// Do stuff
 #if ANKI_BONES
 #if ANKI_BONES
-		skinning(vert, payload.m_boneTransformsOrParticleEmitterOffset, vert.m_position, prevPos, vert.m_normal, vert.m_tangent);
+			skinning(vert, payload.m_boneTransformsOrParticleEmitterOffset, vert.m_position, prevPos, vert.m_normal, vert.m_tangent);
 #endif
 #endif
 
 
-		output.m_position = Vec4(mul(worldTransform, Vec4(vert.m_position, 1.0)), 1.0);
-		output.m_position = mul(g_globalConstants.m_viewProjectionMatrix, output.m_position);
+			output.m_position = Vec4(mul(worldTransform, Vec4(vert.m_position, 1.0)), 1.0);
+			output.m_position = mul(g_globalConstants.m_viewProjectionMatrix, output.m_position);
 
 
 #if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
 #if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
-		output.m_normal = mul(worldTransform, Vec4(vert.m_normal, 0.0));
-		output.m_tangent = mul(worldTransform, Vec4(vert.m_tangent.xyz, 0.0));
-		output.m_bitangent = cross(output.m_normal, output.m_tangent) * vert.m_tangent.w;
+			output.m_normal = mul(worldTransform, Vec4(vert.m_normal, 0.0));
+			output.m_tangent = mul(worldTransform, Vec4(vert.m_tangent.xyz, 0.0));
+			output.m_bitangent = cross(output.m_normal, output.m_tangent) * vert.m_tangent.w;
 #endif
 #endif
 
 
 #if REALLY_VELOCITY
 #if REALLY_VELOCITY
-		velocity(worldTransform, prevWorldTransform, prevPos, output);
+			velocity(worldTransform, prevWorldTransform, prevPos, output);
 #endif
 #endif
 
 
-		verts[svGroupIndex] = output;
-	}
+			verts[idx] = output;
+		}
 
 
-	// Write the indices
-	if(svGroupIndex < primCount)
-	{
-		indices[svGroupIndex] = g_unifiedGeom_R8G8B8A8_Uint[meshlet.m_firstPrimitive + svGroupIndex].xyz;
+		// Write the indices
+		if(idx < primCount)
+		{
+			indices[idx] = g_unifiedGeom_R8G8B8A8_Uint[meshlet.m_firstPrimitive + idx].xyz;
+		}
 	}
 	}
 }
 }
 
 

+ 2 - 2
AnKi/Shaders/Include/Common.h

@@ -770,8 +770,8 @@ constexpr U32 kMaxVerticesPerMeshlet = 64;
 #define ANKI_TASK_SHADER_THREADGROUP_SIZE 64u
 #define ANKI_TASK_SHADER_THREADGROUP_SIZE 64u
 constexpr U32 kMaxMeshletsPerTaskShaderPayload = ANKI_TASK_SHADER_THREADGROUP_SIZE;
 constexpr U32 kMaxMeshletsPerTaskShaderPayload = ANKI_TASK_SHADER_THREADGROUP_SIZE;
 
 
-#define ANKI_MESH_SHADER_THREADGROUP_SIZE 64u
-static_assert(ANKI_MESH_SHADER_THREADGROUP_SIZE == max(kMaxPrimitivesPerMeshlet, kMaxVerticesPerMeshlet));
+#define ANKI_MESH_SHADER_THREADGROUP_SIZE 32u
+static_assert(max(kMaxPrimitivesPerMeshlet, kMaxVerticesPerMeshlet) % ANKI_MESH_SHADER_THREADGROUP_SIZE == 0);
 
 
 struct DrawIndirectArgs
 struct DrawIndirectArgs
 {
 {