Browse Source

Sort the render batches when drawing for optimal performance

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
c6434ef07a

+ 17 - 0
AnKi/Gr/Shader.h

@@ -97,9 +97,26 @@ public:
 		return m_shaderType;
 		return m_shaderType;
 	}
 	}
 
 
+	U32 getShaderBinarySize() const
+	{
+		ANKI_ASSERT(m_shaderBinarySize);
+		return m_shaderBinarySize;
+	}
+
+	/// Fragment shader had a discard.
+	U32 hasDiscard() const
+	{
+		ANKI_ASSERT(m_shaderType == ShaderType::kFragment);
+		return m_hasDiscard;
+	}
+
 protected:
 protected:
+	U32 m_shaderBinarySize = 0;
+
 	ShaderType m_shaderType = ShaderType::kCount;
 	ShaderType m_shaderType = ShaderType::kCount;
 
 
+	Bool m_hasDiscard = false;
+
 	/// Construct.
 	/// Construct.
 	Shader(CString name)
 	Shader(CString name)
 		: GrObject(kClassType, name)
 		: GrObject(kClassType, name)

+ 21 - 2
AnKi/Gr/ShaderProgram.h

@@ -78,7 +78,28 @@ public:
 		return m_shaderTypes;
 		return m_shaderTypes;
 	}
 	}
 
 
+	/// Get the size of the shader. Can be an indication of the complexity of the shader.
+	U32 getShaderBinarySize(ShaderType type) const
+	{
+		ANKI_ASSERT(!!(ShaderTypeBit(1u << type) & m_shaderTypes));
+		ANKI_ASSERT(m_shaderBinarySizes[type] > 0);
+		return m_shaderBinarySizes[type];
+	}
+
+	/// The fragment shader of the program has a discard.
+	Bool hasDiscard() const
+	{
+		ANKI_ASSERT(!!(m_shaderTypes & ShaderTypeBit::kFragment));
+		return m_hasDiscard;
+	}
+
 protected:
 protected:
+	Array<U32, U32(ShaderType::kCount)> m_shaderBinarySizes = {};
+
+	ShaderTypeBit m_shaderTypes = ShaderTypeBit::kNone;
+
+	Bool m_hasDiscard = false;
+
 	/// Construct.
 	/// Construct.
 	ShaderProgram(CString name)
 	ShaderProgram(CString name)
 		: GrObject(kClassType, name)
 		: GrObject(kClassType, name)
@@ -90,8 +111,6 @@ protected:
 	{
 	{
 	}
 	}
 
 
-	ShaderTypeBit m_shaderTypes = ShaderTypeBit::kNone;
-
 private:
 private:
 	/// Allocate and initialize a new instance.
 	/// Allocate and initialize a new instance.
 	[[nodiscard]] static ShaderProgram* newInstance(const ShaderProgramInitInfo& init);
 	[[nodiscard]] static ShaderProgram* newInstance(const ShaderProgramInitInfo& init);

+ 31 - 0
AnKi/Gr/Vulkan/ShaderImpl.cpp

@@ -17,6 +17,25 @@
 
 
 namespace anki {
 namespace anki {
 
 
+template<typename TFunc>
+static void visitSpirv(ConstWeakArray<U32> spv, TFunc func)
+{
+	ANKI_ASSERT(spv.getSize() > 5);
+
+	const U32* it = &spv[5];
+	do
+	{
+		const U32 instructionCount = *it >> 16u;
+		const U32 opcode = *it & 0xFFFFu;
+
+		func(opcode);
+
+		it += instructionCount;
+	} while(it < spv.getEnd());
+
+	ANKI_ASSERT(it == spv.getEnd());
+}
+
 class ShaderImpl::SpecConstsVector
 class ShaderImpl::SpecConstsVector
 {
 {
 public:
 public:
@@ -47,6 +66,7 @@ Error ShaderImpl::init(const ShaderInitInfo& inf)
 	ANKI_ASSERT(inf.m_binary.getSize() > 0);
 	ANKI_ASSERT(inf.m_binary.getSize() > 0);
 	ANKI_ASSERT(m_handle == VK_NULL_HANDLE);
 	ANKI_ASSERT(m_handle == VK_NULL_HANDLE);
 	m_shaderType = inf.m_shaderType;
 	m_shaderType = inf.m_shaderType;
+	m_shaderBinarySize = U32(inf.m_binary.getSizeInBytes());
 
 
 #if ANKI_DUMP_SHADERS
 #if ANKI_DUMP_SHADERS
 	{
 	{
@@ -236,6 +256,17 @@ void ShaderImpl::doReflection(ConstWeakArray<U8> spirv, SpecConstsVector& specCo
 		ANKI_ASSERT(blockSize <= getGrManagerImpl().getDeviceCapabilities().m_pushConstantsSize);
 		ANKI_ASSERT(blockSize <= getGrManagerImpl().getDeviceCapabilities().m_pushConstantsSize);
 		m_pushConstantsSize = blockSize;
 		m_pushConstantsSize = blockSize;
 	}
 	}
+
+	// Discards?
+	if(m_shaderType == ShaderType::kFragment)
+	{
+		visitSpirv(ConstWeakArray<U32>(reinterpret_cast<const U32*>(&spirv[0]), spirv.getSize() / sizeof(U32)), [this](U32 cmd) {
+			if(cmd == spv::OpKill)
+			{
+				m_hasDiscard = true;
+			}
+		});
+	}
 }
 }
 
 
 } // end namespace anki
 } // end namespace anki

+ 20 - 0
AnKi/Gr/Vulkan/ShaderProgramImpl.cpp

@@ -352,6 +352,26 @@ Error ShaderProgramImpl::init(const ShaderProgramInitInfo& inf)
 		m_rt.m_allHandlesBuff->unmap();
 		m_rt.m_allHandlesBuff->unmap();
 	}
 	}
 
 
+	// Get shader sizes and a few other things
+	//
+	for(const ShaderPtr& s : m_shaders)
+	{
+		if(!s.isCreated())
+		{
+			continue;
+		}
+
+		const ShaderType type = s->getShaderType();
+		const U32 size = s->getShaderBinarySize();
+
+		m_shaderBinarySizes[type] = size;
+
+		if(type == ShaderType::kFragment)
+		{
+			m_hasDiscard = s->hasDiscard();
+		}
+	}
+
 	return Error::kNone;
 	return Error::kNone;
 }
 }
 
 

+ 118 - 38
AnKi/Renderer/Utils/Drawer.cpp

@@ -179,62 +179,142 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 	cmdb.bindVertexBuffer(0, args.m_instanceRateRenderablesBuffer.m_buffer, args.m_instanceRateRenderablesBuffer.m_offset,
 	cmdb.bindVertexBuffer(0, args.m_instanceRateRenderablesBuffer.m_buffer, args.m_instanceRateRenderablesBuffer.m_offset,
 						  sizeof(GpuSceneRenderableVertex), VertexStepRate::kInstance);
 						  sizeof(GpuSceneRenderableVertex), VertexStepRate::kInstance);
 
 
+	// Gather the drawcalls
+	class Command
+	{
+	public:
+		class LegacyDraw
+		{
+		public:
+			Buffer* m_drawIndirectArgsBuffer;
+			PtrSize m_drawIndirectArgsBufferOffset;
+			Buffer* m_mdiDrawCountsBuffer;
+			PtrSize m_mdiDrawCountsBufferOffset;
+			U32 m_maxDrawCount;
+			PrimitiveTopology m_primitiveTopology;
+		};
+
+		class ModernDraw
+		{
+		public:
+			U32 m_firstPayload;
+			Buffer* m_taskShaderIndirectArgsBuffer;
+			PtrSize m_taskShaderIndirectArgsBufferOffset;
+		};
+
+		union
+		{
+			LegacyDraw m_legacyDraw;
+			ModernDraw m_modernDraw;
+		};
+
+		ShaderProgram* m_program;
+		U64 m_shaderBinarySize;
+		U8 m_drawType;
+		Bool m_hasDiscard;
+	};
+
+	Array<Command, 16> commands;
+	U32 commandCount = 0;
+
 	U32 allUserCount = 0;
 	U32 allUserCount = 0;
 	U32 bucketCount = 0;
 	U32 bucketCount = 0;
 	U32 allMeshletGroupCount = 0;
 	U32 allMeshletGroupCount = 0;
 	U32 legacyGeometryFlowUserCount = 0;
 	U32 legacyGeometryFlowUserCount = 0;
-	RenderStateBucketContainer::getSingleton().iterateBuckets(args.m_renderingTechinuqe, [&](const RenderStateInfo& state, U32 userCount,
-																							 U32 meshletGroupCount) {
-		if(userCount == 0)
-		{
-			++bucketCount;
-			return;
-		}
+	RenderStateBucketContainer::getSingleton().iterateBuckets(
+		args.m_renderingTechinuqe, [&](const RenderStateInfo& state, U32 userCount, U32 meshletGroupCount) {
+			if(userCount == 0)
+			{
+				++bucketCount;
+				return;
+			}
 
 
-		ShaderProgramPtr prog = state.m_program;
-		cmdb.bindShaderProgram(prog.get());
+			Command& cmd = commands[commandCount++];
 
 
-		const Bool usesMeshShaders = meshletGroupCount > 0;
+			cmd.m_program = state.m_program.get();
+			cmd.m_shaderBinarySize = U64(state.m_program->getShaderBinarySize(ShaderType::kFragment)) << 32u;
+			cmd.m_hasDiscard = state.m_program->hasDiscard();
 
 
-		if(usesMeshShaders)
-		{
-			const UVec4 firstPayload(allMeshletGroupCount);
-			cmdb.setPushConstants(&firstPayload, sizeof(firstPayload));
+			const Bool usesMeshShaders = meshletGroupCount > 0;
 
 
-			cmdb.drawMeshTasksIndirect(args.m_taskShaderIndirectArgsBuffer.m_buffer,
-									   args.m_taskShaderIndirectArgsBuffer.m_offset + sizeof(DispatchIndirectArgs) * bucketCount);
+			if(usesMeshShaders)
+			{
+				cmd.m_drawType = 2;
+				cmd.m_shaderBinarySize |= state.m_program->getShaderBinarySize(ShaderType::kMesh);
 
 
-			allMeshletGroupCount += min(meshletGroupCount, kMaxMeshletGroupCountPerRenderStateBucket);
-		}
-		else
-		{
-			const U32 maxDrawCount = userCount;
+				cmd.m_modernDraw.m_firstPayload = allMeshletGroupCount;
+				cmd.m_modernDraw.m_taskShaderIndirectArgsBuffer = args.m_taskShaderIndirectArgsBuffer.m_buffer;
+				cmd.m_modernDraw.m_taskShaderIndirectArgsBufferOffset =
+					args.m_taskShaderIndirectArgsBuffer.m_offset + sizeof(DispatchIndirectArgs) * bucketCount;
 
 
-			if(state.m_indexedDrawcall)
-			{
-				cmdb.drawIndexedIndirectCount(state.m_primitiveTopology, args.m_drawIndexedIndirectArgsBuffer.m_buffer,
-											  args.m_drawIndexedIndirectArgsBuffer.m_offset
-												  + sizeof(DrawIndexedIndirectArgs) * legacyGeometryFlowUserCount,
-											  sizeof(DrawIndexedIndirectArgs), args.m_mdiDrawCountsBuffer.m_buffer,
-											  args.m_mdiDrawCountsBuffer.m_offset + sizeof(U32) * bucketCount, maxDrawCount);
+				allMeshletGroupCount += min(meshletGroupCount, kMaxMeshletGroupCountPerRenderStateBucket);
 			}
 			}
 			else
 			else
 			{
 			{
-				// Yes, the DrawIndexedIndirectArgs is intentional
-				cmdb.drawIndirectCount(state.m_primitiveTopology, args.m_drawIndexedIndirectArgsBuffer.m_buffer,
-									   args.m_drawIndexedIndirectArgsBuffer.m_offset + sizeof(DrawIndexedIndirectArgs) * legacyGeometryFlowUserCount,
-									   sizeof(DrawIndexedIndirectArgs), args.m_mdiDrawCountsBuffer.m_buffer,
-									   args.m_mdiDrawCountsBuffer.m_offset + sizeof(U32) * bucketCount, maxDrawCount);
+				const U32 maxDrawCount = userCount;
+
+				cmd.m_drawType = (state.m_indexedDrawcall) ? 0 : 1;
+				cmd.m_shaderBinarySize |= state.m_program->getShaderBinarySize(ShaderType::kVertex);
+
+				cmd.m_legacyDraw.m_primitiveTopology = state.m_primitiveTopology;
+				cmd.m_legacyDraw.m_drawIndirectArgsBuffer = args.m_drawIndexedIndirectArgsBuffer.m_buffer;
+				cmd.m_legacyDraw.m_drawIndirectArgsBufferOffset =
+					args.m_drawIndexedIndirectArgsBuffer.m_offset + sizeof(DrawIndexedIndirectArgs) * legacyGeometryFlowUserCount;
+				cmd.m_legacyDraw.m_maxDrawCount = maxDrawCount;
+				cmd.m_legacyDraw.m_mdiDrawCountsBuffer = args.m_mdiDrawCountsBuffer.m_buffer;
+				cmd.m_legacyDraw.m_mdiDrawCountsBufferOffset = args.m_mdiDrawCountsBuffer.m_offset + sizeof(U32) * bucketCount;
+
+				legacyGeometryFlowUserCount += userCount;
 			}
 			}
 
 
-			legacyGeometryFlowUserCount += userCount;
-		}
+			++bucketCount;
+			allUserCount += userCount;
+		});
+
+	ANKI_ASSERT(bucketCount == RenderStateBucketContainer::getSingleton().getBucketCount(args.m_renderingTechinuqe));
 
 
-		++bucketCount;
-		allUserCount += userCount;
+	// Sort the drawcalls from the least expensive to the most expensive, leave alpha tested at the end
+	std::sort(&commands[0], &commands[0] + commandCount, [](const Command& a, const Command& b) {
+		if(a.m_hasDiscard != b.m_hasDiscard)
+		{
+			return !a.m_hasDiscard;
+		}
+		else
+		{
+			return a.m_shaderBinarySize < b.m_shaderBinarySize;
+		}
 	});
 	});
 
 
-	ANKI_ASSERT(bucketCount == RenderStateBucketContainer::getSingleton().getBucketCount(args.m_renderingTechinuqe));
+	// Now draw
+	for(const Command* it = commands.getBegin(); it < commands.getBegin() + commandCount; ++it)
+	{
+		cmdb.bindShaderProgram(it->m_program);
+
+		if(it->m_drawType == 0)
+		{
+			cmdb.drawIndexedIndirectCount(it->m_legacyDraw.m_primitiveTopology, it->m_legacyDraw.m_drawIndirectArgsBuffer,
+										  it->m_legacyDraw.m_drawIndirectArgsBufferOffset, sizeof(DrawIndexedIndirectArgs),
+										  it->m_legacyDraw.m_mdiDrawCountsBuffer, it->m_legacyDraw.m_mdiDrawCountsBufferOffset,
+										  it->m_legacyDraw.m_maxDrawCount);
+		}
+		else if(it->m_drawType == 1)
+		{
+			// Yes, the DrawIndexedIndirectArgs is intentional
+			cmdb.drawIndirectCount(it->m_legacyDraw.m_primitiveTopology, it->m_legacyDraw.m_drawIndirectArgsBuffer,
+								   it->m_legacyDraw.m_drawIndirectArgsBufferOffset, sizeof(DrawIndexedIndirectArgs),
+								   it->m_legacyDraw.m_mdiDrawCountsBuffer, it->m_legacyDraw.m_mdiDrawCountsBufferOffset,
+								   it->m_legacyDraw.m_maxDrawCount);
+		}
+		else
+		{
+			ANKI_ASSERT(it->m_drawType == 2);
+
+			const UVec4 firstPayload(it->m_modernDraw.m_firstPayload);
+			cmdb.setPushConstants(&firstPayload, sizeof(firstPayload));
+
+			cmdb.drawMeshTasksIndirect(it->m_modernDraw.m_taskShaderIndirectArgsBuffer, it->m_modernDraw.m_taskShaderIndirectArgsBufferOffset);
+		}
+	}
 
 
 	g_maxDrawcallsStatVar.increment(allUserCount);
 	g_maxDrawcallsStatVar.increment(allUserCount);
 }
 }