Browse Source

Vulkan: Optimizing queries by batching commands together

Panagiotis Christopoulos Charitos 9 years ago
parent
commit
dc230cff6a

+ 3 - 0
src/anki/core/Trace.cpp

@@ -81,6 +81,9 @@ Error TraceManager::create(HeapAllocator<U8> alloc, const CString& cacheDir)
 		return ErrorCode::NONE;
 	}
 
+	memset(&m_perFrameCounters[0], 0, sizeof(m_perFrameCounters));
+	memset(&m_perRunCounters[0], 0, sizeof(m_perRunCounters));
+
 	// Create trace file
 	StringAuto fname(alloc);
 	fname.sprintf("%s/trace.json", &cacheDir[0]);

+ 116 - 0
src/anki/gr/vulkan/CommandBufferImpl.cpp

@@ -50,6 +50,8 @@ CommandBufferImpl::~CommandBufferImpl()
 
 	m_imgBarriers.destroy(m_alloc);
 	m_buffBarriers.destroy(m_alloc);
+	m_queryResetAtoms.destroy(m_alloc);
+	m_writeQueryAtoms.destroy(m_alloc);
 }
 
 Error CommandBufferImpl::init(const CommandBufferInitInfo& init)
@@ -699,4 +701,118 @@ void CommandBufferImpl::flushBarriers()
 	m_dstStageMask = 0;
 }
 
+void CommandBufferImpl::flushQueryResets()
+{
+	if(m_queryResetAtomCount == 0)
+	{
+		return;
+	}
+
+	std::sort(&m_queryResetAtoms[0],
+		&m_queryResetAtoms[0] + m_queryResetAtomCount,
+		[](const QueryResetAtom& a, const QueryResetAtom& b) -> Bool {
+			if(a.m_pool != b.m_pool)
+			{
+				return a.m_pool < b.m_pool;
+			}
+
+			ANKI_ASSERT(a.m_queryIdx != b.m_queryIdx && "Tried to reset the same query more than once");
+			return a.m_queryIdx < b.m_queryIdx;
+		});
+
+	U firstQuery = m_queryResetAtoms[0].m_queryIdx;
+	U queryCount = 1;
+	VkQueryPool pool = m_queryResetAtoms[0].m_pool;
+	for(U i = 1; i < m_queryResetAtomCount; ++i)
+	{
+		const QueryResetAtom& crnt = m_queryResetAtoms[i];
+		const QueryResetAtom& prev = m_queryResetAtoms[i - 1];
+
+		if(crnt.m_pool == prev.m_pool && crnt.m_queryIdx == prev.m_queryIdx + 1)
+		{
+			// Can batch
+			++queryCount;
+		}
+		else
+		{
+			// Flush batch
+			vkCmdResetQueryPool(m_handle, pool, firstQuery, queryCount);
+
+			// New batch
+			firstQuery = crnt.m_queryIdx;
+			queryCount = 1;
+			pool = crnt.m_pool;
+		}
+	}
+
+	vkCmdResetQueryPool(m_handle, pool, firstQuery, queryCount);
+
+	m_queryResetAtomCount = 0;
+}
+
+void CommandBufferImpl::flushWriteQueryResults()
+{
+	if(m_writeQueryAtomCount == 0)
+	{
+		return;
+	}
+
+	std::sort(&m_writeQueryAtoms[0],
+		&m_writeQueryAtoms[0] + m_writeQueryAtomCount,
+		[](const WriteQueryAtom& a, const WriteQueryAtom& b) -> Bool {
+			if(a.m_pool != b.m_pool)
+			{
+				return a.m_pool < b.m_pool;
+			}
+
+			if(a.m_buffer != a.m_buffer)
+			{
+				return a.m_buffer < b.m_buffer;
+			}
+
+			if(a.m_offset != a.m_offset)
+			{
+				return a.m_offset < b.m_offset;
+			}
+
+			ANKI_ASSERT(a.m_queryIdx != b.m_queryIdx && "Tried to write the same query more than once");
+			return a.m_queryIdx < b.m_queryIdx;
+		});
+
+	U firstQuery = m_writeQueryAtoms[0].m_queryIdx;
+	U queryCount = 1;
+	VkQueryPool pool = m_writeQueryAtoms[0].m_pool;
+	PtrSize offset = m_writeQueryAtoms[0].m_offset;
+	VkBuffer buff = m_writeQueryAtoms[0].m_buffer;
+	for(U i = 1; i < m_writeQueryAtomCount; ++i)
+	{
+		const WriteQueryAtom& crnt = m_writeQueryAtoms[i];
+		const WriteQueryAtom& prev = m_writeQueryAtoms[i - 1];
+
+		if(crnt.m_pool == prev.m_pool && crnt.m_buffer == prev.m_buffer && prev.m_queryIdx + 1 == crnt.m_queryIdx
+			&& prev.m_offset + sizeof(U32) == crnt.m_offset)
+		{
+			// Can batch
+			++queryCount;
+		}
+		else
+		{
+			// Flush batch
+			vkCmdCopyQueryPoolResults(
+				m_handle, pool, firstQuery, queryCount, buff, offset, sizeof(U32), VK_QUERY_RESULT_PARTIAL_BIT);
+
+			// New batch
+			firstQuery = crnt.m_queryIdx;
+			queryCount = 1;
+			pool = crnt.m_pool;
+			buff = crnt.m_buffer;
+		}
+	}
+
+	vkCmdCopyQueryPoolResults(
+		m_handle, pool, firstQuery, queryCount, buff, offset, sizeof(U32), VK_QUERY_RESULT_PARTIAL_BIT);
+
+	m_writeQueryAtomCount = 0;
+}
+
 } // end namespace anki

+ 34 - 0
src/anki/gr/vulkan/CommandBufferImpl.h

@@ -24,6 +24,8 @@ class CommandBufferInitInfo;
 enum class CommandBufferCommandType : U8
 {
 	SET_BARRIER,
+	RESET_OCCLUSION_QUERY,
+	WRITE_QUERY_RESULT,
 	ANY_OTHER_COMMAND
 };
 
@@ -185,6 +187,34 @@ private:
 	VkPipelineStageFlags m_dstStageMask = 0;
 	/// @}
 
+	/// @name reset_query_batch
+	/// @{
+	class QueryResetAtom
+	{
+	public:
+		VkQueryPool m_pool;
+		U32 m_queryIdx;
+	};
+
+	DynamicArray<QueryResetAtom> m_queryResetAtoms;
+	U16 m_queryResetAtomCount = 0;
+	/// @}
+
+	/// @name write_query_result_batch
+	/// @{
+	class WriteQueryAtom
+	{
+	public:
+		VkQueryPool m_pool;
+		U32 m_queryIdx;
+		VkBuffer m_buffer;
+		PtrSize m_offset;
+	};
+
+	DynamicArray<WriteQueryAtom> m_writeQueryAtoms;
+	U16 m_writeQueryAtomCount = 0;
+	/// @}
+
 	/// Some common operations per command.
 	void commandCommon();
 
@@ -209,6 +239,10 @@ private:
 	/// Flush batched image and buffer barriers.
 	void flushBarriers();
 
+	void flushQueryResets();
+
+	void flushWriteQueryResults();
+
 	void clearTextureInternal(TexturePtr tex, const ClearValue& clearValue, const VkImageSubresourceRange& range);
 
 	void setTextureBarrierInternal(

+ 44 - 1
src/anki/gr/vulkan/CommandBufferImpl.inl.h

@@ -258,7 +258,22 @@ inline void CommandBufferImpl::resetOcclusionQuery(OcclusionQueryPtr query)
 	U32 idx = query->getImplementation().m_handle.m_queryIndex;
 	ANKI_ASSERT(handle);
 
-	ANKI_CMD(vkCmdResetQueryPool(m_handle, handle, idx, 0), ANY_OTHER_COMMAND);
+#if ANKI_BATCH_COMMANDS
+	flushBatches(CommandBufferCommandType::RESET_OCCLUSION_QUERY);
+
+	if(m_queryResetAtoms.getSize() <= m_queryResetAtomCount)
+	{
+		m_queryResetAtoms.resize(m_alloc, max<U>(2, m_queryResetAtomCount * 2));
+	}
+
+	QueryResetAtom atom;
+	atom.m_pool = handle;
+	atom.m_queryIdx = idx;
+
+	m_queryResetAtoms[m_queryResetAtomCount++] = atom;
+#else
+	ANKI_CMD(vkCmdResetQueryPool(m_handle, handle, idx, 1), ANY_OTHER_COMMAND);
+#endif
 
 	m_queryList.pushBack(m_alloc, query);
 }
@@ -428,6 +443,12 @@ inline void CommandBufferImpl::flushBatches(CommandBufferCommandType type)
 		case CommandBufferCommandType::SET_BARRIER:
 			flushBarriers();
 			break;
+		case CommandBufferCommandType::RESET_OCCLUSION_QUERY:
+			flushQueryResets();
+			break;
+		case CommandBufferCommandType::WRITE_QUERY_RESULT:
+			flushWriteQueryResults();
+			break;
 		case CommandBufferCommandType::ANY_OTHER_COMMAND:
 			break;
 		default:
@@ -453,6 +474,8 @@ inline void CommandBufferImpl::fillBuffer(BufferPtr buff, PtrSize offset, PtrSiz
 	ANKI_ASSERT((size % 4) == 0 && "Should be multiple of 4");
 
 	ANKI_CMD(vkCmdFillBuffer(m_handle, impl.getHandle(), offset, size, value), ANY_OTHER_COMMAND);
+
+	m_bufferList.pushBack(m_alloc, buff);
 }
 
 inline void CommandBufferImpl::writeOcclusionQueryResultToBuffer(
@@ -468,6 +491,22 @@ inline void CommandBufferImpl::writeOcclusionQueryResultToBuffer(
 
 	const OcclusionQueryImpl& q = query->getImplementation();
 
+#if ANKI_BATCH_COMMANDS
+	flushBatches(CommandBufferCommandType::WRITE_QUERY_RESULT);
+
+	if(m_writeQueryAtoms.getSize() <= m_writeQueryAtomCount)
+	{
+		m_writeQueryAtoms.resize(m_alloc, max<U>(2, m_writeQueryAtomCount * 2));
+	}
+
+	WriteQueryAtom atom;
+	atom.m_pool = q.m_handle.m_pool;
+	atom.m_queryIdx = q.m_handle.m_queryIndex;
+	atom.m_buffer = impl.getHandle();
+	atom.m_offset = offset;
+
+	m_writeQueryAtoms[m_writeQueryAtomCount++] = atom;
+#else
 	ANKI_CMD(vkCmdCopyQueryPoolResults(m_handle,
 				 q.m_handle.m_pool,
 				 q.m_handle.m_queryIndex,
@@ -477,6 +516,10 @@ inline void CommandBufferImpl::writeOcclusionQueryResultToBuffer(
 				 sizeof(U32),
 				 VK_QUERY_RESULT_PARTIAL_BIT),
 		ANY_OTHER_COMMAND);
+#endif
+
+	m_queryList.pushBack(m_alloc, query);
+	m_bufferList.pushBack(m_alloc, buff);
 }
 
 } // end namespace anki