瀏覽代碼

Vulkan: Batch barriers

Panagiotis Christopoulos Charitos 9 年之前
父節點
當前提交
d535677eb4

+ 1 - 1
README.md

@@ -3,7 +3,7 @@
 [![Build Status](https://travis-ci.org/godlikepanos/anki-3d-engine.svg?branch=master)](https://travis-ci.org/godlikepanos/anki-3d-engine)
 [![Build Status](https://travis-ci.org/godlikepanos/anki-3d-engine.svg?branch=master)](https://travis-ci.org/godlikepanos/anki-3d-engine)
 
 
 AnKi 3D engine is a Linux and Windows opensource game engine that runs on
 AnKi 3D engine is a Linux and Windows opensource game engine that runs on
-OpenGL 4.4.
+OpenGL 4.5 and Vulkan 1.0 (Beta).
 
 
 [![Video](http://img.youtube.com/vi/va7nZ2EFR4c/0.jpg)](http://www.youtube.com/watch?v=va7nZ2EFR4c)
 [![Video](http://img.youtube.com/vi/va7nZ2EFR4c/0.jpg)](http://www.youtube.com/watch?v=va7nZ2EFR4c)
 
 

+ 1 - 0
src/anki/core/Trace.cpp

@@ -45,6 +45,7 @@ static Array<const char*, U(TraceCounterType::COUNT)> counterNames = {{"GR_DRAWC
 	"GR_PIPELINES_CREATED",
 	"GR_PIPELINES_CREATED",
 	"GR_PIPELINE_BINDS_SKIPPED",
 	"GR_PIPELINE_BINDS_SKIPPED",
 	"GR_PIPELINE_BINDS_HAPPENED",
 	"GR_PIPELINE_BINDS_HAPPENED",
+	"GR_PIPELINE_BARRIERS",
 	"RENDERER_LIGHTS",
 	"RENDERER_LIGHTS",
 	"RENDERER_SHADOW_PASSES",
 	"RENDERER_SHADOW_PASSES",
 	"RENDERER_MERGED_DRAWCALLS",
 	"RENDERER_MERGED_DRAWCALLS",

+ 1 - 0
src/anki/core/Trace.h

@@ -63,6 +63,7 @@ enum class TraceCounterType
 	GR_PIPELINES_CREATED,
 	GR_PIPELINES_CREATED,
 	GR_PIPELINE_BINDS_SKIPPED,
 	GR_PIPELINE_BINDS_SKIPPED,
 	GR_PIPELINE_BINDS_HAPPENED,
 	GR_PIPELINE_BINDS_HAPPENED,
+	GR_PIPELINE_BARRIERS,
 	RENDERER_LIGHTS,
 	RENDERER_LIGHTS,
 	RENDERER_SHADOW_PASSES,
 	RENDERER_SHADOW_PASSES,
 	RENDERER_MERGED_DRAWCALLS,
 	RENDERER_MERGED_DRAWCALLS,

+ 18 - 9
src/anki/gr/vulkan/CommandBufferExtra.cpp

@@ -10,14 +10,19 @@ namespace anki
 
 
 CommandBufferFactory::~CommandBufferFactory()
 CommandBufferFactory::~CommandBufferFactory()
 {
 {
-	for(CmdbType& type : m_types)
+	for(U i = 0; i < 2; ++i)
 	{
 	{
-		if(type.m_count)
+		for(U j = 0; j < 2; ++j)
 		{
 		{
-			vkFreeCommandBuffers(m_dev, m_pool, type.m_count, &type.m_cmdbs[0]);
-		}
+			CmdbType& type = m_types[i][j];
+
+			if(type.m_count)
+			{
+				vkFreeCommandBuffers(m_dev, m_pool, type.m_count, &type.m_cmdbs[0]);
+			}
 
 
-		type.m_cmdbs.destroy(m_alloc);
+			type.m_cmdbs.destroy(m_alloc);
+		}
 	}
 	}
 
 
 	if(m_pool)
 	if(m_pool)
@@ -41,11 +46,13 @@ Error CommandBufferFactory::init(GenericMemoryPoolAllocator<U8> alloc, VkDevice
 	return ErrorCode::NONE;
 	return ErrorCode::NONE;
 }
 }
 
 
-VkCommandBuffer CommandBufferFactory::newCommandBuffer(Bool secondLevel)
+VkCommandBuffer CommandBufferFactory::newCommandBuffer(CommandBufferFlag cmdbFlags)
 {
 {
 	ANKI_ASSERT(isCreated());
 	ANKI_ASSERT(isCreated());
 
 
-	CmdbType& type = m_types[secondLevel];
+	Bool secondLevel = !!(cmdbFlags & CommandBufferFlag::SECOND_LEVEL);
+	Bool smallBatch = !!(cmdbFlags & CommandBufferFlag::SMALL_BATCH);
+	CmdbType& type = m_types[secondLevel][smallBatch];
 
 
 	LockGuard<Mutex> lock(type.m_mtx);
 	LockGuard<Mutex> lock(type.m_mtx);
 
 
@@ -74,12 +81,14 @@ VkCommandBuffer CommandBufferFactory::newCommandBuffer(Bool secondLevel)
 	return out;
 	return out;
 }
 }
 
 
-void CommandBufferFactory::deleteCommandBuffer(VkCommandBuffer cmdb, Bool secondLevel)
+void CommandBufferFactory::deleteCommandBuffer(VkCommandBuffer cmdb, CommandBufferFlag cmdbFlags)
 {
 {
 	ANKI_ASSERT(isCreated());
 	ANKI_ASSERT(isCreated());
 	ANKI_ASSERT(cmdb);
 	ANKI_ASSERT(cmdb);
 
 
-	CmdbType& type = m_types[secondLevel];
+	Bool secondLevel = !!(cmdbFlags & CommandBufferFlag::SECOND_LEVEL);
+	Bool smallBatch = !!(cmdbFlags & CommandBufferFlag::SMALL_BATCH);
+	CmdbType& type = m_types[secondLevel][smallBatch];
 
 
 	LockGuard<Mutex> lock(type.m_mtx);
 	LockGuard<Mutex> lock(type.m_mtx);
 
 

+ 4 - 3
src/anki/gr/vulkan/CommandBufferExtra.h

@@ -6,6 +6,7 @@
 #pragma once
 #pragma once
 
 
 #include <anki/gr/vulkan/Common.h>
 #include <anki/gr/vulkan/Common.h>
+#include <anki/gr/CommandBuffer.h>
 
 
 namespace anki
 namespace anki
 {
 {
@@ -26,10 +27,10 @@ public:
 	ANKI_USE_RESULT Error init(GenericMemoryPoolAllocator<U8> alloc, VkDevice dev, uint32_t queueFamily);
 	ANKI_USE_RESULT Error init(GenericMemoryPoolAllocator<U8> alloc, VkDevice dev, uint32_t queueFamily);
 
 
 	/// Request a new command buffer.
 	/// Request a new command buffer.
-	VkCommandBuffer newCommandBuffer(Bool secondLevel);
+	VkCommandBuffer newCommandBuffer(CommandBufferFlag cmdbFlags);
 
 
 	/// Free a command buffer.
 	/// Free a command buffer.
-	void deleteCommandBuffer(VkCommandBuffer cmdb, Bool secondLevel);
+	void deleteCommandBuffer(VkCommandBuffer cmdb, CommandBufferFlag cmdbFlags);
 
 
 	void collect();
 	void collect();
 
 
@@ -51,7 +52,7 @@ private:
 		Mutex m_mtx; ///< Lock because the allocations may happen anywhere.
 		Mutex m_mtx; ///< Lock because the allocations may happen anywhere.
 	};
 	};
 
 
-	Array<CmdbType, 2> m_types;
+	Array2d<CmdbType, 2, 2> m_types;
 };
 };
 /// @}
 /// @}
 
 

+ 157 - 18
src/anki/gr/vulkan/CommandBufferImpl.cpp

@@ -13,6 +13,8 @@
 #include <anki/gr/ResourceGroup.h>
 #include <anki/gr/ResourceGroup.h>
 #include <anki/gr/vulkan/ResourceGroupImpl.h>
 #include <anki/gr/vulkan/ResourceGroupImpl.h>
 
 
+#include <algorithm>
+
 namespace anki
 namespace anki
 {
 {
 
 
@@ -35,8 +37,7 @@ CommandBufferImpl::~CommandBufferImpl()
 
 
 	if(m_handle)
 	if(m_handle)
 	{
 	{
-		Bool secondLevel = (m_flags & CommandBufferFlag::SECOND_LEVEL) == CommandBufferFlag::SECOND_LEVEL;
-		getGrManagerImpl().deleteCommandBuffer(m_handle, secondLevel, m_tid);
+		getGrManagerImpl().deleteCommandBuffer(m_handle, m_flags, m_tid);
 	}
 	}
 
 
 	m_pplineList.destroy(m_alloc);
 	m_pplineList.destroy(m_alloc);
@@ -46,6 +47,9 @@ CommandBufferImpl::~CommandBufferImpl()
 	m_queryList.destroy(m_alloc);
 	m_queryList.destroy(m_alloc);
 	m_bufferList.destroy(m_alloc);
 	m_bufferList.destroy(m_alloc);
 	m_cmdbList.destroy(m_alloc);
 	m_cmdbList.destroy(m_alloc);
+
+	m_imgBarriers.destroy(m_alloc);
+	m_buffBarriers.destroy(m_alloc);
 }
 }
 
 
 Error CommandBufferImpl::init(const CommandBufferInitInfo& init)
 Error CommandBufferImpl::init(const CommandBufferInitInfo& init)
@@ -57,8 +61,7 @@ Error CommandBufferImpl::init(const CommandBufferInitInfo& init)
 	m_flags = init.m_flags;
 	m_flags = init.m_flags;
 	m_tid = Thread::getCurrentThreadId();
 	m_tid = Thread::getCurrentThreadId();
 
 
-	Bool secondLevel = (m_flags & CommandBufferFlag::SECOND_LEVEL) == CommandBufferFlag::SECOND_LEVEL;
-	m_handle = getGrManagerImpl().newCommandBuffer(m_tid, secondLevel);
+	m_handle = getGrManagerImpl().newCommandBuffer(m_tid, m_flags);
 	ANKI_ASSERT(m_handle);
 	ANKI_ASSERT(m_handle);
 
 
 	// Begin recording
 	// Begin recording
@@ -70,7 +73,7 @@ Error CommandBufferImpl::init(const CommandBufferInitInfo& init)
 	begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
 	begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
 	begin.pInheritanceInfo = &inheritance;
 	begin.pInheritanceInfo = &inheritance;
 
 
-	if(secondLevel)
+	if(!!(m_flags & CommandBufferFlag::SECOND_LEVEL))
 	{
 	{
 		const FramebufferImpl& impl = init.m_framebuffer->getImplementation();
 		const FramebufferImpl& impl = init.m_framebuffer->getImplementation();
 
 
@@ -96,7 +99,7 @@ Error CommandBufferImpl::init(const CommandBufferInitInfo& init)
 
 
 void CommandBufferImpl::bindPipeline(PipelinePtr ppline)
 void CommandBufferImpl::bindPipeline(PipelinePtr ppline)
 {
 {
-	commandCommon();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 	vkCmdBindPipeline(m_handle, ppline->getImplementation().getBindPoint(), ppline->getImplementation().getHandle());
 	vkCmdBindPipeline(m_handle, ppline->getImplementation().getBindPoint(), ppline->getImplementation().getHandle());
 
 
 	m_pplineList.pushBack(m_alloc, ppline);
 	m_pplineList.pushBack(m_alloc, ppline);
@@ -104,7 +107,7 @@ void CommandBufferImpl::bindPipeline(PipelinePtr ppline)
 
 
 void CommandBufferImpl::beginRenderPass(FramebufferPtr fb)
 void CommandBufferImpl::beginRenderPass(FramebufferPtr fb)
 {
 {
-	commandCommon();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 	ANKI_ASSERT(!insideRenderPass());
 	ANKI_ASSERT(!insideRenderPass());
 
 
 	m_rpCommandCount = 0;
 	m_rpCommandCount = 0;
@@ -119,8 +122,6 @@ void CommandBufferImpl::beginRenderPass(FramebufferPtr fb)
 
 
 void CommandBufferImpl::beginRenderPassInternal()
 void CommandBufferImpl::beginRenderPassInternal()
 {
 {
-	flushBarriers();
-
 	VkRenderPassBeginInfo bi = {};
 	VkRenderPassBeginInfo bi = {};
 	bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
 	bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
 	FramebufferImpl& impl = m_activeFb->getImplementation();
 	FramebufferImpl& impl = m_activeFb->getImplementation();
@@ -162,7 +163,7 @@ void CommandBufferImpl::beginRenderPassInternal()
 
 
 void CommandBufferImpl::endRenderPass()
 void CommandBufferImpl::endRenderPass()
 {
 {
-	commandCommon();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 	ANKI_ASSERT(insideRenderPass());
 	ANKI_ASSERT(insideRenderPass());
 	ANKI_ASSERT(m_rpCommandCount > 0);
 	ANKI_ASSERT(m_rpCommandCount > 0);
 
 
@@ -197,13 +198,13 @@ void CommandBufferImpl::endRecordingInternal()
 
 
 void CommandBufferImpl::endRecording()
 void CommandBufferImpl::endRecording()
 {
 {
-	commandCommon();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 	endRecordingInternal();
 	endRecordingInternal();
 }
 }
 
 
 void CommandBufferImpl::bindResourceGroup(ResourceGroupPtr rc, U slot, const TransientMemoryInfo* dynInfo)
 void CommandBufferImpl::bindResourceGroup(ResourceGroupPtr rc, U slot, const TransientMemoryInfo* dynInfo)
 {
 {
-	commandCommon();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 	const ResourceGroupImpl& impl = rc->getImplementation();
 	const ResourceGroupImpl& impl = rc->getImplementation();
 
 
 	if(impl.hasDescriptorSet())
 	if(impl.hasDescriptorSet())
@@ -250,8 +251,7 @@ void CommandBufferImpl::bindResourceGroup(ResourceGroupPtr rc, U slot, const Tra
 
 
 void CommandBufferImpl::generateMipmaps2d(TexturePtr tex, U face, U layer)
 void CommandBufferImpl::generateMipmaps2d(TexturePtr tex, U face, U layer)
 {
 {
-	commandCommon();
-	flushBarriers();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 
 
 	const TextureImpl& impl = tex->getImplementation();
 	const TextureImpl& impl = tex->getImplementation();
 	ANKI_ASSERT(impl.m_type != TextureType::_3D && "Not for 3D");
 	ANKI_ASSERT(impl.m_type != TextureType::_3D && "Not for 3D");
@@ -349,8 +349,7 @@ void CommandBufferImpl::generateMipmaps2d(TexturePtr tex, U face, U layer)
 void CommandBufferImpl::uploadTextureSurface(
 void CommandBufferImpl::uploadTextureSurface(
 	TexturePtr tex, const TextureSurfaceInfo& surf, const TransientMemoryToken& token)
 	TexturePtr tex, const TextureSurfaceInfo& surf, const TransientMemoryToken& token)
 {
 {
-	commandCommon();
-	flushBarriers();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 
 
 	TextureImpl& impl = tex->getImplementation();
 	TextureImpl& impl = tex->getImplementation();
 	impl.checkSurface(surf);
 	impl.checkSurface(surf);
@@ -453,8 +452,7 @@ void CommandBufferImpl::uploadTextureSurface(
 void CommandBufferImpl::uploadTextureVolume(
 void CommandBufferImpl::uploadTextureVolume(
 	TexturePtr tex, const TextureVolumeInfo& vol, const TransientMemoryToken& token)
 	TexturePtr tex, const TextureVolumeInfo& vol, const TransientMemoryToken& token)
 {
 {
-	commandCommon();
-	flushBarriers();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 
 
 	TextureImpl& impl = tex->getImplementation();
 	TextureImpl& impl = tex->getImplementation();
 	impl.checkVolume(vol);
 	impl.checkVolume(vol);
@@ -560,4 +558,145 @@ void CommandBufferImpl::uploadTextureVolume(
 	m_texList.pushBack(m_alloc, tex);
 	m_texList.pushBack(m_alloc, tex);
 }
 }
 
 
+void CommandBufferImpl::flushBarriers()
+{
+	if(m_imgBarrierCount == 0 && m_buffBarrierCount == 0)
+	{
+		return;
+	}
+
+	// Sort
+	//
+
+	if(m_imgBarrierCount > 0)
+	{
+		std::sort(&m_imgBarriers[0],
+			&m_imgBarriers[0] + m_imgBarrierCount,
+			[](const VkImageMemoryBarrier& a, const VkImageMemoryBarrier& b) -> Bool {
+				if(a.image != b.image)
+				{
+					return a.image < b.image;
+				}
+
+				if(a.subresourceRange.aspectMask != b.subresourceRange.aspectMask)
+				{
+					return a.subresourceRange.aspectMask < b.subresourceRange.aspectMask;
+				}
+
+				if(a.oldLayout != b.oldLayout)
+				{
+					return a.oldLayout < b.oldLayout;
+				}
+
+				if(a.newLayout != b.newLayout)
+				{
+					return a.newLayout < b.newLayout;
+				}
+
+				if(a.subresourceRange.baseArrayLayer != b.subresourceRange.baseArrayLayer)
+				{
+					return a.subresourceRange.baseArrayLayer < b.subresourceRange.baseArrayLayer;
+				}
+
+				if(a.subresourceRange.baseMipLevel != b.subresourceRange.baseMipLevel)
+				{
+					return a.subresourceRange.baseMipLevel < b.subresourceRange.baseMipLevel;
+				}
+
+				return false;
+			});
+	}
+
+	// Batch
+	//
+
+	DynamicArrayAuto<VkImageMemoryBarrier> finalImgBarriers(m_alloc);
+	U finalImgBarrierCount = 0;
+	if(m_imgBarrierCount > 0)
+	{
+		DynamicArrayAuto<VkImageMemoryBarrier> squashedBarriers(m_alloc);
+		U squashedBarrierCount = 0;
+
+		squashedBarriers.create(m_imgBarrierCount);
+
+		// Squash the mips by reducing the barriers
+		for(U i = 0; i < m_imgBarrierCount; ++i)
+		{
+			const VkImageMemoryBarrier* prev = (i > 0) ? &m_imgBarriers[i - 1] : nullptr;
+			const VkImageMemoryBarrier& crnt = m_imgBarriers[i];
+
+			if(prev && prev->image == crnt.image
+				&& prev->subresourceRange.aspectMask == crnt.subresourceRange.aspectMask
+				&& prev->oldLayout == crnt.oldLayout
+				&& prev->newLayout == crnt.newLayout
+				&& prev->srcAccessMask == crnt.srcAccessMask
+				&& prev->dstAccessMask == crnt.dstAccessMask
+				&& prev->subresourceRange.baseMipLevel + prev->subresourceRange.levelCount
+					== crnt.subresourceRange.baseMipLevel
+				&& prev->subresourceRange.baseArrayLayer == crnt.subresourceRange.baseArrayLayer
+				&& prev->subresourceRange.layerCount == crnt.subresourceRange.layerCount)
+			{
+				// Can batch
+				squashedBarriers[squashedBarrierCount].subresourceRange.levelCount += crnt.subresourceRange.levelCount;
+			}
+			else
+			{
+				// Can't batch, create new barrier
+				squashedBarriers[squashedBarrierCount++] = crnt;
+			}
+		}
+
+		ANKI_ASSERT(squashedBarrierCount);
+
+		// Squash the layers
+		finalImgBarriers.create(squashedBarrierCount);
+
+		for(U i = 0; i < squashedBarrierCount; ++i)
+		{
+			const VkImageMemoryBarrier* prev = (i > 0) ? &squashedBarriers[i - 1] : nullptr;
+			const VkImageMemoryBarrier& crnt = squashedBarriers[i];
+
+			if(prev && prev->image == crnt.image
+				&& prev->subresourceRange.aspectMask == crnt.subresourceRange.aspectMask
+				&& prev->oldLayout == crnt.oldLayout
+				&& prev->newLayout == crnt.newLayout
+				&& prev->srcAccessMask == crnt.srcAccessMask
+				&& prev->dstAccessMask == crnt.dstAccessMask
+				&& prev->subresourceRange.baseMipLevel == crnt.subresourceRange.baseMipLevel
+				&& prev->subresourceRange.levelCount == crnt.subresourceRange.levelCount
+				&& prev->subresourceRange.baseArrayLayer + prev->subresourceRange.layerCount
+					== crnt.subresourceRange.baseArrayLayer)
+			{
+				// Can batch
+				finalImgBarriers[finalImgBarrierCount].subresourceRange.layerCount += crnt.subresourceRange.layerCount;
+			}
+			else
+			{
+				// Can't batch, create new barrier
+				finalImgBarriers[finalImgBarrierCount++] = crnt;
+			}
+		}
+
+		ANKI_ASSERT(finalImgBarrierCount);
+	}
+
+	// Finish the job
+	//
+	vkCmdPipelineBarrier(m_handle,
+		m_srcStageMask,
+		m_dstStageMask,
+		0,
+		0,
+		nullptr,
+		m_buffBarrierCount,
+		(m_buffBarrierCount) ? &m_buffBarriers[0] : nullptr,
+		finalImgBarrierCount,
+		(finalImgBarrierCount) ? &finalImgBarriers[0] : nullptr);
+
+	ANKI_TRACE_INC_COUNTER(GR_PIPELINE_BARRIERS, 1);
+
+	m_imgBarrierCount = 0;
+	m_buffBarrierCount = 0;
+}
+
 } // end namespace anki
 } // end namespace anki

+ 26 - 11
src/anki/gr/vulkan/CommandBufferImpl.h

@@ -12,12 +12,21 @@
 namespace anki
 namespace anki
 {
 {
 
 
+#define ANKI_BATCH_COMMANDS 1
+
 // Forward
 // Forward
 class CommandBufferInitInfo;
 class CommandBufferInitInfo;
 
 
 /// @addtogroup vulkan
 /// @addtogroup vulkan
 /// @{
 /// @{
 
 
+/// List the commands that can be batched.
+enum class CommandBufferCommandType : U8
+{
+	SET_BARRIER,
+	ANY_OTHER_COMMAND
+};
+
 /// Command buffer implementation.
 /// Command buffer implementation.
 class CommandBufferImpl : public VulkanObject
 class CommandBufferImpl : public VulkanObject
 {
 {
@@ -65,12 +74,7 @@ public:
 
 
 	void drawElementsIndirect(U32 drawCount, PtrSize offset, BufferPtr buff);
 	void drawElementsIndirect(U32 drawCount, PtrSize offset, BufferPtr buff);
 
 
-	void dispatchCompute(U32 groupCountX, U32 groupCountY, U32 groupCountZ)
-	{
-		commandCommon();
-		flushBarriers();
-		vkCmdDispatch(m_handle, groupCountX, groupCountY, groupCountZ);
-	}
+	void dispatchCompute(U32 groupCountX, U32 groupCountY, U32 groupCountZ);
 
 
 	void resetOcclusionQuery(OcclusionQueryPtr query);
 	void resetOcclusionQuery(OcclusionQueryPtr query);
 
 
@@ -164,8 +168,20 @@ private:
 #endif
 #endif
 	VkSubpassContents m_subpassContents = VK_SUBPASS_CONTENTS_MAX_ENUM;
 	VkSubpassContents m_subpassContents = VK_SUBPASS_CONTENTS_MAX_ENUM;
 
 
+	CommandBufferCommandType m_lastCmdType = CommandBufferCommandType::ANY_OTHER_COMMAND;
+
+	/// @name barrier_batch
+	/// @{
+	DynamicArray<VkImageMemoryBarrier> m_imgBarriers;
+	DynamicArray<VkBufferMemoryBarrier> m_buffBarriers;
+	U16 m_imgBarrierCount = 0;
+	U16 m_buffBarrierCount = 0;
+	VkPipelineStageFlags m_srcStageMask = 0;
+	VkPipelineStageFlags m_dstStageMask = 0;
+	/// @}
+
 	/// Some common operations per command.
 	/// Some common operations per command.
-	void commandCommon();
+	void commandCommon(CommandBufferCommandType type);
 
 
 	void drawcallCommon();
 	void drawcallCommon();
 
 
@@ -180,12 +196,11 @@ private:
 
 
 	Bool secondLevel() const
 	Bool secondLevel() const
 	{
 	{
-		return (m_flags & CommandBufferFlag::SECOND_LEVEL) != CommandBufferFlag::NONE;
+		return !!(m_flags & CommandBufferFlag::SECOND_LEVEL);
 	}
 	}
 
 
-	void flushBarriers()
-	{
-	}
+	/// Flush batched image and buffer barriers.
+	void flushBarriers();
 
 
 	void clearTextureInternal(TexturePtr tex, const ClearValue& clearValue, const VkImageSubresourceRange& range);
 	void clearTextureInternal(TexturePtr tex, const ClearValue& clearValue, const VkImageSubresourceRange& range);
 
 

+ 65 - 19
src/anki/gr/vulkan/CommandBufferImpl.inl.h

@@ -10,13 +10,14 @@
 #include <anki/gr/vulkan/BufferImpl.h>
 #include <anki/gr/vulkan/BufferImpl.h>
 #include <anki/gr/OcclusionQuery.h>
 #include <anki/gr/OcclusionQuery.h>
 #include <anki/gr/vulkan/OcclusionQueryImpl.h>
 #include <anki/gr/vulkan/OcclusionQueryImpl.h>
+#include <anki/core/Trace.h>
 
 
 namespace anki
 namespace anki
 {
 {
 
 
 inline void CommandBufferImpl::setViewport(U16 minx, U16 miny, U16 maxx, U16 maxy)
 inline void CommandBufferImpl::setViewport(U16 minx, U16 miny, U16 maxx, U16 maxy)
 {
 {
-	commandCommon();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 	ANKI_ASSERT(minx < maxx && miny < maxy);
 	ANKI_ASSERT(minx < maxx && miny < maxy);
 	VkViewport s;
 	VkViewport s;
 	s.x = minx;
 	s.x = minx;
@@ -37,7 +38,7 @@ inline void CommandBufferImpl::setViewport(U16 minx, U16 miny, U16 maxx, U16 max
 
 
 inline void CommandBufferImpl::setPolygonOffset(F32 factor, F32 units)
 inline void CommandBufferImpl::setPolygonOffset(F32 factor, F32 units)
 {
 {
-	commandCommon();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 	vkCmdSetDepthBias(m_handle, units, 0.0, factor);
 	vkCmdSetDepthBias(m_handle, units, 0.0, factor);
 }
 }
 
 
@@ -51,7 +52,7 @@ inline void CommandBufferImpl::setImageBarrier(VkPipelineStageFlags srcStage,
 	const VkImageSubresourceRange& range)
 	const VkImageSubresourceRange& range)
 {
 {
 	ANKI_ASSERT(img);
 	ANKI_ASSERT(img);
-	commandCommon();
+	commandCommon(CommandBufferCommandType::SET_BARRIER);
 
 
 	VkImageMemoryBarrier inf = {};
 	VkImageMemoryBarrier inf = {};
 	inf.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
 	inf.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
@@ -64,7 +65,20 @@ inline void CommandBufferImpl::setImageBarrier(VkPipelineStageFlags srcStage,
 	inf.image = img;
 	inf.image = img;
 	inf.subresourceRange = range;
 	inf.subresourceRange = range;
 
 
+#if ANKI_BATCH_COMMANDS
+	if(m_imgBarriers.getSize() <= m_imgBarrierCount)
+	{
+		m_imgBarriers.resize(m_alloc, max<U>(2, m_imgBarrierCount * 2));
+	}
+
+	m_imgBarriers[m_imgBarrierCount++] = inf;
+
+	m_srcStageMask |= srcStage;
+	m_dstStageMask |= dstStage;
+#else
 	vkCmdPipelineBarrier(m_handle, srcStage, dstStage, 0, 0, nullptr, 0, nullptr, 1, &inf);
 	vkCmdPipelineBarrier(m_handle, srcStage, dstStage, 0, 0, nullptr, 0, nullptr, 1, &inf);
+	ANKI_TRACE_INC_COUNTER(GR_PIPELINE_BARRIERS, 1);
+#endif
 }
 }
 
 
 inline void CommandBufferImpl::setImageBarrier(VkPipelineStageFlags srcStage,
 inline void CommandBufferImpl::setImageBarrier(VkPipelineStageFlags srcStage,
@@ -145,7 +159,7 @@ inline void CommandBufferImpl::setBufferBarrier(VkPipelineStageFlags srcStage,
 	VkBuffer buff)
 	VkBuffer buff)
 {
 {
 	ANKI_ASSERT(buff);
 	ANKI_ASSERT(buff);
-	commandCommon();
+	commandCommon(CommandBufferCommandType::SET_BARRIER);
 
 
 	VkBufferMemoryBarrier b = {};
 	VkBufferMemoryBarrier b = {};
 	b.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
 	b.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@@ -157,7 +171,20 @@ inline void CommandBufferImpl::setBufferBarrier(VkPipelineStageFlags srcStage,
 	b.offset = offset;
 	b.offset = offset;
 	b.size = size;
 	b.size = size;
 
 
+#if ANKI_BATCH_COMMANDS
+	if(m_buffBarriers.getSize() <= m_buffBarrierCount)
+	{
+		m_buffBarriers.resize(m_alloc, max<U>(2, m_buffBarrierCount * 2));
+	}
+
+	m_buffBarriers[m_buffBarrierCount++] = b;
+
+	m_srcStageMask |= srcStage;
+	m_dstStageMask |= dstStage;
+#else
 	vkCmdPipelineBarrier(m_handle, srcStage, dstStage, 0, 0, nullptr, 1, &b, 0, nullptr);
 	vkCmdPipelineBarrier(m_handle, srcStage, dstStage, 0, 0, nullptr, 1, &b, 0, nullptr);
+	ANKI_TRACE_INC_COUNTER(GR_PIPELINE_BARRIERS, 1);
+#endif
 }
 }
 
 
 inline void CommandBufferImpl::setBufferBarrier(
 inline void CommandBufferImpl::setBufferBarrier(
@@ -211,10 +238,15 @@ inline void CommandBufferImpl::drawElementsIndirect(U32 drawCount, PtrSize offse
 	vkCmdDrawIndexedIndirect(m_handle, impl.getHandle(), offset, drawCount, sizeof(DrawElementsIndirectInfo));
 	vkCmdDrawIndexedIndirect(m_handle, impl.getHandle(), offset, drawCount, sizeof(DrawElementsIndirectInfo));
 }
 }
 
 
+inline void CommandBufferImpl::dispatchCompute(U32 groupCountX, U32 groupCountY, U32 groupCountZ)
+{
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
+	vkCmdDispatch(m_handle, groupCountX, groupCountY, groupCountZ);
+}
+
 inline void CommandBufferImpl::resetOcclusionQuery(OcclusionQueryPtr query)
 inline void CommandBufferImpl::resetOcclusionQuery(OcclusionQueryPtr query)
 {
 {
-	commandCommon();
-	flushBarriers();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 
 
 	VkQueryPool handle = query->getImplementation().m_handle.m_pool;
 	VkQueryPool handle = query->getImplementation().m_handle.m_pool;
 	U32 idx = query->getImplementation().m_handle.m_queryIndex;
 	U32 idx = query->getImplementation().m_handle.m_queryIndex;
@@ -227,8 +259,7 @@ inline void CommandBufferImpl::resetOcclusionQuery(OcclusionQueryPtr query)
 
 
 inline void CommandBufferImpl::beginOcclusionQuery(OcclusionQueryPtr query)
 inline void CommandBufferImpl::beginOcclusionQuery(OcclusionQueryPtr query)
 {
 {
-	commandCommon();
-	flushBarriers();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 
 
 	VkQueryPool handle = query->getImplementation().m_handle.m_pool;
 	VkQueryPool handle = query->getImplementation().m_handle.m_pool;
 	U32 idx = query->getImplementation().m_handle.m_queryIndex;
 	U32 idx = query->getImplementation().m_handle.m_queryIndex;
@@ -241,7 +272,7 @@ inline void CommandBufferImpl::beginOcclusionQuery(OcclusionQueryPtr query)
 
 
 inline void CommandBufferImpl::endOcclusionQuery(OcclusionQueryPtr query)
 inline void CommandBufferImpl::endOcclusionQuery(OcclusionQueryPtr query)
 {
 {
-	commandCommon();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 
 
 	VkQueryPool handle = query->getImplementation().m_handle.m_pool;
 	VkQueryPool handle = query->getImplementation().m_handle.m_pool;
 	U32 idx = query->getImplementation().m_handle.m_queryIndex;
 	U32 idx = query->getImplementation().m_handle.m_queryIndex;
@@ -255,8 +286,7 @@ inline void CommandBufferImpl::endOcclusionQuery(OcclusionQueryPtr query)
 inline void CommandBufferImpl::clearTextureInternal(
 inline void CommandBufferImpl::clearTextureInternal(
 	TexturePtr tex, const ClearValue& clearValue, const VkImageSubresourceRange& range)
 	TexturePtr tex, const ClearValue& clearValue, const VkImageSubresourceRange& range)
 {
 {
-	commandCommon();
-	flushBarriers();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 
 
 	VkClearColorValue vclear;
 	VkClearColorValue vclear;
 	static_assert(sizeof(vclear) == sizeof(clearValue), "See file");
 	static_assert(sizeof(vclear) == sizeof(clearValue), "See file");
@@ -311,8 +341,7 @@ inline void CommandBufferImpl::clearTextureVolume(
 
 
 inline void CommandBufferImpl::uploadBuffer(BufferPtr buff, PtrSize offset, const TransientMemoryToken& token)
 inline void CommandBufferImpl::uploadBuffer(BufferPtr buff, PtrSize offset, const TransientMemoryToken& token)
 {
 {
-	commandCommon();
-	flushBarriers();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 	BufferImpl& impl = buff->getImplementation();
 	BufferImpl& impl = buff->getImplementation();
 
 
 	VkBufferCopy region;
 	VkBufferCopy region;
@@ -333,7 +362,7 @@ inline void CommandBufferImpl::uploadBuffer(BufferPtr buff, PtrSize offset, cons
 
 
 inline void CommandBufferImpl::pushSecondLevelCommandBuffer(CommandBufferPtr cmdb)
 inline void CommandBufferImpl::pushSecondLevelCommandBuffer(CommandBufferPtr cmdb)
 {
 {
-	commandCommon();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 	ANKI_ASSERT(insideRenderPass());
 	ANKI_ASSERT(insideRenderPass());
 	ANKI_ASSERT(m_subpassContents == VK_SUBPASS_CONTENTS_MAX_ENUM
 	ANKI_ASSERT(m_subpassContents == VK_SUBPASS_CONTENTS_MAX_ENUM
 		|| m_subpassContents == VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS);
 		|| m_subpassContents == VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS);
@@ -346,7 +375,7 @@ inline void CommandBufferImpl::pushSecondLevelCommandBuffer(CommandBufferPtr cmd
 		beginRenderPassInternal();
 		beginRenderPassInternal();
 	}
 	}
 
 
-	cmdb->getImplementation().endRecordingInternal();
+	cmdb->getImplementation().endRecordingInternal(); // XXX That is wrong in MT
 
 
 	vkCmdExecuteCommands(m_handle, 1, &cmdb->getImplementation().m_handle);
 	vkCmdExecuteCommands(m_handle, 1, &cmdb->getImplementation().m_handle);
 
 
@@ -357,7 +386,7 @@ inline void CommandBufferImpl::pushSecondLevelCommandBuffer(CommandBufferPtr cmd
 inline void CommandBufferImpl::drawcallCommon()
 inline void CommandBufferImpl::drawcallCommon()
 {
 {
 	// Preconditions
 	// Preconditions
-	commandCommon();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 	ANKI_ASSERT(insideRenderPass() || secondLevel());
 	ANKI_ASSERT(insideRenderPass() || secondLevel());
 	ANKI_ASSERT(m_subpassContents == VK_SUBPASS_CONTENTS_MAX_ENUM || m_subpassContents == VK_SUBPASS_CONTENTS_INLINE);
 	ANKI_ASSERT(m_subpassContents == VK_SUBPASS_CONTENTS_MAX_ENUM || m_subpassContents == VK_SUBPASS_CONTENTS_INLINE);
 #if ANKI_ASSERTIONS
 #if ANKI_ASSERTIONS
@@ -372,7 +401,7 @@ inline void CommandBufferImpl::drawcallCommon()
 	++m_rpCommandCount;
 	++m_rpCommandCount;
 }
 }
 
 
-inline void CommandBufferImpl::commandCommon()
+inline void CommandBufferImpl::commandCommon(CommandBufferCommandType type)
 {
 {
 	ANKI_ASSERT(Thread::getCurrentThreadId() == m_tid
 	ANKI_ASSERT(Thread::getCurrentThreadId() == m_tid
 		&& "Commands must be recorder and flushed by the thread this command buffer was created");
 		&& "Commands must be recorder and flushed by the thread this command buffer was created");
@@ -380,11 +409,28 @@ inline void CommandBufferImpl::commandCommon()
 	ANKI_ASSERT(!m_finalized);
 	ANKI_ASSERT(!m_finalized);
 	ANKI_ASSERT(m_handle);
 	ANKI_ASSERT(m_handle);
 	m_empty = false;
 	m_empty = false;
+
+	// Flush batched commands
+	if(type != m_lastCmdType)
+	{
+		switch(m_lastCmdType)
+		{
+		case CommandBufferCommandType::SET_BARRIER:
+			flushBarriers();
+			break;
+		case CommandBufferCommandType::ANY_OTHER_COMMAND:
+			break;
+		default:
+			ANKI_ASSERT(0);
+		}
+
+		m_lastCmdType = type;
+	}
 }
 }
 
 
 inline void CommandBufferImpl::fillBuffer(BufferPtr buff, PtrSize offset, PtrSize size, U32 value)
 inline void CommandBufferImpl::fillBuffer(BufferPtr buff, PtrSize offset, PtrSize size, U32 value)
 {
 {
-	commandCommon();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 	ANKI_ASSERT(!insideRenderPass());
 	ANKI_ASSERT(!insideRenderPass());
 	const BufferImpl& impl = buff->getImplementation();
 	const BufferImpl& impl = buff->getImplementation();
 	ANKI_ASSERT(impl.usageValid(BufferUsageBit::FILL));
 	ANKI_ASSERT(impl.usageValid(BufferUsageBit::FILL));
@@ -402,7 +448,7 @@ inline void CommandBufferImpl::fillBuffer(BufferPtr buff, PtrSize offset, PtrSiz
 inline void CommandBufferImpl::writeOcclusionQueryResultToBuffer(
 inline void CommandBufferImpl::writeOcclusionQueryResultToBuffer(
 	OcclusionQueryPtr query, PtrSize offset, BufferPtr buff)
 	OcclusionQueryPtr query, PtrSize offset, BufferPtr buff)
 {
 {
-	commandCommon();
+	commandCommon(CommandBufferCommandType::ANY_OTHER_COMMAND);
 	ANKI_ASSERT(!insideRenderPass());
 	ANKI_ASSERT(!insideRenderPass());
 
 
 	const BufferImpl& impl = buff->getImplementation();
 	const BufferImpl& impl = buff->getImplementation();

+ 2 - 2
src/anki/gr/vulkan/GpuMemoryAllocator.cpp

@@ -38,8 +38,8 @@ public:
 	/// It points to a CPU address if mapped.
 	/// It points to a CPU address if mapped.
 	U8* m_mappedAddress = nullptr;
 	U8* m_mappedAddress = nullptr;
 
 
-	/// Protect the m_mappedAddress. It's a SpinLock because we don't want a
-	/// whole mutex for every GpuMemoryAllocatorChunk.
+	/// Protect the m_mappedAddress. It's a SpinLock because we don't want a whole mutex for every
+	/// GpuMemoryAllocatorChunk.
 	SpinLock m_mtx;
 	SpinLock m_mtx;
 
 
 	/// If true it contains linear resources.
 	/// If true it contains linear resources.

+ 4 - 4
src/anki/gr/vulkan/GrManagerImpl.cpp

@@ -555,7 +555,7 @@ GrManagerImpl::PerThread& GrManagerImpl::getPerThreadCache(ThreadId tid)
 	return *thread;
 	return *thread;
 }
 }
 
 
-VkCommandBuffer GrManagerImpl::newCommandBuffer(ThreadId tid, Bool secondLevel)
+VkCommandBuffer GrManagerImpl::newCommandBuffer(ThreadId tid, CommandBufferFlag cmdbFlags)
 {
 {
 	// Get the per thread cache
 	// Get the per thread cache
 	PerThread& thread = getPerThreadCache(tid);
 	PerThread& thread = getPerThreadCache(tid);
@@ -570,15 +570,15 @@ VkCommandBuffer GrManagerImpl::newCommandBuffer(ThreadId tid, Bool secondLevel)
 		}
 		}
 	}
 	}
 
 
-	return thread.m_cmdbs.newCommandBuffer(secondLevel);
+	return thread.m_cmdbs.newCommandBuffer(cmdbFlags);
 }
 }
 
 
-void GrManagerImpl::deleteCommandBuffer(VkCommandBuffer cmdb, Bool secondLevel, ThreadId tid)
+void GrManagerImpl::deleteCommandBuffer(VkCommandBuffer cmdb, CommandBufferFlag cmdbFlags, ThreadId tid)
 {
 {
 	// Get the per thread cache
 	// Get the per thread cache
 	PerThread& thread = getPerThreadCache(tid);
 	PerThread& thread = getPerThreadCache(tid);
 
 
-	thread.m_cmdbs.deleteCommandBuffer(cmdb, secondLevel);
+	thread.m_cmdbs.deleteCommandBuffer(cmdb, cmdbFlags);
 }
 }
 
 
 void GrManagerImpl::flushCommandBuffer(CommandBufferPtr cmdb,
 void GrManagerImpl::flushCommandBuffer(CommandBufferPtr cmdb,

+ 2 - 2
src/anki/gr/vulkan/GrManagerImpl.h

@@ -78,9 +78,9 @@ public:
 	/// @name object_creation
 	/// @name object_creation
 	/// @{
 	/// @{
 
 
-	VkCommandBuffer newCommandBuffer(ThreadId tid, Bool secondLevel);
+	VkCommandBuffer newCommandBuffer(ThreadId tid, CommandBufferFlag cmdbFlags);
 
 
-	void deleteCommandBuffer(VkCommandBuffer cmdb, Bool secondLevel, ThreadId tid);
+	void deleteCommandBuffer(VkCommandBuffer cmdb, CommandBufferFlag cmdbFlags, ThreadId tid);
 
 
 	SemaphorePtr newSemaphore()
 	SemaphorePtr newSemaphore()
 	{
 	{

+ 1 - 0
src/anki/gr/vulkan/TextureImpl.h

@@ -160,6 +160,7 @@ inline void TextureImpl::computeSubResourceRange(const TextureSurfaceInfo& surf,
 		break;
 		break;
 	default:
 	default:
 		ANKI_ASSERT(0);
 		ANKI_ASSERT(0);
+		range.baseArrayLayer = 0;
 	}
 	}
 	range.layerCount = 1;
 	range.layerCount = 1;
 }
 }