Browse Source

Nuke secondary command buffers

Panagiotis Christopoulos Charitos 1 year ago
parent
commit
64160ca105

+ 3 - 12
AnKi/Gr/CommandBuffer.h

@@ -56,16 +56,14 @@ enum class CommandBufferFlag : U8
 {
 	kNone = 0,
 
-	kSecondLevel = 1 << 0,
-
 	/// It will contain a handfull of commands.
-	kSmallBatch = 1 << 3,
+	kSmallBatch = 1 << 0,
 
 	/// Will contain graphics, compute and transfer work.
-	kGeneralWork = 1 << 4,
+	kGeneralWork = 1 << 1,
 
 	/// Will contain only compute work. It binds to async compute queues.
-	kComputeWork = 1 << 5,
+	kComputeWork = 1 << 2,
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(CommandBufferFlag)
 
@@ -73,10 +71,6 @@ ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(CommandBufferFlag)
 class CommandBufferInitInfo : public GrBaseInitInfo
 {
 public:
-	Framebuffer* m_framebuffer = nullptr; ///< For second level command buffers.
-	Array<TextureUsageBit, kMaxColorRenderTargets> m_colorAttachmentUsages = {};
-	TextureUsageBit m_depthStencilAttachmentUsage = TextureUsageBit::kNone;
-
 	CommandBufferFlag m_flags = CommandBufferFlag::kGeneralWork;
 
 	CommandBufferInitInfo(CString name = {})
@@ -459,9 +453,6 @@ public:
 	/// Write a timestamp.
 	void writeTimestamp(TimestampQuery* query);
 
-	/// Append second level command buffers.
-	void pushSecondLevelCommandBuffers(ConstWeakArray<CommandBuffer*> cmdbs);
-
 	Bool isEmpty() const;
 
 	void pushDebugMarker(CString name, Vec3 color);

+ 144 - 209
AnKi/Gr/RenderGraph.cpp

@@ -121,8 +121,6 @@ public:
 
 	Function<void(RenderPassWorkContext&), MemoryPoolPtrWrapper<StackMemoryPool>> m_callback;
 
-	DynamicArray<CommandBufferPtr, MemoryPoolPtrWrapper<StackMemoryPool>> m_secondLevelCmdbs;
-	CommandBufferInitInfo m_secondLevelCmdbInitInfo;
 	Array<U32, 4> m_fbRenderArea;
 	Array<TextureUsageBit, kMaxColorRenderTargets> m_colorUsages = {}; ///< For beginRender pass
 	TextureUsageBit m_dsUsage = TextureUsageBit::kNone; ///< For beginRender pass
@@ -137,7 +135,6 @@ public:
 	Pass(StackMemoryPool* pool)
 		: m_dependsOn(pool)
 		, m_consumedTextures(pool)
-		, m_secondLevelCmdbs(pool)
 		, m_name(pool)
 	{
 	}
@@ -152,7 +149,7 @@ public:
 	DynamicArray<TextureBarrier, MemoryPoolPtrWrapper<StackMemoryPool>> m_textureBarriersBefore;
 	DynamicArray<BufferBarrier, MemoryPoolPtrWrapper<StackMemoryPool>> m_bufferBarriersBefore;
 	DynamicArray<ASBarrier, MemoryPoolPtrWrapper<StackMemoryPool>> m_asBarriersBefore;
-	CommandBuffer* m_cmdb; ///< Someone else holds the ref already so have a ptr here.
+	Bool m_drawsToPresentImage = false;
 
 	Batch(StackMemoryPool* pool)
 		: m_passIndices(pool)
@@ -173,8 +170,7 @@ public:
 		m_textureBarriersBefore = std::move(b.m_textureBarriersBefore);
 		m_bufferBarriersBefore = std::move(b.m_bufferBarriersBefore);
 		m_asBarriersBefore = std::move(b.m_asBarriersBefore);
-		m_cmdb = b.m_cmdb;
-		b.m_cmdb = nullptr;
+		m_drawsToPresentImage = b.m_drawsToPresentImage;
 
 		return *this;
 	}
@@ -191,8 +187,6 @@ public:
 	DynamicArray<BufferRange, MemoryPoolPtrWrapper<StackMemoryPool>> m_buffers;
 	DynamicArray<AS, MemoryPoolPtrWrapper<StackMemoryPool>> m_as;
 
-	DynamicArray<CommandBufferPtr, MemoryPoolPtrWrapper<StackMemoryPool>> m_graphicsCmdbs;
-
 	Bool m_gatherStatistics = false;
 
 	BakeContext(StackMemoryPool* pool)
@@ -201,7 +195,6 @@ public:
 		, m_rts(pool)
 		, m_buffers(pool)
 		, m_as(pool)
-		, m_graphicsCmdbs(pool)
 	{
 	}
 };
@@ -378,13 +371,10 @@ void RenderGraph::reset()
 	for(Pass& p : m_ctx->m_passes)
 	{
 		p.m_framebuffer.reset(nullptr);
-		p.m_secondLevelCmdbs.destroy();
 		p.m_callback.destroy();
 		p.m_name.destroy();
 	}
 
-	m_ctx->m_graphicsCmdbs.destroy();
-
 	m_ctx = nullptr;
 	++m_version;
 }
@@ -830,14 +820,6 @@ void RenderGraph::initRenderPassesAndSetDeps(const RenderGraphDescription& descr
 				outPass.m_fbRenderArea = graphicsPass.m_fbRenderArea;
 				outPass.m_drawsToPresentable = drawsToPresentable;
 			}
-			else
-			{
-				ANKI_ASSERT(inPass.m_secondLevelCmdbsCount == 0 && "Can't have second level cmdbs");
-			}
-		}
-		else
-		{
-			ANKI_ASSERT(inPass.m_secondLevelCmdbsCount == 0 && "Can't have second level cmdbs");
 		}
 
 		// Set dependencies by checking all previous subpasses.
@@ -861,11 +843,10 @@ void RenderGraph::initBatches()
 	U passesAssignedToBatchCount = 0;
 	const U passCount = m_ctx->m_passes.getSize();
 	ANKI_ASSERT(passCount > 0);
-	Bool setTimestamp = m_ctx->m_gatherStatistics;
 	while(passesAssignedToBatchCount < passCount)
 	{
 		Batch batch(m_ctx->m_as.getMemoryPool().m_pool);
-		Bool drawsToPresentable = false;
+		batch.m_drawsToPresentImage = false;
 
 		for(U32 i = 0; i < passCount; ++i)
 		{
@@ -876,40 +857,9 @@ void RenderGraph::initBatches()
 				batch.m_passIndices.emplaceBack(i);
 
 				// Will batch draw to the swapchain?
-				drawsToPresentable = drawsToPresentable || m_ctx->m_passes[i].m_drawsToPresentable;
-			}
-		}
-
-		// Get or create cmdb for the batch.
-		// Create a new cmdb if the batch is writing to swapchain. This will help Vulkan to have a dependency of the swap chain image acquire to the
-		// 2nd command buffer instead of adding it to a single big cmdb.
-		if(m_ctx->m_graphicsCmdbs.isEmpty() || drawsToPresentable)
-		{
-			CommandBufferInitInfo cmdbInit;
-			cmdbInit.m_flags = CommandBufferFlag::kGeneralWork;
-			CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cmdbInit);
-
-			m_ctx->m_graphicsCmdbs.emplaceBack(cmdb);
-
-			batch.m_cmdb = cmdb.get();
-
-			// Maybe write a timestamp
-			if(setTimestamp) [[unlikely]]
-			{
-				setTimestamp = false;
-				TimestampQueryPtr query = GrManager::getSingleton().newTimestampQuery();
-				TimestampQuery* pQuery = query.get();
-				cmdb->resetTimestampQueries({&pQuery, 1});
-				cmdb->writeTimestamp(query.get());
-
-				m_statistics.m_nextTimestamp = (m_statistics.m_nextTimestamp + 1) % kMaxBufferedTimestamps;
-				m_statistics.m_timestamps[m_statistics.m_nextTimestamp * 2] = query;
+				batch.m_drawsToPresentImage = batch.m_drawsToPresentImage || m_ctx->m_passes[i].m_drawsToPresentable;
 			}
 		}
-		else
-		{
-			batch.m_cmdb = m_ctx->m_graphicsCmdbs.getBack().get();
-		}
 
 		// Mark batch's passes done
 		for(U32 passIdx : batch.m_passIndices)
@@ -959,27 +909,8 @@ void RenderGraph::initGraphicsPasses(const RenderGraphDescription& descr)
 
 					outPass.m_dsUsage = usage;
 				}
-
-				// Do some pre-work for the second level command buffers
-				if(inPass.m_secondLevelCmdbsCount)
-				{
-					outPass.m_secondLevelCmdbs.resize(inPass.m_secondLevelCmdbsCount);
-					CommandBufferInitInfo& cmdbInit = outPass.m_secondLevelCmdbInitInfo;
-					cmdbInit.m_flags = CommandBufferFlag::kGeneralWork | CommandBufferFlag::kSecondLevel;
-					cmdbInit.m_framebuffer = outPass.m_framebuffer.get();
-					cmdbInit.m_colorAttachmentUsages = outPass.m_colorUsages;
-					cmdbInit.m_depthStencilAttachmentUsage = outPass.m_dsUsage;
-				}
-			}
-			else
-			{
-				ANKI_ASSERT(inPass.m_secondLevelCmdbsCount == 0 && "Can't have second level cmdbs");
 			}
 		}
-		else
-		{
-			ANKI_ASSERT(inPass.m_secondLevelCmdbsCount == 0 && "Can't have second level cmdbs");
-		}
 	}
 }
 
@@ -1332,177 +1263,181 @@ AccelerationStructure* RenderGraph::getAs(AccelerationStructureHandle handle) co
 	return m_ctx->m_as[handle.m_idx].m_as.get();
 }
 
-void RenderGraph::runSecondLevel()
+void RenderGraph::recordAndSubmitCommandBuffers(FencePtr* optionalFence)
 {
-	ANKI_TRACE_SCOPED_EVENT(GrRenderGraph2ndLevel);
+	ANKI_TRACE_SCOPED_EVENT(GrRenderGraphRecordAndSubmit);
 	ANKI_ASSERT(m_ctx);
 
-	StackMemoryPool& pool = *m_ctx->m_rts.getMemoryPool().m_pool;
+	const U32 batchGroupCount = min(CoreThreadJobManager::getSingleton().getThreadCount(), m_ctx->m_batches.getSize());
+	StackMemoryPool* pool = m_ctx->m_rts.getMemoryPool().m_pool;
+
+	DynamicArray<CommandBufferPtr, MemoryPoolPtrWrapper<StackMemoryPool>> cmdbs(pool);
+	cmdbs.resize(batchGroupCount);
+	SpinLock cmdbsMtx;
 
-	// Gather the tasks
-	for(Pass& pass : m_ctx->m_passes)
+	for(U32 group = 0; group < batchGroupCount; ++group)
 	{
-		for(U32 cmdIdx = 0; cmdIdx < pass.m_secondLevelCmdbs.getSize(); ++cmdIdx)
+		U32 start, end;
+		splitThreadedProblem(group, batchGroupCount, m_ctx->m_batches.getSize(), start, end);
+
+		if(start == end)
 		{
-			RenderPassWorkContext* ctx = anki::newInstance<RenderPassWorkContext>(pool);
-			ctx->m_rgraph = this;
-			ctx->m_currentSecondLevelCommandBufferIndex = cmdIdx;
-			ctx->m_secondLevelCommandBufferCount = pass.m_secondLevelCmdbs.getSize();
-			ctx->m_passIdx = U32(&pass - &m_ctx->m_passes[0]);
-			ctx->m_batchIdx = pass.m_batchIdx;
-
-			CoreThreadJobManager::getSingleton().dispatchTask([ctx]([[maybe_unused]] U32 tid) {
-				ANKI_TRACE_SCOPED_EVENT(GrExecuteSecondaryCmdb);
-
-				// Create the command buffer in the thread
-				Pass& pass = ctx->m_rgraph->m_ctx->m_passes[ctx->m_passIdx];
-				ANKI_ASSERT(!pass.m_secondLevelCmdbs[ctx->m_currentSecondLevelCommandBufferIndex].isCreated());
-				pass.m_secondLevelCmdbs[ctx->m_currentSecondLevelCommandBufferIndex] =
-					GrManager::getSingleton().newCommandBuffer(pass.m_secondLevelCmdbInitInfo);
-				ctx->m_commandBuffer = pass.m_secondLevelCmdbs[ctx->m_currentSecondLevelCommandBufferIndex].get();
+			continue;
+		}
+
+		CoreThreadJobManager::getSingleton().dispatchTask(
+			[this, start, end, pool, &cmdbs, &cmdbsMtx, group, batchGroupCount]([[maybe_unused]] U32 tid) {
+				ANKI_TRACE_SCOPED_EVENT(GrRenderGraphTask);
 
+				CommandBufferInitInfo cmdbInit("RenderGraph cmdb");
+				cmdbInit.m_flags = CommandBufferFlag::kGeneralWork;
+				CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cmdbInit);
+
+				// Write timestamp
+				const Bool setPreQuery = m_ctx->m_gatherStatistics && group == 0;
+				const Bool setPostQuery = m_ctx->m_gatherStatistics && group == batchGroupCount - 1;
+				TimestampQueryPtr preQuery, postQuery;
+				if(setPreQuery)
 				{
-					ANKI_TRACE_SCOPED_EVENT(GrRenderGraphCallback);
-					pass.m_callback(*ctx);
+					preQuery = GrManager::getSingleton().newTimestampQuery();
+					cmdb->writeTimestamp(preQuery.get());
 				}
 
-				ctx->m_commandBuffer->endRecording();
-				if(!(ctx->m_commandBuffer->getFlags() & CommandBufferFlag::kSecondLevel))
+				if(setPostQuery)
 				{
-					GrManager::getSingleton().submit(ctx->m_commandBuffer);
+					postQuery = GrManager::getSingleton().newTimestampQuery();
 				}
-			});
-		}
-	}
 
-	CoreThreadJobManager::getSingleton().waitForAllTasksToFinish();
-}
+				// Bookkeeping
+				{
+					LockGuard lock(cmdbsMtx);
+					cmdbs[group] = cmdb;
 
-void RenderGraph::run() const
-{
-	ANKI_TRACE_SCOPED_EVENT(GrRenderGraphRun);
-	ANKI_ASSERT(m_ctx);
+					if(preQuery.isCreated())
+					{
+						m_statistics.m_nextTimestamp = (m_statistics.m_nextTimestamp + 1) % kMaxBufferedTimestamps;
+						m_statistics.m_timestamps[m_statistics.m_nextTimestamp * 2] = preQuery;
+					}
 
-	StackMemoryPool* pool = m_ctx->m_rts.getMemoryPool().m_pool;
+					if(postQuery.isCreated())
+					{
+						m_statistics.m_timestamps[m_statistics.m_nextTimestamp * 2 + 1] = postQuery;
+						m_statistics.m_cpuStartTimes[m_statistics.m_nextTimestamp] = HighRezTimer::getCurrentTime();
+					}
+				}
 
-	RenderPassWorkContext ctx;
-	ctx.m_rgraph = this;
-	ctx.m_currentSecondLevelCommandBufferIndex = 0;
-	ctx.m_secondLevelCommandBufferCount = 0;
+				RenderPassWorkContext ctx;
+				ctx.m_rgraph = this;
 
-	for(const Batch& batch : m_ctx->m_batches)
-	{
-		ctx.m_commandBuffer = batch.m_cmdb;
-		CommandBuffer& cmdb = *ctx.m_commandBuffer;
+				for(U32 i = start; i < end; ++i)
+				{
+					const Batch& batch = m_ctx->m_batches[i];
 
-		// Set the barriers
-		DynamicArray<TextureBarrierInfo, MemoryPoolPtrWrapper<StackMemoryPool>> texBarriers(pool);
-		texBarriers.resizeStorage(batch.m_textureBarriersBefore.getSize());
-		for(const TextureBarrier& barrier : batch.m_textureBarriersBefore)
-		{
-			TextureBarrierInfo& inf = *texBarriers.emplaceBack();
-			inf.m_previousUsage = barrier.m_usageBefore;
-			inf.m_nextUsage = barrier.m_usageAfter;
-			inf.m_subresource = barrier.m_surface;
-			inf.m_subresource.m_depthStencilAspect = barrier.m_dsAspect;
-			inf.m_texture = m_ctx->m_rts[barrier.m_idx].m_texture.get();
-		}
-		DynamicArray<BufferBarrierInfo, MemoryPoolPtrWrapper<StackMemoryPool>> buffBarriers(pool);
-		buffBarriers.resizeStorage(batch.m_bufferBarriersBefore.getSize());
-		for(const BufferBarrier& barrier : batch.m_bufferBarriersBefore)
-		{
-			BufferBarrierInfo& inf = *buffBarriers.emplaceBack();
-			inf.m_previousUsage = barrier.m_usageBefore;
-			inf.m_nextUsage = barrier.m_usageAfter;
-			inf.m_offset = m_ctx->m_buffers[barrier.m_idx].m_offset;
-			inf.m_range = m_ctx->m_buffers[barrier.m_idx].m_range;
-			inf.m_buffer = m_ctx->m_buffers[barrier.m_idx].m_buffer.get();
-		}
-		DynamicArray<AccelerationStructureBarrierInfo, MemoryPoolPtrWrapper<StackMemoryPool>> asBarriers(pool);
-		for(const ASBarrier& barrier : batch.m_asBarriersBefore)
-		{
-			AccelerationStructureBarrierInfo& inf = *asBarriers.emplaceBack();
-			inf.m_previousUsage = barrier.m_usageBefore;
-			inf.m_nextUsage = barrier.m_usageAfter;
-			inf.m_as = m_ctx->m_as[barrier.m_idx].m_as.get();
-		}
+					// Set the barriers
+					DynamicArray<TextureBarrierInfo, MemoryPoolPtrWrapper<StackMemoryPool>> texBarriers(pool);
+					texBarriers.resizeStorage(batch.m_textureBarriersBefore.getSize());
+					for(const TextureBarrier& barrier : batch.m_textureBarriersBefore)
+					{
+						TextureBarrierInfo& inf = *texBarriers.emplaceBack();
+						inf.m_previousUsage = barrier.m_usageBefore;
+						inf.m_nextUsage = barrier.m_usageAfter;
+						inf.m_subresource = barrier.m_surface;
+						inf.m_subresource.m_depthStencilAspect = barrier.m_dsAspect;
+						inf.m_texture = m_ctx->m_rts[barrier.m_idx].m_texture.get();
+					}
+					DynamicArray<BufferBarrierInfo, MemoryPoolPtrWrapper<StackMemoryPool>> buffBarriers(pool);
+					buffBarriers.resizeStorage(batch.m_bufferBarriersBefore.getSize());
+					for(const BufferBarrier& barrier : batch.m_bufferBarriersBefore)
+					{
+						BufferBarrierInfo& inf = *buffBarriers.emplaceBack();
+						inf.m_previousUsage = barrier.m_usageBefore;
+						inf.m_nextUsage = barrier.m_usageAfter;
+						inf.m_offset = m_ctx->m_buffers[barrier.m_idx].m_offset;
+						inf.m_range = m_ctx->m_buffers[barrier.m_idx].m_range;
+						inf.m_buffer = m_ctx->m_buffers[barrier.m_idx].m_buffer.get();
+					}
+					DynamicArray<AccelerationStructureBarrierInfo, MemoryPoolPtrWrapper<StackMemoryPool>> asBarriers(pool);
+					for(const ASBarrier& barrier : batch.m_asBarriersBefore)
+					{
+						AccelerationStructureBarrierInfo& inf = *asBarriers.emplaceBack();
+						inf.m_previousUsage = barrier.m_usageBefore;
+						inf.m_nextUsage = barrier.m_usageAfter;
+						inf.m_as = m_ctx->m_as[barrier.m_idx].m_as.get();
+					}
 
-		cmdb.pushDebugMarker("Barrier", Vec3(1.0f, 0.0f, 0.0f));
-		cmdb.setPipelineBarrier(texBarriers, buffBarriers, asBarriers);
-		cmdb.popDebugMarker();
+					cmdb->pushDebugMarker("Barrier", Vec3(1.0f, 0.0f, 0.0f));
+					cmdb->setPipelineBarrier(texBarriers, buffBarriers, asBarriers);
+					cmdb->popDebugMarker();
 
-		// Call the passes
-		for(U32 passIdx : batch.m_passIndices)
-		{
-			const Pass& pass = m_ctx->m_passes[passIdx];
+					ctx.m_commandBuffer = cmdb.get();
+					ctx.m_batchIdx = i;
 
-			Vec3 passColor;
-			if(pass.m_framebuffer)
-			{
-				cmdb.beginRenderPass(pass.m_framebuffer.get(), pass.m_colorUsages, pass.m_dsUsage, pass.m_fbRenderArea[0], pass.m_fbRenderArea[1],
-									 pass.m_fbRenderArea[2], pass.m_fbRenderArea[3]);
+					// Call the passes
+					for(U32 passIdx : batch.m_passIndices)
+					{
+						const Pass& pass = m_ctx->m_passes[passIdx];
 
-				passColor = Vec3(0.0f, 1.0f, 0.0f);
-			}
-			else
-			{
-				passColor = Vec3(1.0f, 1.0f, 0.0f);
-			}
+						Vec3 passColor;
+						if(pass.m_framebuffer)
+						{
+							cmdb->beginRenderPass(pass.m_framebuffer.get(), pass.m_colorUsages, pass.m_dsUsage, pass.m_fbRenderArea[0],
+												  pass.m_fbRenderArea[1], pass.m_fbRenderArea[2], pass.m_fbRenderArea[3]);
 
-			cmdb.pushDebugMarker(pass.m_name, passColor);
+							passColor = Vec3(0.0f, 1.0f, 0.0f);
+						}
+						else
+						{
+							passColor = Vec3(1.0f, 1.0f, 0.0f);
+						}
 
-			const U32 size = pass.m_secondLevelCmdbs.getSize();
-			if(size == 0)
-			{
-				ctx.m_passIdx = passIdx;
-				ctx.m_batchIdx = pass.m_batchIdx;
+						cmdb->pushDebugMarker(pass.m_name, passColor);
 
-				ANKI_TRACE_SCOPED_EVENT(GrRenderGraphCallback);
-				pass.m_callback(ctx);
-			}
-			else
-			{
-				DynamicArray<CommandBuffer*, MemoryPoolPtrWrapper<StackMemoryPool>> cmdbs(pool);
-				cmdbs.resizeStorage(size);
-				for(const CommandBufferPtr& cmdb2nd : pass.m_secondLevelCmdbs)
+						{
+							ANKI_TRACE_SCOPED_EVENT(GrRenderGraphCallback);
+							ctx.m_passIdx = passIdx;
+							pass.m_callback(ctx);
+						}
+
+						if(pass.m_framebuffer)
+						{
+							cmdb->endRenderPass();
+						}
+
+						cmdb->popDebugMarker();
+					}
+				} // end for batches
+
+				if(setPostQuery)
 				{
-					cmdbs.emplaceBack(cmdb2nd.get());
+					// Write a timestamp before the last flush
+					cmdb->writeTimestamp(postQuery.get());
 				}
-				cmdb.pushSecondLevelCommandBuffers(cmdbs);
-			}
 
-			if(pass.m_framebuffer)
-			{
-				cmdb.endRenderPass();
-			}
-
-			cmdb.popDebugMarker();
-		}
+				cmdb->endRecording();
+			});
 	}
-}
 
-void RenderGraph::flush(FencePtr* optionalFence)
-{
-	ANKI_TRACE_SCOPED_EVENT(GrRenderGraphFlush);
+	CoreThreadJobManager::getSingleton().waitForAllTasksToFinish();
 
-	for(U32 i = 0; i < m_ctx->m_graphicsCmdbs.getSize(); ++i)
+	// Submit
+	if(cmdbs.getSize() == 1) [[unlikely]]
 	{
-		if(m_ctx->m_gatherStatistics && i == m_ctx->m_graphicsCmdbs.getSize() - 1) [[unlikely]]
-		{
-			// Write a timestamp before the last flush
-
-			TimestampQueryPtr query = GrManager::getSingleton().newTimestampQuery();
-			TimestampQuery* pQuery = query.get();
-			m_ctx->m_graphicsCmdbs[i]->resetTimestampQueries({&pQuery, 1});
-			m_ctx->m_graphicsCmdbs[i]->writeTimestamp(pQuery);
+		GrManager::getSingleton().submit(cmdbs[0].get(), {}, optionalFence);
+	}
+	else
+	{
+		// 2 submits. The 1st contains all the batches minus the last. Then the last batch is alone given that it most likely it writes to the
+		// swapchain
 
-			m_statistics.m_timestamps[m_statistics.m_nextTimestamp * 2 + 1] = query;
-			m_statistics.m_cpuStartTimes[m_statistics.m_nextTimestamp] = HighRezTimer::getCurrentTime();
+		DynamicArray<CommandBuffer*, MemoryPoolPtrWrapper<StackMemoryPool>> pCmdbs(pool);
+		pCmdbs.resize(cmdbs.getSize() - 1);
+		for(U32 i = 0; i < cmdbs.getSize() - 1; ++i)
+		{
+			pCmdbs[i] = cmdbs[i].get();
 		}
 
-		// Flush
-		m_ctx->m_graphicsCmdbs[i]->endRecording();
-		GrManager::getSingleton().submit(m_ctx->m_graphicsCmdbs[i].get(), {}, (i == m_ctx->m_graphicsCmdbs.getSize() - 1) ? optionalFence : nullptr);
+		GrManager::getSingleton().submit(WeakArray(pCmdbs), {}, nullptr);
+		GrManager::getSingleton().submit(cmdbs.getBack().get(), {}, optionalFence);
 	}
 }
 

+ 7 - 44
AnKi/Gr/RenderGraph.h

@@ -122,8 +122,6 @@ class RenderPassWorkContext
 
 public:
 	CommandBuffer* m_commandBuffer = nullptr;
-	U32 m_currentSecondLevelCommandBufferIndex = 0;
-	U32 m_secondLevelCommandBufferCount = 0;
 
 	void getBufferState(BufferHandle handle, Buffer*& buff, PtrSize& offset, PtrSize& range) const;
 
@@ -300,18 +298,10 @@ class RenderPassDescriptionBase
 	friend class RenderGraphDescription;
 
 public:
-	template<typename TFunc>
-	void setWork(U32 secondLeveCmdbCount, TFunc func)
-	{
-		ANKI_ASSERT(m_type == Type::kGraphics || secondLeveCmdbCount == 0);
-		m_callback = {func, m_rtDeps.getMemoryPool().m_pool};
-		m_secondLevelCmdbsCount = secondLeveCmdbCount;
-	}
-
 	template<typename TFunc>
 	void setWork(TFunc func)
 	{
-		setWork(0, func);
+		m_callback = {func, m_rtDeps.getMemoryPool().m_pool};
 	}
 
 	void newTextureDependency(RenderTargetHandle handle, TextureUsageBit usage, const TextureSubresourceInfo& subresource)
@@ -346,7 +336,6 @@ protected:
 	RenderGraphDescription* m_descr;
 
 	Function<void(RenderPassWorkContext&), MemoryPoolPtrWrapper<StackMemoryPool>> m_callback;
-	U32 m_secondLevelCmdbsCount = 0;
 
 	DynamicArray<RenderPassDependency, MemoryPoolPtrWrapper<StackMemoryPool>> m_rtDeps;
 	DynamicArray<RenderPassDependency, MemoryPoolPtrWrapper<StackMemoryPool>> m_buffDeps;
@@ -603,7 +592,7 @@ public:
 ///
 /// The idea for the RenderGraph is to automate:
 /// - Synchronization (barriers, events etc) between passes.
-/// - Command buffer creation for primary and secondary command buffers.
+/// - Command buffer creation .
 /// - Framebuffer creation.
 /// - Render target creation (optional since textures can be imported as well).
 ///
@@ -618,43 +607,17 @@ class RenderGraph final : public GrObject
 public:
 	static constexpr GrObjectType kClassType = GrObjectType::kRenderGraph;
 
-	/// @name 1st step methods
-	/// @{
+	/// 1st step.
 	void compileNewGraph(const RenderGraphDescription& descr, StackMemoryPool& pool);
-	/// @}
-
-	/// @name 2nd step methods
-	/// @{
-
-	/// Will call a number of RenderPassWorkCallback that populate 2nd level command buffers.
-	void runSecondLevel();
-	/// @}
 
-	/// @name 3rd step methods
-	/// @{
-
-	/// Will call a number of RenderPassWorkCallback that populate 1st level command buffers.
-	void run() const;
-	/// @}
-
-	/// @name 3rd step methods
-	/// @{
-	void flush(FencePtr* optionalFence = nullptr);
-	/// @}
+	/// 2nd step. Will call a number of RenderPassWorkCallback that populate command buffers and submit work.
+	void recordAndSubmitCommandBuffers(FencePtr* optionalFence = nullptr);
 
-	/// @name 4th step methods
-	/// @{
-
-	/// Reset the graph for a new frame. All previously created RenderGraphHandle are invalid after that call.
+	/// 3rd step. Reset the graph for a new frame. All previously created RenderGraphHandle are invalid after that call.
 	void reset();
-	/// @}
 
-	/// @name 5th step methods [OPTIONAL]
-	/// @{
-
-	/// Get some statistics.
+	/// [OPTIONAL] 4th step. Get some statistics.
 	void getStatistics(RenderGraphStatistics& statistics) const;
-	/// @}
 
 private:
 	static constexpr U kPeriodicCleanupEvery = 60; ///< How many frames between cleanups.

+ 1 - 1
AnKi/Gr/Vulkan/VkBuffer.cpp

@@ -21,7 +21,7 @@ Buffer* Buffer::newInstance(const BufferInitInfo& init)
 	return impl;
 }
 
-void* Buffer::map(PtrSize offset, PtrSize range, BufferMapAccessBit access)
+void* Buffer::map(PtrSize offset, PtrSize range, [[maybe_unused]] BufferMapAccessBit access)
 {
 	ANKI_VK_SELF(BufferImpl);
 

+ 52 - 224
AnKi/Gr/Vulkan/VkCommandBuffer.cpp

@@ -390,15 +390,44 @@ void CommandBuffer::beginRenderPass(Framebuffer* fb, const Array<TextureUsageBit
 	self.commandCommon();
 	ANKI_ASSERT(!self.insideRenderPass());
 
-	self.m_rpCommandCount = 0;
+	FramebufferImpl& impl = static_cast<FramebufferImpl&>(*fb);
 	self.m_activeFb = fb;
 
-	FramebufferImpl& fbimpl = static_cast<FramebufferImpl&>(*fb);
+	self.m_state.beginRenderPass(&impl);
 
+	VkRenderPassBeginInfo bi = {};
+	bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
+	bi.clearValueCount = impl.getAttachmentCount();
+	bi.pClearValues = impl.getClearValues();
+	bi.framebuffer = impl.getFramebufferHandle();
+
+	// Calc the layouts
+	Array<VkImageLayout, kMaxColorRenderTargets> colAttLayouts;
+	for(U i = 0; i < impl.getColorAttachmentCount(); ++i)
+	{
+		const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getColorAttachment(i));
+		colAttLayouts[i] = view.getTextureImpl().computeLayout(colorAttachmentUsages[i], 0);
+	}
+
+	VkImageLayout dsAttLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
+	if(impl.hasDepthStencil())
+	{
+		const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getDepthStencilAttachment());
+		dsAttLayout = view.getTextureImpl().computeLayout(depthStencilAttachmentUsage, 0);
+	}
+
+	VkImageLayout sriAttachmentLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
+	if(impl.hasSri())
+	{
+		// Technically it's possible for SRI to be in other layout. Don't bother though
+		sriAttachmentLayout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
+	}
+
+	bi.renderPass = impl.getRenderPassHandle(colAttLayouts, dsAttLayout, sriAttachmentLayout);
+
+	// Set the render area
 	U32 fbWidth, fbHeight;
-	fbimpl.getAttachmentsSize(fbWidth, fbHeight);
-	self.m_fbSize[0] = fbWidth;
-	self.m_fbSize[1] = fbHeight;
+	impl.getAttachmentsSize(fbWidth, fbHeight);
 
 	ANKI_ASSERT(minx < fbWidth && miny < fbHeight);
 
@@ -408,21 +437,28 @@ void CommandBuffer::beginRenderPass(Framebuffer* fb, const Array<TextureUsageBit
 	height = maxy - miny;
 	ANKI_ASSERT(minx + width <= fbWidth && miny + height <= fbHeight);
 
-	self.m_renderArea[0] = minx;
-	self.m_renderArea[1] = miny;
-	self.m_renderArea[2] = width;
-	self.m_renderArea[3] = height;
-
-	self.m_colorAttachmentUsages = colorAttachmentUsages;
-	self.m_depthStencilAttachmentUsage = depthStencilAttachmentUsage;
+	const Bool flipvp = self.flipViewport();
+	bi.renderArea.offset.x = minx;
+	if(flipvp)
+	{
+		ANKI_ASSERT(height <= fbHeight);
+	}
+	bi.renderArea.offset.y = (flipvp) ? fbHeight - (miny + height) : miny;
+	bi.renderArea.extent.width = width;
+	bi.renderArea.extent.height = height;
 
-	self.m_microCmdb->pushObjectRef(fb);
+	VkSubpassBeginInfo subpassBeginInfo = {};
+	subpassBeginInfo.sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO;
+	subpassBeginInfo.contents = VK_SUBPASS_CONTENTS_INLINE;
 
-	self.m_subpassContents = VK_SUBPASS_CONTENTS_MAX_ENUM;
+	vkCmdBeginRenderPass2KHR(self.m_handle, &bi, &subpassBeginInfo);
 
 	// Re-set the viewport and scissor because sometimes they are set clamped
 	self.m_viewportDirty = true;
 	self.m_scissorDirty = true;
+
+	self.m_renderedToDefaultFb = self.m_renderedToDefaultFb || impl.hasPresentableTexture();
+	self.m_microCmdb->pushObjectRef(fb);
 }
 
 void CommandBuffer::endRenderPass()
@@ -430,12 +466,6 @@ void CommandBuffer::endRenderPass()
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	ANKI_ASSERT(self.insideRenderPass());
-	if(self.m_rpCommandCount == 0)
-	{
-		// Empty pass
-		self.m_subpassContents = VK_SUBPASS_CONTENTS_INLINE;
-		self.beginRenderPassInternal();
-	}
 
 	VkSubpassEndInfo subpassEndInfo = {};
 	subpassEndInfo.sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO;
@@ -444,13 +474,6 @@ void CommandBuffer::endRenderPass()
 
 	self.m_activeFb = nullptr;
 	self.m_state.endRenderPass();
-
-	// After pushing second level command buffers the state is undefined. Reset the tracker and rebind the dynamic state
-	if(self.m_subpassContents == VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS)
-	{
-		self.m_state.reset();
-		self.rebindDynamicState();
-	}
 }
 
 void CommandBuffer::setVrsRate(VrsRate rate)
@@ -1186,35 +1209,6 @@ void CommandBuffer::endPipelineQuery(PipelineQuery* query)
 	self.m_microCmdb->pushObjectRef(query);
 }
 
-void CommandBuffer::pushSecondLevelCommandBuffers(ConstWeakArray<CommandBuffer*> cmdbs)
-{
-	ANKI_VK_SELF(CommandBufferImpl);
-	ANKI_ASSERT(cmdbs.getSize() > 0);
-	self.commandCommon();
-	ANKI_ASSERT(self.insideRenderPass());
-	ANKI_ASSERT(self.m_subpassContents == VK_SUBPASS_CONTENTS_MAX_ENUM || self.m_subpassContents == VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS);
-
-	self.m_subpassContents = VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS;
-
-	if(self.m_rpCommandCount == 0) [[unlikely]]
-	{
-		self.beginRenderPassInternal();
-	}
-
-	DynamicArray<VkCommandBuffer, MemoryPoolPtrWrapper<StackMemoryPool>> handles(self.m_pool);
-	handles.resize(cmdbs.getSize());
-	for(U32 i = 0; i < cmdbs.getSize(); ++i)
-	{
-		ANKI_ASSERT(static_cast<const CommandBufferImpl&>(*cmdbs[i]).m_finalized);
-		handles[i] = static_cast<const CommandBufferImpl&>(*cmdbs[i]).m_handle;
-		self.m_microCmdb->pushObjectRef(cmdbs[i]);
-	}
-
-	vkCmdExecuteCommands(self.m_handle, handles.getSize(), handles.getBegin());
-
-	++self.m_rpCommandCount;
-}
-
 void CommandBuffer::resetTimestampQueries(ConstWeakArray<TimestampQuery*> queries)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
@@ -1344,7 +1338,7 @@ CommandBufferImpl::~CommandBufferImpl()
 #if ANKI_EXTRA_CHECKS
 	ANKI_ASSERT(m_debugMarkersPushed == 0);
 
-	if(!m_submitted && !(m_flags & CommandBufferFlag::kSecondLevel))
+	if(!m_submitted)
 	{
 		ANKI_VK_LOGW("Command buffer not submitted");
 	}
@@ -1361,16 +1355,6 @@ Error CommandBufferImpl::init(const CommandBufferInitInfo& init)
 
 	m_pool = &m_microCmdb->getFastMemoryPool();
 
-	// Store some of the init info for later
-	if(!!(m_flags & CommandBufferFlag::kSecondLevel))
-	{
-		m_activeFb = init.m_framebuffer;
-		m_colorAttachmentUsages = init.m_colorAttachmentUsages;
-		m_depthStencilAttachmentUsage = init.m_depthStencilAttachmentUsage;
-		m_state.beginRenderPass(static_cast<FramebufferImpl*>(m_activeFb));
-		m_microCmdb->pushObjectRef(m_activeFb);
-	}
-
 	for(DSStateTracker& state : m_dsetState)
 	{
 		state.init(m_pool);
@@ -1416,39 +1400,6 @@ void CommandBufferImpl::beginRecording()
 	begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
 	begin.pInheritanceInfo = &inheritance;
 
-	if(!!(m_flags & CommandBufferFlag::kSecondLevel))
-	{
-		FramebufferImpl& impl = static_cast<FramebufferImpl&>(*m_activeFb);
-
-		// Calc the layouts
-		Array<VkImageLayout, kMaxColorRenderTargets> colAttLayouts;
-		for(U i = 0; i < impl.getColorAttachmentCount(); ++i)
-		{
-			const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getColorAttachment(i));
-			colAttLayouts[i] = view.getTextureImpl().computeLayout(m_colorAttachmentUsages[i], 0);
-		}
-
-		VkImageLayout dsAttLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
-		if(impl.hasDepthStencil())
-		{
-			const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getDepthStencilAttachment());
-			dsAttLayout = view.getTextureImpl().computeLayout(m_depthStencilAttachmentUsage, 0);
-		}
-
-		VkImageLayout sriAttachmentLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
-		if(impl.hasSri())
-		{
-			// Technically it's possible for SRI to be in other layout. Don't bother though
-			sriAttachmentLayout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
-		}
-
-		inheritance.renderPass = impl.getRenderPassHandle(colAttLayouts, dsAttLayout, sriAttachmentLayout);
-		inheritance.subpass = 0;
-		inheritance.framebuffer = impl.getFramebufferHandle();
-
-		begin.flags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
-	}
-
 	vkBeginCommandBuffer(m_handle, &begin);
 
 	// Stats
@@ -1458,76 +1409,6 @@ void CommandBufferImpl::beginRecording()
 	}
 }
 
-void CommandBufferImpl::beginRenderPassInternal()
-{
-	FramebufferImpl& impl = static_cast<FramebufferImpl&>(*m_activeFb);
-
-	m_state.beginRenderPass(&impl);
-
-	VkRenderPassBeginInfo bi = {};
-	bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
-	bi.clearValueCount = impl.getAttachmentCount();
-	bi.pClearValues = impl.getClearValues();
-	bi.framebuffer = impl.getFramebufferHandle();
-
-	// Calc the layouts
-	Array<VkImageLayout, kMaxColorRenderTargets> colAttLayouts;
-	for(U i = 0; i < impl.getColorAttachmentCount(); ++i)
-	{
-		const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getColorAttachment(i));
-		colAttLayouts[i] = view.getTextureImpl().computeLayout(m_colorAttachmentUsages[i], 0);
-	}
-
-	VkImageLayout dsAttLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
-	if(impl.hasDepthStencil())
-	{
-		const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getDepthStencilAttachment());
-		dsAttLayout = view.getTextureImpl().computeLayout(m_depthStencilAttachmentUsage, 0);
-	}
-
-	VkImageLayout sriAttachmentLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
-	if(impl.hasSri())
-	{
-		// Technically it's possible for SRI to be in other layout. Don't bother though
-		sriAttachmentLayout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
-	}
-
-	bi.renderPass = impl.getRenderPassHandle(colAttLayouts, dsAttLayout, sriAttachmentLayout);
-
-	const Bool flipvp = flipViewport();
-	bi.renderArea.offset.x = m_renderArea[0];
-	if(flipvp)
-	{
-		ANKI_ASSERT(m_renderArea[3] <= m_fbSize[1]);
-	}
-	bi.renderArea.offset.y = (flipvp) ? m_fbSize[1] - (m_renderArea[1] + m_renderArea[3]) : m_renderArea[1];
-	bi.renderArea.extent.width = m_renderArea[2];
-	bi.renderArea.extent.height = m_renderArea[3];
-
-#if !ANKI_PLATFORM_MOBILE
-	// nVidia SRI cache workaround
-	if(impl.hasSri())
-	{
-		VkMemoryBarrier memBarrier = {};
-		memBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
-		memBarrier.dstAccessMask = VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR;
-
-		const VkPipelineStageFlags srcStages = VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
-		const VkPipelineStageFlags dstStages = VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
-
-		vkCmdPipelineBarrier(m_handle, srcStages, dstStages, 0, 1, &memBarrier, 0, nullptr, 0, nullptr);
-	}
-#endif
-
-	VkSubpassBeginInfo subpassBeginInfo = {};
-	subpassBeginInfo.sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO;
-	subpassBeginInfo.contents = m_subpassContents;
-
-	vkCmdBeginRenderPass2KHR(m_handle, &bi, &subpassBeginInfo);
-
-	m_renderedToDefaultFb = m_renderedToDefaultFb || impl.hasPresentableTexture();
-}
-
 void CommandBufferImpl::endRecording()
 {
 	commandCommon();
@@ -1573,67 +1454,14 @@ void CommandBufferImpl::endRecording()
 #endif
 }
 
-void CommandBufferImpl::rebindDynamicState()
-{
-	m_viewportDirty = true;
-	m_lastViewport = {};
-	m_scissorDirty = true;
-	m_lastScissor = {};
-	m_vrsRateDirty = true;
-	m_vrsRate = VrsRate::k1x1;
-
-	// Rebind the stencil compare mask
-	if(m_stencilCompareMasks[0] == m_stencilCompareMasks[1])
-	{
-		vkCmdSetStencilCompareMask(m_handle, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT, m_stencilCompareMasks[0]);
-	}
-	else
-	{
-		vkCmdSetStencilCompareMask(m_handle, VK_STENCIL_FACE_FRONT_BIT, m_stencilCompareMasks[0]);
-		vkCmdSetStencilCompareMask(m_handle, VK_STENCIL_FACE_BACK_BIT, m_stencilCompareMasks[1]);
-	}
-
-	// Rebind the stencil write mask
-	if(m_stencilWriteMasks[0] == m_stencilWriteMasks[1])
-	{
-		vkCmdSetStencilWriteMask(m_handle, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT, m_stencilWriteMasks[0]);
-	}
-	else
-	{
-		vkCmdSetStencilWriteMask(m_handle, VK_STENCIL_FACE_FRONT_BIT, m_stencilWriteMasks[0]);
-		vkCmdSetStencilWriteMask(m_handle, VK_STENCIL_FACE_BACK_BIT, m_stencilWriteMasks[1]);
-	}
-
-	// Rebind the stencil reference
-	if(m_stencilReferenceMasks[0] == m_stencilReferenceMasks[1])
-	{
-		vkCmdSetStencilReference(m_handle, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT, m_stencilReferenceMasks[0]);
-	}
-	else
-	{
-		vkCmdSetStencilReference(m_handle, VK_STENCIL_FACE_FRONT_BIT, m_stencilReferenceMasks[0]);
-		vkCmdSetStencilReference(m_handle, VK_STENCIL_FACE_BACK_BIT, m_stencilReferenceMasks[1]);
-	}
-}
-
 void CommandBufferImpl::drawcallCommon()
 {
 	// Preconditions
 	commandCommon();
 	ANKI_ASSERT(m_graphicsProg);
-	ANKI_ASSERT(insideRenderPass() || secondLevel());
-	ANKI_ASSERT(m_subpassContents == VK_SUBPASS_CONTENTS_MAX_ENUM || m_subpassContents == VK_SUBPASS_CONTENTS_INLINE);
+	ANKI_ASSERT(insideRenderPass());
 	ANKI_ASSERT(m_graphicsProg->getReflectionInfo().m_pushConstantsSize == m_setPushConstantsSize && "Forgot to set pushConstants");
 
-	m_subpassContents = VK_SUBPASS_CONTENTS_INLINE;
-
-	if(m_rpCommandCount == 0 && !secondLevel())
-	{
-		beginRenderPassInternal();
-	}
-
-	++m_rpCommandCount;
-
 	// Get or create ppline
 	Pipeline ppline;
 	Bool stateDirty;

+ 2 - 24
AnKi/Gr/Vulkan/VkCommandBuffer.h

@@ -70,11 +70,6 @@ public:
 		return m_empty;
 	}
 
-	Bool isSecondLevel() const
-	{
-		return !!(m_flags & CommandBufferFlag::kSecondLevel);
-	}
-
 	void writeTimestampInternal(TimestampQuery* query);
 
 	// To enable using Anki's commandbuffers for external workloads
@@ -90,7 +85,7 @@ public:
 		return m_finalized;
 	}
 
-#if ANKI_EXTRA_CHECKS
+#if ANKI_ASSERTIONS_ENABLED
 	void setSubmitted()
 	{
 		ANKI_ASSERT(!m_submitted);
@@ -109,7 +104,7 @@ private:
 	Bool m_empty : 1 = true;
 	Bool m_beganRecording : 1 = false;
 	Bool m_debugMarkers : 1 = false;
-#if ANKI_EXTRA_CHECKS
+#if ANKI_ASSERTIONS_ENABLED
 	U32 m_commandCount = 0;
 	U32 m_setPushConstantsSize = 0;
 	U32 m_debugMarkersPushed = 0;
@@ -117,11 +112,6 @@ private:
 #endif
 
 	Framebuffer* m_activeFb = nullptr;
-	Array<U32, 4> m_renderArea = {0, 0, kMaxU32, kMaxU32};
-	Array<U32, 2> m_fbSize = {0, 0};
-	U32 m_rpCommandCount = 0; ///< Number of drawcalls or pushed cmdbs in rp.
-	Array<TextureUsageBit, kMaxColorRenderTargets> m_colorAttachmentUsages = {};
-	TextureUsageBit m_depthStencilAttachmentUsage = TextureUsageBit::kNone;
 
 	PipelineStateTracker m_state;
 
@@ -131,8 +121,6 @@ private:
 	ShaderProgramImpl* m_computeProg ANKI_DEBUG_CODE(= nullptr);
 	ShaderProgramImpl* m_rtProg ANKI_DEBUG_CODE(= nullptr);
 
-	VkSubpassContents m_subpassContents = VK_SUBPASS_CONTENTS_MAX_ENUM;
-
 	/// @name state_opts
 	/// @{
 	Array<U32, 4> m_viewport = {0, 0, 0, 0};
@@ -149,9 +137,6 @@ private:
 #endif
 	Bool m_vrsRateDirty = true;
 	VrsRate m_vrsRate = VrsRate::k1x1;
-
-	/// Rebind the above dynamic state. Needed after pushing secondary command buffers (they dirty the state).
-	void rebindDynamicState();
 	/// @}
 
 	/// Some common operations per command.
@@ -182,13 +167,6 @@ private:
 		return m_activeFb != nullptr;
 	}
 
-	void beginRenderPassInternal();
-
-	Bool secondLevel() const
-	{
-		return !!(m_flags & CommandBufferFlag::kSecondLevel);
-	}
-
 	void setImageBarrier(VkPipelineStageFlags srcStage, VkAccessFlags srcAccess, VkImageLayout prevLayout, VkPipelineStageFlags dstStage,
 						 VkAccessFlags dstAccess, VkImageLayout newLayout, VkImage img, const VkImageSubresourceRange& range);
 

+ 10 - 19
AnKi/Gr/Vulkan/VkCommandBufferFactory.cpp

@@ -85,14 +85,11 @@ Error CommandBufferThreadAllocator::init()
 
 void CommandBufferThreadAllocator::destroy()
 {
-	for(U32 secondLevel = 0; secondLevel < 2; ++secondLevel)
+	for(U32 smallBatch = 0; smallBatch < 2; ++smallBatch)
 	{
-		for(U32 smallBatch = 0; smallBatch < 2; ++smallBatch)
+		for(VulkanQueueType queue : EnumIterable<VulkanQueueType>())
 		{
-			for(VulkanQueueType queue : EnumIterable<VulkanQueueType>())
-			{
-				m_recyclers[secondLevel][smallBatch][queue].destroy();
-			}
+			m_recyclers[smallBatch][queue].destroy();
 		}
 	}
 
@@ -110,11 +107,10 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 {
 	ANKI_ASSERT(!!(cmdbFlags & CommandBufferFlag::kComputeWork) ^ !!(cmdbFlags & CommandBufferFlag::kGeneralWork));
 
-	const Bool secondLevel = !!(cmdbFlags & CommandBufferFlag::kSecondLevel);
 	const Bool smallBatch = !!(cmdbFlags & CommandBufferFlag::kSmallBatch);
 	const VulkanQueueType queue = getQueueTypeFromCommandBufferFlags(cmdbFlags, m_factory->m_queueFamilies);
 
-	MicroObjectRecycler<MicroCommandBuffer>& recycler = m_recyclers[secondLevel][smallBatch][queue];
+	MicroObjectRecycler<MicroCommandBuffer>& recycler = m_recyclers[smallBatch][queue];
 
 	MicroCommandBuffer* out = recycler.findToReuse();
 
@@ -125,7 +121,7 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 		VkCommandBufferAllocateInfo ci = {};
 		ci.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
 		ci.commandPool = m_pools[queue];
-		ci.level = (secondLevel) ? VK_COMMAND_BUFFER_LEVEL_SECONDARY : VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+		ci.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
 		ci.commandBufferCount = 1;
 
 		ANKI_TRACE_INC_COUNTER(VkCommandBufferCreate, 1);
@@ -167,26 +163,21 @@ void CommandBufferThreadAllocator::deleteCommandBuffer(MicroCommandBuffer* ptr)
 {
 	ANKI_ASSERT(ptr);
 
-	const Bool secondLevel = !!(ptr->m_flags & CommandBufferFlag::kSecondLevel);
 	const Bool smallBatch = !!(ptr->m_flags & CommandBufferFlag::kSmallBatch);
 
-	m_recyclers[secondLevel][smallBatch][ptr->m_queue].recycle(ptr);
+	m_recyclers[smallBatch][ptr->m_queue].recycle(ptr);
 }
 
 void CommandBufferFactory::destroy()
 {
-	// First trim the caches for all recyclers. This will release the primaries and populate the recyclers of
-	// secondaries
+	// First trim the caches for all recyclers.
 	for(CommandBufferThreadAllocator* talloc : m_threadAllocs)
 	{
-		for(U32 secondLevel = 0; secondLevel < 2; ++secondLevel)
+		for(U32 smallBatch = 0; smallBatch < 2; ++smallBatch)
 		{
-			for(U32 smallBatch = 0; smallBatch < 2; ++smallBatch)
+			for(VulkanQueueType queue : EnumIterable<VulkanQueueType>())
 			{
-				for(VulkanQueueType queue : EnumIterable<VulkanQueueType>())
-				{
-					talloc->m_recyclers[secondLevel][smallBatch][queue].trimCache();
-				}
+				talloc->m_recyclers[smallBatch][queue].trimCache();
 			}
 		}
 	}

+ 1 - 2
AnKi/Gr/Vulkan/VkCommandBufferFactory.h

@@ -55,7 +55,6 @@ public:
 
 	void setFence(MicroFence* fence)
 	{
-		ANKI_ASSERT(!(m_flags & CommandBufferFlag::kSecondLevel));
 		m_fence.reset(fence);
 	}
 
@@ -191,7 +190,7 @@ private:
 	Atomic<U32> m_createdCmdbs = {0};
 #endif
 
-	Array3d<MicroObjectRecycler<MicroCommandBuffer>, 2, 2, U(VulkanQueueType::kCount)> m_recyclers;
+	Array2d<MicroObjectRecycler<MicroCommandBuffer>, 2, U(VulkanQueueType::kCount)> m_recyclers;
 };
 
 /// Command bufffer object recycler.

+ 4 - 2
AnKi/Gr/Vulkan/VkGrManager.cpp

@@ -158,6 +158,8 @@ ANKI_NEW_GR_OBJECT(GrUpscaler)
 
 void GrManager::submit(WeakArray<CommandBuffer*> cmdbs, WeakArray<Fence*> waitFences, FencePtr* signalFence)
 {
+	ANKI_VK_SELF(GrManagerImpl);
+
 	Bool renderedToDefaultFb = false;
 	Array<MicroCommandBuffer*, 16> mcmdbs;
 	for(U32 i = 0; i < cmdbs.getSize(); ++i)
@@ -180,8 +182,8 @@ void GrManager::submit(WeakArray<CommandBuffer*> cmdbs, WeakArray<Fence*> waitFe
 	}
 
 	MicroSemaphorePtr signalSemaphore;
-	getGrManagerImpl().flushCommandBuffers({mcmdbs.getBegin(), cmdbs.getSize()}, renderedToDefaultFb,
-										   {waitSemaphores.getBegin(), waitFences.getSize()}, (signalFence) ? &signalSemaphore : nullptr, false);
+	self.flushCommandBuffers({mcmdbs.getBegin(), cmdbs.getSize()}, renderedToDefaultFb, {waitSemaphores.getBegin(), waitFences.getSize()},
+							 (signalFence) ? &signalSemaphore : nullptr, false);
 
 	if(signalFence)
 	{

+ 1 - 1
AnKi/Renderer/Dbg.cpp

@@ -252,7 +252,7 @@ void Dbg::populateRenderGraph(RenderingContext& ctx)
 	// Create pass
 	GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("Debug");
 
-	pass.setWork(1, [this, &ctx](RenderPassWorkContext& rgraphCtx) {
+	pass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
 		run(rgraphCtx, ctx);
 	});
 

+ 1 - 1
AnKi/Renderer/FinalComposite.cpp

@@ -76,7 +76,7 @@ void FinalComposite::populateRenderGraph(RenderingContext& ctx)
 	// Create the pass
 	GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("Final Composite");
 
-	pass.setWork(1, [this](RenderPassWorkContext& rgraphCtx) {
+	pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
 		run(rgraphCtx);
 	});
 	pass.setFramebufferInfo(m_fbDescr, {ctx.m_outRenderTarget});

+ 1 - 1
AnKi/Renderer/GBuffer.cpp

@@ -207,7 +207,7 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 
 	pass.setFramebufferInfo(m_fbDescr, ConstWeakArray<RenderTargetHandle>(&rts[0], kGBufferColorRenderTargetCount), m_runCtx.m_crntFrameDepthRt,
 							sriRt);
-	pass.setWork(1, [this, &ctx, visOut, meshletVisOut](RenderPassWorkContext& rgraphCtx) {
+	pass.setWork([this, &ctx, visOut, meshletVisOut](RenderPassWorkContext& rgraphCtx) {
 		ANKI_TRACE_SCOPED_EVENT(GBuffer);
 
 		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;

+ 6 - 6
AnKi/Renderer/IndirectDiffuseProbes.cpp

@@ -267,8 +267,8 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 				pass.newBufferDependency((meshletVisOut.isFilled()) ? meshletVisOut.m_dependency : visOut.m_dependency,
 										 BufferUsageBit::kIndirectDraw);
 
-				pass.setWork(1, [this, visOut, meshletVisOut, viewProjMat = frustum.getViewProjectionMatrix(),
-								 viewMat = frustum.getViewMatrix()](RenderPassWorkContext& rgraphCtx) {
+				pass.setWork([this, visOut, meshletVisOut, viewProjMat = frustum.getViewProjectionMatrix(),
+							  viewMat = frustum.getViewMatrix()](RenderPassWorkContext& rgraphCtx) {
 					ANKI_TRACE_SCOPED_EVENT(RIndirectDiffuse);
 					CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
@@ -348,7 +348,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 				pass.newBufferDependency((shadowMeshletVisOut.isFilled()) ? shadowMeshletVisOut.m_dependency : shadowVisOut.m_dependency,
 										 BufferUsageBit::kIndirectDraw);
 
-				pass.setWork(1, [this, shadowVisOut, shadowMeshletVisOut, cascadeViewProjMat, cascadeViewMat](RenderPassWorkContext& rgraphCtx) {
+				pass.setWork([this, shadowVisOut, shadowMeshletVisOut, cascadeViewProjMat, cascadeViewMat](RenderPassWorkContext& rgraphCtx) {
 					ANKI_TRACE_SCOPED_EVENT(RIndirectDiffuse);
 					CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
@@ -422,9 +422,9 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 					pass.newTextureDependency(getRenderer().getSky().getSkyLutRt(), TextureUsageBit::kSampledFragment);
 				}
 
-				pass.setWork(1, [this, visibleLightsBuffer = lightVis.m_visiblesBuffer, viewProjMat = frustum.getViewProjectionMatrix(), cellCenter,
-								 gbufferColorRts, gbufferDepthRt, probeToRefresh, cascadeViewProjMat, shadowsRt, faceIdx = f,
-								 &rctx](RenderPassWorkContext& rgraphCtx) {
+				pass.setWork([this, visibleLightsBuffer = lightVis.m_visiblesBuffer, viewProjMat = frustum.getViewProjectionMatrix(), cellCenter,
+							  gbufferColorRts, gbufferDepthRt, probeToRefresh, cascadeViewProjMat, shadowsRt, faceIdx = f,
+							  &rctx](RenderPassWorkContext& rgraphCtx) {
 					ANKI_TRACE_SCOPED_EVENT(RIndirectDiffuse);
 
 					const LightComponent* dirLightc = SceneGraph::getSingleton().getDirectionalLight();

+ 1 - 1
AnKi/Renderer/LightShading.cpp

@@ -297,7 +297,7 @@ void LightShading::populateRenderGraph(RenderingContext& ctx)
 	// Create pass
 	GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("Light&FW Shad");
 
-	pass.setWork(1, [this, &ctx](RenderPassWorkContext& rgraphCtx) {
+	pass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
 		run(ctx, rgraphCtx);
 	});
 	pass.setFramebufferInfo(m_lightShading.m_fbDescr, {m_runCtx.m_rt}, getRenderer().getGBuffer().getDepthRt(), sriRt);

+ 1 - 7
AnKi/Renderer/MainRenderer.cpp

@@ -145,15 +145,9 @@ Error MainRenderer::render(Texture* presentTex)
 	// Bake the render graph
 	m_rgraph->compileNewGraph(ctx.m_renderGraphDescr, m_framePool);
 
-	// Populate the 2nd level command buffers
-	m_rgraph->runSecondLevel();
-
-	// Populate 1st level command buffers
-	m_rgraph->run();
-
 	// Flush
 	FencePtr fence;
-	m_rgraph->flush(&fence);
+	m_rgraph->recordAndSubmitCommandBuffers(&fence);
 
 	// Reset for the next frame
 	m_rgraph->reset();

+ 3 - 3
AnKi/Renderer/ProbeReflections.cpp

@@ -265,8 +265,8 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 			pass.newTextureDependency(gbufferDepthRt, TextureUsageBit::kAllFramebuffer, TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
 			pass.newBufferDependency((meshletVisOut.isFilled()) ? meshletVisOut.m_dependency : visOut.m_dependency, BufferUsageBit::kIndirectDraw);
 
-			pass.setWork(1, [this, visOut, meshletVisOut, viewProjMat = frustum.getViewProjectionMatrix(),
-							 viewMat = frustum.getViewMatrix()](RenderPassWorkContext& rgraphCtx) {
+			pass.setWork([this, visOut, meshletVisOut, viewProjMat = frustum.getViewProjectionMatrix(),
+						  viewMat = frustum.getViewMatrix()](RenderPassWorkContext& rgraphCtx) {
 				ANKI_TRACE_SCOPED_EVENT(ProbeReflections);
 				CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
@@ -344,7 +344,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 			pass.newBufferDependency((shadowMeshletVisOut.isFilled()) ? shadowMeshletVisOut.m_dependency : shadowVisOut.m_dependency,
 									 BufferUsageBit::kIndirectDraw);
 
-			pass.setWork(1, [this, shadowVisOut, shadowMeshletVisOut, cascadeViewProjMat, cascadeViewMat](RenderPassWorkContext& rgraphCtx) {
+			pass.setWork([this, shadowVisOut, shadowMeshletVisOut, cascadeViewProjMat, cascadeViewMat](RenderPassWorkContext& rgraphCtx) {
 				ANKI_TRACE_SCOPED_EVENT(ProbeReflections);
 
 				CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;

+ 1 - 1
AnKi/Renderer/ShadowMapping.cpp

@@ -668,7 +668,7 @@ void ShadowMapping::createDrawShadowsPass(ConstWeakArray<ShadowSubpassInfo> subp
 	pass.newBufferDependency((meshletVisOut.isFilled()) ? meshletVisOut.m_dependency : visOut.m_dependency, BufferUsageBit::kIndirectDraw);
 	pass.newTextureDependency(m_runCtx.m_rt, TextureUsageBit::kFramebufferWrite, TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
 
-	pass.setWork(1 /*TODO*/, [this, visOut, meshletVisOut, subpasses, loadFb](RenderPassWorkContext& rgraphCtx) {
+	pass.setWork([this, visOut, meshletVisOut, subpasses, loadFb](RenderPassWorkContext& rgraphCtx) {
 		ANKI_TRACE_SCOPED_EVENT(ShadowMapping);
 
 		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;

+ 3 - 30
Tests/Gr/Gr.cpp

@@ -1254,7 +1254,7 @@ static void drawOffscreenDrawcalls([[maybe_unused]] GrManager& gr, ShaderProgram
 	cmdb->drawIndexed(PrimitiveTopology::kTriangles, 6 * 2 * 3);
 }
 
-static void drawOffscreen(GrManager& gr, Bool useSecondLevel)
+static void drawOffscreen(GrManager& gr)
 {
 	//
 	// Create textures
@@ -1332,25 +1332,7 @@ static void drawOffscreen(GrManager& gr, Bool useSecondLevel)
 		setTextureSurfaceBarrier(cmdb, dp, TextureUsageBit::kNone, TextureUsageBit::kAllFramebuffer, TextureSurfaceInfo(0, 0, 0, 0));
 		cmdb->beginRenderPass(fb.get(), {{TextureUsageBit::kFramebufferWrite, TextureUsageBit::kFramebufferWrite}}, TextureUsageBit::kAllFramebuffer);
 
-		if(!useSecondLevel)
-		{
-			drawOffscreenDrawcalls(gr, prog, cmdb, TEX_SIZE, indices, verts);
-		}
-		else
-		{
-			CommandBufferInitInfo cinit;
-			cinit.m_flags = CommandBufferFlag::kSecondLevel | CommandBufferFlag::kGeneralWork;
-			cinit.m_framebuffer = fb.get();
-			CommandBufferPtr cmdb2 = gr.newCommandBuffer(cinit);
-
-			drawOffscreenDrawcalls(gr, prog, cmdb2, TEX_SIZE, indices, verts);
-
-			cmdb->endRecording();
-			GrManager::getSingleton().submit(cmdb.get());
-
-			CommandBuffer* pCmdb = cmdb2.get();
-			cmdb->pushSecondLevelCommandBuffers({&pCmdb, 1});
-		}
+		drawOffscreenDrawcalls(gr, prog, cmdb, TEX_SIZE, indices, verts);
 
 		cmdb->endRenderPass();
 
@@ -1391,16 +1373,7 @@ ANKI_TEST(Gr, DrawOffscreen)
 {
 	COMMON_BEGIN()
 
-	drawOffscreen(*g_gr, false);
-
-	COMMON_END()
-}
-
-ANKI_TEST(Gr, DrawWithSecondLevel)
-{
-	COMMON_BEGIN()
-
-	drawOffscreen(*g_gr, true);
+	drawOffscreen(*g_gr);
 
 	COMMON_END()
 }