浏览代码

[BUGFIX] Bugs in barriers, render graph and in BitSet

Panagiotis Christopoulos Charitos 8 年之前
父节点
当前提交
9766fdf05e

+ 81 - 18
src/anki/gr/RenderGraph.cpp

@@ -124,7 +124,7 @@ class RenderGraph::BakeContext
 public:
 public:
 	StackAllocator<U8> m_alloc;
 	StackAllocator<U8> m_alloc;
 	DynamicArray<Pass> m_passes;
 	DynamicArray<Pass> m_passes;
-	BitSet<MAX_RENDER_GRAPH_PASSES> m_passIsInBatch = {false};
+	BitSet<MAX_RENDER_GRAPH_PASSES, U64> m_passIsInBatch = {false};
 	DynamicArray<Batch> m_batches;
 	DynamicArray<Batch> m_batches;
 	DynamicArray<RT> m_rts;
 	DynamicArray<RT> m_rts;
 	DynamicArray<Buffer> m_buffers;
 	DynamicArray<Buffer> m_buffers;
@@ -422,11 +422,11 @@ Bool RenderGraph::passADependsOnB(const RenderPassDescriptionBase& a, const Rend
 	// Render targets
 	// Render targets
 	{
 	{
 		// Compute the 3 types of dependencies
 		// Compute the 3 types of dependencies
-		BitSet<MAX_RENDER_GRAPH_RENDER_TARGETS> aReadBWrite = a.m_consumerRtMask & b.m_producerRtMask;
-		BitSet<MAX_RENDER_GRAPH_RENDER_TARGETS> aWriteBRead = a.m_producerRtMask & b.m_consumerRtMask;
-		BitSet<MAX_RENDER_GRAPH_RENDER_TARGETS> aWriteBWrite = a.m_producerRtMask & b.m_producerRtMask;
+		BitSet<MAX_RENDER_GRAPH_RENDER_TARGETS, U64> aReadBWrite = a.m_consumerRtMask & b.m_producerRtMask;
+		BitSet<MAX_RENDER_GRAPH_RENDER_TARGETS, U64> aWriteBRead = a.m_producerRtMask & b.m_consumerRtMask;
+		BitSet<MAX_RENDER_GRAPH_RENDER_TARGETS, U64> aWriteBWrite = a.m_producerRtMask & b.m_producerRtMask;
 
 
-		BitSet<MAX_RENDER_GRAPH_RENDER_TARGETS> fullDep = aReadBWrite | aWriteBRead | aWriteBWrite;
+		BitSet<MAX_RENDER_GRAPH_RENDER_TARGETS, U64> fullDep = aReadBWrite | aWriteBRead | aWriteBWrite;
 
 
 		if(fullDep.getAny())
 		if(fullDep.getAny())
 		{
 		{
@@ -450,11 +450,11 @@ Bool RenderGraph::passADependsOnB(const RenderPassDescriptionBase& a, const Rend
 
 
 	// Buffers
 	// Buffers
 	{
 	{
-		BitSet<MAX_RENDER_GRAPH_BUFFERS> aReadBWrite = a.m_consumerBufferMask & b.m_producerBufferMask;
-		BitSet<MAX_RENDER_GRAPH_BUFFERS> aWriteBRead = a.m_producerBufferMask & b.m_consumerBufferMask;
-		BitSet<MAX_RENDER_GRAPH_BUFFERS> aWriteBWrite = a.m_producerBufferMask & b.m_producerBufferMask;
+		BitSet<MAX_RENDER_GRAPH_BUFFERS, U64> aReadBWrite = a.m_consumerBufferMask & b.m_producerBufferMask;
+		BitSet<MAX_RENDER_GRAPH_BUFFERS, U64> aWriteBRead = a.m_producerBufferMask & b.m_consumerBufferMask;
+		BitSet<MAX_RENDER_GRAPH_BUFFERS, U64> aWriteBWrite = a.m_producerBufferMask & b.m_producerBufferMask;
 
 
-		BitSet<MAX_RENDER_GRAPH_BUFFERS> fullDep = aReadBWrite | aWriteBRead | aWriteBWrite;
+		BitSet<MAX_RENDER_GRAPH_BUFFERS, U64> fullDep = aReadBWrite | aWriteBRead | aWriteBWrite;
 
 
 		if(fullDep.getAny())
 		if(fullDep.getAny())
 		{
 		{
@@ -690,6 +690,9 @@ void RenderGraph::setBatchBarriers(const RenderGraphDescription& descr, BakeCont
 	// For all batches
 	// For all batches
 	for(Batch& batch : ctx.m_batches)
 	for(Batch& batch : ctx.m_batches)
 	{
 	{
+		BitSet<MAX_RENDER_GRAPH_RENDER_TARGETS, U64> rtHasBarrierMask = {false};
+		BitSet<MAX_RENDER_GRAPH_BUFFERS, U64> buffHasBarrierMask = {false};
+
 		// For all passes of that batch
 		// For all passes of that batch
 		for(U passIdx : batch.m_passIndices)
 		for(U passIdx : batch.m_passIndices)
 		{
 		{
@@ -704,18 +707,51 @@ void RenderGraph::setBatchBarriers(const RenderGraphDescription& descr, BakeCont
 					const TextureUsageBit consumerUsage = consumer.m_texture.m_usage;
 					const TextureUsageBit consumerUsage = consumer.m_texture.m_usage;
 
 
 					Bool anySurfaceFound = false;
 					Bool anySurfaceFound = false;
-					const Bool wholeTex = consumer.m_texture.m_wholeTex;
+					const Bool consumerWholeTex = consumer.m_texture.m_wholeTex;
 					for(RT::Usage& u : ctx.m_rts[rtIdx].m_surfUsages)
 					for(RT::Usage& u : ctx.m_rts[rtIdx].m_surfUsages)
 					{
 					{
-						if(wholeTex || u.m_surface == consumer.m_texture.m_surface)
+						if(consumerWholeTex || u.m_surface == consumer.m_texture.m_surface)
 						{
 						{
 							anySurfaceFound = true;
 							anySurfaceFound = true;
+
+							// Check if we might need a new barrier
 							if(u.m_usage != consumerUsage)
 							if(u.m_usage != consumerUsage)
 							{
 							{
-								batch.m_barriersBefore.emplaceBack(
-									alloc, consumer.m_texture.m_handle.m_idx, u.m_usage, consumerUsage, u.m_surface);
-
-								u.m_usage = consumer.m_texture.m_usage;
+								const Bool rtHasBarrier = rtHasBarrierMask.get(rtIdx);
+
+								if(!rtHasBarrier)
+								{
+									// RT hasn't had a barrier in this batch, add a new barrier
+
+									batch.m_barriersBefore.emplaceBack(alloc,
+										consumer.m_texture.m_handle.m_idx,
+										u.m_usage,
+										consumerUsage,
+										u.m_surface);
+
+									u.m_usage = consumer.m_texture.m_usage;
+									rtHasBarrierMask.set(rtIdx);
+								}
+								else
+								{
+									// RT already in a barrier, merge the 2 barriers
+
+									Barrier* barrierToMergeTo = nullptr;
+									for(Barrier& b : batch.m_barriersBefore)
+									{
+										if(b.m_isTexture && b.m_texture.m_idx == rtIdx)
+										{
+											barrierToMergeTo = &b;
+											break;
+										}
+									}
+
+									ANKI_ASSERT(barrierToMergeTo);
+									ANKI_ASSERT(!!barrierToMergeTo->m_texture.m_usageAfter);
+									ANKI_ASSERT(!!u.m_usage);
+									barrierToMergeTo->m_texture.m_usageAfter |= consumerUsage;
+									u.m_usage |= consumerUsage;
+								}
 							}
 							}
 						}
 						}
 					}
 					}
@@ -740,10 +776,37 @@ void RenderGraph::setBatchBarriers(const RenderGraphDescription& descr, BakeCont
 
 
 					if(consumerUsage != ctx.m_buffers[buffIdx].m_usage)
 					if(consumerUsage != ctx.m_buffers[buffIdx].m_usage)
 					{
 					{
-						batch.m_barriersBefore.emplaceBack(
-							alloc, buffIdx, ctx.m_buffers[buffIdx].m_usage, consumerUsage);
+						const Bool buffHasBarrier = buffHasBarrierMask.get(buffIdx);
 
 
-						ctx.m_buffers[buffIdx].m_usage = consumerUsage;
+						if(!buffHasBarrier)
+						{
+							// Buff hasn't had a barrier in this batch, add a new barrier
+
+							batch.m_barriersBefore.emplaceBack(
+								alloc, buffIdx, ctx.m_buffers[buffIdx].m_usage, consumerUsage);
+
+							ctx.m_buffers[buffIdx].m_usage = consumerUsage;
+							buffHasBarrierMask.set(buffIdx);
+						}
+						else
+						{
+							// Buff already in a barrier, merge the 2 barriers
+
+							Barrier* barrierToMergeTo = nullptr;
+							for(Barrier& b : batch.m_barriersBefore)
+							{
+								if(!b.m_isTexture && b.m_buffer.m_idx == buffIdx)
+								{
+									barrierToMergeTo = &b;
+									break;
+								}
+							}
+
+							ANKI_ASSERT(barrierToMergeTo);
+							ANKI_ASSERT(!!barrierToMergeTo->m_buffer.m_usageAfter);
+							barrierToMergeTo->m_buffer.m_usageAfter |= consumerUsage;
+							ctx.m_buffers[buffIdx].m_usage = barrierToMergeTo->m_buffer.m_usageAfter;
+						}
 					}
 					}
 				}
 				}
 			} // For all consumers
 			} // For all consumers

+ 4 - 4
src/anki/gr/RenderGraph.h

@@ -312,10 +312,10 @@ protected:
 	DynamicArray<RenderPassDependency> m_consumers;
 	DynamicArray<RenderPassDependency> m_consumers;
 	DynamicArray<RenderPassDependency> m_producers;
 	DynamicArray<RenderPassDependency> m_producers;
 
 
-	BitSet<MAX_RENDER_GRAPH_RENDER_TARGETS> m_consumerRtMask = {false};
-	BitSet<MAX_RENDER_GRAPH_RENDER_TARGETS> m_producerRtMask = {false};
-	BitSet<MAX_RENDER_GRAPH_BUFFERS> m_consumerBufferMask = {false};
-	BitSet<MAX_RENDER_GRAPH_BUFFERS> m_producerBufferMask = {false};
+	BitSet<MAX_RENDER_GRAPH_RENDER_TARGETS, U64> m_consumerRtMask = {false};
+	BitSet<MAX_RENDER_GRAPH_RENDER_TARGETS, U64> m_producerRtMask = {false};
+	BitSet<MAX_RENDER_GRAPH_BUFFERS, U64> m_consumerBufferMask = {false};
+	BitSet<MAX_RENDER_GRAPH_BUFFERS, U64> m_producerBufferMask = {false};
 
 
 	String m_name;
 	String m_name;
 
 

+ 3 - 2
src/anki/gr/vulkan/BufferImpl.cpp

@@ -195,8 +195,9 @@ VkPipelineStageFlags BufferImpl::computePplineStage(BufferUsageBit usage)
 
 
 	if(!!(usage & (BufferUsageBit::INDEX | BufferUsageBit::VERTEX)))
 	if(!!(usage & (BufferUsageBit::INDEX | BufferUsageBit::VERTEX)))
 	{
 	{
-		stageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
-			| VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT;
+		stageMask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT
+			| VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT
+			| VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT;
 	}
 	}
 
 
 	if(!!(usage & BufferUsageBit::INDIRECT))
 	if(!!(usage & BufferUsageBit::INDIRECT))

+ 11 - 8
src/anki/gr/vulkan/CommandBufferImpl.inl.h

@@ -517,21 +517,24 @@ inline void CommandBufferImpl::drawcallCommon()
 	{
 	{
 		const Bool flipvp = flipViewport();
 		const Bool flipvp = flipViewport();
 
 
-		const I minx = m_viewport[0];
-		const I miny = m_viewport[1];
-		const I width = m_viewport[2];
-		const I height = m_viewport[3];
-
 		U32 fbWidth, fbHeight;
 		U32 fbWidth, fbHeight;
 		m_activeFb->m_impl->getAttachmentsSize(fbWidth, fbHeight);
 		m_activeFb->m_impl->getAttachmentsSize(fbWidth, fbHeight);
 
 
+		const U32 minx = m_viewport[0];
+		const U32 miny = m_viewport[1];
+		const U32 width = min<U32>(fbWidth, m_viewport[2]);
+		const U32 height = min<U32>(fbHeight, m_viewport[3]);
+		ANKI_ASSERT(width > 0 && height > 0);
+		ANKI_ASSERT(minx + width <= fbWidth);
+		ANKI_ASSERT(miny + height <= fbHeight);
+
 		VkViewport s;
 		VkViewport s;
 		s.x = minx;
 		s.x = minx;
 		s.y = (flipvp) ? (fbHeight - miny) : miny; // Move to the bottom;
 		s.y = (flipvp) ? (fbHeight - miny) : miny; // Move to the bottom;
 		s.width = width;
 		s.width = width;
-		s.height = (flipvp) ? -height : height;
-		s.minDepth = 0.0;
-		s.maxDepth = 1.0;
+		s.height = (flipvp) ? -F32(height) : height;
+		s.minDepth = 0.0f;
+		s.maxDepth = 1.0f;
 		ANKI_CMD(vkCmdSetViewport(m_handle, 0, 1, &s), ANY_OTHER_COMMAND);
 		ANKI_CMD(vkCmdSetViewport(m_handle, 0, 1, &s), ANY_OTHER_COMMAND);
 
 
 		m_viewportDirty = false;
 		m_viewportDirty = false;

+ 1 - 0
src/anki/renderer/ForwardShading.cpp

@@ -102,6 +102,7 @@ void ForwardShading::drawVolumetric(RenderingContext& ctx, RenderPassWorkContext
 {
 {
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 
 
+	cmdb->setViewport(0, 0, m_width, m_height);
 	cmdb->bindShaderProgram(m_vol.m_grProg);
 	cmdb->bindShaderProgram(m_vol.m_grProg);
 	cmdb->setBlendFactors(0, BlendFactor::ONE, BlendFactor::ONE);
 	cmdb->setBlendFactors(0, BlendFactor::ONE, BlendFactor::ONE);
 	cmdb->setDepthWrite(false);
 	cmdb->setDepthWrite(false);

+ 9 - 9
src/anki/util/BitSet.h

@@ -131,7 +131,7 @@ public:
 	{
 	{
 		U high, low;
 		U high, low;
 		position(static_cast<U>(pos), high, low);
 		position(static_cast<U>(pos), high, low);
-		ChunkType mask = MASK >> low;
+		const ChunkType mask = ChunkType(1) << low;
 		m_chunks[high] = (setBits) ? (m_chunks[high] | mask) : (m_chunks[high] & ~mask);
 		m_chunks[high] = (setBits) ? (m_chunks[high] | mask) : (m_chunks[high] & ~mask);
 	}
 	}
 
 
@@ -178,7 +178,7 @@ public:
 	{
 	{
 		U high, low;
 		U high, low;
 		position(static_cast<U>(pos), high, low);
 		position(static_cast<U>(pos), high, low);
-		ChunkType mask = MASK >> low;
+		const ChunkType mask = ChunkType(1) << low;
 		m_chunks[high] ^= mask;
 		m_chunks[high] ^= mask;
 	}
 	}
 
 
@@ -188,7 +188,7 @@ public:
 	{
 	{
 		U high, low;
 		U high, low;
 		position(static_cast<U>(pos), high, low);
 		position(static_cast<U>(pos), high, low);
-		ChunkType mask = MASK >> low;
+		const ChunkType mask = ChunkType(1) << low;
 		return (m_chunks[high] & mask) != 0;
 		return (m_chunks[high] & mask) != 0;
 	}
 	}
 
 
@@ -219,9 +219,6 @@ protected:
 	/// Number of chunks.
 	/// Number of chunks.
 	static const U CHUNK_COUNT = (N + (CHUNK_BIT_COUNT - 1)) / CHUNK_BIT_COUNT;
 	static const U CHUNK_COUNT = (N + (CHUNK_BIT_COUNT - 1)) / CHUNK_BIT_COUNT;
 
 
-	/// A mask for some stuff.
-	static const ChunkType MASK = ChunkType(1) << (CHUNK_BIT_COUNT - 1);
-
 	ChunkType m_chunks[CHUNK_COUNT];
 	ChunkType m_chunks[CHUNK_COUNT];
 
 
 	BitSet()
 	BitSet()
@@ -240,9 +237,12 @@ protected:
 	/// Zero the unused bits.
 	/// Zero the unused bits.
 	void zeroUnusedBits()
 	void zeroUnusedBits()
 	{
 	{
-		const ChunkType REMAINING_BITS = N - (CHUNK_COUNT - 1) * CHUNK_BIT_COUNT;
-		const ChunkType REMAINING_BITMASK = std::numeric_limits<ChunkType>::max() >> REMAINING_BITS;
-		m_chunks[CHUNK_COUNT - 1] ^= REMAINING_BITMASK;
+		const ChunkType UNUSED_BITS = CHUNK_COUNT * CHUNK_BIT_COUNT - N;
+		const ChunkType USED_BITMASK = std::numeric_limits<ChunkType>::max() >> UNUSED_BITS;
+		if(USED_BITMASK > 0)
+		{
+			m_chunks[CHUNK_COUNT - 1] &= USED_BITMASK;
+		}
 	}
 	}
 };
 };
 /// @}
 /// @}