Browse Source

Move forward shading to GPU occlusion

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
8981e9effd

+ 4 - 1
AnKi/Gr/CommandBuffer.h

@@ -304,9 +304,12 @@ public:
 
 	void drawIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset, Buffer* indirectBuff);
 
-	void drawIndexedIndirectCount(PrimitiveTopology topology, Buffer* argBuffer, PtrSize argBufferOffset, Buffer* countBuffer,
+	void drawIndexedIndirectCount(PrimitiveTopology topology, Buffer* argBuffer, PtrSize argBufferOffset, U32 argBufferStride, Buffer* countBuffer,
 								  PtrSize countBufferOffset, U32 maxDrawCount);
 
+	void drawIndirectCount(PrimitiveTopology topology, Buffer* argBuffer, PtrSize argBufferOffset, U32 argBufferStride, Buffer* countBuffer,
+						   PtrSize countBufferOffset, U32 maxDrawCount);
+
 	void dispatchCompute(U32 groupCountX, U32 groupCountY, U32 groupCountZ);
 
 	void dispatchComputeIndirect(Buffer* argBuffer, PtrSize argBufferOffset);

+ 10 - 3
AnKi/Gr/Vulkan/CommandBuffer.cpp

@@ -274,11 +274,18 @@ void CommandBuffer::drawIndirect(PrimitiveTopology topology, U32 drawCount, PtrS
 	self.drawIndirectInternal(topology, drawCount, offset, buff);
 }
 
-void CommandBuffer::drawIndexedIndirectCount(PrimitiveTopology topology, Buffer* argBuffer, PtrSize argBufferOffset, Buffer* countBuffer,
-											 PtrSize countBufferOffset, U32 maxDrawCount)
+void CommandBuffer::drawIndexedIndirectCount(PrimitiveTopology topology, Buffer* argBuffer, PtrSize argBufferOffset, U32 argBufferStride,
+											 Buffer* countBuffer, PtrSize countBufferOffset, U32 maxDrawCount)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.drawIndexedIndirectCountInternal(topology, argBuffer, argBufferOffset, countBuffer, countBufferOffset, maxDrawCount);
+	self.drawIndexedIndirectCountInternal(topology, argBuffer, argBufferOffset, argBufferStride, countBuffer, countBufferOffset, maxDrawCount);
+}
+
+void CommandBuffer::drawIndirectCount(PrimitiveTopology topology, Buffer* argBuffer, PtrSize argBufferOffset, U32 argBufferStride,
+									  Buffer* countBuffer, PtrSize countBufferOffset, U32 maxDrawCount)
+{
+	ANKI_VK_SELF(CommandBufferImpl);
+	self.drawIndirectCountInternal(topology, argBuffer, argBufferOffset, argBufferStride, countBuffer, countBufferOffset, maxDrawCount);
 }
 
 void CommandBuffer::drawIndexedIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset, Buffer* buff)

+ 26 - 3
AnKi/Gr/Vulkan/CommandBufferImpl.h

@@ -318,14 +318,15 @@ public:
 	}
 
 	ANKI_FORCE_INLINE void drawIndexedIndirectCountInternal(PrimitiveTopology topology, Buffer* argBuffer, PtrSize argBufferOffset,
-															Buffer* countBuffer, PtrSize countBufferOffset, U32 maxDrawCount)
+															U32 argBufferStride, Buffer* countBuffer, PtrSize countBufferOffset, U32 maxDrawCount)
 	{
 		m_state.setPrimitiveTopology(topology);
 		drawcallCommon();
 		const BufferImpl& argBufferImpl = static_cast<const BufferImpl&>(*argBuffer);
 		ANKI_ASSERT(argBufferImpl.usageValid(BufferUsageBit::kIndirectDraw));
 		ANKI_ASSERT((argBufferOffset % 4) == 0);
-		ANKI_ASSERT(argBufferOffset + maxDrawCount * sizeof(DrawIndexedIndirectArgs) <= argBuffer->getSize());
+		ANKI_ASSERT(argBufferStride >= sizeof(DrawIndexedIndirectArgs));
+		ANKI_ASSERT(argBufferOffset + maxDrawCount * argBufferStride <= argBuffer->getSize());
 
 		const BufferImpl& countBufferImpl = static_cast<const BufferImpl&>(*countBuffer);
 		ANKI_ASSERT(countBufferImpl.usageValid(BufferUsageBit::kIndirectDraw));
@@ -335,7 +336,29 @@ public:
 		ANKI_ASSERT(maxDrawCount > 0 && maxDrawCount <= getGrManagerImpl().getDeviceCapabilities().m_maxDrawIndirectCount);
 
 		vkCmdDrawIndexedIndirectCountKHR(m_handle, argBufferImpl.getHandle(), argBufferOffset, countBufferImpl.getHandle(), countBufferOffset,
-										 maxDrawCount, sizeof(DrawIndexedIndirectArgs));
+										 maxDrawCount, argBufferStride);
+	}
+
+	ANKI_FORCE_INLINE void drawIndirectCountInternal(PrimitiveTopology topology, Buffer* argBuffer, PtrSize argBufferOffset, U32 argBufferStride,
+													 Buffer* countBuffer, PtrSize countBufferOffset, U32 maxDrawCount)
+	{
+		m_state.setPrimitiveTopology(topology);
+		drawcallCommon();
+		const BufferImpl& argBufferImpl = static_cast<const BufferImpl&>(*argBuffer);
+		ANKI_ASSERT(argBufferImpl.usageValid(BufferUsageBit::kIndirectDraw));
+		ANKI_ASSERT((argBufferOffset % 4) == 0);
+		ANKI_ASSERT(argBufferStride >= sizeof(DrawIndirectArgs));
+		ANKI_ASSERT(argBufferOffset + maxDrawCount * argBufferStride <= argBuffer->getSize());
+
+		const BufferImpl& countBufferImpl = static_cast<const BufferImpl&>(*countBuffer);
+		ANKI_ASSERT(countBufferImpl.usageValid(BufferUsageBit::kIndirectDraw));
+		ANKI_ASSERT((countBufferOffset % 4) == 0);
+		ANKI_ASSERT(countBufferOffset + maxDrawCount * sizeof(U32) <= countBuffer->getSize());
+
+		ANKI_ASSERT(maxDrawCount > 0 && maxDrawCount <= getGrManagerImpl().getDeviceCapabilities().m_maxDrawIndirectCount);
+
+		vkCmdDrawIndirectCountKHR(m_handle, argBufferImpl.getHandle(), argBufferOffset, countBufferImpl.getHandle(), countBufferOffset, maxDrawCount,
+								  argBufferStride);
 	}
 
 	void dispatchComputeInternal(U32 groupCountX, U32 groupCountY, U32 groupCountZ);

+ 52 - 37
AnKi/Renderer/ForwardShading.cpp

@@ -13,61 +13,74 @@
 #include <AnKi/Renderer/LensFlare.h>
 #include <AnKi/Renderer/ClusterBinning2.h>
 #include <AnKi/Renderer/LensFlare.h>
+#include <AnKi/Renderer/GBuffer.h>
 #include <AnKi/Renderer/VolumetricLightingAccumulation.h>
 #include <AnKi/Shaders/Include/MaterialTypes.h>
+#include <AnKi/Core/App.h>
 
 namespace anki {
 
+void ForwardShading::populateRenderGraph(RenderingContext& ctx)
+{
+	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
+
+	const Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
+
+	FrustumGpuVisibilityInput visIn;
+	visIn.m_passesName = "FW shading visibility";
+	visIn.m_technique = RenderingTechnique::kForward;
+	visIn.m_viewProjectionMatrix = ctx.m_matrices.m_viewProjection;
+	visIn.m_lodReferencePoint = ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz();
+	visIn.m_lodDistances = lodDistances;
+	visIn.m_rgraph = &rgraph;
+	RenderTargetHandle hzb = getRenderer().getGBuffer().getHzbRt();
+	visIn.m_hzbRt = &hzb;
+
+	getRenderer().getGpuVisibility().populateRenderGraph(visIn, m_runCtx.m_visOut);
+}
+
 void ForwardShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
 {
 	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-	const U32 threadId = rgraphCtx.m_currentSecondLevelCommandBufferIndex;
-	const U32 threadCount = rgraphCtx.m_secondLevelCommandBufferCount;
-	const U32 problemSize = ctx.m_renderQueue->m_forwardShadingRenderables.getSize();
-	U32 start, end;
-	splitThreadedProblem(threadId, threadCount, problemSize, start, end);
 
-	if(start != end)
-	{
-		cmdb.setDepthWrite(false);
-		cmdb.setBlendFactors(0, BlendFactor::kSrcAlpha, BlendFactor::kOneMinusSrcAlpha);
-
-		const U32 set = U32(MaterialSet::kGlobal);
-		cmdb.bindSampler(set, U32(MaterialBinding::kLinearClampSampler), getRenderer().getSamplers().m_trilinearClamp.get());
-		cmdb.bindSampler(set, U32(MaterialBinding::kShadowSampler), getRenderer().getSamplers().m_trilinearClampShadow.get());
+	// Set state
+	cmdb.setDepthWrite(false);
+	cmdb.setBlendFactors(0, BlendFactor::kSrcAlpha, BlendFactor::kOneMinusSrcAlpha);
 
-		rgraphCtx.bindTexture(set, U32(MaterialBinding::kDepthRt), getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
-		rgraphCtx.bindColorTexture(set, U32(MaterialBinding::kLightVolume), getRenderer().getVolumetricLightingAccumulation().getRt());
+	// Bind stuff
+	const U32 set = U32(MaterialSet::kGlobal);
+	cmdb.bindSampler(set, U32(MaterialBinding::kLinearClampSampler), getRenderer().getSamplers().m_trilinearClamp.get());
+	cmdb.bindSampler(set, U32(MaterialBinding::kShadowSampler), getRenderer().getSamplers().m_trilinearClampShadow.get());
 
-		cmdb.bindUniformBuffer(set, U32(MaterialBinding::kClusterShadingUniforms), getRenderer().getClusterBinning2().getClusteredShadingUniforms());
+	rgraphCtx.bindTexture(set, U32(MaterialBinding::kDepthRt), getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
+	rgraphCtx.bindColorTexture(set, U32(MaterialBinding::kLightVolume), getRenderer().getVolumetricLightingAccumulation().getRt());
 
-		cmdb.bindStorageBuffer(set, U32(MaterialBinding::kClusterShadingLights),
-							   getRenderer().getClusterBinning2().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kLight));
+	cmdb.bindUniformBuffer(set, U32(MaterialBinding::kClusterShadingUniforms), getRenderer().getClusterBinning2().getClusteredShadingUniforms());
 
-		rgraphCtx.bindColorTexture(set, U32(MaterialBinding::kClusterShadingLights) + 1, getRenderer().getShadowMapping().getShadowmapRt());
+	cmdb.bindStorageBuffer(set, U32(MaterialBinding::kClusterShadingLights),
+						   getRenderer().getClusterBinning2().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kLight));
 
-		cmdb.bindStorageBuffer(set, U32(MaterialBinding::kClusters), getRenderer().getClusterBinning2().getClustersBuffer());
+	rgraphCtx.bindColorTexture(set, U32(MaterialBinding::kClusterShadingLights) + 1, getRenderer().getShadowMapping().getShadowmapRt());
 
-		RenderableDrawerArguments args;
-		args.m_viewMatrix = ctx.m_matrices.m_view;
-		args.m_cameraTransform = ctx.m_matrices.m_cameraTransform;
-		args.m_viewProjectionMatrix = ctx.m_matrices.m_viewProjectionJitter;
-		args.m_previousViewProjectionMatrix = ctx.m_prevMatrices.m_viewProjectionJitter; // Not sure about that
-		args.m_sampler = getRenderer().getSamplers().m_trilinearRepeatAnisoResolutionScalingBias.get();
+	cmdb.bindStorageBuffer(set, U32(MaterialBinding::kClusters), getRenderer().getClusterBinning2().getClustersBuffer());
 
-		// Start drawing
-		getRenderer().getSceneDrawer().drawRange(args, ctx.m_renderQueue->m_forwardShadingRenderables.getBegin() + start,
-												 ctx.m_renderQueue->m_forwardShadingRenderables.getBegin() + end, cmdb);
+	// Draw
+	RenderableDrawerArguments args;
+	args.m_viewMatrix = ctx.m_matrices.m_view;
+	args.m_cameraTransform = ctx.m_matrices.m_cameraTransform;
+	args.m_viewProjectionMatrix = ctx.m_matrices.m_viewProjectionJitter;
+	args.m_previousViewProjectionMatrix = ctx.m_matrices.m_jitter * ctx.m_prevMatrices.m_viewProjection;
+	args.m_sampler = getRenderer().getSamplers().m_trilinearRepeatAnisoResolutionScalingBias.get();
+	args.m_renderingTechinuqe = RenderingTechnique::kForward;
+	args.fillMdi(m_runCtx.m_visOut);
+	getRenderer().getSceneDrawer().drawMdi(args, cmdb);
 
-		// Restore state
-		cmdb.setDepthWrite(true);
-		cmdb.setBlendFactors(0, BlendFactor::kOne, BlendFactor::kZero);
-	}
+	// Restore state
+	cmdb.setDepthWrite(true);
+	cmdb.setBlendFactors(0, BlendFactor::kOne, BlendFactor::kZero);
 
-	if(threadId == threadCount - 1 && ctx.m_renderQueue->m_lensFlares.getSize())
-	{
-		getRenderer().getLensFlare().runDrawFlares(ctx, cmdb);
-	}
+	// Do lens flares
+	getRenderer().getLensFlare().runDrawFlares(ctx, cmdb);
 }
 
 void ForwardShading::setDependencies(const RenderingContext& ctx, GraphicsRenderPassDescription& pass)
@@ -79,6 +92,8 @@ void ForwardShading::setDependencies(const RenderingContext& ctx, GraphicsRender
 	{
 		pass.newBufferDependency(getRenderer().getLensFlare().getIndirectDrawBuffer(), BufferUsageBit::kIndirectDraw);
 	}
+
+	pass.newBufferDependency(m_runCtx.m_visOut.m_mdiDrawCountsHandle, BufferUsageBit::kIndirectDraw);
 }
 
 } // end namespace anki

+ 10 - 0
AnKi/Renderer/ForwardShading.h

@@ -6,6 +6,7 @@
 #pragma once
 
 #include <AnKi/Renderer/RendererObject.h>
+#include <AnKi/Renderer/Utils/GpuVisibility.h>
 
 namespace anki {
 
@@ -25,9 +26,18 @@ public:
 		return Error::kNone;
 	}
 
+	void populateRenderGraph(RenderingContext& ctx);
+
 	void setDependencies(const RenderingContext& ctx, GraphicsRenderPassDescription& pass);
 
 	void run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
+
+private:
+	class
+	{
+	public:
+		GpuVisibilityOutput m_visOut;
+	} m_runCtx;
 };
 /// @}
 

+ 5 - 0
AnKi/Renderer/GBuffer.h

@@ -51,6 +51,11 @@ public:
 		return m_runCtx.m_prevFrameDepthRt;
 	}
 
+	RenderTargetHandle getHzbRt() const
+	{
+		return m_runCtx.m_hzbRt;
+	}
+
 	void getDebugRenderTarget(CString rtName, Array<RenderTargetHandle, kMaxDebugRenderTargets>& handles,
 							  [[maybe_unused]] ShaderProgramPtr& optionalShaderProgram) const override
 	{

+ 14 - 17
AnKi/Renderer/LightShading.cpp

@@ -152,7 +152,6 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 	}
 
 	// Do light shading first
-	if(rgraphCtx.m_currentSecondLevelCommandBufferIndex == 0)
 	{
 		cmdb.bindShaderProgram(m_lightShading.m_grProg[getRenderer().getRtShadowsEnabled()].get());
 		cmdb.setDepthWrite(false);
@@ -184,7 +183,6 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 	}
 
 	// Apply indirect
-	if(rgraphCtx.m_currentSecondLevelCommandBufferIndex == 0)
 	{
 		cmdb.setDepthWrite(false);
 		cmdb.bindShaderProgram(m_applyIndirect.m_grProg.get());
@@ -214,7 +212,6 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 	}
 
 	// Skybox
-	if(rgraphCtx.m_currentSecondLevelCommandBufferIndex == 0)
 	{
 		cmdb.setDepthCompareOperation(CompareOperation::kEqual);
 
@@ -254,8 +251,7 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 		cmdb.setDepthCompareOperation(CompareOperation::kLess);
 	}
 
-	// Do the fog apply
-	if(rgraphCtx.m_currentSecondLevelCommandBufferIndex == rgraphCtx.m_secondLevelCommandBufferCount - 1u)
+	// Apply the fog
 	{
 		cmdb.bindShaderProgram(m_applyFog.m_grProg.get());
 
@@ -288,17 +284,19 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 	}
 
 	// Forward shading last
-	if(enableVrs)
 	{
-		cmdb.setVrsRate(VrsRate::k2x2);
-	}
+		if(enableVrs)
+		{
+			cmdb.setVrsRate(VrsRate::k2x2);
+		}
 
-	getRenderer().getForwardShading().run(ctx, rgraphCtx);
+		getRenderer().getForwardShading().run(ctx, rgraphCtx);
 
-	if(enableVrs)
-	{
-		// Restore
-		cmdb.setVrsRate(VrsRate::k1x1);
+		if(enableVrs)
+		{
+			// Restore
+			cmdb.setVrsRate(VrsRate::k1x1);
+		}
 	}
 }
 
@@ -339,10 +337,9 @@ void LightShading::populateRenderGraph(RenderingContext& ctx)
 	// Create pass
 	GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("Light&FW Shad");
 
-	pass.setWork(computeNumberOfSecondLevelCommandBuffers(ctx.m_renderQueue->m_forwardShadingRenderables.getSize()),
-				 [this, &ctx](RenderPassWorkContext& rgraphCtx) {
-					 run(ctx, rgraphCtx);
-				 });
+	pass.setWork(1, [this, &ctx](RenderPassWorkContext& rgraphCtx) {
+		run(ctx, rgraphCtx);
+	});
 	pass.setFramebufferInfo(m_lightShading.m_fbDescr, {m_runCtx.m_rt}, getRenderer().getGBuffer().getDepthRt(), sriRt);
 
 	const TextureUsageBit readUsage = TextureUsageBit::kSampledFragment;

+ 1 - 0
AnKi/Renderer/Renderer.cpp

@@ -357,6 +357,7 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	{
 		m_accelerationStructureBuilder->populateRenderGraph(ctx);
 	}
+	m_forwardShading->populateRenderGraph(ctx); // This may feel out of place but it's only visibility
 	m_gbuffer->populateRenderGraph(ctx);
 	m_shadowMapping->populateRenderGraph(ctx);
 	m_clusterBinning2->populateRenderGraph(ctx);

+ 15 - 6
AnKi/Renderer/Utils/Drawer.cpp

@@ -165,17 +165,26 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 			return;
 		}
 
-		ANKI_ASSERT(state.m_indexedDrawcall && "TODO non-indexed");
-
 		ShaderProgramPtr prog = state.m_program;
 		cmdb.bindShaderProgram(prog.get());
 
 		const U32 maxDrawCount = userCount;
 
-		cmdb.drawIndexedIndirectCount(state.m_primitiveTopology, args.m_drawIndexedIndirectArgsBuffer.m_buffer,
-									  args.m_drawIndexedIndirectArgsBuffer.m_offset + sizeof(DrawIndexedIndirectArgs) * allUserCount,
-									  args.m_mdiDrawCountsBuffer.m_buffer, args.m_mdiDrawCountsBuffer.m_offset + sizeof(U32) * bucketCount,
-									  maxDrawCount);
+		if(state.m_indexedDrawcall)
+		{
+			cmdb.drawIndexedIndirectCount(state.m_primitiveTopology, args.m_drawIndexedIndirectArgsBuffer.m_buffer,
+										  args.m_drawIndexedIndirectArgsBuffer.m_offset + sizeof(DrawIndexedIndirectArgs) * allUserCount,
+										  sizeof(DrawIndexedIndirectArgs), args.m_mdiDrawCountsBuffer.m_buffer,
+										  args.m_mdiDrawCountsBuffer.m_offset + sizeof(U32) * bucketCount, maxDrawCount);
+		}
+		else
+		{
+			// Yes, the DrawIndexedIndirectArgs is intentional
+			cmdb.drawIndirectCount(state.m_primitiveTopology, args.m_drawIndexedIndirectArgsBuffer.m_buffer,
+								   args.m_drawIndexedIndirectArgsBuffer.m_offset + sizeof(DrawIndexedIndirectArgs) * allUserCount,
+								   sizeof(DrawIndexedIndirectArgs), args.m_mdiDrawCountsBuffer.m_buffer,
+								   args.m_mdiDrawCountsBuffer.m_offset + sizeof(U32) * bucketCount, maxDrawCount);
+		}
 
 		++bucketCount;
 		allUserCount += userCount;

+ 8 - 0
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -84,6 +84,9 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 	case RenderingTechnique::kDepth:
 		aabbCount = GpuSceneArrays::RenderableAabbDepth::getSingleton().getElementCount();
 		break;
+	case RenderingTechnique::kForward:
+		aabbCount = GpuSceneArrays::RenderableAabbForward::getSingleton().getElementCount();
+		break;
 	default:
 		ANKI_ASSERT(0);
 	}
@@ -178,6 +181,11 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 								   GpuSceneArrays::RenderableAabbDepth::getSingleton().getGpuSceneOffsetOfArrayBase(),
 								   GpuSceneArrays::RenderableAabbDepth::getSingleton().getBufferRange());
 			break;
+		case RenderingTechnique::kForward:
+			cmdb.bindStorageBuffer(0, 0, &GpuSceneBuffer::getSingleton().getBuffer(),
+								   GpuSceneArrays::RenderableAabbForward::getSingleton().getGpuSceneOffsetOfArrayBase(),
+								   GpuSceneArrays::RenderableAabbForward::getSingleton().getBufferRange());
+			break;
 		default:
 			ANKI_ASSERT(0);
 		}

+ 16 - 0
AnKi/Scene/Components/ParticleEmitterComponent.cpp

@@ -416,6 +416,22 @@ Error ParticleEmitterComponent::update(SceneComponentUpdateInfo& info, Bool& upd
 		m_gpuSceneRenderable.uploadToGpuScene(renderable);
 	}
 
+	if(!m_resourceUpdated)
+	{
+		// Always upload GpuSceneParticleEmitter
+
+		GpuSceneParticleEmitter particles = {};
+		particles.m_vertexOffsets[U32(VertexStreamId::kParticlePosition)] = m_gpuScenePositions.getOffset();
+		particles.m_vertexOffsets[U32(VertexStreamId::kParticleColor)] = m_gpuSceneAlphas.getOffset();
+		particles.m_vertexOffsets[U32(VertexStreamId::kParticleScale)] = m_gpuSceneScales.getOffset();
+		particles.m_aliveParticleCount = m_aliveParticleCount;
+		if(!m_gpuSceneParticleEmitter.isValid())
+		{
+			m_gpuSceneParticleEmitter.allocate();
+		}
+		m_gpuSceneParticleEmitter.uploadToGpuScene(particles);
+	}
+
 	// Upload the GpuSceneRenderableAabb always
 	for(RenderingTechnique t : EnumIterable<RenderingTechnique>())
 	{

+ 49 - 14
AnKi/Shaders/GpuVisibility.ankiprog

@@ -16,6 +16,15 @@
 #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
 #include <AnKi/Shaders/CollisionFunctions.hlsl>
 
+struct DrawIndirectArgsWithPadding
+{
+	U32 m_vertexCount;
+	U32 m_instanceCount;
+	U32 m_firstVertex;
+	U32 m_firstInstance;
+	U32 m_padding;
+};
+
 // Buffers that point to the GPU scene
 [[vk::binding(0)]] StructuredBuffer<GpuSceneRenderableAabb> g_aabbs;
 [[vk::binding(1)]] StructuredBuffer<GpuSceneRenderable> g_renderables;
@@ -24,6 +33,7 @@
 // These 2 have the same size
 [[vk::binding(3)]] RWStructuredBuffer<UVec4> g_instanceRateRenderables;
 [[vk::binding(4)]] RWStructuredBuffer<DrawIndexedIndirectArgs> g_drawIndexedIndirectArgs;
+[[vk::binding(4)]] RWStructuredBuffer<DrawIndirectArgsWithPadding> g_drawIndirectArgs;
 
 // Index pointing to the above arrays. One for each render state bucket
 [[vk::binding(5)]] StructuredBuffer<U32> g_drawIndirectArgsOffsets;
@@ -183,20 +193,45 @@
 	const U32 meshLodOffset = renderableIn.m_meshLodsOffset + sizeof(GpuSceneMeshLod) * lod;
 	const GpuSceneMeshLod meshLod = g_gpuScene.Load<GpuSceneMeshLod>(meshLodOffset);
 
-	DrawIndexedIndirectArgs indirect;
-	indirect.m_indexCount = meshLod.m_indexCount;
-	indirect.m_instanceCount = 1;
-	indirect.m_firstIndex = meshLod.m_firstIndex;
-	indirect.m_vertexOffset = 0;
-	indirect.m_firstInstance = indirectIdx;
-	g_drawIndexedIndirectArgs[indirectIdx] = indirect;
-
-	UVec4 instanceVertex;
-	instanceVertex.x = renderableIn.m_worldTransformsOffset;
-	instanceVertex.y = renderableIn.m_uniformsOffset;
-	instanceVertex.z = meshLodOffset;
-	instanceVertex.w = renderableIn.m_boneTransformsOffset;
-	g_instanceRateRenderables[indirectIdx] = instanceVertex;
+	const Bool isParticleEmitter = renderableIn.m_particleEmitterOffset != 0;
+
+	if(!isParticleEmitter)
+	{
+		// Regular renderables are always indexed
+
+		DrawIndexedIndirectArgs indirect;
+		indirect.m_indexCount = meshLod.m_indexCount;
+		indirect.m_instanceCount = 1;
+		indirect.m_firstIndex = meshLod.m_firstIndex;
+		indirect.m_vertexOffset = 0;
+		indirect.m_firstInstance = indirectIdx;
+		g_drawIndexedIndirectArgs[indirectIdx] = indirect;
+
+		UVec4 instanceVertex;
+		instanceVertex.x = renderableIn.m_worldTransformsOffset;
+		instanceVertex.y = renderableIn.m_uniformsOffset;
+		instanceVertex.z = meshLodOffset;
+		instanceVertex.w = renderableIn.m_boneTransformsOffset;
+		g_instanceRateRenderables[indirectIdx] = instanceVertex;
+	}
+	else
+	{
+		const GpuSceneParticleEmitter emitter = g_gpuScene.Load<GpuSceneParticleEmitter>(renderableIn.m_particleEmitterOffset);
+
+		DrawIndirectArgsWithPadding indirect;
+		indirect.m_vertexCount = emitter.m_aliveParticleCount * meshLod.m_indexCount;
+		indirect.m_instanceCount = 1;
+		indirect.m_firstVertex = 0;
+		indirect.m_firstInstance = indirectIdx;
+		g_drawIndirectArgs[indirectIdx] = indirect;
+
+		UVec4 instanceVertex;
+		instanceVertex.x = renderableIn.m_worldTransformsOffset;
+		instanceVertex.y = renderableIn.m_uniformsOffset;
+		instanceVertex.z = meshLodOffset;
+		instanceVertex.w = renderableIn.m_particleEmitterOffset;
+		g_instanceRateRenderables[indirectIdx] = instanceVertex;
+	}
 
 	// Now update the stats
 #if STATS