Browse Source

Add another way of doing GPU visibility. Distance based

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
76aa83b866

+ 4 - 2
AnKi/Renderer/ClusterBinning.cpp

@@ -77,8 +77,10 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 
 
 		cmdb.bindShaderProgram(m_grProg.get());
 		cmdb.bindShaderProgram(m_grProg.get());
 
 
-		bindUniforms(cmdb, 0, 0, m_runCtx.m_clusteredShadingUniformsToken);
-		bindStorage(cmdb, 0, 1, m_runCtx.m_clustersToken);
+		cmdb.bindUniformBuffer(0, 0, &RebarTransientMemoryPool::getSingleton().getBuffer(), m_runCtx.m_clusteredShadingUniformsToken.m_offset,
+							   m_runCtx.m_clusteredShadingUniformsToken.m_range);
+		cmdb.bindStorageBuffer(0, 1, &RebarTransientMemoryPool::getSingleton().getBuffer(), m_runCtx.m_clustersToken.m_offset,
+							   m_runCtx.m_clustersToken.m_range);
 
 
 		for(ClusteredObjectType type : EnumIterable<ClusteredObjectType>())
 		for(ClusteredObjectType type : EnumIterable<ClusteredObjectType>())
 		{
 		{

+ 6 - 2
AnKi/Renderer/ForwardShading.cpp

@@ -40,13 +40,17 @@ void ForwardShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgr
 		rgraphCtx.bindTexture(set, U32(MaterialBinding::kDepthRt), getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
 		rgraphCtx.bindTexture(set, U32(MaterialBinding::kDepthRt), getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
 		rgraphCtx.bindColorTexture(set, U32(MaterialBinding::kLightVolume), getRenderer().getVolumetricLightingAccumulation().getRt());
 		rgraphCtx.bindColorTexture(set, U32(MaterialBinding::kLightVolume), getRenderer().getVolumetricLightingAccumulation().getRt());
 
 
-		bindUniforms(cmdb, set, U32(MaterialBinding::kClusterShadingUniforms), getRenderer().getClusterBinning().getClusteredUniformsRebarToken());
+		cmdb.bindUniformBuffer(set, U32(MaterialBinding::kClusterShadingUniforms), &RebarTransientMemoryPool::getSingleton().getBuffer(),
+							   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_offset,
+							   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_range);
 		getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, set, U32(MaterialBinding::kClusterShadingLights),
 		getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, set, U32(MaterialBinding::kClusterShadingLights),
 																				 ClusteredObjectType::kPointLight);
 																				 ClusteredObjectType::kPointLight);
 		getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, set, U32(MaterialBinding::kClusterShadingLights) + 1,
 		getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, set, U32(MaterialBinding::kClusterShadingLights) + 1,
 																				 ClusteredObjectType::kSpotLight);
 																				 ClusteredObjectType::kSpotLight);
 		rgraphCtx.bindColorTexture(set, U32(MaterialBinding::kClusterShadingLights) + 2, getRenderer().getShadowMapping().getShadowmapRt());
 		rgraphCtx.bindColorTexture(set, U32(MaterialBinding::kClusterShadingLights) + 2, getRenderer().getShadowMapping().getShadowmapRt());
-		bindStorage(cmdb, set, U32(MaterialBinding::kClusters), getRenderer().getClusterBinning().getClustersRebarToken());
+		cmdb.bindStorageBuffer(set, U32(MaterialBinding::kClusters), &RebarTransientMemoryPool::getSingleton().getBuffer(),
+							   getRenderer().getClusterBinning().getClustersRebarToken().m_offset,
+							   getRenderer().getClusterBinning().getClustersRebarToken().m_range);
 
 
 		RenderableDrawerArguments args;
 		RenderableDrawerArguments args;
 		args.m_viewMatrix = ctx.m_matrices.m_view;
 		args.m_viewMatrix = ctx.m_matrices.m_view;

+ 1 - 1
AnKi/Renderer/GBuffer.cpp

@@ -162,7 +162,7 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 	const CommonMatrices& matrices = (getRenderer().getFrameCount() <= 1) ? ctx.m_matrices : ctx.m_prevMatrices;
 	const CommonMatrices& matrices = (getRenderer().getFrameCount() <= 1) ? ctx.m_matrices : ctx.m_prevMatrices;
 	const Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
 	const Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
 
 
-	GpuVisibilityInput visIn;
+	FrustumGpuVisibilityInput visIn;
 	visIn.m_passesName = "GBuffer visibility";
 	visIn.m_passesName = "GBuffer visibility";
 	visIn.m_technique = RenderingTechnique::kGBuffer;
 	visIn.m_technique = RenderingTechnique::kGBuffer;
 	visIn.m_viewProjectionMatrix = matrices.m_viewProjection;
 	visIn.m_viewProjectionMatrix = matrices.m_viewProjection;

+ 6 - 2
AnKi/Renderer/GBufferPost.cpp

@@ -90,11 +90,15 @@ void GBufferPost::run(RenderPassWorkContext& rgraphCtx)
 
 
 	cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearRepeat.get());
 	cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearRepeat.get());
 
 
-	bindUniforms(cmdb, 0, 3, getRenderer().getClusterBinning().getClusteredUniformsRebarToken());
+	cmdb.bindUniformBuffer(0, 3, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+						   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_offset,
+						   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_range);
 
 
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 4, ClusteredObjectType::kDecal);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 4, ClusteredObjectType::kDecal);
 
 
-	bindStorage(cmdb, 0, 5, getRenderer().getClusterBinning().getClustersRebarToken());
+	cmdb.bindStorageBuffer(0, 5, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+						   getRenderer().getClusterBinning().getClustersRebarToken().m_offset,
+						   getRenderer().getClusterBinning().getClustersRebarToken().m_range);
 
 
 	cmdb.bindAllBindless(1);
 	cmdb.bindAllBindless(1);
 
 

+ 6 - 2
AnKi/Renderer/IndirectDiffuse.cpp

@@ -265,9 +265,13 @@ void IndirectDiffuse::populateRenderGraph(RenderingContext& ctx)
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 			cmdb.bindShaderProgram(m_main.m_grProg.get());
 			cmdb.bindShaderProgram(m_main.m_grProg.get());
 
 
-			bindUniforms(cmdb, 0, 0, getRenderer().getClusterBinning().getClusteredUniformsRebarToken());
+			cmdb.bindUniformBuffer(0, 0, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+								   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_offset,
+								   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_range);
 			getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 1, ClusteredObjectType::kGlobalIlluminationProbe);
 			getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 1, ClusteredObjectType::kGlobalIlluminationProbe);
-			bindStorage(cmdb, 0, 2, getRenderer().getClusterBinning().getClustersRebarToken());
+			cmdb.bindStorageBuffer(0, 2, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+								   getRenderer().getClusterBinning().getClustersRebarToken().m_offset,
+								   getRenderer().getClusterBinning().getClustersRebarToken().m_range);
 
 
 			cmdb.bindSampler(0, 3, getRenderer().getSamplers().m_trilinearClamp.get());
 			cmdb.bindSampler(0, 3, getRenderer().getSamplers().m_trilinearClamp.get());
 			rgraphCtx.bindColorTexture(0, 4, getRenderer().getGBuffer().getColorRt(2));
 			rgraphCtx.bindColorTexture(0, 4, getRenderer().getGBuffer().getColorRt(2));

+ 2 - 2
AnKi/Renderer/IndirectDiffuseProbes.cpp

@@ -238,7 +238,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 
 
 			Array<F32, kMaxLodCount - 1> lodDistances = {1000.0f, 1001.0f}; // Something far to force detailed LODs
 			Array<F32, kMaxLodCount - 1> lodDistances = {1000.0f, 1001.0f}; // Something far to force detailed LODs
 
 
-			GpuVisibilityInput visIn;
+			FrustumGpuVisibilityInput visIn;
 			visIn.m_passesName = "GI GBuffer visibility";
 			visIn.m_passesName = "GI GBuffer visibility";
 			visIn.m_technique = RenderingTechnique::kGBuffer;
 			visIn.m_technique = RenderingTechnique::kGBuffer;
 			visIn.m_viewProjectionMatrix = frustum.getViewProjectionMatrix();
 			visIn.m_viewProjectionMatrix = frustum.getViewProjectionMatrix();
@@ -314,7 +314,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 
 
 				Array<F32, kMaxLodCount - 1> lodDistances = {1000.0f, 1001.0f}; // Something far to force detailed LODs
 				Array<F32, kMaxLodCount - 1> lodDistances = {1000.0f, 1001.0f}; // Something far to force detailed LODs
 
 
-				GpuVisibilityInput visIn;
+				FrustumGpuVisibilityInput visIn;
 				visIn.m_passesName = "GI shadows visibility";
 				visIn.m_passesName = "GI shadows visibility";
 				visIn.m_technique = RenderingTechnique::kDepth;
 				visIn.m_technique = RenderingTechnique::kDepth;
 				visIn.m_viewProjectionMatrix = cascadeViewProjMats[i];
 				visIn.m_viewProjectionMatrix = cascadeViewProjMats[i];

+ 7 - 3
AnKi/Renderer/IndirectSpecular.cpp

@@ -174,7 +174,7 @@ void IndirectSpecular::run(const RenderingContext& ctx, RenderPassWorkContext& r
 	const U32 depthLod = min(g_ssrDepthLodCVar.get(), getRenderer().getDepthDownscale().getMipmapCount() - 1);
 	const U32 depthLod = min(g_ssrDepthLodCVar.get(), getRenderer().getDepthDownscale().getMipmapCount() - 1);
 
 
 	// Bind uniforms
 	// Bind uniforms
-	SsrUniforms* unis = allocateAndBindUniforms<SsrUniforms*>(sizeof(SsrUniforms), cmdb, 0, 0);
+	SsrUniforms* unis = allocateAndBindUniforms<SsrUniforms>(cmdb, 0, 0);
 	unis->m_depthBufferSize = getRenderer().getInternalResolution() >> (depthLod + 1);
 	unis->m_depthBufferSize = getRenderer().getInternalResolution() >> (depthLod + 1);
 	unis->m_framebufferSize = UVec2(getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y()) / 2;
 	unis->m_framebufferSize = UVec2(getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y()) / 2;
 	unis->m_frameCount = getRenderer().getFrameCount() & kMaxU32;
 	unis->m_frameCount = getRenderer().getFrameCount() & kMaxU32;
@@ -207,9 +207,13 @@ void IndirectSpecular::run(const RenderingContext& ctx, RenderPassWorkContext& r
 	cmdb.bindSampler(0, 9, getRenderer().getSamplers().m_trilinearRepeat.get());
 	cmdb.bindSampler(0, 9, getRenderer().getSamplers().m_trilinearRepeat.get());
 	cmdb.bindTexture(0, 10, &m_noiseImage->getTextureView());
 	cmdb.bindTexture(0, 10, &m_noiseImage->getTextureView());
 
 
-	bindUniforms(cmdb, 0, 11, getRenderer().getClusterBinning().getClusteredUniformsRebarToken());
+	cmdb.bindUniformBuffer(0, 11, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+						   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_offset,
+						   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_range);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 12, ClusteredObjectType::kReflectionProbe);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 12, ClusteredObjectType::kReflectionProbe);
-	bindStorage(cmdb, 0, 13, getRenderer().getClusterBinning().getClustersRebarToken());
+	cmdb.bindStorageBuffer(0, 13, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+						   getRenderer().getClusterBinning().getClustersRebarToken().m_offset,
+						   getRenderer().getClusterBinning().getClustersRebarToken().m_range);
 
 
 	cmdb.bindAllBindless(1);
 	cmdb.bindAllBindless(1);
 
 

+ 2 - 2
AnKi/Renderer/LensFlare.cpp

@@ -85,7 +85,7 @@ void LensFlare::updateIndirectInfo(const RenderingContext& ctx, RenderPassWorkCo
 	cmdb.setPushConstants(&ctx.m_matrices.m_viewProjectionJitter, sizeof(ctx.m_matrices.m_viewProjectionJitter));
 	cmdb.setPushConstants(&ctx.m_matrices.m_viewProjectionJitter, sizeof(ctx.m_matrices.m_viewProjectionJitter));
 
 
 	// Write flare info
 	// Write flare info
-	Vec4* flarePositions = allocateAndBindStorage<Vec4*>(count * sizeof(Vec4), cmdb, 0, 0);
+	Vec4* flarePositions = allocateAndBindStorage<Vec4>(cmdb, 0, 0, count);
 	for(U32 i = 0; i < count; ++i)
 	for(U32 i = 0; i < count; ++i)
 	{
 	{
 		*flarePositions = Vec4(ctx.m_renderQueue->m_lensFlares[i].m_worldPosition, 1.0f);
 		*flarePositions = Vec4(ctx.m_renderQueue->m_lensFlares[i].m_worldPosition, 1.0f);
@@ -156,7 +156,7 @@ void LensFlare::runDrawFlares(const RenderingContext& ctx, CommandBuffer& cmdb)
 		U32 spritesCount = max<U32>(1, m_maxSpritesPerFlare);
 		U32 spritesCount = max<U32>(1, m_maxSpritesPerFlare);
 
 
 		// Get uniform memory
 		// Get uniform memory
-		LensFlareSprite* tmpSprites = allocateAndBindStorage<LensFlareSprite*>(spritesCount * sizeof(LensFlareSprite), cmdb, 0, 0);
+		LensFlareSprite* tmpSprites = allocateAndBindStorage<LensFlareSprite>(cmdb, 0, 0, spritesCount);
 		WeakArray<LensFlareSprite> sprites(tmpSprites, spritesCount);
 		WeakArray<LensFlareSprite> sprites(tmpSprites, spritesCount);
 
 
 		// misc
 		// misc

+ 9 - 3
AnKi/Renderer/LightShading.cpp

@@ -159,13 +159,17 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 		cmdb.setDepthWrite(false);
 		cmdb.setDepthWrite(false);
 
 
 		// Bind all
 		// Bind all
-		bindUniforms(cmdb, 0, 0, getRenderer().getClusterBinning().getClusteredUniformsRebarToken());
+		cmdb.bindUniformBuffer(0, 0, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+							   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_offset,
+							   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_range);
 
 
 		getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 1, ClusteredObjectType::kPointLight);
 		getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 1, ClusteredObjectType::kPointLight);
 		getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 2, ClusteredObjectType::kSpotLight);
 		getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 2, ClusteredObjectType::kSpotLight);
 		rgraphCtx.bindColorTexture(0, 3, getRenderer().getShadowMapping().getShadowmapRt());
 		rgraphCtx.bindColorTexture(0, 3, getRenderer().getShadowMapping().getShadowmapRt());
 
 
-		bindStorage(cmdb, 0, 4, getRenderer().getClusterBinning().getClustersRebarToken());
+		cmdb.bindStorageBuffer(0, 4, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+							   getRenderer().getClusterBinning().getClustersRebarToken().m_offset,
+							   getRenderer().getClusterBinning().getClustersRebarToken().m_range);
 
 
 		cmdb.bindSampler(0, 5, getRenderer().getSamplers().m_nearestNearestClamp.get());
 		cmdb.bindSampler(0, 5, getRenderer().getSamplers().m_nearestNearestClamp.get());
 		cmdb.bindSampler(0, 6, getRenderer().getSamplers().m_trilinearClamp.get());
 		cmdb.bindSampler(0, 6, getRenderer().getSamplers().m_trilinearClamp.get());
@@ -204,7 +208,9 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 		rgraphCtx.bindColorTexture(0, 8, getRenderer().getGBuffer().getColorRt(2));
 		rgraphCtx.bindColorTexture(0, 8, getRenderer().getGBuffer().getColorRt(2));
 		cmdb.bindTexture(0, 9, &getRenderer().getProbeReflections().getIntegrationLut());
 		cmdb.bindTexture(0, 9, &getRenderer().getProbeReflections().getIntegrationLut());
 
 
-		bindUniforms(cmdb, 0, 10, getRenderer().getClusterBinning().getClusteredUniformsRebarToken());
+		cmdb.bindUniformBuffer(0, 10, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+							   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_offset,
+							   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_range);
 
 
 		const Vec4 pc(ctx.m_renderQueue->m_cameraNear, ctx.m_renderQueue->m_cameraFar, 0.0f, 0.0f);
 		const Vec4 pc(ctx.m_renderQueue->m_cameraNear, ctx.m_renderQueue->m_cameraFar, 0.0f, 0.0f);
 		cmdb.setPushConstants(&pc, sizeof(pc));
 		cmdb.setPushConstants(&pc, sizeof(pc));

+ 1 - 1
AnKi/Renderer/MainRenderer.cpp

@@ -49,7 +49,7 @@ Error MainRenderer::init(const MainRendererInitInfo& inf)
 	ANKI_R_LOGI("Initializing main renderer. Swapchain resolution %ux%u", m_swapchainResolution.x(), m_swapchainResolution.y());
 	ANKI_R_LOGI("Initializing main renderer. Swapchain resolution %ux%u", m_swapchainResolution.x(), m_swapchainResolution.y());
 
 
 	m_r = newInstance<Renderer>(RendererMemoryPool::getSingleton());
 	m_r = newInstance<Renderer>(RendererMemoryPool::getSingleton());
-	ANKI_CHECK(m_r->init(m_swapchainResolution));
+	ANKI_CHECK(m_r->init(m_swapchainResolution, &m_framePool));
 
 
 	// Init other
 	// Init other
 	if(!m_rDrawToDefaultFb)
 	if(!m_rDrawToDefaultFb)

+ 1 - 1
AnKi/Renderer/MotionVectors.cpp

@@ -139,7 +139,7 @@ void MotionVectors::run(const RenderingContext& ctx, RenderPassWorkContext& rgra
 		Mat4 m_viewProjectionInvMat;
 		Mat4 m_viewProjectionInvMat;
 		Mat4 m_prevViewProjectionInvMat;
 		Mat4 m_prevViewProjectionInvMat;
 	} * pc;
 	} * pc;
-	pc = allocateAndBindUniforms<Uniforms*>(sizeof(*pc), cmdb, 0, 5);
+	pc = allocateAndBindUniforms<Uniforms>(cmdb, 0, 5);
 
 
 	pc->m_reprojectionMat = ctx.m_matrices.m_reprojection;
 	pc->m_reprojectionMat = ctx.m_matrices.m_reprojection;
 	pc->m_viewProjectionInvMat = ctx.m_matrices.m_invertedViewProjectionJitter;
 	pc->m_viewProjectionInvMat = ctx.m_matrices.m_invertedViewProjectionJitter;

+ 3 - 3
AnKi/Renderer/PackVisibleClusteredObjects.cpp

@@ -63,18 +63,18 @@ void PackVisibleClusteredObjects::dispatchType(WeakArray<TRenderQueueElement> ar
 	}
 	}
 
 
 	RebarAllocation token;
 	RebarAllocation token;
-	U32* indices = allocateStorage<U32*>(array.getSize() * sizeof(U32), token);
+	U32* indices = RebarTransientMemoryPool::getSingleton().allocateFrame<U32>(array.getSize(), token);
 
 
 	RebarAllocation extrasToken;
 	RebarAllocation extrasToken;
 	PointLightExtra* plightExtras = nullptr;
 	PointLightExtra* plightExtras = nullptr;
 	SpotLightExtra* slightExtras = nullptr;
 	SpotLightExtra* slightExtras = nullptr;
 	if constexpr(std::is_same_v<TClustererType, PointLight>)
 	if constexpr(std::is_same_v<TClustererType, PointLight>)
 	{
 	{
-		plightExtras = allocateStorage<PointLightExtra*>(array.getSize() * sizeof(PointLightExtra), extrasToken);
+		plightExtras = RebarTransientMemoryPool::getSingleton().allocateFrame<PointLightExtra>(array.getSize(), extrasToken);
 	}
 	}
 	else if constexpr(std::is_same_v<TClustererType, SpotLight>)
 	else if constexpr(std::is_same_v<TClustererType, SpotLight>)
 	{
 	{
-		slightExtras = allocateStorage<SpotLightExtra*>(array.getSize() * sizeof(SpotLightExtra), extrasToken);
+		slightExtras = RebarTransientMemoryPool::getSingleton().allocateFrame<SpotLightExtra>(array.getSize(), extrasToken);
 	}
 	}
 
 
 	// Write ReBAR
 	// Write ReBAR

+ 2 - 2
AnKi/Renderer/ProbeReflections.cpp

@@ -360,7 +360,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 
 
 		Array<F32, kMaxLodCount - 1> lodDistances = {1000.0f, 1001.0f}; // Something far to force detailed LODs
 		Array<F32, kMaxLodCount - 1> lodDistances = {1000.0f, 1001.0f}; // Something far to force detailed LODs
 
 
-		GpuVisibilityInput visIn;
+		FrustumGpuVisibilityInput visIn;
 		visIn.m_passesName = "Cube refl GBuffer visibility";
 		visIn.m_passesName = "Cube refl GBuffer visibility";
 		visIn.m_technique = RenderingTechnique::kGBuffer;
 		visIn.m_technique = RenderingTechnique::kGBuffer;
 		visIn.m_viewProjectionMatrix = frustum.getViewProjectionMatrix();
 		visIn.m_viewProjectionMatrix = frustum.getViewProjectionMatrix();
@@ -429,7 +429,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 
 
 			Array<F32, kMaxLodCount - 1> lodDistances = {1000.0f, 1001.0f}; // Something far to force detailed LODs
 			Array<F32, kMaxLodCount - 1> lodDistances = {1000.0f, 1001.0f}; // Something far to force detailed LODs
 
 
-			GpuVisibilityInput visIn;
+			FrustumGpuVisibilityInput visIn;
 			visIn.m_passesName = "Cube refl shadows visibility";
 			visIn.m_passesName = "Cube refl shadows visibility";
 			visIn.m_technique = RenderingTechnique::kDepth;
 			visIn.m_technique = RenderingTechnique::kDepth;
 			visIn.m_viewProjectionMatrix = cascadeViewProjMats[i];
 			visIn.m_viewProjectionMatrix = cascadeViewProjMats[i];

+ 2 - 1
AnKi/Renderer/Renderer.cpp

@@ -105,10 +105,11 @@ Renderer::~Renderer()
 {
 {
 }
 }
 
 
-Error Renderer::init(UVec2 swapchainSize)
+Error Renderer::init(UVec2 swapchainSize, StackMemoryPool* framePool)
 {
 {
 	ANKI_TRACE_SCOPED_EVENT(RInit);
 	ANKI_TRACE_SCOPED_EVENT(RInit);
 
 
+	m_framePool = framePool;
 	const Error err = initInternal(swapchainSize);
 	const Error err = initInternal(swapchainSize);
 	if(err)
 	if(err)
 	{
 	{

+ 9 - 1
AnKi/Renderer/Renderer.h

@@ -75,7 +75,7 @@ public:
 	}
 	}
 
 
 	/// Init the renderer.
 	/// Init the renderer.
-	Error init(UVec2 swapchainSize);
+	Error init(UVec2 swapchainSize, StackMemoryPool* framePool);
 
 
 	/// This function does all the rendering stages and produces a final result.
 	/// This function does all the rendering stages and produces a final result.
 	Error populateRenderGraph(RenderingContext& ctx);
 	Error populateRenderGraph(RenderingContext& ctx);
@@ -188,6 +188,12 @@ public:
 	Bool getCurrentDebugRenderTarget(Array<RenderTargetHandle, kMaxDebugRenderTargets>& handles, ShaderProgramPtr& optionalShaderProgram);
 	Bool getCurrentDebugRenderTarget(Array<RenderTargetHandle, kMaxDebugRenderTargets>& handles, ShaderProgramPtr& optionalShaderProgram);
 	/// @}
 	/// @}
 
 
+	StackMemoryPool& getFrameMemoryPool() const
+	{
+		ANKI_ASSERT(m_framePool);
+		return *m_framePool;
+	}
+
 private:
 private:
 	/// @name Rendering stages
 	/// @name Rendering stages
 	/// @{
 	/// @{
@@ -223,6 +229,8 @@ private:
 
 
 	ShaderProgramResourcePtr m_clearTexComputeProg;
 	ShaderProgramResourcePtr m_clearTexComputeProg;
 
 
+	StackMemoryPool* m_framePool = nullptr;
+
 	class DebugRtInfo
 	class DebugRtInfo
 	{
 	{
 	public:
 	public:

+ 0 - 29
AnKi/Renderer/RendererObject.cpp

@@ -16,35 +16,6 @@ Renderer& RendererObject::getRenderer()
 	return MainRenderer::getSingleton().getOffscreenRenderer();
 	return MainRenderer::getSingleton().getOffscreenRenderer();
 }
 }
 
 
-void* RendererObject::allocateRebarStagingMemory(PtrSize size, RebarAllocation& token) const
-{
-	return RebarTransientMemoryPool::getSingleton().allocateFrame(size, token);
-}
-
-void RendererObject::bindUniforms(CommandBuffer& cmdb, U32 set, U32 binding, const RebarAllocation& token) const
-{
-	if(!token.isUnused())
-	{
-		cmdb.bindUniformBuffer(set, binding, &RebarTransientMemoryPool::getSingleton().getBuffer(), token.m_offset, token.m_range);
-	}
-	else
-	{
-		cmdb.bindUniformBuffer(set, binding, &getRenderer().getDummyBuffer(), 0, getRenderer().getDummyBuffer().getSize());
-	}
-}
-
-void RendererObject::bindStorage(CommandBuffer& cmdb, U32 set, U32 binding, const RebarAllocation& token) const
-{
-	if(!token.isUnused())
-	{
-		cmdb.bindStorageBuffer(set, binding, &RebarTransientMemoryPool::getSingleton().getBuffer(), token.m_offset, token.m_range);
-	}
-	else
-	{
-		cmdb.bindStorageBuffer(set, binding, &getRenderer().getDummyBuffer(), 0, getRenderer().getDummyBuffer().getSize());
-	}
-}
-
 U32 RendererObject::computeNumberOfSecondLevelCommandBuffers(U32 drawcallCount) const
 U32 RendererObject::computeNumberOfSecondLevelCommandBuffers(U32 drawcallCount) const
 {
 {
 	const U32 drawcallsPerThread = drawcallCount / CoreThreadHive::getSingleton().getThreadCount();
 	const U32 drawcallsPerThread = drawcallCount / CoreThreadHive::getSingleton().getThreadCount();

+ 12 - 28
AnKi/Renderer/RendererObject.h

@@ -38,8 +38,6 @@ public:
 protected:
 protected:
 	static ANKI_PURE Renderer& getRenderer();
 	static ANKI_PURE Renderer& getRenderer();
 
 
-	void* allocateRebarStagingMemory(PtrSize size, RebarAllocation& token) const;
-
 	U32 computeNumberOfSecondLevelCommandBuffers(U32 drawcallCount) const;
 	U32 computeNumberOfSecondLevelCommandBuffers(U32 drawcallCount) const;
 
 
 	/// Used in fullscreen quad draws.
 	/// Used in fullscreen quad draws.
@@ -65,37 +63,23 @@ protected:
 		cmdb.dispatchCompute(sizeX, sizeY, sizeZ);
 		cmdb.dispatchCompute(sizeX, sizeY, sizeZ);
 	}
 	}
 
 
-	template<typename TPtr>
-	TPtr allocateUniforms(PtrSize size, RebarAllocation& token) const
-	{
-		return static_cast<TPtr>(allocateRebarStagingMemory(size, token));
-	}
-
-	void bindUniforms(CommandBuffer& cmdb, U32 set, U32 binding, const RebarAllocation& token) const;
-
-	template<typename TPtr>
-	TPtr allocateAndBindUniforms(PtrSize size, CommandBuffer& cmdb, U32 set, U32 binding) const
+	template<typename T>
+	static T* allocateAndBindUniforms(CommandBuffer& cmdb, U32 set, U32 binding)
 	{
 	{
-		RebarAllocation token;
-		TPtr ptr = allocateUniforms<TPtr>(size, token);
-		bindUniforms(cmdb, set, binding, token);
+		RebarAllocation alloc;
+		T* ptr = static_cast<T*>(RebarTransientMemoryPool::getSingleton().allocateFrame(sizeof(T), alloc));
+		ANKI_ASSERT(isAligned(alignof(T), ptrToNumber(ptr)));
+		cmdb.bindUniformBuffer(set, binding, &RebarTransientMemoryPool::getSingleton().getBuffer(), alloc.m_offset, alloc.m_range);
 		return ptr;
 		return ptr;
 	}
 	}
 
 
-	template<typename TPtr>
-	TPtr allocateStorage(PtrSize size, RebarAllocation& token) const
-	{
-		return static_cast<TPtr>(allocateRebarStagingMemory(size, token));
-	}
-
-	void bindStorage(CommandBuffer& cmdb, U32 set, U32 binding, const RebarAllocation& token) const;
-
-	template<typename TPtr>
-	TPtr allocateAndBindStorage(PtrSize size, CommandBuffer& cmdb, U32 set, U32 binding) const
+	template<typename T>
+	static T* allocateAndBindStorage(CommandBuffer& cmdb, U32 set, U32 binding, PtrSize count = 1)
 	{
 	{
-		RebarAllocation token;
-		TPtr ptr = allocateStorage<TPtr>(size, token);
-		bindStorage(cmdb, set, binding, token);
+		RebarAllocation alloc;
+		T* ptr = static_cast<T*>(RebarTransientMemoryPool::getSingleton().allocateFrame(sizeof(T) * count, alloc));
+		ANKI_ASSERT(isAligned(alignof(T), ptrToNumber(ptr)));
+		cmdb.bindStorageBuffer(set, binding, &RebarTransientMemoryPool::getSingleton().getBuffer(), alloc.m_offset, alloc.m_range);
 		return ptr;
 		return ptr;
 	}
 	}
 
 

+ 8 - 3
AnKi/Renderer/RtShadows.cpp

@@ -453,13 +453,17 @@ void RtShadows::run(RenderPassWorkContext& rgraphCtx)
 
 
 	constexpr U32 kSet = 2;
 	constexpr U32 kSet = 2;
 
 
-	bindUniforms(cmdb, kSet, 0, getRenderer().getClusterBinning().getClusteredUniformsRebarToken());
+	cmdb.bindUniformBuffer(kSet, 0, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+						   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_offset,
+						   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_range);
 
 
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, kSet, 1, ClusteredObjectType::kPointLight);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, kSet, 1, ClusteredObjectType::kPointLight);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, kSet, 2, ClusteredObjectType::kSpotLight);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, kSet, 2, ClusteredObjectType::kSpotLight);
 	rgraphCtx.bindColorTexture(kSet, 3, getRenderer().getShadowMapping().getShadowmapRt());
 	rgraphCtx.bindColorTexture(kSet, 3, getRenderer().getShadowMapping().getShadowmapRt());
 
 
-	bindStorage(cmdb, kSet, 4, getRenderer().getClusterBinning().getClustersRebarToken());
+	cmdb.bindStorageBuffer(kSet, 4, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+						   getRenderer().getClusterBinning().getClustersRebarToken().m_offset,
+						   getRenderer().getClusterBinning().getClustersRebarToken().m_range);
 
 
 	cmdb.bindSampler(kSet, 5, getRenderer().getSamplers().m_trilinearRepeat.get());
 	cmdb.bindSampler(kSet, 5, getRenderer().getSamplers().m_trilinearRepeat.get());
 
 
@@ -611,7 +615,8 @@ void RtShadows::buildSbt(RenderingContext& ctx)
 
 
 	// Allocate SBT
 	// Allocate SBT
 	RebarAllocation token;
 	RebarAllocation token;
-	U8* sbt = allocateStorage<U8*>(PtrSize(m_sbtRecordSize) * (instanceCount + extraSbtRecords), token);
+	U8* sbt =
+		static_cast<U8*>(RebarTransientMemoryPool::getSingleton().allocateFrame(PtrSize(m_sbtRecordSize) * (instanceCount + extraSbtRecords), token));
 	[[maybe_unused]] const U8* sbtStart = sbt;
 	[[maybe_unused]] const U8* sbtStart = sbt;
 	m_runCtx.m_sbtBuffer.reset(const_cast<Buffer*>(&RebarTransientMemoryPool::getSingleton().getBuffer()));
 	m_runCtx.m_sbtBuffer.reset(const_cast<Buffer*>(&RebarTransientMemoryPool::getSingleton().getBuffer()));
 	m_runCtx.m_sbtOffset = token.m_offset;
 	m_runCtx.m_sbtOffset = token.m_offset;

+ 1 - 1
AnKi/Renderer/ShadowMapping.cpp

@@ -257,7 +257,7 @@ void ShadowMapping::newWorkItem(const UVec4& atlasViewport, const RenderQueue& q
 
 
 	const Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
 	const Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
 
 
-	GpuVisibilityInput visIn;
+	FrustumGpuVisibilityInput visIn;
 	visIn.m_passesName = "Shadows visibility";
 	visIn.m_passesName = "Shadows visibility";
 	visIn.m_technique = RenderingTechnique::kDepth;
 	visIn.m_technique = RenderingTechnique::kDepth;
 	visIn.m_viewProjectionMatrix = queue.m_viewProjectionMatrix;
 	visIn.m_viewProjectionMatrix = queue.m_viewProjectionMatrix;

+ 6 - 2
AnKi/Renderer/ShadowmapsResolve.cpp

@@ -105,11 +105,15 @@ void ShadowmapsResolve::run(RenderPassWorkContext& rgraphCtx)
 
 
 	cmdb.bindShaderProgram(m_grProg.get());
 	cmdb.bindShaderProgram(m_grProg.get());
 
 
-	bindUniforms(cmdb, 0, 0, getRenderer().getClusterBinning().getClusteredUniformsRebarToken());
+	cmdb.bindUniformBuffer(0, 0, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+						   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_offset,
+						   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_range);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 1, ClusteredObjectType::kPointLight);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 1, ClusteredObjectType::kPointLight);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 2, ClusteredObjectType::kSpotLight);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 2, ClusteredObjectType::kSpotLight);
 	rgraphCtx.bindColorTexture(0, 3, getRenderer().getShadowMapping().getShadowmapRt());
 	rgraphCtx.bindColorTexture(0, 3, getRenderer().getShadowMapping().getShadowmapRt());
-	bindStorage(cmdb, 0, 4, getRenderer().getClusterBinning().getClustersRebarToken());
+	cmdb.bindStorageBuffer(0, 4, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+						   getRenderer().getClusterBinning().getClustersRebarToken().m_offset,
+						   getRenderer().getClusterBinning().getClustersRebarToken().m_range);
 
 
 	cmdb.bindSampler(0, 5, getRenderer().getSamplers().m_trilinearClamp.get());
 	cmdb.bindSampler(0, 5, getRenderer().getSamplers().m_trilinearClamp.get());
 	cmdb.bindSampler(0, 6, getRenderer().getSamplers().m_trilinearClampShadow.get());
 	cmdb.bindSampler(0, 6, getRenderer().getSamplers().m_trilinearClampShadow.get());

+ 94 - 46
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -21,20 +21,17 @@ static StatCounter g_testedObjects(StatCategory::kMisc, "Visbility tested object
 
 
 Error GpuVisibility::init()
 Error GpuVisibility::init()
 {
 {
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuVisibility.ankiprogbin", m_prog));
-
-	for(U32 i = 0; i < 2; ++i)
+	for(MutatorValue hzb = 0; hzb < 2; ++hzb)
 	{
 	{
-		ShaderProgramResourceVariantInitInfo variantInit(m_prog);
-		variantInit.addMutation("HZB_TEST", i);
-		variantInit.addMutation("STATS", ANKI_STATS_ENABLED);
-
-		const ShaderProgramResourceVariant* variant;
-		m_prog->getOrCreateVariant(variantInit, variant);
-
-		m_grProgs[i].reset(&variant->getProgram());
+		ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibility.ankiprogbin",
+									 Array<SubMutation, 3>{{{"HZB_TEST", hzb}, {"STATS", ANKI_STATS_ENABLED}, {"DISTANCE_TEST", 0}}}, m_prog,
+									 m_frustumGrProgs[hzb]));
 	}
 	}
 
 
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibility.ankiprogbin",
+								 Array<SubMutation, 3>{{{"HZB_TEST", 0}, {"STATS", ANKI_STATS_ENABLED}, {"DISTANCE_TEST", 1}}}, m_prog,
+								 m_distGrProg));
+
 #if ANKI_STATS_ENABLED
 #if ANKI_STATS_ENABLED
 	for(GpuReadbackMemoryAllocation& alloc : m_readbackMemory)
 	for(GpuReadbackMemoryAllocation& alloc : m_readbackMemory)
 	{
 	{
@@ -45,8 +42,39 @@ Error GpuVisibility::init()
 	return Error::kNone;
 	return Error::kNone;
 }
 }
 
 
-void GpuVisibility::populateRenderGraph(GpuVisibilityInput& in, GpuVisibilityOutput& out)
+void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out)
 {
 {
+	class DistanceTestData
+	{
+	public:
+		Vec3 m_pointOfTest;
+		F32 m_testRadius;
+	};
+
+	class FrustumTestData
+	{
+	public:
+		RenderTargetHandle m_hzbRt;
+		Mat4 m_viewProjMat;
+	};
+
+	FrustumTestData* frustumTestData = nullptr;
+	DistanceTestData* distTestData = nullptr;
+
+	if(distanceBased)
+	{
+		distTestData = newInstance<DistanceTestData>(getRenderer().getFrameMemoryPool());
+		const DistanceGpuVisibilityInput& din = static_cast<DistanceGpuVisibilityInput&>(in);
+		distTestData->m_pointOfTest = din.m_pointOfTest;
+		distTestData->m_testRadius = din.m_testRadius;
+	}
+	else
+	{
+		frustumTestData = newInstance<FrustumTestData>(getRenderer().getFrameMemoryPool());
+		const FrustumGpuVisibilityInput& fin = static_cast<FrustumGpuVisibilityInput&>(in);
+		frustumTestData->m_viewProjMat = fin.m_viewProjectionMatrix;
+	}
+
 	U32 aabbCount = 0;
 	U32 aabbCount = 0;
 	switch(in.m_technique)
 	switch(in.m_technique)
 	{
 	{
@@ -113,17 +141,14 @@ void GpuVisibility::populateRenderGraph(GpuVisibilityInput& in, GpuVisibilityOut
 	pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageComputeRead);
 	pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageComputeRead);
 	pass.newBufferDependency(out.m_mdiDrawCountsHandle, BufferUsageBit::kStorageComputeWrite);
 	pass.newBufferDependency(out.m_mdiDrawCountsHandle, BufferUsageBit::kStorageComputeWrite);
 
 
-	if(in.m_hzbRt)
+	if(!distanceBased && static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt)
 	{
 	{
-		pass.newTextureDependency(*in.m_hzbRt, TextureUsageBit::kSampledCompute);
+		frustumTestData->m_hzbRt = *static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt;
+		pass.newTextureDependency(frustumTestData->m_hzbRt, TextureUsageBit::kSampledCompute);
 	}
 	}
 
 
-	const RenderTargetHandle hzbRtCopy =
-		(in.m_hzbRt) ? *in.m_hzbRt : RenderTargetHandle(); // Can't pass to the lambda the hzbRt which is a pointer to who knows what
-
-	pass.setWork([this, viewProjectionMat = in.m_viewProjectionMatrix, lodReferencePoint = in.m_lodReferencePoint, lodDistances = in.m_lodDistances,
-				  technique = in.m_technique, hzbRtCopy, mdiDrawCountsHandle = out.m_mdiDrawCountsHandle, instanceRateRenderables, indirectArgs,
-				  aabbCount
+	pass.setWork([this, frustumTestData, distTestData, lodReferencePoint = in.m_lodReferencePoint, lodDistances = in.m_lodDistances,
+				  technique = in.m_technique, mdiDrawCountsHandle = out.m_mdiDrawCountsHandle, instanceRateRenderables, indirectArgs, aabbCount
 #if ANKI_STATS_ENABLED
 #if ANKI_STATS_ENABLED
 				  ,
 				  ,
 				  clearStatsBuffer, clearStatsBufferOffset, writeStatsBuffer, writeStatsBufferOffset
 				  clearStatsBuffer, clearStatsBufferOffset, writeStatsBuffer, writeStatsBufferOffset
@@ -131,36 +156,41 @@ void GpuVisibility::populateRenderGraph(GpuVisibilityInput& in, GpuVisibilityOut
 	](RenderPassWorkContext& rpass) {
 	](RenderPassWorkContext& rpass) {
 		CommandBuffer& cmdb = *rpass.m_commandBuffer;
 		CommandBuffer& cmdb = *rpass.m_commandBuffer;
 
 
-		cmdb.bindShaderProgram(m_grProgs[hzbRtCopy.isValid()].get());
+		if(frustumTestData)
+		{
+			cmdb.bindShaderProgram(m_frustumGrProgs[frustumTestData->m_hzbRt.isValid()].get());
+		}
+		else
+		{
+			cmdb.bindShaderProgram(m_distGrProg.get());
+		}
 
 
 		switch(technique)
 		switch(technique)
 		{
 		{
 		case RenderingTechnique::kGBuffer:
 		case RenderingTechnique::kGBuffer:
 			cmdb.bindStorageBuffer(0, 0, &GpuSceneBuffer::getSingleton().getBuffer(),
 			cmdb.bindStorageBuffer(0, 0, &GpuSceneBuffer::getSingleton().getBuffer(),
 								   GpuSceneArrays::RenderableAabbGBuffer::getSingleton().getGpuSceneOffsetOfArrayBase(),
 								   GpuSceneArrays::RenderableAabbGBuffer::getSingleton().getGpuSceneOffsetOfArrayBase(),
-								   GpuSceneArrays::RenderableAabbGBuffer::getSingleton().getElementCount()
-									   * GpuSceneArrays::RenderableAabbGBuffer::getSingleton().getElementSize());
+								   GpuSceneArrays::RenderableAabbGBuffer::getSingleton().getBufferRange());
 			break;
 			break;
 		case RenderingTechnique::kDepth:
 		case RenderingTechnique::kDepth:
 			cmdb.bindStorageBuffer(0, 0, &GpuSceneBuffer::getSingleton().getBuffer(),
 			cmdb.bindStorageBuffer(0, 0, &GpuSceneBuffer::getSingleton().getBuffer(),
 								   GpuSceneArrays::RenderableAabbDepth::getSingleton().getGpuSceneOffsetOfArrayBase(),
 								   GpuSceneArrays::RenderableAabbDepth::getSingleton().getGpuSceneOffsetOfArrayBase(),
-								   GpuSceneArrays::RenderableAabbDepth::getSingleton().getElementCount()
-									   * GpuSceneArrays::RenderableAabbDepth::getSingleton().getElementSize());
+								   GpuSceneArrays::RenderableAabbDepth::getSingleton().getBufferRange());
 			break;
 			break;
 		default:
 		default:
 			ANKI_ASSERT(0);
 			ANKI_ASSERT(0);
 		}
 		}
 
 
-		cmdb.bindStorageBuffer(
-			0, 1, &GpuSceneBuffer::getSingleton().getBuffer(), GpuSceneArrays::Renderable::getSingleton().getGpuSceneOffsetOfArrayBase(),
-			GpuSceneArrays::Renderable::getSingleton().getElementCount() * GpuSceneArrays::Renderable::getSingleton().getElementSize());
+		cmdb.bindStorageBuffer(0, 1, &GpuSceneBuffer::getSingleton().getBuffer(),
+							   GpuSceneArrays::Renderable::getSingleton().getGpuSceneOffsetOfArrayBase(),
+							   GpuSceneArrays::Renderable::getSingleton().getBufferRange());
 
 
 		cmdb.bindStorageBuffer(0, 2, &GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
 		cmdb.bindStorageBuffer(0, 2, &GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
 
 
 		cmdb.bindStorageBuffer(0, 3, instanceRateRenderables.m_buffer, instanceRateRenderables.m_offset, instanceRateRenderables.m_size);
 		cmdb.bindStorageBuffer(0, 3, instanceRateRenderables.m_buffer, instanceRateRenderables.m_offset, instanceRateRenderables.m_size);
 		cmdb.bindStorageBuffer(0, 4, indirectArgs.m_buffer, indirectArgs.m_offset, indirectArgs.m_size);
 		cmdb.bindStorageBuffer(0, 4, indirectArgs.m_buffer, indirectArgs.m_offset, indirectArgs.m_size);
 
 
-		U32* offsets = allocateAndBindStorage<U32*>(sizeof(U32) * RenderStateBucketContainer::getSingleton().getBucketCount(technique), cmdb, 0, 5);
+		U32* offsets = allocateAndBindStorage<U32>(cmdb, 0, 5, RenderStateBucketContainer::getSingleton().getBucketCount(technique));
 		U32 bucketCount = 0;
 		U32 bucketCount = 0;
 		U32 userCount = 0;
 		U32 userCount = 0;
 		RenderStateBucketContainer::getSingleton().iterateBuckets(technique, [&](const RenderStateInfo&, U32 userCount_) {
 		RenderStateBucketContainer::getSingleton().iterateBuckets(technique, [&](const RenderStateInfo&, U32 userCount_) {
@@ -172,28 +202,46 @@ void GpuVisibility::populateRenderGraph(GpuVisibilityInput& in, GpuVisibilityOut
 
 
 		rpass.bindStorageBuffer(0, 6, mdiDrawCountsHandle);
 		rpass.bindStorageBuffer(0, 6, mdiDrawCountsHandle);
 
 
-		GpuVisibilityUniforms* unis = allocateAndBindUniforms<GpuVisibilityUniforms*>(sizeof(GpuVisibilityUniforms), cmdb, 0, 7);
-
-		Array<Plane, 6> planes;
-		extractClipPlanes(viewProjectionMat, planes);
-		for(U32 i = 0; i < 6; ++i)
+		if(frustumTestData)
 		{
 		{
-			unis->m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
-		}
+			FrustumGpuVisibilityUniforms* unis = allocateAndBindUniforms<FrustumGpuVisibilityUniforms>(cmdb, 0, 7);
 
 
-		ANKI_ASSERT(kMaxLodCount == 3);
-		unis->m_maxLodDistances[0] = lodDistances[0];
-		unis->m_maxLodDistances[1] = lodDistances[1];
-		unis->m_maxLodDistances[2] = kMaxF32;
-		unis->m_maxLodDistances[3] = kMaxF32;
+			Array<Plane, 6> planes;
+			extractClipPlanes(frustumTestData->m_viewProjMat, planes);
+			for(U32 i = 0; i < 6; ++i)
+			{
+				unis->m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
+			}
+
+			ANKI_ASSERT(kMaxLodCount == 3);
+			unis->m_maxLodDistances[0] = lodDistances[0];
+			unis->m_maxLodDistances[1] = lodDistances[1];
+			unis->m_maxLodDistances[2] = kMaxF32;
+			unis->m_maxLodDistances[3] = kMaxF32;
 
 
-		unis->m_lodReferencePoint = lodReferencePoint;
-		unis->m_viewProjectionMat = viewProjectionMat;
+			unis->m_lodReferencePoint = lodReferencePoint;
+			unis->m_viewProjectionMat = frustumTestData->m_viewProjMat;
 
 
-		if(hzbRtCopy.isValid())
+			if(frustumTestData->m_hzbRt.isValid())
+			{
+				rpass.bindColorTexture(0, 8, frustumTestData->m_hzbRt);
+				cmdb.bindSampler(0, 9, getRenderer().getSamplers().m_nearestNearestClamp.get());
+			}
+		}
+		else
 		{
 		{
-			rpass.bindColorTexture(0, 8, hzbRtCopy);
-			cmdb.bindSampler(0, 9, getRenderer().getSamplers().m_nearestNearestClamp.get());
+			DistanceGpuVisibilityUniforms unis;
+			unis.m_pointOfTest = distTestData->m_pointOfTest;
+			unis.m_testRadius = distTestData->m_testRadius;
+
+			unis.m_maxLodDistances[0] = lodDistances[0];
+			unis.m_maxLodDistances[1] = lodDistances[1];
+			unis.m_maxLodDistances[2] = kMaxF32;
+			unis.m_maxLodDistances[3] = kMaxF32;
+
+			unis.m_lodReferencePoint = lodReferencePoint;
+
+			cmdb.setPushConstants(&unis, sizeof(unis));
 		}
 		}
 
 
 #if ANKI_STATS_ENABLED
 #if ANKI_STATS_ENABLED

+ 35 - 8
AnKi/Renderer/Utils/GpuVisibility.h

@@ -14,19 +14,34 @@ namespace anki {
 /// @addtogroup renderer
 /// @addtogroup renderer
 /// @{
 /// @{
 
 
-/// @memberof GpuVisibility
-class GpuVisibilityInput
+class BaseGpuVisibilityInput
 {
 {
 public:
 public:
 	CString m_passesName;
 	CString m_passesName;
 	RenderingTechnique m_technique = RenderingTechnique::kCount;
 	RenderingTechnique m_technique = RenderingTechnique::kCount;
-	Mat4 m_viewProjectionMatrix = Mat4::getIdentity();
+
 	Vec3 m_lodReferencePoint = Vec3(0.0f);
 	Vec3 m_lodReferencePoint = Vec3(0.0f);
 	Array<F32, kMaxLodCount - 1> m_lodDistances = {};
 	Array<F32, kMaxLodCount - 1> m_lodDistances = {};
-	const RenderTargetHandle* m_hzbRt = nullptr; ///< Optional.
+
 	RenderGraphDescription* m_rgraph = nullptr;
 	RenderGraphDescription* m_rgraph = nullptr;
 };
 };
 
 
+/// @memberof GpuVisibility
+class FrustumGpuVisibilityInput : public BaseGpuVisibilityInput
+{
+public:
+	Mat4 m_viewProjectionMatrix = Mat4::getIdentity();
+	const RenderTargetHandle* m_hzbRt = nullptr; ///< Optional.
+};
+
+/// @memberof GpuVisibility
+class DistanceGpuVisibilityInput : public BaseGpuVisibilityInput
+{
+public:
+	Vec3 m_pointOfTest = Vec3(0.0f);
+	F32 m_testRadius = 1.0f;
+};
+
 /// @memberof GpuVisibility
 /// @memberof GpuVisibility
 class GpuVisibilityOutput
 class GpuVisibilityOutput
 {
 {
@@ -44,17 +59,29 @@ class GpuVisibility : public RendererObject
 public:
 public:
 	Error init();
 	Error init();
 
 
-	/// Populate the rendergraph.
-	void populateRenderGraph(GpuVisibilityInput& in, GpuVisibilityOutput& out);
+	/// Perform frustum visibility testing.
+	void populateRenderGraph(FrustumGpuVisibilityInput& in, GpuVisibilityOutput& out)
+	{
+		populateRenderGraphInternal(false, in, out);
+	}
+
+	/// Perform simple distance-based visibility testing.
+	void populateRenderGraph(DistanceGpuVisibilityInput& in, GpuVisibilityOutput& out)
+	{
+		populateRenderGraphInternal(true, in, out);
+	}
 
 
 private:
 private:
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramResourcePtr m_prog;
-	Array<ShaderProgramPtr, 2> m_grProgs;
+	Array<ShaderProgramPtr, 2> m_frustumGrProgs;
+	ShaderProgramPtr m_distGrProg;
 
 
 #if ANKI_STATS_ENABLED
 #if ANKI_STATS_ENABLED
 	Array<GpuReadbackMemoryAllocation, kMaxFramesInFlight> m_readbackMemory;
 	Array<GpuReadbackMemoryAllocation, kMaxFramesInFlight> m_readbackMemory;
 	U64 m_lastFrameIdx = kMaxU64;
 	U64 m_lastFrameIdx = kMaxU64;
 #endif
 #endif
+
+	void populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out);
 };
 };
 
 
 /// @memberof GpuVisibilityNonRenderables
 /// @memberof GpuVisibilityNonRenderables
@@ -64,9 +91,9 @@ public:
 	CString m_passesName;
 	CString m_passesName;
 	GpuSceneNonRenderableObjectType m_objectType = GpuSceneNonRenderableObjectType::kCount;
 	GpuSceneNonRenderableObjectType m_objectType = GpuSceneNonRenderableObjectType::kCount;
 	Mat4 m_viewProjectionMat;
 	Mat4 m_viewProjectionMat;
-	const RenderTargetHandle* m_hzbRt = nullptr;
 	RenderGraphDescription* m_rgraph = nullptr;
 	RenderGraphDescription* m_rgraph = nullptr;
 
 
+	const RenderTargetHandle* m_hzbRt = nullptr; ///< Optional.
 	BufferOffsetRange m_cpuFeedbackBuffer; ///< Optional.
 	BufferOffsetRange m_cpuFeedbackBuffer; ///< Optional.
 };
 };
 
 

+ 1 - 2
AnKi/Renderer/Utils/TraditionalDeferredShading.cpp

@@ -100,8 +100,7 @@ void TraditionalDeferredLightShading::drawLights(TraditionalDeferredLightShading
 	{
 	{
 		const LightComponent* dirLightc = SceneGraph::getSingleton().getDirectionalLight();
 		const LightComponent* dirLightc = SceneGraph::getSingleton().getDirectionalLight();
 
 
-		TraditionalDeferredShadingUniforms* unis =
-			allocateAndBindUniforms<TraditionalDeferredShadingUniforms*>(sizeof(TraditionalDeferredShadingUniforms), cmdb, 0, 0);
+		TraditionalDeferredShadingUniforms* unis = allocateAndBindUniforms<TraditionalDeferredShadingUniforms>(cmdb, 0, 0);
 
 
 		unis->m_inputTexUvScale = info.m_gbufferTexCoordsScale;
 		unis->m_inputTexUvScale = info.m_gbufferTexCoordsScale;
 		unis->m_inputTexUvBias = info.m_gbufferTexCoordsBias;
 		unis->m_inputTexUvBias = info.m_gbufferTexCoordsBias;

+ 6 - 2
AnKi/Renderer/VolumetricLightingAccumulation.cpp

@@ -110,7 +110,9 @@ void VolumetricLightingAccumulation::run(const RenderingContext& ctx, RenderPass
 
 
 	rgraphCtx.bindColorTexture(0, 5, m_runCtx.m_rts[0]);
 	rgraphCtx.bindColorTexture(0, 5, m_runCtx.m_rts[0]);
 
 
-	bindUniforms(cmdb, 0, 6, getRenderer().getClusterBinning().getClusteredUniformsRebarToken());
+	cmdb.bindUniformBuffer(0, 6, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+						   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_offset,
+						   getRenderer().getClusterBinning().getClusteredUniformsRebarToken().m_range);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 7, ClusteredObjectType::kPointLight);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 7, ClusteredObjectType::kPointLight);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 8, ClusteredObjectType::kSpotLight);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 8, ClusteredObjectType::kSpotLight);
 	rgraphCtx.bindColorTexture(0, 9, getRenderer().getShadowMapping().getShadowmapRt());
 	rgraphCtx.bindColorTexture(0, 9, getRenderer().getShadowMapping().getShadowmapRt());
@@ -118,7 +120,9 @@ void VolumetricLightingAccumulation::run(const RenderingContext& ctx, RenderPass
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 10, ClusteredObjectType::kGlobalIlluminationProbe);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 10, ClusteredObjectType::kGlobalIlluminationProbe);
 
 
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 11, ClusteredObjectType::kFogDensityVolume);
 	getRenderer().getPackVisibleClusteredObjects().bindClusteredObjectBuffer(cmdb, 0, 11, ClusteredObjectType::kFogDensityVolume);
-	bindStorage(cmdb, 0, 12, getRenderer().getClusterBinning().getClustersRebarToken());
+	cmdb.bindStorageBuffer(0, 12, &RebarTransientMemoryPool::getSingleton().getBuffer(),
+						   getRenderer().getClusterBinning().getClustersRebarToken().m_offset,
+						   getRenderer().getClusterBinning().getClustersRebarToken().m_range);
 
 
 	cmdb.bindAllBindless(1);
 	cmdb.bindAllBindless(1);
 
 

+ 21 - 4
AnKi/Shaders/GpuVisibility.ankiprog

@@ -5,6 +5,9 @@
 
 
 #pragma anki mutator HZB_TEST 0 1
 #pragma anki mutator HZB_TEST 0 1
 #pragma anki mutator STATS 0 1
 #pragma anki mutator STATS 0 1
+#pragma anki mutator DISTANCE_TEST 0 1
+
+#pragma anki skip_mutation DISTANCE_TEST 1 HZB_TEST 1
 
 
 #pragma anki start comp
 #pragma anki start comp
 
 
@@ -27,7 +30,11 @@
 // The MDI counts. One for each render state bucket
 // The MDI counts. One for each render state bucket
 [[vk::binding(6)]] RWStructuredBuffer<U32> g_mdiDrawCounts;
 [[vk::binding(6)]] RWStructuredBuffer<U32> g_mdiDrawCounts;
 
 
-[[vk::binding(7)]] ConstantBuffer<GpuVisibilityUniforms> g_unis;
+#if DISTANCE_TEST == 0
+[[vk::binding(7)]] ConstantBuffer<FrustumGpuVisibilityUniforms> g_unis;
+#else
+[[vk::push_constant]] ConstantBuffer<DistanceGpuVisibilityUniforms> g_unis;
+#endif
 
 
 #if HZB_TEST
 #if HZB_TEST
 [[vk::binding(8)]] Texture2D<Vec4> g_hzbTex;
 [[vk::binding(8)]] Texture2D<Vec4> g_hzbTex;
@@ -59,6 +66,7 @@
 
 
 	const GpuSceneRenderableAabb aabb = g_aabbs[aabbIdx];
 	const GpuSceneRenderableAabb aabb = g_aabbs[aabbIdx];
 
 
+#if DISTANCE_TEST == 0
 	// Frustum test
 	// Frustum test
 	//
 	//
 	if(!frustumTest(g_unis.m_clipPlanes, aabb.m_sphereCenter, aabb.m_sphereRadius))
 	if(!frustumTest(g_unis.m_clipPlanes, aabb.m_sphereCenter, aabb.m_sphereRadius))
@@ -96,7 +104,7 @@
 	// HiZ culling
 	// HiZ culling
 	//
 	//
 
 
-#if HZB_TEST
+#	if HZB_TEST
 	// Compute the mip
 	// Compute the mip
 	Vec2 texSize;
 	Vec2 texSize;
 	F32 mipCount;
 	F32 mipCount;
@@ -107,7 +115,7 @@
 	const Vec2 sizeXY = (maxUv - minUv) * texSize;
 	const Vec2 sizeXY = (maxUv - minUv) * texSize;
 	F32 mip = ceil(log2(max(sizeXY.x, sizeXY.y)));
 	F32 mip = ceil(log2(max(sizeXY.x, sizeXY.y)));
 
 
-#	if 0
+#		if 0
 	mip = clamp(mip, 0.0, mipCount - 1.0);
 	mip = clamp(mip, 0.0, mipCount - 1.0);
 	const F32 levelLower = max(mip - 1.0, 0.0);
 	const F32 levelLower = max(mip - 1.0, 0.0);
 	const Vec2 scale = exp2(-levelLower);
 	const Vec2 scale = exp2(-levelLower);
@@ -119,7 +127,7 @@
 	{
 	{
 		mip = levelLower;
 		mip = levelLower;
 	}
 	}
-#	endif
+#		endif
 
 
 	// Sample mip
 	// Sample mip
 	Vec4 depths;
 	Vec4 depths;
@@ -133,6 +141,15 @@
 	{
 	{
 		return;
 		return;
 	}
 	}
+#	endif // HZB_TEST
+#else // DISTANCE_TEST == 1
+	const Vec3 vec = aabb.m_sphereCenter - g_unis.m_pointOfTest;
+	const F32 distFromTestPointSquared = dot(vec, vec);
+	const F32 maxDist = g_unis.m_testRadius + aabb.m_sphereRadius;
+	if(distFromTestPointSquared >= maxDist * maxDist)
+	{
+		return;
+	}
 #endif
 #endif
 
 
 	// Compute the LOD
 	// Compute the LOD

+ 12 - 1
AnKi/Shaders/Include/GpuVisibilityTypes.h

@@ -9,7 +9,7 @@
 
 
 ANKI_BEGIN_NAMESPACE
 ANKI_BEGIN_NAMESPACE
 
 
-struct GpuVisibilityUniforms
+struct FrustumGpuVisibilityUniforms
 {
 {
 	Vec4 m_clipPlanes[6u];
 	Vec4 m_clipPlanes[6u];
 
 
@@ -21,6 +21,17 @@ struct GpuVisibilityUniforms
 	Mat4 m_viewProjectionMat;
 	Mat4 m_viewProjectionMat;
 };
 };
 
 
+struct DistanceGpuVisibilityUniforms
+{
+	Vec3 m_pointOfTest;
+	F32 m_testRadius;
+
+	Vec4 m_maxLodDistances;
+
+	Vec3 m_lodReferencePoint;
+	F32 m_padding3;
+};
+
 struct GpuVisibilityNonRenderableUniforms
 struct GpuVisibilityNonRenderableUniforms
 {
 {
 	Vec4 m_clipPlanes[6u];
 	Vec4 m_clipPlanes[6u];