Browse Source

Enable GPU visibility on shadows

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
1385bf799e

+ 1 - 1
AnKi/Gr/RenderGraph.h

@@ -31,7 +31,7 @@ class RenderGraphDescription;
 /// @{
 constexpr U32 kMaxRenderGraphPasses = 128;
 constexpr U32 kMaxRenderGraphRenderTargets = 64; ///< Max imported or not render targets in RenderGraph.
-constexpr U32 kMaxRenderGraphBuffers = 64;
+constexpr U32 kMaxRenderGraphBuffers = 128;
 constexpr U32 kMaxRenderGraphAccelerationStructures = 32;
 /// @}
 

+ 1 - 1
AnKi/Renderer/Drawer.cpp

@@ -151,7 +151,7 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 {
 	setState(args, cmdb);
 
-	cmdb.bindVertexBuffer(0, args.m_instaceRateRenderablesBuffer, args.m_instaceRateRenderablesOffset, sizeof(GpuSceneRenderablePacked),
+	cmdb.bindVertexBuffer(0, args.m_instanceRateRenderablesBuffer, args.m_instanceRateRenderablesBufferOffset, sizeof(GpuSceneRenderablePacked),
 						  VertexStepRate::kInstance);
 
 	U32 allUserCount = 0;

+ 17 - 3
AnKi/Renderer/Drawer.h

@@ -7,6 +7,7 @@
 
 #include <AnKi/Renderer/Common.h>
 #include <AnKi/Resource/RenderingKey.h>
+#include <AnKi/Renderer/GpuVisibility.h>
 #include <AnKi/Gr.h>
 
 namespace anki {
@@ -41,9 +42,22 @@ public:
 	PtrSize m_drawIndexedIndirectArgsBufferOffset = 0;
 	PtrSize m_drawIndexedIndirectArgsBufferRange = 0;
 
-	Buffer* m_instaceRateRenderablesBuffer = nullptr;
-	PtrSize m_instaceRateRenderablesOffset = 0;
-	PtrSize m_instaceRateRenderablesRange = 0;
+	Buffer* m_instanceRateRenderablesBuffer = nullptr;
+	PtrSize m_instanceRateRenderablesBufferOffset = 0;
+	PtrSize m_instanceRateRenderablesBufferRange = 0;
+
+	void fillMdi(const GpuVisibilityOutput& visOut)
+	{
+		m_mdiDrawCountsBuffer = visOut.m_mdiDrawCountsBuffer;
+		m_mdiDrawCountsBufferOffset = visOut.m_mdiDrawCountsBufferOffset;
+		m_mdiDrawCountsBufferRange = visOut.m_mdiDrawCountsBufferRange;
+		m_drawIndexedIndirectArgsBuffer = visOut.m_drawIndexedIndirectArgsBuffer;
+		m_drawIndexedIndirectArgsBufferOffset = visOut.m_drawIndexedIndirectArgsBufferOffset;
+		m_drawIndexedIndirectArgsBufferRange = visOut.m_drawIndexedIndirectArgsBufferRange;
+		m_instanceRateRenderablesBuffer = visOut.m_instanceRateRenderablesBuffer;
+		m_instanceRateRenderablesBufferOffset = visOut.m_instanceRateRenderablesBufferOffset;
+		m_instanceRateRenderablesBufferRange = visOut.m_instanceRateRenderablesBufferRange;
+	}
 };
 
 /// It uses the render queue to batch and render.

+ 10 - 14
AnKi/Renderer/GBuffer.cpp

@@ -107,7 +107,7 @@ Error GBuffer::initInternal()
 	return Error::kNone;
 }
 
-void GBuffer::runInThread(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx) const
+void GBuffer::runInThread(const RenderingContext& ctx, const GpuVisibilityOutput& visOut, RenderPassWorkContext& rgraphCtx) const
 {
 	ANKI_TRACE_SCOPED_EVENT(RGBuffer);
 
@@ -132,14 +132,7 @@ void GBuffer::runInThread(const RenderingContext& ctx, RenderPassWorkContext& rg
 	args.m_previousViewProjectionMatrix = ctx.m_matrices.m_jitter * ctx.m_prevMatrices.m_viewProjection;
 	args.m_sampler = getRenderer().getSamplers().m_trilinearRepeatAnisoResolutionScalingBias.get();
 	args.m_renderingTechinuqe = RenderingTechnique::kGBuffer;
-
-	const GpuVisibility& gpuVis = m_visibility;
-	rgraphCtx.getBufferState(gpuVis.getMdiDrawCountsBufferHandle(), args.m_mdiDrawCountsBuffer, args.m_mdiDrawCountsBufferOffset,
-							 args.m_mdiDrawCountsBufferRange);
-	rgraphCtx.getBufferState(gpuVis.getDrawIndexedIndirectArgsBufferHandle(), args.m_drawIndexedIndirectArgsBuffer,
-							 args.m_drawIndexedIndirectArgsBufferOffset, args.m_drawIndexedIndirectArgsBufferRange);
-	rgraphCtx.getBufferState(gpuVis.getInstanceRateRenderablesBufferHandle(), args.m_instaceRateRenderablesBuffer,
-							 args.m_instaceRateRenderablesOffset, args.m_instaceRateRenderablesRange);
+	args.fillMdi(visOut);
 
 	cmdb.setDepthCompareOperation(CompareOperation::kLessEqual);
 	getRenderer().getSceneDrawer().drawMdi(args, cmdb);
@@ -174,8 +167,11 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 
 	const CommonMatrices& matrices = (getRenderer().getFrameCount() <= 1) ? ctx.m_matrices : ctx.m_prevMatrices;
+	const Array<F32, kMaxLodCount - 1> lodDistances = {ConfigSet::getSingleton().getLod0MaxDistance(),
+													   ConfigSet::getSingleton().getLod1MaxDistance()};
+	GpuVisibilityOutput visOut;
 	m_visibility.populateRenderGraph(RenderingTechnique::kGBuffer, matrices.m_viewProjection, matrices.m_cameraTransform.getTranslationPart().xyz(),
-									 m_runCtx.m_hzbRt, rgraph);
+									 lodDistances, &m_runCtx.m_hzbRt, rgraph, visOut);
 
 	const Bool enableVrs =
 		GrManager::getSingleton().getDeviceCapabilities().m_vrs && ConfigSet::getSingleton().getRVrs() && ConfigSet::getSingleton().getRGBufferVrs();
@@ -218,8 +214,8 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 
 	pass.setFramebufferInfo(m_fbDescr, ConstWeakArray<RenderTargetHandle>(&rts[0], kGBufferColorRenderTargetCount), m_runCtx.m_crntFrameDepthRt,
 							sriRt);
-	pass.setWork(1, [this, &ctx](RenderPassWorkContext& rgraphCtx) {
-		runInThread(ctx, rgraphCtx);
+	pass.setWork(1, [this, &ctx, visOut](RenderPassWorkContext& rgraphCtx) {
+		runInThread(ctx, visOut, rgraphCtx);
 	});
 
 	for(U i = 0; i < kGBufferColorRenderTargetCount; ++i)
@@ -238,9 +234,9 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 	pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageGeometryRead | BufferUsageBit::kStorageFragmentRead);
 
 	// Only add one depedency to the GPU visibility. No need to track all buffers
-	pass.newBufferDependency(m_visibility.getMdiDrawCountsBufferHandle(), BufferUsageBit::kIndirectDraw);
+	pass.newBufferDependency(visOut.m_mdiDrawCountsHandle, BufferUsageBit::kIndirectDraw);
 
-	// HZB generation
+	// HZB generation for the next frame
 	m_hzb.populateRenderGraph(m_runCtx.m_crntFrameDepthRt, getRenderer().getInternalResolution(), m_runCtx.m_hzbRt,
 							  UVec2(m_hzbRt->getWidth(), m_hzbRt->getHeight()), ctx);
 }

+ 1 - 1
AnKi/Renderer/GBuffer.h

@@ -91,7 +91,7 @@ private:
 
 	Error initInternal();
 
-	void runInThread(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx) const;
+	void runInThread(const RenderingContext& ctx, const GpuVisibilityOutput& visOut, RenderPassWorkContext& rgraphCtx) const;
 };
 /// @}
 

+ 60 - 26
AnKi/Renderer/GpuVisibility.cpp

@@ -12,7 +12,6 @@
 #include <AnKi/Core/GpuMemory/GpuSceneBuffer.h>
 #include <AnKi/Collision/Functions.h>
 #include <AnKi/Shaders/Include/MiscRendererTypes.h>
-#include <AnKi/Core/ConfigSet.h>
 
 namespace anki {
 
@@ -37,13 +36,25 @@ static GpuSceneContiguousArrayType techniqueToArrayType(RenderingTechnique techn
 
 Error GpuVisibility::init()
 {
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibility.ankiprogbin", m_prog, m_grProg));
+	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuVisibility.ankiprogbin", m_prog));
+
+	for(U32 i = 0; i < 2; ++i)
+	{
+		ShaderProgramResourceVariantInitInfo variantInit(m_prog);
+		variantInit.addMutation("HZB_TEST", i);
+
+		const ShaderProgramResourceVariant* variant;
+		m_prog->getOrCreateVariant(variantInit, variant);
+
+		m_grProgs[i].reset(&variant->getProgram());
+	}
 
 	return Error::kNone;
 }
 
-void GpuVisibility::populateRenderGraph(RenderingTechnique technique, const Mat4& viewProjectionMat, Vec3 cameraPosition, RenderTargetHandle hzbRt,
-										RenderGraphDescription& rgraph)
+void GpuVisibility::populateRenderGraph(RenderingTechnique technique, const Mat4& viewProjectionMat, Vec3 lodReferencePoint,
+										const Array<F32, kMaxLodCount - 1> lodDistances, const RenderTargetHandle* hzbRt,
+										RenderGraphDescription& rgraph, GpuVisibilityOutput& out)
 {
 	const U32 aabbCount = GpuSceneContiguousArrays::getSingleton().getElementCount(techniqueToArrayType(technique));
 	const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(technique);
@@ -51,35 +62,55 @@ void GpuVisibility::populateRenderGraph(RenderingTechnique technique, const Mat4
 	// Allocate memory for the indirect commands
 	const GpuVisibleTransientMemoryAllocation indirectArgs =
 		GpuVisibleTransientMemoryPool::getSingleton().allocate(aabbCount * sizeof(DrawIndexedIndirectArgs));
+	out.m_drawIndexedIndirectArgsBuffer = indirectArgs.m_buffer;
+	out.m_drawIndexedIndirectArgsBufferOffset = indirectArgs.m_offset;
+	out.m_drawIndexedIndirectArgsBufferRange = indirectArgs.m_size;
+
 	const GpuVisibleTransientMemoryAllocation instanceRateRenderables =
 		GpuVisibleTransientMemoryPool::getSingleton().allocate(aabbCount * sizeof(GpuSceneRenderable));
+	out.m_instanceRateRenderablesBuffer = instanceRateRenderables.m_buffer;
+	out.m_instanceRateRenderablesBufferOffset = instanceRateRenderables.m_offset;
+	out.m_instanceRateRenderablesBufferRange = instanceRateRenderables.m_size;
 
 	// Allocate and zero the MDI counts
 	RebarAllocation mdiDrawCounts;
 	U32* atomics = RebarTransientMemoryPool::getSingleton().allocateFrame<U32>(bucketCount, mdiDrawCounts);
 	memset(atomics, 0, mdiDrawCounts.m_range);
+	out.m_mdiDrawCountsBuffer = &RebarTransientMemoryPool::getSingleton().getBuffer();
+	out.m_mdiDrawCountsBufferOffset = mdiDrawCounts.m_offset;
+	out.m_mdiDrawCountsBufferRange = mdiDrawCounts.m_range;
 
 	// Import buffers
-	m_runCtx.m_instanceRateRenderables = rgraph.importBuffer(instanceRateRenderables.m_buffer, BufferUsageBit::kNone,
-															 instanceRateRenderables.m_offset, instanceRateRenderables.m_size);
-	m_runCtx.m_drawIndexedIndirectArgs =
+	out.m_instanceRateRenderablesHandle = rgraph.importBuffer(instanceRateRenderables.m_buffer, BufferUsageBit::kNone,
+															  instanceRateRenderables.m_offset, instanceRateRenderables.m_size);
+	out.m_drawIndexedIndirectArgsHandle =
 		rgraph.importBuffer(indirectArgs.m_buffer, BufferUsageBit::kNone, indirectArgs.m_offset, indirectArgs.m_size);
-	m_runCtx.m_mdiDrawCounts = rgraph.importBuffer(&RebarTransientMemoryPool::getSingleton().getBuffer(), BufferUsageBit::kNone,
-												   mdiDrawCounts.m_offset, mdiDrawCounts.m_range);
+	out.m_mdiDrawCountsHandle = rgraph.importBuffer(&RebarTransientMemoryPool::getSingleton().getBuffer(), BufferUsageBit::kNone,
+													mdiDrawCounts.m_offset, mdiDrawCounts.m_range);
 
 	// Create the renderpass
 	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("GPU occlusion");
 
 	pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageComputeRead);
-	pass.newTextureDependency(hzbRt, TextureUsageBit::kSampledCompute);
-	pass.newBufferDependency(m_runCtx.m_instanceRateRenderables, BufferUsageBit::kStorageComputeWrite);
-	pass.newBufferDependency(m_runCtx.m_drawIndexedIndirectArgs, BufferUsageBit::kStorageComputeWrite);
-	pass.newBufferDependency(m_runCtx.m_mdiDrawCounts, BufferUsageBit::kStorageComputeWrite);
+	pass.newBufferDependency(out.m_instanceRateRenderablesHandle, BufferUsageBit::kStorageComputeWrite);
+	pass.newBufferDependency(out.m_drawIndexedIndirectArgsHandle, BufferUsageBit::kStorageComputeWrite);
+	pass.newBufferDependency(out.m_mdiDrawCountsHandle, BufferUsageBit::kStorageComputeWrite);
+
+	if(hzbRt)
+	{
+		pass.newTextureDependency(*hzbRt, TextureUsageBit::kSampledCompute);
+	}
 
-	pass.setWork([this, viewProjectionMat, cameraPosition, technique, hzbRt](RenderPassWorkContext& rpass) {
+	const RenderTargetHandle hzbRtCopy =
+		(hzbRt) ? *hzbRt : RenderTargetHandle(); // Can't pass to the lambda the hzbRt which is a pointer to who knows what
+
+	pass.setWork([this, viewProjectionMat, lodReferencePoint, lodDistances, technique, hzbRtCopy,
+				  drawIndexedIndirectArgsHandle = out.m_drawIndexedIndirectArgsHandle,
+				  instanceRateRenderablesHandle = out.m_instanceRateRenderablesHandle,
+				  mdiDrawCountsHandle = out.m_mdiDrawCountsHandle](RenderPassWorkContext& rpass) {
 		CommandBuffer& cmdb = *rpass.m_commandBuffer;
 
-		cmdb.bindShaderProgram(m_grProg.get());
+		cmdb.bindShaderProgram(m_grProgs[hzbRtCopy.isValid()].get());
 
 		const GpuSceneContiguousArrayType type = techniqueToArrayType(technique);
 
@@ -93,13 +124,10 @@ void GpuVisibility::populateRenderGraph(RenderingTechnique technique, const Mat4
 
 		cmdb.bindStorageBuffer(0, 2, &GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
 
-		rpass.bindColorTexture(0, 3, hzbRt);
-		cmdb.bindSampler(0, 4, getRenderer().getSamplers().m_nearestNearestClamp.get());
-
-		rpass.bindStorageBuffer(0, 5, m_runCtx.m_instanceRateRenderables);
-		rpass.bindStorageBuffer(0, 6, m_runCtx.m_drawIndexedIndirectArgs);
+		rpass.bindStorageBuffer(0, 3, instanceRateRenderablesHandle);
+		rpass.bindStorageBuffer(0, 4, drawIndexedIndirectArgsHandle);
 
-		U32* offsets = allocateAndBindStorage<U32*>(sizeof(U32) * RenderStateBucketContainer::getSingleton().getBucketCount(technique), cmdb, 0, 7);
+		U32* offsets = allocateAndBindStorage<U32*>(sizeof(U32) * RenderStateBucketContainer::getSingleton().getBucketCount(technique), cmdb, 0, 5);
 		U32 bucketCount = 0;
 		U32 userCount = 0;
 		RenderStateBucketContainer::getSingleton().iterateBuckets(technique, [&](const RenderStateInfo&, U32 userCount_) {
@@ -109,9 +137,9 @@ void GpuVisibility::populateRenderGraph(RenderingTechnique technique, const Mat4
 		});
 		ANKI_ASSERT(userCount == RenderStateBucketContainer::getSingleton().getBucketsItemCount(technique));
 
-		rpass.bindStorageBuffer(0, 8, m_runCtx.m_mdiDrawCounts);
+		rpass.bindStorageBuffer(0, 6, mdiDrawCountsHandle);
 
-		GpuVisibilityUniforms* unis = allocateAndBindUniforms<GpuVisibilityUniforms*>(sizeof(GpuVisibilityUniforms), cmdb, 0, 9);
+		GpuVisibilityUniforms* unis = allocateAndBindUniforms<GpuVisibilityUniforms*>(sizeof(GpuVisibilityUniforms), cmdb, 0, 7);
 
 		Array<Plane, 6> planes;
 		extractClipPlanes(viewProjectionMat, planes);
@@ -124,14 +152,20 @@ void GpuVisibility::populateRenderGraph(RenderingTechnique technique, const Mat4
 		unis->m_aabbCount = aabbCount;
 
 		ANKI_ASSERT(kMaxLodCount == 3);
-		unis->m_maxLodDistances[0] = ConfigSet::getSingleton().getLod0MaxDistance();
-		unis->m_maxLodDistances[1] = ConfigSet::getSingleton().getLod1MaxDistance();
+		unis->m_maxLodDistances[0] = lodDistances[0];
+		unis->m_maxLodDistances[1] = lodDistances[1];
 		unis->m_maxLodDistances[2] = kMaxF32;
 		unis->m_maxLodDistances[3] = kMaxF32;
 
-		unis->m_cameraOrigin = cameraPosition;
+		unis->m_lodReferencePoint = lodReferencePoint;
 		unis->m_viewProjectionMat = viewProjectionMat;
 
+		if(hzbRtCopy.isValid())
+		{
+			rpass.bindColorTexture(0, 8, hzbRtCopy);
+			cmdb.bindSampler(0, 9, getRenderer().getSamplers().m_nearestNearestClamp.get());
+		}
+
 		dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
 	});
 }

+ 24 - 26
AnKi/Renderer/GpuVisibility.h

@@ -13,6 +13,26 @@ namespace anki {
 /// @addtogroup renderer
 /// @{
 
+class GpuVisibilityOutput
+{
+public:
+	BufferHandle m_instanceRateRenderablesHandle;
+	BufferHandle m_drawIndexedIndirectArgsHandle;
+	BufferHandle m_mdiDrawCountsHandle;
+
+	Buffer* m_instanceRateRenderablesBuffer;
+	Buffer* m_drawIndexedIndirectArgsBuffer;
+	Buffer* m_mdiDrawCountsBuffer;
+
+	PtrSize m_instanceRateRenderablesBufferOffset;
+	PtrSize m_drawIndexedIndirectArgsBufferOffset;
+	PtrSize m_mdiDrawCountsBufferOffset;
+
+	PtrSize m_instanceRateRenderablesBufferRange;
+	PtrSize m_drawIndexedIndirectArgsBufferRange;
+	PtrSize m_mdiDrawCountsBufferRange;
+};
+
 /// Performs GPU visibility for some pass.
 class GpuVisibility : public RendererObject
 {
@@ -20,35 +40,13 @@ public:
 	Error init();
 
 	/// Populate the rendergraph.
-	void populateRenderGraph(RenderingTechnique technique, const Mat4& viewProjectionMat, Vec3 cameraPosition, RenderTargetHandle hzbRt,
-							 RenderGraphDescription& rgraph);
-
-	BufferHandle getMdiDrawCountsBufferHandle() const
-	{
-		return m_runCtx.m_mdiDrawCounts;
-	}
-
-	BufferHandle getDrawIndexedIndirectArgsBufferHandle() const
-	{
-		return m_runCtx.m_drawIndexedIndirectArgs;
-	}
-
-	BufferHandle getInstanceRateRenderablesBufferHandle() const
-	{
-		return m_runCtx.m_instanceRateRenderables;
-	}
+	void populateRenderGraph(RenderingTechnique technique, const Mat4& viewProjectionMat, Vec3 lodReferencePoint,
+							 const Array<F32, kMaxLodCount - 1> lodDistances, const RenderTargetHandle* hzbRt, RenderGraphDescription& rgraph,
+							 GpuVisibilityOutput& out);
 
 private:
 	ShaderProgramResourcePtr m_prog;
-	ShaderProgramPtr m_grProg;
-
-	class
-	{
-	public:
-		BufferHandle m_instanceRateRenderables;
-		BufferHandle m_drawIndexedIndirectArgs;
-		BufferHandle m_mdiDrawCounts;
-	} m_runCtx;
+	Array<ShaderProgramPtr, 2> m_grProgs;
 };
 /// @}
 

+ 67 - 202
AnKi/Renderer/ShadowMapping.cpp

@@ -12,24 +12,14 @@
 
 namespace anki {
 
-class ShadowMapping::LightToRenderTempInfo
+class ShadowMapping::ViewportWorkItem
 {
 public:
 	UVec4 m_viewport;
-	RenderQueue* m_renderQueue;
-	U32 m_drawcallCount;
-	U32 m_renderQueueElementsLod;
-};
+	Mat4 m_mvp;
+	Mat3x4 m_viewMatrix;
 
-class ShadowMapping::ThreadWorkItem
-{
-public:
-	UVec4 m_viewport;
-	RenderQueue* m_renderQueue;
-	U32 m_firstRenderableElement;
-	U32 m_renderableElementCount;
-	U32 m_threadPoolTaskIdx;
-	U32 m_renderQueueElementsLod;
+	GpuVisibilityOutput m_visOut;
 };
 
 Error ShadowMapping::init()
@@ -74,6 +64,8 @@ Error ShadowMapping::initInternal()
 	m_clearDepthProg->getOrCreateVariant(variant);
 	m_clearDepthGrProg.reset(&variant->getProgram());
 
+	ANKI_CHECK(m_visibility.init());
+
 	return Error::kNone;
 }
 
@@ -95,8 +87,7 @@ void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
 	}
 
 	// First process the lights
-	U32 threadCountForPass = 0;
-	processLights(ctx, threadCountForPass);
+	processLights(ctx);
 
 	// Build the render graph
 	if(m_runCtx.m_workItems.getSize())
@@ -111,17 +102,21 @@ void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
 
 		GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("ShadowMapping");
 
-		pass.setFramebufferInfo(m_fbDescr, {}, m_runCtx.m_rt, {}, minx, miny, width, height);
-		ANKI_ASSERT(threadCountForPass && threadCountForPass <= CoreThreadHive::getSingleton().getThreadCount());
-		pass.setWork(threadCountForPass, [this](RenderPassWorkContext& rgraphCtx) {
-			runShadowMapping(rgraphCtx);
-		});
+		for(const ViewportWorkItem& work : m_runCtx.m_workItems)
+		{
+			pass.newBufferDependency(work.m_visOut.m_mdiDrawCountsHandle, BufferUsageBit::kIndirectDraw);
+		}
 
 		TextureSubresourceInfo subresource = TextureSubresourceInfo(DepthStencilAspectBit::kDepth);
 		pass.newTextureDependency(m_runCtx.m_rt, TextureUsageBit::kAllFramebuffer, subresource);
 
 		pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(),
 								 BufferUsageBit::kStorageGeometryRead | BufferUsageBit::kStorageFragmentRead);
+
+		pass.setFramebufferInfo(m_fbDescr, {}, m_runCtx.m_rt, {}, minx, miny, width, height);
+		pass.setWork(1, [this](RenderPassWorkContext& rgraphCtx) {
+			runShadowMapping(rgraphCtx);
+		});
 	}
 }
 
@@ -235,50 +230,29 @@ Bool ShadowMapping::allocateAtlasTiles(U64 lightUuid, U32 faceCount, const U64*
 }
 
 template<typename TMemoryPool>
-void ShadowMapping::newWorkItems(const UVec4& atlasViewport, RenderQueue* lightRenderQueue, U32 renderQueueElementsLod,
-								 DynamicArray<LightToRenderTempInfo, TMemoryPool>& workItems, U32& drawcallCount) const
+void ShadowMapping::newWorkItem(const UVec4& atlasViewport, const RenderQueue& queue, RenderGraphDescription& rgraph,
+								DynamicArray<ViewportWorkItem, TMemoryPool>& workItems)
 {
-	LightToRenderTempInfo toRender;
-	toRender.m_renderQueue = lightRenderQueue;
-	toRender.m_viewport = atlasViewport;
-	toRender.m_drawcallCount = lightRenderQueue->m_renderables.getSize();
-	toRender.m_renderQueueElementsLod = renderQueueElementsLod;
-
-	workItems.emplaceBack(toRender);
-	drawcallCount += toRender.m_drawcallCount;
+	ViewportWorkItem& work = *workItems.emplaceBack();
+
+	const Array<F32, kMaxLodCount - 1> lodDistances = {ConfigSet::getSingleton().getLod0MaxDistance(),
+													   ConfigSet::getSingleton().getLod1MaxDistance()};
+	m_visibility.populateRenderGraph(RenderingTechnique::kDepth, queue.m_viewProjectionMatrix, queue.m_cameraTransform.getTranslationPart().xyz(),
+									 lodDistances, nullptr, rgraph, work.m_visOut);
+
+	work.m_viewport = atlasViewport;
+	work.m_mvp = queue.m_viewProjectionMatrix;
+	work.m_viewMatrix = queue.m_viewMatrix;
 }
 
-void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForPass)
+void ShadowMapping::processLights(RenderingContext& ctx)
 {
 	m_runCtx.m_fullViewport = UVec4(kMaxU32, kMaxU32, kMinU32, kMinU32);
 
 	// Vars
 	const Vec4 cameraOrigin = ctx.m_renderQueue->m_cameraTransform.getTranslationPart().xyz0();
-	DynamicArray<LightToRenderTempInfo, MemoryPoolPtrWrapper<StackMemoryPool>> lightsToRender(ctx.m_tempPool);
-	U32 drawcallCount = 0;
-
-	// First thing, allocate an empty tile for empty faces of point lights
-	UVec4 emptyTileViewport;
-	{
-		Array<U32, 4> tileViewport;
-		[[maybe_unused]] const TileAllocatorResult res =
-			m_tileAlloc.allocate(GlobalFrameIndex::getSingleton().m_value, 1, kMaxU64, 0, 1, kPointLightMaxTileAllocHierarchy, tileViewport);
-
-		emptyTileViewport = UVec4(tileViewport);
-
-#if ANKI_ENABLE_ASSERTIONS
-		static Bool firstRun = true;
-		if(firstRun)
-		{
-			ANKI_ASSERT(res == TileAllocatorResult::kAllocationSucceded);
-			firstRun = false;
-		}
-		else
-		{
-			ANKI_ASSERT(res == TileAllocatorResult::kCached);
-		}
-#endif
-	}
+	DynamicArray<ViewportWorkItem, MemoryPoolPtrWrapper<StackMemoryPool>> workItems(ctx.m_tempPool);
+	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 
 	// Process the directional light first.
 	if(ctx.m_renderQueue->m_directionalLight.m_shadowCascadeCount > 0)
@@ -293,57 +267,31 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForPass
 		Array<U32, kMaxShadowCascades> hierarchies;
 		Array<U32, kMaxShadowCascades> renderQueueElementsLods;
 
-		U32 activeCascades = 0;
-
 		for(U32 cascade = 0; cascade < light.m_shadowCascadeCount; ++cascade)
 		{
 			ANKI_ASSERT(light.m_shadowRenderQueues[cascade]);
-			if(light.m_shadowRenderQueues[cascade]->m_renderables.getSize() > 0)
-			{
-				// Cascade with drawcalls, will need tiles
 
-				timestamps[activeCascades] = GlobalFrameIndex::getSingleton().m_value; // This light is always updated
-				cascadeIndices[activeCascades] = cascade;
-				drawcallCounts[activeCascades] = 1; // Doesn't matter
+			timestamps[cascade] = GlobalFrameIndex::getSingleton().m_value; // This light is always updated
+			cascadeIndices[cascade] = cascade;
+			drawcallCounts[cascade] = 1; // Doesn't matter
 
-				// Change the quality per cascade
-				hierarchies[activeCascades] = kTileAllocHierarchyCount - 1 - chooseDirectionalLightShadowCascadeDetail(cascade);
-				renderQueueElementsLods[activeCascades] = (cascade == 0) ? 0 : (kMaxLodCount - 1);
-
-				++activeCascades;
-			}
+			// Change the quality per cascade
+			hierarchies[cascade] = kTileAllocHierarchyCount - 1 - chooseDirectionalLightShadowCascadeDetail(cascade);
+			renderQueueElementsLods[cascade] = (cascade == 0) ? 0 : (kMaxLodCount - 1);
 		}
 
-		const Bool allocationFailed = activeCascades == 0
-									  || !allocateAtlasTiles(light.m_uuid, activeCascades, &timestamps[0], &cascadeIndices[0], &drawcallCounts[0],
-															 &hierarchies[0], &atlasViewports[0], &subResults[0]);
+		const Bool allocationFailed = !allocateAtlasTiles(light.m_uuid, light.m_shadowCascadeCount, &timestamps[0], &cascadeIndices[0],
+														  &drawcallCounts[0], &hierarchies[0], &atlasViewports[0], &subResults[0]);
 
 		if(!allocationFailed)
 		{
-			activeCascades = 0;
-
 			for(U cascade = 0; cascade < light.m_shadowCascadeCount; ++cascade)
 			{
-				if(light.m_shadowRenderQueues[cascade]->m_renderables.getSize() > 0)
-				{
-					// Cascade with drawcalls, push some work for it
-
-					// Update the texture matrix to point to the correct region in the atlas
-					light.m_textureMatrices[cascade] =
-						createSpotLightTextureMatrix(atlasViewports[activeCascades]) * light.m_textureMatrices[cascade];
-
-					// Push work
-					newWorkItems(atlasViewports[activeCascades], light.m_shadowRenderQueues[cascade], renderQueueElementsLods[activeCascades],
-								 lightsToRender, drawcallCount);
-
-					++activeCascades;
-				}
-				else
-				{
-					// Empty cascade, point it to the empty tile
+				// Update the texture matrix to point to the correct region in the atlas
+				light.m_textureMatrices[cascade] = createSpotLightTextureMatrix(atlasViewports[cascade]) * light.m_textureMatrices[cascade];
 
-					light.m_textureMatrices[cascade] = createSpotLightTextureMatrix(emptyTileViewport) * light.m_textureMatrices[cascade];
-				}
+				// Push work
+				newWorkItem(atlasViewports[cascade], *light.m_shadowRenderQueues[cascade], rgraph, workItems);
 			}
 		}
 		else
@@ -369,7 +317,6 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForPass
 		Array<UVec4, 6> atlasViewports;
 		Array<TileAllocatorResult, 6> subResults;
 		Array<U32, 6> hierarchies;
-		U32 numOfFacesThatHaveDrawcalls = 0;
 
 		U32 hierarchy, renderQueueElementsLod;
 		chooseDetail(cameraOrigin, light, hierarchy, renderQueueElementsLod);
@@ -377,24 +324,17 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForPass
 		for(U32 face = 0; face < 6; ++face)
 		{
 			ANKI_ASSERT(light.m_shadowRenderQueues[face]);
-			if(light.m_shadowRenderQueues[face]->m_renderables.getSize())
-			{
-				// Has renderables, need to allocate tiles for it so add it to the arrays
-
-				faceIndices[numOfFacesThatHaveDrawcalls] = face;
-				timestamps[numOfFacesThatHaveDrawcalls] = light.m_shadowRenderQueues[face]->m_shadowRenderablesLastUpdateTimestamp;
 
-				drawcallCounts[numOfFacesThatHaveDrawcalls] = light.m_shadowRenderQueues[face]->m_renderables.getSize();
+			faceIndices[face] = face;
+			timestamps[face] = light.m_shadowRenderQueues[face]->m_shadowRenderablesLastUpdateTimestamp;
 
-				hierarchies[numOfFacesThatHaveDrawcalls] = hierarchy;
+			drawcallCounts[face] = light.m_shadowRenderQueues[face]->m_renderables.getSize();
 
-				++numOfFacesThatHaveDrawcalls;
-			}
+			hierarchies[face] = hierarchy;
 		}
 
-		const Bool allocationFailed = numOfFacesThatHaveDrawcalls == 0
-									  || !allocateAtlasTiles(light.m_uuid, numOfFacesThatHaveDrawcalls, &timestamps[0], &faceIndices[0],
-															 &drawcallCounts[0], &hierarchies[0], &atlasViewports[0], &subResults[0]);
+		const Bool allocationFailed = !allocateAtlasTiles(light.m_uuid, 6, &timestamps[0], &faceIndices[0], &drawcallCounts[0], &hierarchies[0],
+														  &atlasViewports[0], &subResults[0]);
 
 		if(!allocationFailed)
 		{
@@ -417,36 +357,17 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForPass
 
 			light.m_shadowAtlasTileSize = superTileSize / atlasResolution;
 
-			numOfFacesThatHaveDrawcalls = 0;
 			for(U face = 0; face < 6; ++face)
 			{
-				if(light.m_shadowRenderQueues[face]->m_renderables.getSize())
-				{
-					// Has drawcalls, asigned it to a tile
-
-					const UVec4& atlasViewport = atlasViewports[numOfFacesThatHaveDrawcalls];
-
-					// Add a half texel to the viewport's start to avoid bilinear filtering bleeding
-					light.m_shadowAtlasTileOffsets[face].x() = (F32(atlasViewport[0]) + texelsBorder) / atlasResolution;
-					light.m_shadowAtlasTileOffsets[face].y() = (F32(atlasViewport[1]) + texelsBorder) / atlasResolution;
+				const UVec4& atlasViewport = atlasViewports[face];
 
-					if(subResults[numOfFacesThatHaveDrawcalls] != TileAllocatorResult::kCached)
-					{
-						newWorkItems(atlasViewport, light.m_shadowRenderQueues[face], renderQueueElementsLod, lightsToRender, drawcallCount);
-					}
+				// Add a half texel to the viewport's start to avoid bilinear filtering bleeding
+				light.m_shadowAtlasTileOffsets[face].x() = (F32(atlasViewport[0]) + texelsBorder) / atlasResolution;
+				light.m_shadowAtlasTileOffsets[face].y() = (F32(atlasViewport[1]) + texelsBorder) / atlasResolution;
 
-					++numOfFacesThatHaveDrawcalls;
-				}
-				else
+				if(subResults[face] != TileAllocatorResult::kCached)
 				{
-					// Doesn't have renderables, point the face to the empty tile
-					UVec4 atlasViewport = emptyTileViewport;
-					ANKI_ASSERT(F32(atlasViewport[2]) <= superTileSize && F32(atlasViewport[3]) <= superTileSize);
-					atlasViewport[2] = U32(superTileSize);
-					atlasViewport[3] = U32(superTileSize);
-
-					light.m_shadowAtlasTileOffsets[face].x() = (F32(atlasViewport[0]) + texelsBorder) / atlasResolution;
-					light.m_shadowAtlasTileOffsets[face].y() = (F32(atlasViewport[1]) + texelsBorder) / atlasResolution;
+					newWorkItem(atlasViewport, *light.m_shadowRenderQueues[face], rgraph, workItems);
 				}
 			}
 		}
@@ -475,9 +396,8 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForPass
 		U32 hierarchy, renderQueueElementsLod;
 		chooseDetail(cameraOrigin, light, hierarchy, renderQueueElementsLod);
 
-		const Bool allocationFailed = localDrawcallCount == 0
-									  || !allocateAtlasTiles(light.m_uuid, 1, &light.m_shadowRenderQueue->m_shadowRenderablesLastUpdateTimestamp,
-															 &faceIdx, &localDrawcallCount, &hierarchy, &atlasViewport, &subResult);
+		const Bool allocationFailed = !allocateAtlasTiles(light.m_uuid, 1, &light.m_shadowRenderQueue->m_shadowRenderablesLastUpdateTimestamp,
+														  &faceIdx, &localDrawcallCount, &hierarchy, &atlasViewport, &subResult);
 
 		if(!allocationFailed)
 		{
@@ -488,7 +408,7 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForPass
 
 			if(subResult != TileAllocatorResult::kCached)
 			{
-				newWorkItems(atlasViewport, light.m_shadowRenderQueue, renderQueueElementsLod, lightsToRender, drawcallCount);
+				newWorkItem(atlasViewport, *light.m_shadowRenderQueue, rgraph, workItems);
 			}
 		}
 		else
@@ -498,57 +418,9 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForPass
 		}
 	}
 
-	// Split the work that will happen in the scratch buffer
-	if(lightsToRender.getSize())
+	// Move the work to the context
+	if(workItems.getSize())
 	{
-		DynamicArray<ThreadWorkItem, MemoryPoolPtrWrapper<StackMemoryPool>> workItems(ctx.m_tempPool);
-		LightToRenderTempInfo* lightToRender = lightsToRender.getBegin();
-		U32 lightToRenderDrawcallCount = lightToRender->m_drawcallCount;
-		const LightToRenderTempInfo* lightToRenderEnd = lightsToRender.getEnd();
-
-		const U32 threadCount = computeNumberOfSecondLevelCommandBuffers(drawcallCount);
-		threadCountForPass = threadCount;
-		for(U32 taskId = 0; taskId < threadCount; ++taskId)
-		{
-			U32 start, end;
-			splitThreadedProblem(taskId, threadCount, drawcallCount, start, end);
-
-			// While there are drawcalls in this task emit new work items
-			U32 taskDrawcallCount = end - start;
-			ANKI_ASSERT(taskDrawcallCount > 0 && "Because we used computeNumberOfSecondLevelCommandBuffers()");
-
-			while(taskDrawcallCount)
-			{
-				ANKI_ASSERT(lightToRender != lightToRenderEnd);
-				const U32 workItemDrawcallCount = min(lightToRenderDrawcallCount, taskDrawcallCount);
-
-				ThreadWorkItem workItem;
-				workItem.m_viewport = lightToRender->m_viewport;
-				workItem.m_renderQueue = lightToRender->m_renderQueue;
-				workItem.m_firstRenderableElement = lightToRender->m_drawcallCount - lightToRenderDrawcallCount;
-				workItem.m_renderableElementCount = workItemDrawcallCount;
-				workItem.m_threadPoolTaskIdx = taskId;
-				workItem.m_renderQueueElementsLod = lightToRender->m_renderQueueElementsLod;
-				workItems.emplaceBack(workItem);
-
-				// Decrease the drawcall counts for the task and the light
-				ANKI_ASSERT(taskDrawcallCount >= workItemDrawcallCount);
-				taskDrawcallCount -= workItemDrawcallCount;
-				ANKI_ASSERT(lightToRenderDrawcallCount >= workItemDrawcallCount);
-				lightToRenderDrawcallCount -= workItemDrawcallCount;
-
-				// Move to the next light
-				if(lightToRenderDrawcallCount == 0)
-				{
-					++lightToRender;
-					lightToRenderDrawcallCount = (lightToRender != lightToRenderEnd) ? lightToRender->m_drawcallCount : 0;
-				}
-			}
-		}
-
-		ANKI_ASSERT(lightToRender == lightToRenderEnd);
-		ANKI_ASSERT(lightsToRender.getSize() <= workItems.getSize());
-
 		// All good, store the work items for the threads to pick up
 		workItems.moveAndReset(m_runCtx.m_workItems);
 	}
@@ -564,23 +436,16 @@ void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
 	ANKI_TRACE_SCOPED_EVENT(RSm);
 
 	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-	const U threadIdx = rgraphCtx.m_currentSecondLevelCommandBufferIndex;
 
 	cmdb.setPolygonOffset(kShadowsPolygonOffsetFactor, kShadowsPolygonOffsetUnits);
 
-	for(ThreadWorkItem& work : m_runCtx.m_workItems)
+	for(ViewportWorkItem& work : m_runCtx.m_workItems)
 	{
-		if(work.m_threadPoolTaskIdx != threadIdx)
-		{
-			continue;
-		}
-
 		// Set state
 		cmdb.setViewport(work.m_viewport[0], work.m_viewport[1], work.m_viewport[2], work.m_viewport[3]);
 		cmdb.setScissor(work.m_viewport[0], work.m_viewport[1], work.m_viewport[2], work.m_viewport[3]);
 
-		// The 1st drawcall will clear the depth buffer
-		if(work.m_firstRenderableElement == 0)
+		// Clear the depth buffer
 		{
 			cmdb.bindShaderProgram(m_clearDepthGrProg.get());
 			cmdb.setDepthCompareOperation(CompareOperation::kAlways);
@@ -593,15 +458,15 @@ void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
 		}
 
 		RenderableDrawerArguments args;
-		args.m_viewMatrix = work.m_renderQueue->m_viewMatrix;
+		args.m_renderingTechinuqe = RenderingTechnique::kDepth;
+		args.m_viewMatrix = work.m_viewMatrix;
 		args.m_cameraTransform = Mat3x4::getIdentity(); // Don't care
-		args.m_viewProjectionMatrix = work.m_renderQueue->m_viewProjectionMatrix;
+		args.m_viewProjectionMatrix = work.m_mvp;
 		args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
 		args.m_sampler = getRenderer().getSamplers().m_trilinearRepeatAniso.get();
+		args.fillMdi(work.m_visOut);
 
-		getRenderer().getSceneDrawer().drawRange(
-			args, work.m_renderQueue->m_renderables.getBegin() + work.m_firstRenderableElement,
-			work.m_renderQueue->m_renderables.getBegin() + work.m_firstRenderableElement + work.m_renderableElementCount, cmdb);
+		getRenderer().getSceneDrawer().drawMdi(args, cmdb);
 	}
 }
 

+ 8 - 6
AnKi/Renderer/ShadowMapping.h

@@ -9,6 +9,7 @@
 #include <AnKi/Gr.h>
 #include <AnKi/Resource/ImageResource.h>
 #include <AnKi/Renderer/TileAllocator.h>
+#include <AnKi/Renderer/GpuVisibility.h>
 
 namespace anki {
 
@@ -34,8 +35,7 @@ public:
 	}
 
 private:
-	class LightToRenderTempInfo;
-	class ThreadWorkItem;
+	class ViewportWorkItem;
 
 	TileAllocator m_tileAlloc;
 	static constexpr U32 kTileAllocHierarchyCount = 4;
@@ -53,17 +53,19 @@ private:
 	ShaderProgramResourcePtr m_clearDepthProg;
 	ShaderProgramPtr m_clearDepthGrProg;
 
+	GpuVisibility m_visibility;
+
 	class
 	{
 	public:
 		RenderTargetHandle m_rt;
-		WeakArray<ThreadWorkItem> m_workItems;
+		WeakArray<ViewportWorkItem> m_workItems;
 		UVec4 m_fullViewport; ///< Calculate the viewport that contains all of the work items. Mobile optimization.
 	} m_runCtx;
 
 	Error initInternal();
 
-	void processLights(RenderingContext& ctx, U32& threadCountForScratchPass);
+	void processLights(RenderingContext& ctx);
 
 	Bool allocateAtlasTiles(U64 lightUuid, U32 faceCount, const U64* faceTimestamps, const U32* faceIndices, const U32* drawcallsCount,
 							const U32* hierarchies, UVec4* atlasTileViewports, TileAllocatorResult* subResults);
@@ -76,8 +78,8 @@ private:
 	void chooseDetail(const Vec4& cameraOrigin, const SpotLightQueueElement& light, U32& tileAllocatorHierarchy, U32& renderQueueElementsLod) const;
 
 	template<typename TMemoryPool>
-	void newWorkItems(const UVec4& atlasViewport, RenderQueue* lightRenderQueue, U32 renderQueueElementsLod,
-					  DynamicArray<LightToRenderTempInfo, TMemoryPool>& workItems, U32& drawcallCount) const;
+	void newWorkItem(const UVec4& atlasViewport, const RenderQueue& queue, RenderGraphDescription& rgraph,
+					 DynamicArray<ViewportWorkItem, TMemoryPool>& workItems);
 
 	void runShadowMapping(RenderPassWorkContext& rgraphCtx);
 };

+ 19 - 14
AnKi/Shaders/GpuVisibility.ankiprog

@@ -3,6 +3,8 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
+#pragma anki mutator HZB_TEST 0 1
+
 #pragma anki start comp
 
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
@@ -14,19 +16,21 @@
 [[vk::binding(1)]] StructuredBuffer<GpuSceneRenderable> g_renderables;
 [[vk::binding(2)]] ByteAddressBuffer g_gpuScene;
 
-[[vk::binding(3)]] Texture2D<Vec4> g_hzbTex;
-[[vk::binding(4)]] SamplerState g_nearestAnyClampSampler;
-
 // These 2 have the same size
-[[vk::binding(5)]] RWStructuredBuffer<GpuSceneRenderable> g_instanceRateRenderables;
-[[vk::binding(6)]] RWStructuredBuffer<DrawIndexedIndirectArgs> g_drawIndexedIndirectArgs;
+[[vk::binding(3)]] RWStructuredBuffer<GpuSceneRenderable> g_instanceRateRenderables;
+[[vk::binding(4)]] RWStructuredBuffer<DrawIndexedIndirectArgs> g_drawIndexedIndirectArgs;
 
 // Index pointing to the above arrays. One for each render state bucket
-[[vk::binding(7)]] StructuredBuffer<U32> g_drawIndirectArgsOffsets;
+[[vk::binding(5)]] StructuredBuffer<U32> g_drawIndirectArgsOffsets;
 // The MDI counts. One for each render state bucket
-[[vk::binding(8)]] RWStructuredBuffer<U32> g_mdiDrawCounts;
+[[vk::binding(6)]] RWStructuredBuffer<U32> g_mdiDrawCounts;
+
+[[vk::binding(7)]] ConstantBuffer<GpuVisibilityUniforms> g_unis;
 
-[[vk::binding(9)]] ConstantBuffer<GpuVisibilityUniforms> g_unis;
+#if HZB_TEST
+[[vk::binding(8)]] Texture2D<Vec4> g_hzbTex;
+[[vk::binding(9)]] SamplerState g_nearestAnyClampSampler;
+#endif
 
 [numthreads(64, 1, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
@@ -82,6 +86,7 @@
 	// HiZ culling
 	//
 
+#if HZB_TEST
 	// Compute the mip
 	Vec2 texSize;
 	F32 mipCount;
@@ -92,7 +97,7 @@
 	const Vec2 sizeXY = (maxUv - minUv) * texSize;
 	F32 mip = ceil(log2(max(sizeXY.x, sizeXY.y)));
 
-#if 0
+#	if 0
 	mip = clamp(mip, 0.0, mipCount - 1.0);
 	const F32 levelLower = max(mip - 1.0, 0.0);
 	const Vec2 scale = exp2(-levelLower);
@@ -104,7 +109,7 @@
 	{
 		mip = levelLower;
 	}
-#endif
+#	endif
 
 	// Sample mip
 	Vec4 depths;
@@ -118,18 +123,18 @@
 	{
 		return;
 	}
+#endif
 
 	// Compute the LOD
 	//
-	const Vec4 nearPlane = g_unis.m_clipPlanes[0];
-	const F32 distFromNearPlane = testPlaneSphere(nearPlane.xyz, nearPlane.w, aabb.m_sphereCenter, -aabb.m_negativeSphereRadius);
+	const F32 distFromLodPoint = length(aabb.m_sphereCenter - g_unis.m_lodReferencePoint) + aabb.m_negativeSphereRadius;
 
 	U32 lod;
-	if(distFromNearPlane < g_unis.m_maxLodDistances[0])
+	if(distFromLodPoint < g_unis.m_maxLodDistances[0])
 	{
 		lod = 0u;
 	}
-	else if(distFromNearPlane < g_unis.m_maxLodDistances[1])
+	else if(distFromLodPoint < g_unis.m_maxLodDistances[1])
 	{
 		lod = 1u;
 	}

+ 1 - 1
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -156,7 +156,7 @@ struct GpuVisibilityUniforms
 
 	Vec4 m_maxLodDistances;
 
-	Vec3 m_cameraOrigin;
+	Vec3 m_lodReferencePoint;
 	F32 m_padding2;
 
 	Mat4 m_viewProjectionMat;

+ 5 - 5
Sandbox/Main.cpp

@@ -167,6 +167,11 @@ Error MyApp::userMainLoop(Bool& quit, Second elapsedTime)
 		mousePosOn1stClick = in.getMousePosition();
 	}
 
+	if(in.getKey(KeyCode::kF12) == 1 && ANKI_ENABLE_TRACE)
+	{
+		Tracer::getSingleton().setEnabled(!Tracer::getSingleton().getEnabled());
+	}
+
 	if(in.getMouseButton(MouseButton::kRight) || in.hasTouchDevice())
 	{
 		constexpr F32 ROTATE_ANGLE = toRad(2.5f);
@@ -268,11 +273,6 @@ Error MyApp::userMainLoop(Bool& quit, Second elapsedTime)
 			mover->moveLocalZ(moveDistance);
 		}
 
-		if(in.getKey(KeyCode::kF12) == 1 && ANKI_ENABLE_TRACE)
-		{
-			Tracer::getSingleton().setEnabled(!Tracer::getSingleton().getEnabled());
-		}
-
 		const Vec2 velocity = in.getMousePosition() - mousePosOn1stClick;
 		in.moveCursor(mousePosOn1stClick);
 		if(velocity != Vec2(0.0))