Browse Source

Remove the expensive reprojection

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
598e814edd

+ 27 - 3
AnKi/Renderer/GBuffer.cpp

@@ -9,7 +9,6 @@
 #include <AnKi/Renderer/VrsSriGeneration.h>
 #include <AnKi/Renderer/VrsSriGeneration.h>
 #include <AnKi/Renderer/Scale.h>
 #include <AnKi/Renderer/Scale.h>
 #include <AnKi/Renderer/GpuVisibility.h>
 #include <AnKi/Renderer/GpuVisibility.h>
-#include <AnKi/Renderer/Hzb.h>
 #include <AnKi/Util/Logger.h>
 #include <AnKi/Util/Logger.h>
 #include <AnKi/Util/Tracer.h>
 #include <AnKi/Util/Tracer.h>
 #include <AnKi/Core/ConfigSet.h>
 #include <AnKi/Core/ConfigSet.h>
@@ -29,6 +28,11 @@ Error GBuffer::init()
 		err = m_visibility.init();
 		err = m_visibility.init();
 	}
 	}
 
 
+	if(!err)
+	{
+		err = m_hzb.init();
+	}
+
 	if(err)
 	if(err)
 	{
 	{
 		ANKI_R_LOGE("Failed to initialize g-buffer pass");
 		ANKI_R_LOGE("Failed to initialize g-buffer pass");
@@ -61,6 +65,17 @@ Error GBuffer::initInternal()
 		m_colorRtDescrs[i].bake();
 		m_colorRtDescrs[i].bake();
 	}
 	}
 
 
+	{
+		const TextureUsageBit usage = TextureUsageBit::kSampledCompute | TextureUsageBit::kImageComputeWrite;
+
+		TextureInitInfo texinit = getRenderer().create2DRenderTargetInitInfo(
+			ConfigSet::getSingleton().getRHzbWidth(), ConfigSet::getSingleton().getRHzbWidth(), Format::kR32_Sfloat, usage, "GBuffer HZB");
+		texinit.m_mipmapCount = U8(computeMaxMipmapCount2d(texinit.m_width, texinit.m_height));
+		ClearValue clear;
+		clear.m_colorf = {1.0f, 1.0f, 1.0f, 1.0f};
+		m_hzbRt = getRenderer().createAndClearRenderTarget(texinit, TextureUsageBit::kSampledCompute, clear);
+	}
+
 	// FB descr
 	// FB descr
 	AttachmentLoadOperation loadop = AttachmentLoadOperation::kDontCare;
 	AttachmentLoadOperation loadop = AttachmentLoadOperation::kDontCare;
 #if ANKI_EXTRA_CHECKS
 #if ANKI_EXTRA_CHECKS
@@ -139,12 +154,16 @@ void GBuffer::importRenderTargets(RenderingContext& ctx)
 		// Already imported once
 		// Already imported once
 		m_runCtx.m_crntFrameDepthRt = rgraph.importRenderTarget(m_depthRts[getRenderer().getFrameCount() & 1].get(), TextureUsageBit::kNone);
 		m_runCtx.m_crntFrameDepthRt = rgraph.importRenderTarget(m_depthRts[getRenderer().getFrameCount() & 1].get(), TextureUsageBit::kNone);
 		m_runCtx.m_prevFrameDepthRt = rgraph.importRenderTarget(m_depthRts[(getRenderer().getFrameCount() + 1) & 1].get());
 		m_runCtx.m_prevFrameDepthRt = rgraph.importRenderTarget(m_depthRts[(getRenderer().getFrameCount() + 1) & 1].get());
+
+		m_runCtx.m_hzbRt = rgraph.importRenderTarget(m_hzbRt.get());
 	}
 	}
 	else
 	else
 	{
 	{
 		m_runCtx.m_crntFrameDepthRt = rgraph.importRenderTarget(m_depthRts[getRenderer().getFrameCount() & 1].get(), TextureUsageBit::kNone);
 		m_runCtx.m_crntFrameDepthRt = rgraph.importRenderTarget(m_depthRts[getRenderer().getFrameCount() & 1].get(), TextureUsageBit::kNone);
 		m_runCtx.m_prevFrameDepthRt =
 		m_runCtx.m_prevFrameDepthRt =
 			rgraph.importRenderTarget(m_depthRts[(getRenderer().getFrameCount() + 1) & 1].get(), TextureUsageBit::kSampledFragment);
 			rgraph.importRenderTarget(m_depthRts[(getRenderer().getFrameCount() + 1) & 1].get(), TextureUsageBit::kSampledFragment);
+
+		m_runCtx.m_hzbRt = rgraph.importRenderTarget(m_hzbRt.get(), TextureUsageBit::kSampledCompute);
 	}
 	}
 }
 }
 
 
@@ -154,8 +173,9 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 
 
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 
 
-	m_visibility.populateRenderGraph(RenderingTechnique::kGBuffer, ctx.m_matrices.m_viewProjection,
-									 ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz(), getRenderer().getHzb().getHzbRt(), rgraph);
+	const CommonMatrices& matrices = (getRenderer().getFrameCount() <= 1) ? ctx.m_matrices : ctx.m_prevMatrices;
+	m_visibility.populateRenderGraph(RenderingTechnique::kGBuffer, matrices.m_viewProjection, matrices.m_cameraTransform.getTranslationPart().xyz(),
+									 m_runCtx.m_hzbRt, rgraph);
 
 
 	const Bool enableVrs =
 	const Bool enableVrs =
 		GrManager::getSingleton().getDeviceCapabilities().m_vrs && ConfigSet::getSingleton().getRVrs() && ConfigSet::getSingleton().getRGBufferVrs();
 		GrManager::getSingleton().getDeviceCapabilities().m_vrs && ConfigSet::getSingleton().getRVrs() && ConfigSet::getSingleton().getRGBufferVrs();
@@ -219,6 +239,10 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 
 
 	// Only add one depedency to the GPU visibility. No need to track all buffers
 	// Only add one depedency to the GPU visibility. No need to track all buffers
 	pass.newBufferDependency(m_visibility.getMdiDrawCountsBufferHandle(), BufferUsageBit::kIndirectDraw);
 	pass.newBufferDependency(m_visibility.getMdiDrawCountsBufferHandle(), BufferUsageBit::kIndirectDraw);
+
+	// HZB generation
+	m_hzb.populateRenderGraph(m_runCtx.m_crntFrameDepthRt, getRenderer().getInternalResolution(), m_runCtx.m_hzbRt,
+							  UVec2(m_hzbRt->getWidth(), m_hzbRt->getHeight()), ctx);
 }
 }
 
 
 } // end namespace anki
 } // end namespace anki

+ 4 - 0
AnKi/Renderer/GBuffer.h

@@ -7,6 +7,7 @@
 
 
 #include <AnKi/Renderer/RendererObject.h>
 #include <AnKi/Renderer/RendererObject.h>
 #include <AnKi/Renderer/GpuVisibility.h>
 #include <AnKi/Renderer/GpuVisibility.h>
+#include <AnKi/Renderer/HzbHelper.h>
 #include <AnKi/Gr.h>
 #include <AnKi/Gr.h>
 
 
 namespace anki {
 namespace anki {
@@ -73,8 +74,10 @@ public:
 private:
 private:
 	Array<RenderTargetDescription, kGBufferColorRenderTargetCount> m_colorRtDescrs;
 	Array<RenderTargetDescription, kGBufferColorRenderTargetCount> m_colorRtDescrs;
 	Array<TexturePtr, 2> m_depthRts;
 	Array<TexturePtr, 2> m_depthRts;
+	TexturePtr m_hzbRt;
 	FramebufferDescription m_fbDescr;
 	FramebufferDescription m_fbDescr;
 
 
+	HzbHelper m_hzb;
 	GpuVisibility m_visibility;
 	GpuVisibility m_visibility;
 
 
 	class
 	class
@@ -83,6 +86,7 @@ private:
 		Array<RenderTargetHandle, kGBufferColorRenderTargetCount> m_colorRts;
 		Array<RenderTargetHandle, kGBufferColorRenderTargetCount> m_colorRts;
 		RenderTargetHandle m_crntFrameDepthRt;
 		RenderTargetHandle m_crntFrameDepthRt;
 		RenderTargetHandle m_prevFrameDepthRt;
 		RenderTargetHandle m_prevFrameDepthRt;
+		RenderTargetHandle m_hzbRt;
 	} m_runCtx;
 	} m_runCtx;
 
 
 	Error initInternal();
 	Error initInternal();

+ 0 - 1
AnKi/Renderer/GpuVisibility.cpp

@@ -5,7 +5,6 @@
 
 
 #include <AnKi/Renderer/GpuVisibility.h>
 #include <AnKi/Renderer/GpuVisibility.h>
 #include <AnKi/Renderer/Renderer.h>
 #include <AnKi/Renderer/Renderer.h>
-#include <AnKi/Renderer/Hzb.h>
 #include <AnKi/Scene/RenderStateBucket.h>
 #include <AnKi/Scene/RenderStateBucket.h>
 #include <AnKi/Scene/ContiguousArrayAllocator.h>
 #include <AnKi/Scene/ContiguousArrayAllocator.h>
 #include <AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h>
 #include <AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h>

+ 0 - 312
AnKi/Renderer/Hzb.cpp

@@ -1,312 +0,0 @@
-// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
-// All rights reserved.
-// Code licensed under the BSD License.
-// http://www.anki3d.org/LICENSE
-
-#include <AnKi/Renderer/Hzb.h>
-#include <AnKi/Renderer/Renderer.h>
-#include <AnKi/Renderer/GBuffer.h>
-#include <AnKi/Renderer/RenderQueue.h>
-#include <AnKi/Core/ConfigSet.h>
-#include <AnKi/Shaders/Include/MiscRendererTypes.h>
-
-#if ANKI_COMPILER_GCC_COMPATIBLE
-#	pragma GCC diagnostic push
-#	pragma GCC diagnostic ignored "-Wunused-function"
-#	pragma GCC diagnostic ignored "-Wignored-qualifiers"
-#elif ANKI_COMPILER_MSVC
-#	pragma warning(push)
-#	pragma warning(disable : 4505)
-#endif
-#define A_CPU
-#include <ThirdParty/FidelityFX/ffx_a.h>
-#include <ThirdParty/FidelityFX/ffx_spd.h>
-#if ANKI_COMPILER_GCC_COMPATIBLE
-#	pragma GCC diagnostic pop
-#elif ANKI_COMPILER_MSVC
-#	pragma warning(pop)
-#endif
-
-namespace anki {
-
-Error Hzb::init()
-{
-	registerDebugRenderTarget("Hzb");
-
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/HzbReprojection.ankiprogbin", m_reproj.m_prog));
-	const ShaderProgramResourceVariant* variant;
-	ShaderProgramResourceVariantInitInfo variantInit(m_reproj.m_prog);
-	for(U32 i = 0; i < m_reproj.m_grProgs.getSize(); ++i)
-	{
-		variantInit.addMutation("SHADOW_TEXTURE_COUNT", i);
-		m_reproj.m_prog->getOrCreateVariant(variantInit, variant);
-		m_reproj.m_grProgs[i].reset(&variant->getProgram());
-	}
-
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/ClearTextureCompute.ankiprogbin", m_clearHzb.m_prog));
-	ShaderProgramResourceVariantInitInfo variantInit2(m_clearHzb.m_prog);
-	variantInit2.addMutation("TEXTURE_DIMENSIONS", 2);
-	variantInit2.addMutation("COMPONENT_TYPE", 1);
-	m_clearHzb.m_prog->getOrCreateVariant(variantInit2, variant);
-	m_clearHzb.m_grProg.reset(&variant->getProgram());
-
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/HzbGenPyramid.ankiprogbin", m_mipmapping.m_prog));
-	for(U32 i = 0; i < 2; ++i)
-	{
-		ShaderProgramResourceVariantInitInfo variantInit(m_mipmapping.m_prog);
-		variantInit.addMutation("REDUCTION_TYPE", i);
-		m_mipmapping.m_prog->getOrCreateVariant(variantInit, variant);
-		m_mipmapping.m_grProgs[i].reset(&variant->getProgram());
-	}
-
-	m_hzbRtDescr = getRenderer().create2DRenderTargetDescription(ConfigSet::getSingleton().getRHzbWidth(), ConfigSet::getSingleton().getRHzbHeight(),
-																 Format::kR32_Uint, "HZB U32");
-	m_hzbRtDescr.m_mipmapCount = U8(computeMaxMipmapCount2d(m_hzbRtDescr.m_width, m_hzbRtDescr.m_height, 1));
-	m_hzbRtDescr.bake();
-
-	for(U32 i = 0; i < kMaxShadowCascades; ++i)
-	{
-		RendererString name;
-		name.sprintf("Shadow HZB U32 #%u", i);
-
-		// Calc the size of the cascade
-		UVec2 smSize(ConfigSet::getSingleton().getRShadowMappingTileResolution());
-		smSize >>= chooseDirectionalLightShadowCascadeDetail(i);
-
-		// No need for the HZB to have the same quality of the cascade
-		const UVec2 hzbSize = smSize >> 1;
-
-		m_hzbShadowRtDescrs[i] = getRenderer().create2DRenderTargetDescription(hzbSize.x(), hzbSize.y(), Format::kR32_Uint, name);
-		m_hzbShadowRtDescrs[i].m_mipmapCount = U8(computeMaxMipmapCount2d(m_hzbShadowRtDescrs[i].m_width, m_hzbShadowRtDescrs[i].m_height, 1));
-		m_hzbShadowRtDescrs[i].bake();
-	}
-
-	BufferInitInfo buffInit("HzbCounterBuffer");
-	buffInit.m_size = sizeof(U32);
-	buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kTransferDestination;
-	m_mipmapping.m_counterBuffer = GrManager::getSingleton().newBuffer(buffInit);
-
-	for(U32 i = 0; i < kMaxShadowCascades; ++i)
-	{
-		RendererString name;
-		name.sprintf("ShadowHzbCounterBuffer", i);
-
-		BufferInitInfo buffInit(name);
-		buffInit.m_size = sizeof(U32);
-		buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kTransferDestination;
-		m_mipmapping.m_shadowCounterBuffers[i] = GrManager::getSingleton().newBuffer(buffInit);
-	}
-
-	return Error::kNone;
-}
-
-void Hzb::populateRenderGraph(RenderingContext& ctx)
-{
-	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
-	const U32 cascadeCount = ctx.m_renderQueue->m_directionalLight.m_shadowCascadeCount;
-	TextureSubresourceInfo firstMipSubresource;
-
-	// Create RTs
-	m_runCtx.m_hzbRt = rgraph.newRenderTarget(m_hzbRtDescr);
-	for(U32 i = 0; i < cascadeCount; ++i)
-	{
-		m_runCtx.m_hzbShadowRts[i] = rgraph.newRenderTarget(m_hzbShadowRtDescrs[i]);
-	}
-
-	// Clear primary HZB
-	{
-		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HZB clear");
-		pass.newTextureDependency(m_runCtx.m_hzbRt, TextureUsageBit::kImageComputeWrite, firstMipSubresource);
-
-		pass.setWork([this](RenderPassWorkContext& rctx) {
-			CommandBuffer& cmdb = *rctx.m_commandBuffer;
-
-			cmdb.bindShaderProgram(m_clearHzb.m_grProg.get());
-
-			TextureSubresourceInfo firstMipSubresource;
-			rctx.bindImage(0, 0, m_runCtx.m_hzbRt, firstMipSubresource);
-
-			// See the comments in the class on what this -0 means
-			const F32 negativeZero = -0.0f;
-			U32 negativeZerou;
-			memcpy(&negativeZerou, &negativeZero, sizeof(U32));
-			ANKI_ASSERT(negativeZerou > 0);
-			UVec4 clearColor(negativeZerou);
-			cmdb.setPushConstants(&clearColor, sizeof(clearColor));
-
-			dispatchPPCompute(cmdb, 8, 8, m_hzbRtDescr.m_width, m_hzbRtDescr.m_height);
-		});
-	}
-
-	// Clear SM HZBs
-	for(U32 i = 0; i < cascadeCount; ++i)
-	{
-		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Shadow HZB clear");
-		pass.newTextureDependency(m_runCtx.m_hzbShadowRts[i], TextureUsageBit::kImageComputeWrite, firstMipSubresource);
-
-		pass.setWork([this, i](RenderPassWorkContext& rctx) {
-			CommandBuffer& cmdb = *rctx.m_commandBuffer;
-
-			cmdb.bindShaderProgram(m_clearHzb.m_grProg.get());
-
-			TextureSubresourceInfo firstMipSubresource;
-			rctx.bindImage(0, 0, m_runCtx.m_hzbShadowRts[i], firstMipSubresource);
-
-			// See the comments in the class on what this -0 means
-			const F32 negativeZero = -0.0f;
-			U32 negativeZerou;
-			memcpy(&negativeZerou, &negativeZero, sizeof(U32));
-			ANKI_ASSERT(negativeZerou > 0);
-			UVec4 clearColor(negativeZerou);
-			cmdb.setPushConstants(&clearColor, sizeof(clearColor));
-
-			dispatchPPCompute(cmdb, 8, 8, m_hzbShadowRtDescrs[i].m_width, m_hzbShadowRtDescrs[i].m_height);
-		});
-	}
-
-	// Reproject all HZBs
-	{
-		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HZB reprojection");
-
-		pass.newTextureDependency(m_runCtx.m_hzbRt, TextureUsageBit::kImageComputeWrite, firstMipSubresource);
-		for(U32 i = 0; i < cascadeCount; ++i)
-		{
-			pass.newTextureDependency(m_runCtx.m_hzbShadowRts[i], TextureUsageBit::kImageComputeWrite, firstMipSubresource);
-		}
-		pass.newTextureDependency(getRenderer().getGBuffer().getPreviousFrameDepthRt(), TextureUsageBit::kSampledCompute);
-
-		pass.setWork([this, &ctx](RenderPassWorkContext& rctx) {
-			const U32 cascadeCount = ctx.m_renderQueue->m_directionalLight.m_shadowCascadeCount;
-			CommandBuffer& cmdb = *rctx.m_commandBuffer;
-
-			cmdb.bindShaderProgram(m_reproj.m_grProgs[cascadeCount].get());
-
-			rctx.bindTexture(0, 0, getRenderer().getGBuffer().getPreviousFrameDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
-
-			cmdb.bindSampler(0, 1, getRenderer().getSamplers().m_nearestNearestClamp.get());
-
-			TextureSubresourceInfo firstMipSubresource;
-			rctx.bindImage(0, 2, m_runCtx.m_hzbRt, firstMipSubresource);
-
-			for(U32 i = 0; i < cascadeCount; ++i)
-			{
-				rctx.bindImage(0, 3, m_runCtx.m_hzbShadowRts[i], firstMipSubresource, i);
-			}
-
-			HzbUniforms* unis = allocateAndBindUniforms<HzbUniforms*>(sizeof(*unis), cmdb, 0, 4);
-			unis->m_reprojectionMatrix = ctx.m_matrices.m_reprojection;
-			unis->m_invertedViewProjectionMatrix = ctx.m_matrices.m_invertedViewProjection;
-			for(U32 i = 0; i < ctx.m_renderQueue->m_directionalLight.m_shadowCascadeCount; ++i)
-			{
-				unis->m_shadowCascadeViewProjectionMatrices[i] = ctx.m_renderQueue->m_directionalLight.m_viewProjectionMatrices[i];
-			}
-
-			ANKI_ASSERT((getRenderer().getInternalResolution().x() % 2) == 0 && (getRenderer().getInternalResolution().y() % 2) == 0);
-			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2);
-		});
-	}
-
-	// Mipmap primary HZB
-	{
-		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HZB mip gen");
-
-		pass.newTextureDependency(m_runCtx.m_hzbRt, TextureUsageBit::kSampledCompute, firstMipSubresource);
-
-		for(U32 mip = 1; mip < m_hzbRtDescr.m_mipmapCount; ++mip)
-		{
-			TextureSubresourceInfo subresource;
-			subresource.m_firstMipmap = mip;
-			pass.newTextureDependency(m_runCtx.m_hzbRt, TextureUsageBit::kImageComputeWrite, subresource);
-		}
-
-		pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
-			runMipmaping(rgraphCtx, m_hzbRtDescr, m_runCtx.m_hzbRt, m_mipmapping.m_counterBufferZeroed, *m_mipmapping.m_counterBuffer,
-						 *m_mipmapping.m_grProgs[0]);
-		});
-	}
-
-	// Mipmap shadow HZBs
-	for(U32 i = 0; i < cascadeCount; ++i)
-	{
-		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Shadow HZB mip gen");
-
-		pass.newTextureDependency(m_runCtx.m_hzbShadowRts[i], TextureUsageBit::kSampledCompute, firstMipSubresource);
-
-		for(U32 mip = 1; mip < m_hzbShadowRtDescrs[i].m_mipmapCount; ++mip)
-		{
-			TextureSubresourceInfo subresource;
-			subresource.m_firstMipmap = mip;
-			pass.newTextureDependency(m_runCtx.m_hzbShadowRts[i], TextureUsageBit::kImageComputeWrite, subresource);
-		}
-
-		pass.setWork([this, i](RenderPassWorkContext& rgraphCtx) {
-			runMipmaping(rgraphCtx, m_hzbShadowRtDescrs[i], m_runCtx.m_hzbShadowRts[i], m_mipmapping.m_shadowCounterBufferZeroed[i],
-						 *m_mipmapping.m_shadowCounterBuffers[i], *m_mipmapping.m_grProgs[1]);
-		});
-	}
-}
-
-void Hzb::runMipmaping(RenderPassWorkContext& rgraphCtx, const RenderTargetDescription& rtDescr, RenderTargetHandle rtHandle,
-					   Bool& counterBufferZeroed, Buffer& counterBuffer, ShaderProgram& reductionProgram)
-{
-	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-
-	TextureSubresourceInfo firstMipSubresource;
-	const U32 mipsToCompute = rtDescr.m_mipmapCount - 1;
-
-	// Zero the counter buffer once before everything else
-	if(!counterBufferZeroed) [[unlikely]]
-	{
-		counterBufferZeroed = true;
-
-		cmdb.fillBuffer(&counterBuffer, 0, kMaxPtrSize, 0);
-
-		const BufferBarrierInfo barrier = {&counterBuffer, BufferUsageBit::kTransferDestination, BufferUsageBit::kStorageComputeWrite, 0,
-										   kMaxPtrSize};
-		cmdb.setPipelineBarrier({}, {&barrier, 1}, {});
-	}
-
-	cmdb.bindShaderProgram(&reductionProgram);
-
-	varAU2(dispatchThreadGroupCountXY);
-	varAU2(workGroupOffset); // needed if Left and Top are not 0,0
-	varAU2(numWorkGroupsAndMips);
-	varAU4(rectInfo) = initAU4(0, 0, rtDescr.m_width, rtDescr.m_height);
-	SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, mipsToCompute);
-
-	struct Uniforms
-	{
-		U32 m_threadGroupCount;
-		U32 m_mipmapCount;
-		U32 m_padding0;
-		U32 m_padding1;
-	} pc;
-
-	pc.m_threadGroupCount = numWorkGroupsAndMips[0];
-	pc.m_mipmapCount = numWorkGroupsAndMips[1];
-
-	cmdb.setPushConstants(&pc, sizeof(pc));
-
-	constexpr U32 maxMipsSpdCanProduce = 12;
-	for(U32 mip = 0; mip < maxMipsSpdCanProduce; ++mip)
-	{
-		TextureSubresourceInfo subresource;
-		if(mip < mipsToCompute)
-		{
-			subresource.m_firstMipmap = mip + 1;
-		}
-		else
-		{
-			subresource.m_firstMipmap = 1;
-		}
-
-		rgraphCtx.bindImage(0, 0, rtHandle, subresource, mip);
-	}
-
-	cmdb.bindStorageBuffer(0, 1, &counterBuffer, 0, kMaxPtrSize);
-	rgraphCtx.bindTexture(0, 2, rtHandle, firstMipSubresource);
-
-	cmdb.dispatchCompute(dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1], 1);
-}
-
-} // end namespace anki

+ 0 - 81
AnKi/Renderer/Hzb.h

@@ -1,81 +0,0 @@
-// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
-// All rights reserved.
-// Code licensed under the BSD License.
-// http://www.anki3d.org/LICENSE
-
-#pragma once
-
-#include <AnKi/Renderer/RendererObject.h>
-
-namespace anki {
-
-/// @addtogroup renderer
-/// @{
-
-/// Hierarchical depth generator.
-///
-/// The HZB is cleared with -0.0. Then the reprojection may replace the -0.0 of some texels with something in the range of [0.0, 1.0] where 0.0 is the
-/// far and 1.0 the near. So after reprojection we have -0.0 for untouched, 1.0 for near and 0.0 for far. We do this because reprojection will use
-/// atomic min on integers and -0.0 > 1.0 > 0.0 if seen as U32. Downscaling also uses min becase we want the farthest value. When testing against the
-/// HZB we only need to bring depth back to normal which is 0.0 for near and 1.0 for far. So it's a plain 1.0-x. So far becomes 1.0, near is 0.0 and
-/// untouched becomes 1.0 which transaltes to far and it's what we want.
-///
-/// Shadows are a little bit different. Clear clears to -0.0. Reprojection treats 0 as near and 1 as far and chooses the min value because we want to
-/// cull everything behind the shadow. HZB generation reinterprets the values and uses max. Uppon testing we have 0 for near, 1 for far and -0.0 for
-/// clear which is what we want. If nothing reprojected (=clear color) then cull early.
-class Hzb : public RendererObject
-{
-public:
-	Error init();
-
-	void populateRenderGraph(RenderingContext& ctx);
-
-	const RenderTargetHandle& getHzbRt() const
-	{
-		return m_runCtx.m_hzbRt;
-	}
-
-private:
-	RenderTargetDescription m_hzbRtDescr;
-	Array<RenderTargetDescription, kMaxShadowCascades> m_hzbShadowRtDescrs;
-
-	class
-	{
-	public:
-		ShaderProgramResourcePtr m_prog;
-		ShaderProgramPtr m_grProg;
-	} m_clearHzb;
-
-	class
-	{
-	public:
-		ShaderProgramResourcePtr m_prog;
-		Array<ShaderProgramPtr, kMaxShadowCascades + 1> m_grProgs;
-	} m_reproj;
-
-	class
-	{
-	public:
-		ShaderProgramResourcePtr m_prog;
-		Array<ShaderProgramPtr, 2> m_grProgs;
-
-		BufferPtr m_counterBuffer;
-		Bool m_counterBufferZeroed = false;
-
-		Array<BufferPtr, kMaxShadowCascades> m_shadowCounterBuffers;
-		Array<Bool, kMaxShadowCascades> m_shadowCounterBufferZeroed = {};
-	} m_mipmapping;
-
-	class
-	{
-	public:
-		RenderTargetHandle m_hzbRt;
-		Array<RenderTargetHandle, kMaxShadowCascades> m_hzbShadowRts;
-	} m_runCtx;
-
-	static void runMipmaping(RenderPassWorkContext& rgraphCtx, const RenderTargetDescription& rtDescr, RenderTargetHandle rtHandle,
-							 Bool& counterBufferZeroed, Buffer& counterBuffer, ShaderProgram& reductionProgram);
-};
-/// @}
-
-} // end namespace anki

+ 131 - 0
AnKi/Renderer/HzbHelper.cpp

@@ -0,0 +1,131 @@
+// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <AnKi/Renderer/HzbHelper.h>
+#include <AnKi/Renderer/Renderer.h>
+
+#if ANKI_COMPILER_GCC_COMPATIBLE
+#	pragma GCC diagnostic push
+#	pragma GCC diagnostic ignored "-Wunused-function"
+#	pragma GCC diagnostic ignored "-Wignored-qualifiers"
+#elif ANKI_COMPILER_MSVC
+#	pragma warning(push)
+#	pragma warning(disable : 4505)
+#endif
+#define A_CPU
+#include <ThirdParty/FidelityFX/ffx_a.h>
+#include <ThirdParty/FidelityFX/ffx_spd.h>
+#if ANKI_COMPILER_GCC_COMPATIBLE
+#	pragma GCC diagnostic pop
+#elif ANKI_COMPILER_MSVC
+#	pragma warning(pop)
+#endif
+
+namespace anki {
+
+Error HzbHelper::init()
+{
+	if(GrManager::getSingleton().getDeviceCapabilities().m_samplingFilterMinMax)
+	{
+		SamplerInitInfo sinit("HzbReductionMax");
+		sinit.m_addressing = SamplingAddressing::kClamp;
+		sinit.m_mipmapFilter = SamplingFilter::kMax;
+		sinit.m_minMagFilter = SamplingFilter::kMax;
+		m_maxSampler = GrManager::getSingleton().newSampler(sinit);
+	}
+
+	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/HzbGenPyramid.ankiprogbin", m_prog));
+
+	ShaderProgramResourceVariantInitInfo variantInit(m_prog);
+	variantInit.addMutation("REDUCTION_TYPE", 1);
+	variantInit.addMutation("MIN_MAX_SAMPLER", m_maxSampler.isCreated());
+	const ShaderProgramResourceVariant* variant;
+	m_prog->getOrCreateVariant(variantInit, variant);
+	m_grProg.reset(&variant->getProgram());
+
+	BufferInitInfo buffInit("HzbCounterBuffer");
+	buffInit.m_size = sizeof(U32);
+	buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kTransferDestination;
+	m_counterBuffer = GrManager::getSingleton().newBuffer(buffInit);
+
+	return Error::kNone;
+}
+
+void HzbHelper::populateRenderGraph(RenderTargetHandle srcDepthRt, UVec2 srcDepthRtSize, RenderTargetHandle dstHzbRt, UVec2 dstHzbRtSize,
+									RenderingContext& ctx)
+{
+	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
+	TextureSubresourceInfo firstMipSubresource;
+
+	constexpr U32 kMaxSpdMips = 12;
+	const U32 hzbMipCount = min(kMaxSpdMips, computeMaxMipmapCount2d(dstHzbRtSize.x(), dstHzbRtSize.y()));
+
+	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HZB generation");
+
+	pass.newTextureDependency(srcDepthRt, TextureUsageBit::kSampledCompute, firstMipSubresource);
+	pass.newTextureDependency(dstHzbRt, TextureUsageBit::kImageComputeWrite);
+
+	pass.setWork([this, hzbMipCount, srcDepthRt, srcDepthRtSize, dstHzbRt, dstHzbRtSize](RenderPassWorkContext& rgraphCtx) {
+		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+		const U32 mipsToCompute = hzbMipCount;
+
+		// Zero the counter buffer once before everything else
+		if(!m_counterBufferZeroed) [[unlikely]]
+		{
+			m_counterBufferZeroed = true;
+
+			cmdb.fillBuffer(m_counterBuffer.get(), 0, kMaxPtrSize, 0);
+
+			const BufferBarrierInfo barrier = {m_counterBuffer.get(), BufferUsageBit::kTransferDestination, BufferUsageBit::kStorageComputeWrite, 0,
+											   kMaxPtrSize};
+			cmdb.setPipelineBarrier({}, {&barrier, 1}, {});
+		}
+
+		cmdb.bindShaderProgram(m_grProg.get());
+
+		varAU2(dispatchThreadGroupCountXY);
+		varAU2(workGroupOffset); // needed if Left and Top are not 0,0
+		varAU2(numWorkGroupsAndMips);
+		varAU4(rectInfo) = initAU4(0, 0, dstHzbRtSize.x() * 2, dstHzbRtSize.y() * 2);
+		SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, mipsToCompute);
+
+		struct Uniforms
+		{
+			Vec2 m_invSrcTexSize;
+			U32 m_threadGroupCount;
+			U32 m_mipmapCount;
+		} pc;
+
+		pc.m_invSrcTexSize = 1.0f / Vec2(dstHzbRtSize * 2);
+		pc.m_threadGroupCount = numWorkGroupsAndMips[0];
+		pc.m_mipmapCount = numWorkGroupsAndMips[1];
+
+		cmdb.setPushConstants(&pc, sizeof(pc));
+
+		for(U32 mip = 0; mip < kMaxSpdMips; ++mip)
+		{
+			TextureSubresourceInfo subresource;
+			if(mip < mipsToCompute)
+			{
+				subresource.m_firstMipmap = mip;
+			}
+			else
+			{
+				subresource.m_firstMipmap = 0; // Put something random
+			}
+
+			rgraphCtx.bindImage(0, 0, dstHzbRt, subresource, mip);
+		}
+
+		cmdb.bindStorageBuffer(0, 1, m_counterBuffer.get(), 0, kMaxPtrSize);
+		rgraphCtx.bindTexture(0, 2, srcDepthRt, TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
+		cmdb.bindSampler(0, 3, m_maxSampler.isCreated() ? m_maxSampler.get() : getRenderer().getSamplers().m_trilinearClamp.get());
+
+		cmdb.dispatchCompute(dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1], 1);
+	});
+}
+
+} // end namespace anki

+ 35 - 0
AnKi/Renderer/HzbHelper.h

@@ -0,0 +1,35 @@
+// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Renderer/RendererObject.h>
+
+namespace anki {
+
+/// @addtogroup renderer
+/// @{
+
+/// Hierarchical depth generator.
+class HzbHelper : public RendererObject
+{
+public:
+	Error init();
+
+	void populateRenderGraph(RenderTargetHandle srcDepthRt, UVec2 srcDepthRtSize, RenderTargetHandle dstHzbRt, UVec2 dstHzbRtSize,
+							 RenderingContext& ctx);
+
+private:
+	ShaderProgramResourcePtr m_prog;
+	ShaderProgramPtr m_grProg;
+
+	SamplerPtr m_maxSampler;
+
+	BufferPtr m_counterBuffer;
+	Bool m_counterBufferZeroed = false;
+};
+/// @}
+
+} // end namespace anki

+ 0 - 5
AnKi/Renderer/Renderer.cpp

@@ -44,7 +44,6 @@
 #include <AnKi/Renderer/IndirectDiffuse.h>
 #include <AnKi/Renderer/IndirectDiffuse.h>
 #include <AnKi/Renderer/VrsSriGeneration.h>
 #include <AnKi/Renderer/VrsSriGeneration.h>
 #include <AnKi/Renderer/PackVisibleClusteredObjects.h>
 #include <AnKi/Renderer/PackVisibleClusteredObjects.h>
-#include <AnKi/Renderer/Hzb.h>
 
 
 namespace anki {
 namespace anki {
 
 
@@ -246,9 +245,6 @@ Error Renderer::initInternal(UVec2 swapchainResolution)
 	m_packVisibleClustererObjects.reset(newInstance<PackVisibleClusteredObjects>(RendererMemoryPool::getSingleton()));
 	m_packVisibleClustererObjects.reset(newInstance<PackVisibleClusteredObjects>(RendererMemoryPool::getSingleton()));
 	ANKI_CHECK(m_packVisibleClustererObjects->init());
 	ANKI_CHECK(m_packVisibleClustererObjects->init());
 
 
-	m_hzb.reset(newInstance<Hzb>(RendererMemoryPool::getSingleton()));
-	ANKI_CHECK(m_hzb->init());
-
 	// Init samplers
 	// Init samplers
 	{
 	{
 		SamplerInitInfo sinit("NearestNearestClamp");
 		SamplerInitInfo sinit("NearestNearestClamp");
@@ -343,7 +339,6 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	m_gbuffer->importRenderTargets(ctx);
 	m_gbuffer->importRenderTargets(ctx);
 
 
 	// Populate render graph. WARNING Watch the order
 	// Populate render graph. WARNING Watch the order
-	m_hzb->populateRenderGraph(ctx);
 	gpuSceneCopy(ctx);
 	gpuSceneCopy(ctx);
 	m_packVisibleClustererObjects->populateRenderGraph(ctx);
 	m_packVisibleClustererObjects->populateRenderGraph(ctx);
 	m_genericCompute->populateRenderGraph(ctx);
 	m_genericCompute->populateRenderGraph(ctx);

+ 0 - 1
AnKi/Renderer/RendererObject.defs.h

@@ -32,4 +32,3 @@ ANKI_RENDERER_OBJECT_DEF(Scale, scale)
 ANKI_RENDERER_OBJECT_DEF(IndirectDiffuse, indirectDiffuse)
 ANKI_RENDERER_OBJECT_DEF(IndirectDiffuse, indirectDiffuse)
 ANKI_RENDERER_OBJECT_DEF(VrsSriGeneration, vrsSriGeneration)
 ANKI_RENDERER_OBJECT_DEF(VrsSriGeneration, vrsSriGeneration)
 ANKI_RENDERER_OBJECT_DEF(PackVisibleClusteredObjects, packVisibleClustererObjects)
 ANKI_RENDERER_OBJECT_DEF(PackVisibleClusteredObjects, packVisibleClustererObjects)
-ANKI_RENDERER_OBJECT_DEF(Hzb, hzb)

+ 6 - 7
AnKi/Shaders/GpuVisibility.ankiprog

@@ -14,7 +14,7 @@
 [[vk::binding(1)]] StructuredBuffer<GpuSceneRenderable> g_renderables;
 [[vk::binding(1)]] StructuredBuffer<GpuSceneRenderable> g_renderables;
 [[vk::binding(2)]] ByteAddressBuffer g_gpuScene;
 [[vk::binding(2)]] ByteAddressBuffer g_gpuScene;
 
 
-[[vk::binding(3)]] Texture2D<U32> g_hzbTex;
+[[vk::binding(3)]] Texture2D<Vec4> g_hzbTex;
 [[vk::binding(4)]] SamplerState g_nearestAnyClampSampler;
 [[vk::binding(4)]] SamplerState g_nearestAnyClampSampler;
 
 
 // These 2 have the same size
 // These 2 have the same size
@@ -108,12 +108,11 @@
 
 
 	// Sample mip
 	// Sample mip
 	Vec4 depths;
 	Vec4 depths;
-	depths[0] = asfloat(g_hzbTex.SampleLevel(g_nearestAnyClampSampler, minUv, mip));
-	depths[1] = asfloat(g_hzbTex.SampleLevel(g_nearestAnyClampSampler, maxUv, mip));
-	depths[2] = asfloat(g_hzbTex.SampleLevel(g_nearestAnyClampSampler, Vec2(minUv.x, maxUv.y), mip));
-	depths[3] = asfloat(g_hzbTex.SampleLevel(g_nearestAnyClampSampler, Vec2(maxUv.x, minUv.y), mip));
-
-	const F32 maxDepth = 1.0 - min(depths[0], min(depths[1], min(depths[2], depths[3])));
+	depths[0] = g_hzbTex.SampleLevel(g_nearestAnyClampSampler, minUv, mip);
+	depths[1] = g_hzbTex.SampleLevel(g_nearestAnyClampSampler, maxUv, mip);
+	depths[2] = g_hzbTex.SampleLevel(g_nearestAnyClampSampler, Vec2(minUv.x, maxUv.y), mip);
+	depths[3] = g_hzbTex.SampleLevel(g_nearestAnyClampSampler, Vec2(maxUv.x, minUv.y), mip);
+	const F32 maxDepth = max4(depths);
 
 
 	if(aabbMinDepth > maxDepth)
 	if(aabbMinDepth > maxDepth)
 	{
 	{

+ 30 - 11
AnKi/Shaders/HzbGenPyramid.ankiprog

@@ -4,23 +4,29 @@
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
 #pragma anki mutator REDUCTION_TYPE 0 1 // 0: min 1: max
 #pragma anki mutator REDUCTION_TYPE 0 1 // 0: min 1: max
+#pragma anki mutator MIN_MAX_SAMPLER 0 1
 
 
 #pragma anki start comp
 #pragma anki start comp
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Common.hlsl>
 
 
 struct Uniforms
 struct Uniforms
 {
 {
+	Vec2 m_invSrcTexSize;
 	U32 m_threadGroupCount;
 	U32 m_threadGroupCount;
 	U32 m_mipmapCount;
 	U32 m_mipmapCount;
-	U32 m_padding0;
-	U32 m_padding1;
 };
 };
 
 
 [[vk::push_constant]] ConstantBuffer<Uniforms> g_uniforms;
 [[vk::push_constant]] ConstantBuffer<Uniforms> g_uniforms;
 
 
-[[vk::binding(0)]] globallycoherent RWTexture2D<UVec4> g_dstUavs[12u];
+[[vk::binding(0)]] globallycoherent RWTexture2D<Vec4> g_dstUavs[12u];
 [[vk::binding(1)]] globallycoherent RWStructuredBuffer<U32> g_spdCounter;
 [[vk::binding(1)]] globallycoherent RWStructuredBuffer<U32> g_spdCounter;
-[[vk::binding(2)]] Texture2D<UVec4> g_srcTex;
+[[vk::binding(2)]] Texture2D<Vec4> g_srcTex;
+
+#if MIN_MAX_SAMPLER
+[[vk::binding(3)]] SamplerState g_minMaxAnyClampSampler;
+#else
+[[vk::binding(3)]] SamplerState g_linearAnyClampSampler;
+#endif
 
 
 // Include SPD
 // Include SPD
 #define A_GPU 1
 #define A_GPU 1
@@ -33,21 +39,33 @@ groupshared AF1 s_spdIntermediateR[16][16];
 AF4 SpdLoadSourceImage(AU2 p, AU1 slice)
 AF4 SpdLoadSourceImage(AU2 p, AU1 slice)
 {
 {
 	ANKI_MAYBE_UNUSED(slice);
 	ANKI_MAYBE_UNUSED(slice);
-	const U32 u = g_srcTex[p].r;
-	return AF4(asfloat(u), 0.0, 0.0, 0.0);
+	const AF2 uv = p * g_uniforms.m_invSrcTexSize + g_uniforms.m_invSrcTexSize;
+
+#if MIN_MAX_SAMPLER
+	const F32 f = g_srcTex.SampleLevel(g_minMaxAnyClampSampler, uv, 0.0).r;
+#else
+	const Vec4 samples = g_srcTex.GatherRed(g_linearAnyClampSampler, uv);
+#	if REDUCTION_TYPE == 0
+	const F32 f = min4(samples);
+#	else
+	const F32 f = max4(samples);
+#	endif
+#endif
+
+	return AF4(f, 0.0, 0.0, 0.0);
 }
 }
 
 
 AF4 SpdLoad(AU2 p, AU1 slice)
 AF4 SpdLoad(AU2 p, AU1 slice)
 {
 {
 	ANKI_MAYBE_UNUSED(slice);
 	ANKI_MAYBE_UNUSED(slice);
-	const U32 u = g_dstUavs[5][p].r;
-	return AF4(asfloat(u), 0.0, 0.0, 0.0);
+	const F32 f = g_dstUavs[5][p].r;
+	return AF4(f, 0.0, 0.0, 0.0);
 }
 }
 
 
 void SpdStore(AU2 p, AF4 value, AU1 mip, AU1 slice)
 void SpdStore(AU2 p, AF4 value, AU1 mip, AU1 slice)
 {
 {
 	ANKI_MAYBE_UNUSED(slice);
 	ANKI_MAYBE_UNUSED(slice);
-	g_dstUavs[mip][p] = Vec4(asuint(value.x), 0.0, 0.0, 0.0);
+	g_dstUavs[mip][p] = Vec4(value.x, 0.0, 0.0, 0.0);
 }
 }
 
 
 void SpdIncreaseAtomicCounter(AU1 slice)
 void SpdIncreaseAtomicCounter(AU1 slice)
@@ -80,13 +98,14 @@ void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value)
 AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3)
 AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3)
 {
 {
 #if REDUCTION_TYPE == 0
 #if REDUCTION_TYPE == 0
-	const F32 value = min(v0.x, min(v1.x, min(v2.x, v3.x)));
+	const F32 value = min4(v0.x, v1.x, v2.x, v3.x);
 #else
 #else
-	const F32 value = max(v0.x, max(v1.x, max(v2.x, v3.x)));
+	const F32 value = max4(v0.x, v1.x, v2.x, v3.x);
 #endif
 #endif
 	return AF4(value, 0.0, 0.0, 0.0);
 	return AF4(value, 0.0, 0.0, 0.0);
 }
 }
 
 
+#define SPD_LINEAR_SAMPLER 1
 #include <ThirdParty/FidelityFX/ffx_spd.h>
 #include <ThirdParty/FidelityFX/ffx_spd.h>
 
 
 [numthreads(256, 1, 1)] void main(UVec3 svGroupId : SV_GROUPID, U32 svGroupIndex : SV_GROUPINDEX)
 [numthreads(256, 1, 1)] void main(UVec3 svGroupId : SV_GROUPID, U32 svGroupIndex : SV_GROUPINDEX)