Browse Source

Add VRS on SSR. Not working great ATM

Panagiotis Christopoulos Charitos 3 years ago
parent
commit
6d3bddd004

+ 4 - 2
AnKi/Renderer/ConfigVars.defs.h

@@ -28,11 +28,13 @@ ANKI_CONFIG_VAR_U32(RVolumetricLightingAccumulationFinalZSplit, 26, 1, 256,
 
 
 // SSR
 // SSR
 ANKI_CONFIG_VAR_U32(RSsrFirstStepPixels, 32, 1, 256, "The 1st step in ray marching")
 ANKI_CONFIG_VAR_U32(RSsrFirstStepPixels, 32, 1, 256, "The 1st step in ray marching")
-ANKI_CONFIG_VAR_U32(RSsrDepthLod, 2, 0, 1000, "Texture LOD of the depth texture that will be raymarched")
+ANKI_CONFIG_VAR_U32(RSsrDepthLod, ((ANKI_PLATFORM_MOBILE) ? 2 : 0), 0, 1000,
+					"Texture LOD of the depth texture that will be raymarched")
 ANKI_CONFIG_VAR_U32(RSsrMaxSteps, 64, 1, 256, "Max SSR raymarching steps")
 ANKI_CONFIG_VAR_U32(RSsrMaxSteps, 64, 1, 256, "Max SSR raymarching steps")
 ANKI_CONFIG_VAR_BOOL(RSsrStochastic, false, "Stochastic reflections")
 ANKI_CONFIG_VAR_BOOL(RSsrStochastic, false, "Stochastic reflections")
-ANKI_CONFIG_VAR_F32(RSsrRoughnessCutoff, ((ANKI_PLATFORM_MOBILE) ? 0.7f : 0.9f), 0.0f, 1.0f,
+ANKI_CONFIG_VAR_F32(RSsrRoughnessCutoff, ((ANKI_PLATFORM_MOBILE) ? 0.7f : 1.0f), 0.0f, 1.0f,
 					"Materials with roughness higher that this value will fallback to probe reflections")
 					"Materials with roughness higher that this value will fallback to probe reflections")
+ANKI_CONFIG_VAR_F32(RSsrVrsThreshold, 0.05f, 0.0f, 1.0f, "Threshold under which a lower shading rate will be applied")
 
 
 // GI probes
 // GI probes
 ANKI_CONFIG_VAR_U32(RIndirectDiffuseProbeTileResolution, ((ANKI_PLATFORM_MOBILE) ? 16 : 32), 8, 32,
 ANKI_CONFIG_VAR_U32(RIndirectDiffuseProbeTileResolution, ((ANKI_PLATFORM_MOBILE) ? 16 : 32), 8, 32,

+ 2 - 2
AnKi/Renderer/IndirectDiffuse.cpp

@@ -62,7 +62,7 @@ Error IndirectDiffuse::initInternal()
 
 
 		const UVec2 rez = (size + m_vrs.m_sriTexelDimension - 1) / m_vrs.m_sriTexelDimension;
 		const UVec2 rez = (size + m_vrs.m_sriTexelDimension - 1) / m_vrs.m_sriTexelDimension;
 		m_vrs.m_rtHandle =
 		m_vrs.m_rtHandle =
-			m_r->create2DRenderTargetDescription(rez.x(), rez.y(), Format::R8_UINT, "IndirectDiffuse VRS SRI");
+			m_r->create2DRenderTargetDescription(rez.x(), rez.y(), Format::R8_UINT, "IndirectDiffuseVrsSri");
 		m_vrs.m_rtHandle.bake();
 		m_vrs.m_rtHandle.bake();
 
 
 		ANKI_CHECK(getResourceManager().loadResource("ShaderBinaries/IndirectDiffuseVrsSriGeneration.ankiprogbin",
 		ANKI_CHECK(getResourceManager().loadResource("ShaderBinaries/IndirectDiffuseVrsSriGeneration.ankiprogbin",
@@ -164,7 +164,7 @@ void IndirectDiffuse::populateRenderGraph(RenderingContext& ctx)
 	{
 	{
 		m_runCtx.m_sriRt = rgraph.newRenderTarget(m_vrs.m_rtHandle);
 		m_runCtx.m_sriRt = rgraph.newRenderTarget(m_vrs.m_rtHandle);
 
 
-		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("VRS SRI generation");
+		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("IndirectDiffuse VRS SRI gen");
 
 
 		pass.newDependency(RenderPassDependency(m_runCtx.m_sriRt, TextureUsageBit::IMAGE_COMPUTE_WRITE));
 		pass.newDependency(RenderPassDependency(m_runCtx.m_sriRt, TextureUsageBit::IMAGE_COMPUTE_WRITE));
 		pass.newDependency(RenderPassDependency(m_r->getDepthDownscale().getHiZRt(), TextureUsageBit::SAMPLED_COMPUTE,
 		pass.newDependency(RenderPassDependency(m_r->getDepthDownscale().getHiZRt(), TextureUsageBit::SAMPLED_COMPUTE,

+ 159 - 34
AnKi/Renderer/IndirectSpecular.cpp

@@ -31,11 +31,10 @@ Error IndirectSpecular::init()
 
 
 Error IndirectSpecular::initInternal()
 Error IndirectSpecular::initInternal()
 {
 {
-	const U32 width = m_r->getInternalResolution().x() / 2;
-	const U32 height = m_r->getInternalResolution().y() / 2;
+	const UVec2 size = m_r->getInternalResolution() / 2;
 	const Bool preferCompute = getConfig().getRPreferCompute();
 	const Bool preferCompute = getConfig().getRPreferCompute();
 
 
-	ANKI_R_LOGV("Initializing indirect specular. Resolution %ux%u", width, height);
+	ANKI_R_LOGV("Initializing indirect specular. Resolution %ux%u", size.x(), size.y());
 
 
 	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoise_Rgba8_64x64.png", m_noiseImage));
 	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoise_Rgba8_64x64.png", m_noiseImage));
 
 
@@ -44,7 +43,8 @@ Error IndirectSpecular::initInternal()
 
 
 	usage |= (preferCompute) ? TextureUsageBit::IMAGE_COMPUTE_WRITE : TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE;
 	usage |= (preferCompute) ? TextureUsageBit::IMAGE_COMPUTE_WRITE : TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE;
 
 
-	TextureInitInfo texInit = m_r->create2DRenderTargetInitInfo(width, height, m_r->getHdrFormat(), usage, "SSR #1");
+	TextureInitInfo texInit =
+		m_r->create2DRenderTargetInitInfo(size.x(), size.y(), m_r->getHdrFormat(), usage, "SSR #1");
 	m_rts[0] = m_r->createAndClearRenderTarget(texInit, TextureUsageBit::ALL_SAMPLED);
 	m_rts[0] = m_r->createAndClearRenderTarget(texInit, TextureUsageBit::ALL_SAMPLED);
 	texInit.setName("SSR #2");
 	texInit.setName("SSR #2");
 	m_rts[1] = m_r->createAndClearRenderTarget(texInit, TextureUsageBit::ALL_SAMPLED);
 	m_rts[1] = m_r->createAndClearRenderTarget(texInit, TextureUsageBit::ALL_SAMPLED);
@@ -65,6 +65,51 @@ Error IndirectSpecular::initInternal()
 	m_prog->getOrCreateVariant(variantInit, variant);
 	m_prog->getOrCreateVariant(variantInit, variant);
 	m_grProg = variant->getProgram();
 	m_grProg = variant->getProgram();
 
 
+	// Init VRS SRI generation
+	const Bool enableVrs = getGrManager().getDeviceCapabilities().m_vrs && getConfig().getRVrs() && !preferCompute;
+	if(enableVrs)
+	{
+		m_vrs.m_sriTexelDimension = getGrManager().getDeviceCapabilities().m_minShadingRateImageTexelSize;
+		ANKI_ASSERT(m_vrs.m_sriTexelDimension == 8 || m_vrs.m_sriTexelDimension == 16);
+
+		const UVec2 rez = (size + m_vrs.m_sriTexelDimension - 1) / m_vrs.m_sriTexelDimension;
+		m_vrs.m_rtHandle =
+			m_r->create2DRenderTargetDescription(rez.x(), rez.y(), Format::R8_UINT, "IndirectSpecularVrsSri");
+		m_vrs.m_rtHandle.bake();
+
+		ANKI_CHECK(getResourceManager().loadResource("ShaderBinaries/IndirectSpecularVrsSriGeneration.ankiprogbin",
+													 m_vrs.m_prog));
+
+		ShaderProgramResourceVariantInitInfo variantInit(m_vrs.m_prog);
+		variantInit.addMutation("SRI_TEXEL_DIMENSION", m_vrs.m_sriTexelDimension);
+
+		if(m_vrs.m_sriTexelDimension == 16 && getGrManager().getDeviceCapabilities().m_minSubgroupSize >= 32)
+		{
+			// Algorithm's workgroup size is 32, GPU's subgroup size is min 32 -> each workgroup has 1 subgroup -> No
+			// need for shared mem
+			variantInit.addMutation("SHARED_MEMORY", 0);
+		}
+		else if(m_vrs.m_sriTexelDimension == 8 && getGrManager().getDeviceCapabilities().m_minSubgroupSize >= 16)
+		{
+			// Algorithm's workgroup size is 16, GPU's subgroup size is min 16 -> each workgroup has 1 subgroup -> No
+			// need for shared mem
+			variantInit.addMutation("SHARED_MEMORY", 0);
+		}
+		else
+		{
+			variantInit.addMutation("SHARED_MEMORY", 1);
+		}
+
+		const ShaderProgramResourceVariant* variant;
+		m_vrs.m_prog->getOrCreateVariant(variantInit, variant);
+		m_vrs.m_grProg = variant->getProgram();
+
+		ANKI_CHECK(getResourceManager().loadResource("ShaderBinaries/VrsSriVisualizeRenderTarget.ankiprogbin",
+													 m_vrs.m_visualizeProg));
+		m_vrs.m_visualizeProg->getOrCreateVariant(variant);
+		m_vrs.m_visualizeGrProg = variant->getProgram();
+	}
+
 	return Error::NONE;
 	return Error::NONE;
 }
 }
 
 
@@ -72,6 +117,8 @@ void IndirectSpecular::populateRenderGraph(RenderingContext& ctx)
 {
 {
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 	const Bool preferCompute = getConfig().getRPreferCompute();
 	const Bool preferCompute = getConfig().getRPreferCompute();
+	const Bool enableVrs = getGrManager().getDeviceCapabilities().m_vrs && getConfig().getRVrs() && !preferCompute;
+	const Bool fbDescrHasVrs = m_fbDescr.m_shadingRateAttachmentTexelWidth > 0;
 
 
 	// Create/import RTs
 	// Create/import RTs
 	const U32 readRtIdx = m_r->getFrameCount() & 1;
 	const U32 readRtIdx = m_r->getFrameCount() & 1;
@@ -88,45 +135,108 @@ void IndirectSpecular::populateRenderGraph(RenderingContext& ctx)
 		m_rtsImportedOnce = true;
 		m_rtsImportedOnce = true;
 	}
 	}
 
 
-	// Create pass
-	RenderPassDescriptionBase* ppass;
-	TextureUsageBit readUsage;
-	TextureUsageBit writeUsage;
-	if(preferCompute)
+	// Re-bake FB descriptor
+	if(!preferCompute && enableVrs != fbDescrHasVrs)
 	{
 	{
-		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("SSR");
-
-		ppass = &pass;
-		readUsage = TextureUsageBit::SAMPLED_COMPUTE;
-		writeUsage = TextureUsageBit::IMAGE_COMPUTE_WRITE;
+		// Re-bake the FB descriptor if the VRS state has changed
+
+		if(enableVrs)
+		{
+			m_fbDescr.m_shadingRateAttachmentTexelWidth = m_vrs.m_sriTexelDimension;
+			m_fbDescr.m_shadingRateAttachmentTexelHeight = m_vrs.m_sriTexelDimension;
+		}
+		else
+		{
+			m_fbDescr.m_shadingRateAttachmentTexelWidth = 0;
+			m_fbDescr.m_shadingRateAttachmentTexelHeight = 0;
+		}
+
+		m_fbDescr.bake();
 	}
 	}
-	else
+
+	// VRS SRI
+	if(enableVrs)
 	{
 	{
-		GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SSR");
-		pass.setFramebufferInfo(m_fbDescr, {m_runCtx.m_rts[WRITE]});
+		m_runCtx.m_sriRt = rgraph.newRenderTarget(m_vrs.m_rtHandle);
 
 
-		ppass = &pass;
-		readUsage = TextureUsageBit::SAMPLED_FRAGMENT;
-		writeUsage = TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE;
-	}
+		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("IndirectSpecular VRS SRI gen");
 
 
-	ppass->newDependency(RenderPassDependency(m_runCtx.m_rts[WRITE], writeUsage));
-	ppass->newDependency(RenderPassDependency(m_runCtx.m_rts[READ], readUsage));
-	ppass->newDependency(RenderPassDependency(m_r->getGBuffer().getColorRt(1), readUsage));
-	ppass->newDependency(RenderPassDependency(m_r->getGBuffer().getColorRt(2), readUsage));
+		pass.newDependency(RenderPassDependency(m_runCtx.m_sriRt, TextureUsageBit::IMAGE_COMPUTE_WRITE));
+		pass.newDependency(RenderPassDependency(m_runCtx.m_rts[READ], TextureUsageBit::SAMPLED_COMPUTE));
 
 
-	TextureSubresourceInfo hizSubresource;
-	hizSubresource.m_mipmapCount = min(getConfig().getRSsrDepthLod() + 1, m_r->getDepthDownscale().getMipmapCount());
-	ppass->newDependency(RenderPassDependency(m_r->getDepthDownscale().getHiZRt(), readUsage, hizSubresource));
+		pass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
+			const UVec2 viewport = m_r->getInternalResolution() / 2u;
+
+			CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
+
+			cmdb->bindShaderProgram(m_vrs.m_grProg);
+
+			rgraphCtx.bindColorTexture(0, 0, m_runCtx.m_rts[READ]);
+			cmdb->bindSampler(0, 1, m_r->getSamplers().m_nearestNearestClamp);
+			rgraphCtx.bindImage(0, 2, m_runCtx.m_sriRt);
+
+			class
+			{
+			public:
+				Vec4 m_v4;
+			} pc;
 
 
-	ppass->newDependency(RenderPassDependency(m_r->getProbeReflections().getReflectionRt(), readUsage));
+			pc.m_v4 = Vec4(1.0f / Vec2(viewport), getConfig().getRSsrVrsThreshold(), 0.0f);
 
 
-	ppass->newDependency(RenderPassDependency(m_r->getMotionVectors().getMotionVectorsRt(), readUsage));
-	ppass->newDependency(RenderPassDependency(m_r->getMotionVectors().getHistoryLengthRt(), readUsage));
+			cmdb->setPushConstants(&pc, sizeof(pc));
 
 
-	ppass->setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
-		run(ctx, rgraphCtx);
-	});
+			dispatchPPCompute(cmdb, m_vrs.m_sriTexelDimension, m_vrs.m_sriTexelDimension, viewport.x(), viewport.y());
+		});
+	}
+
+	// Create pass
+	{
+		RenderPassDescriptionBase* ppass;
+		TextureUsageBit readUsage;
+		TextureUsageBit writeUsage;
+		if(preferCompute)
+		{
+			ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("SSR");
+
+			ppass = &pass;
+			readUsage = TextureUsageBit::SAMPLED_COMPUTE;
+			writeUsage = TextureUsageBit::IMAGE_COMPUTE_WRITE;
+		}
+		else
+		{
+			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SSR");
+			pass.setFramebufferInfo(m_fbDescr, {m_runCtx.m_rts[WRITE]}, {},
+									(enableVrs) ? m_runCtx.m_sriRt : RenderTargetHandle());
+
+			ppass = &pass;
+			readUsage = TextureUsageBit::SAMPLED_FRAGMENT;
+			writeUsage = TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE;
+
+			if(enableVrs)
+			{
+				ppass->newDependency(RenderPassDependency(m_runCtx.m_sriRt, TextureUsageBit::FRAMEBUFFER_SHADING_RATE));
+			}
+		}
+
+		ppass->newDependency(RenderPassDependency(m_runCtx.m_rts[WRITE], writeUsage));
+		ppass->newDependency(RenderPassDependency(m_runCtx.m_rts[READ], readUsage));
+		ppass->newDependency(RenderPassDependency(m_r->getGBuffer().getColorRt(1), readUsage));
+		ppass->newDependency(RenderPassDependency(m_r->getGBuffer().getColorRt(2), readUsage));
+
+		TextureSubresourceInfo hizSubresource;
+		hizSubresource.m_mipmapCount =
+			min(getConfig().getRSsrDepthLod() + 1, m_r->getDepthDownscale().getMipmapCount());
+		ppass->newDependency(RenderPassDependency(m_r->getDepthDownscale().getHiZRt(), readUsage, hizSubresource));
+
+		ppass->newDependency(RenderPassDependency(m_r->getProbeReflections().getReflectionRt(), readUsage));
+
+		ppass->newDependency(RenderPassDependency(m_r->getMotionVectors().getMotionVectorsRt(), readUsage));
+		ppass->newDependency(RenderPassDependency(m_r->getMotionVectors().getHistoryLengthRt(), readUsage));
+
+		ppass->setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
+			run(ctx, rgraphCtx);
+		});
+	}
 }
 }
 
 
 void IndirectSpecular::run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
 void IndirectSpecular::run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
@@ -191,4 +301,19 @@ void IndirectSpecular::run(const RenderingContext& ctx, RenderPassWorkContext& r
 	}
 	}
 }
 }
 
 
+void IndirectSpecular::getDebugRenderTarget(CString rtName, RenderTargetHandle& handle,
+											ShaderProgramPtr& optionalShaderProgram) const
+{
+	if(rtName == "SSR")
+	{
+		handle = m_runCtx.m_rts[WRITE];
+	}
+	else
+	{
+		ANKI_ASSERT(rtName == "IndirectSpecularVrsSri");
+		handle = m_runCtx.m_sriRt;
+		optionalShaderProgram = m_vrs.m_visualizeGrProg;
+	}
+}
+
 } // end namespace anki
 } // end namespace anki

+ 16 - 5
AnKi/Renderer/IndirectSpecular.h

@@ -20,6 +20,7 @@ public:
 		: RendererObject(r)
 		: RendererObject(r)
 	{
 	{
 		registerDebugRenderTarget("SSR");
 		registerDebugRenderTarget("SSR");
+		registerDebugRenderTarget("IndirectSpecularVrsSri");
 	}
 	}
 
 
 	~IndirectSpecular();
 	~IndirectSpecular();
@@ -48,10 +49,24 @@ private:
 
 
 	ImageResourcePtr m_noiseImage;
 	ImageResourcePtr m_noiseImage;
 
 
+	class
+	{
+	public:
+		ShaderProgramResourcePtr m_prog;
+		ShaderProgramPtr m_grProg;
+		RenderTargetDescription m_rtHandle;
+
+		ShaderProgramResourcePtr m_visualizeProg;
+		ShaderProgramPtr m_visualizeGrProg;
+
+		U32 m_sriTexelDimension = 16;
+	} m_vrs;
+
 	class
 	class
 	{
 	{
 	public:
 	public:
 		Array<RenderTargetHandle, 2> m_rts;
 		Array<RenderTargetHandle, 2> m_rts;
+		RenderTargetHandle m_sriRt;
 	} m_runCtx;
 	} m_runCtx;
 
 
 	ANKI_USE_RESULT Error initInternal();
 	ANKI_USE_RESULT Error initInternal();
@@ -59,11 +74,7 @@ private:
 	void run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
 	void run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
 
 
 	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle,
 	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle,
-							  ShaderProgramPtr& optionalShaderProgram) const override
-	{
-		ANKI_ASSERT(rtName == "SSR");
-		handle = m_runCtx.m_rts[WRITE];
-	}
+							  ShaderProgramPtr& optionalShaderProgram) const override;
 };
 };
 /// @}
 /// @}
 
 

+ 168 - 0
AnKi/Shaders/IndirectSpecularVrsSriGeneration.ankiprog

@@ -0,0 +1,168 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma anki mutator SRI_TEXEL_DIMENSION 8 16
+#pragma anki mutator SHARED_MEMORY 0 1
+
+#pragma anki start comp
+
+#include <AnKi/Shaders/Functions.glsl>
+#include <AnKi/Shaders/TonemappingFunctions.glsl>
+
+layout(set = 0, binding = 0) uniform ANKI_RP texture2D u_inputTex;
+layout(set = 0, binding = 1) uniform sampler u_nearestClampSampler;
+
+#if SRI_TEXEL_DIMENSION == 8
+const UVec2 REGION_SIZE = UVec2(2u, 2u);
+#else
+const UVec2 REGION_SIZE = UVec2(2u, 4u);
+#endif
+
+const UVec2 WORKGROUP_SIZE = UVec2(SRI_TEXEL_DIMENSION) / REGION_SIZE;
+layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
+
+layout(set = 0, binding = 2) uniform writeonly uimage2D u_sriImg;
+
+layout(push_constant, std140, row_major) uniform b_pc
+{
+	Vec2 u_oneOverViewportSize;
+	F32 u_threshold;
+	F32 u_padding0;
+};
+
+#if SHARED_MEMORY
+// Ideally, we'd be able to calculate the min/max/average using subgroup operations, but there's no guarantee
+// subgroupSize is large enough so we need shared memory as a fallback. We need gl_NumSubgroups entries, but it is not a
+// constant, so estimate it assuming a subgroupSize of at least 8.
+const U32 SHARED_MEMORY_ENTRIES = WORKGROUP_SIZE.x * WORKGROUP_SIZE.y / 8u;
+shared F32 s_averageLuma[SHARED_MEMORY_ENTRIES];
+shared Vec2 s_maxDerivative[SHARED_MEMORY_ENTRIES];
+#endif
+
+F32 sampleLuma(Vec2 uv, I32 offsetX, I32 offsetY)
+{
+	uv += Vec2(offsetX, offsetY) * u_oneOverViewportSize;
+	const Vec2 ndc = UV_TO_NDC(uv);
+	const Vec3 color = textureLod(sampler2D(u_inputTex, u_nearestClampSampler), uv, 0.0).xyz;
+	const F32 luma = computeLuminance(color);
+
+	return min(1.0, (2.0f * luma) / (1.0f + luma));
+}
+
+void main()
+{
+	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) * Vec2(REGION_SIZE) + 0.5) * u_oneOverViewportSize;
+	const Vec2 ndc = UV_TO_NDC(uv);
+
+#if SRI_TEXEL_DIMENSION == 8
+	// Get positions
+	// l0.z  l0.w
+	// l0.x  l0.y
+	Vec4 l0;
+	l0.x = sampleLuma(uv, 0, 0);
+	l0.y = sampleLuma(uv, 1, 0);
+	l0.z = sampleLuma(uv, 0, 1);
+	l0.w = sampleLuma(uv, 1, 1);
+
+	// Calculate derivatives.
+	Vec2 a = Vec2(l0.y, l0.z);
+	Vec2 b = Vec2(l0.x, l0.w);
+	const Vec2 dx = abs(a - b);
+
+	a = Vec2(l0.z, l0.w);
+	b = Vec2(l0.x, l0.y);
+	const Vec2 dy = abs(a - b);
+
+	F32 maxDerivativeX = max(dx.x, dx.y);
+	F32 maxDerivativeY = max(dy.x, dy.y);
+
+	F32 averageLuma = (l0.x + l0.y + l0.z + l0.w) / 4.0;
+#else
+	// Get luminance.
+	//       l2.z
+	// l1.z  l1.w  l2.y
+	// l1.x  l1.y
+	// l0.z  l0.w  l2.x
+	// l0.x  l0.y
+	Vec4 l0;
+	l0.x = sampleLuma(uv, 0, 0);
+	l0.y = sampleLuma(uv, 1, 0);
+	l0.z = sampleLuma(uv, 0, 1);
+	l0.w = sampleLuma(uv, 1, 1);
+
+	Vec4 l1;
+	l1.x = sampleLuma(uv, 0, 2);
+	l1.y = sampleLuma(uv, 1, 2);
+	l1.z = sampleLuma(uv, 0, 3);
+	l1.w = sampleLuma(uv, 1, 3);
+
+	Vec3 l2;
+	l2.x = sampleLuma(uv, 2, 1);
+	l2.y = sampleLuma(uv, 2, 3);
+	l2.z = sampleLuma(uv, 1, 4);
+
+	// Calculate derivatives.
+	Vec4 a = Vec4(l0.y, l2.x, l1.y, l2.y);
+	Vec4 b = Vec4(l0.x, l0.w, l1.x, l1.w);
+	const Vec4 dx = abs(a - b);
+
+	a = Vec4(l0.z, l0.w, l1.z, l2.z);
+	b = Vec4(l0.x, l0.y, l1.x, l1.w);
+	const Vec4 dy = abs(a - b);
+
+	F32 maxDerivativeX = max(max(dx.x, dx.y), max(dx.z, dx.w));
+	F32 maxDerivativeY = max(max(dy.x, dy.y), max(dy.z, dy.w));
+
+	const Vec4 sumL0L1 = l0 + l1;
+	F32 averageLuma = (sumL0L1.x + sumL0L1.y + sumL0L1.z + sumL0L1.w) / 8.0;
+#endif
+
+	maxDerivativeX = subgroupMax(maxDerivativeX);
+	maxDerivativeY = subgroupMax(maxDerivativeY);
+	averageLuma = subgroupAdd(averageLuma);
+
+#if SHARED_MEMORY
+	// Store results in shared memory.
+	ANKI_BRANCH if(subgroupElect())
+	{
+		s_maxDerivative[gl_SubgroupID] = Vec2(maxDerivativeX, maxDerivativeY);
+		s_maxDerivative[gl_SubgroupID] = Vec2(maxDerivativeX, maxDerivativeY);
+	}
+
+	memoryBarrierShared();
+	barrier();
+#endif
+
+	// Write the result
+	ANKI_BRANCH if(gl_LocalInvocationIndex == 0u)
+	{
+		// Get max across all subgroups.
+#if SHARED_MEMORY
+		Vec2 maxDerivative = s_maxDerivative[0];
+
+		for(U32 i = 1u; i < gl_NumSubgroups; ++i)
+		{
+			maxDerivative = max(maxDerivative, s_maxDerivative[i]);
+		}
+#else
+		const Vec2 maxDerivative = Vec2(maxDerivativeX, maxDerivativeY);
+#endif
+
+		// Determine shading rate.
+		const F32 avgLuma = averageLuma / F32(WORKGROUP_SIZE.x * WORKGROUP_SIZE.y);
+		const Vec2 lumaDiff = maxDerivative / avgLuma;
+		const F32 threshold1 = u_threshold;
+		const F32 threshold2 = threshold1 * 0.4;
+
+		UVec2 rate;
+		rate.x = (lumaDiff.x > threshold1) ? 1u : ((lumaDiff.x > threshold2) ? 2u : 4u);
+		rate.y = (lumaDiff.y > threshold1) ? 1u : ((lumaDiff.y > threshold2) ? 2u : 4u);
+
+		const UVec2 outTexelCoord = gl_WorkGroupID.xy;
+		imageStore(u_sriImg, IVec2(outTexelCoord), UVec4(encodeVrsRate(rate)));
+	}
+}
+
+#pragma anki end

+ 6 - 2
Samples/Common/SampleApp.cpp

@@ -86,14 +86,18 @@ Error SampleApp::userMainLoop(Bool& quit, Second elapsedTime)
 
 
 	if(in.getKey(KeyCode::P) == 1)
 	if(in.getKey(KeyCode::P) == 1)
 	{
 	{
-		static U32 idx = 2;
+		static U32 idx = 3;
 		++idx;
 		++idx;
-		idx %= 3;
+		idx %= 4;
 		if(idx == 0)
 		if(idx == 0)
 		{
 		{
 			renderer.setCurrentDebugRenderTarget("IndirectDiffuseVrsSri");
 			renderer.setCurrentDebugRenderTarget("IndirectDiffuseVrsSri");
 		}
 		}
 		else if(idx == 1)
 		else if(idx == 1)
+		{
+			renderer.setCurrentDebugRenderTarget("IndirectSpecularVrsSri");
+		}
+		else if(idx == 2)
 		{
 		{
 			renderer.setCurrentDebugRenderTarget("VRS");
 			renderer.setCurrentDebugRenderTarget("VRS");
 		}
 		}

+ 28 - 0
Sandbox/Main.cpp

@@ -374,6 +374,34 @@ Error MyApp::userMainLoop(Bool& quit, Second elapsedTime)
 												 : "MotionVectorsHistoryLength");
 												 : "MotionVectorsHistoryLength");
 	}
 	}
 
 
+	if(in.getKey(KeyCode::P) == 1)
+	{
+		static U32 idx = 3;
+		++idx;
+		idx %= 4;
+		if(idx == 0)
+		{
+			renderer.setCurrentDebugRenderTarget("IndirectDiffuseVrsSri");
+		}
+		else if(idx == 1)
+		{
+			renderer.setCurrentDebugRenderTarget("IndirectSpecularVrsSri");
+		}
+		else if(idx == 2)
+		{
+			renderer.setCurrentDebugRenderTarget("VRS");
+		}
+		else
+		{
+			renderer.setCurrentDebugRenderTarget("");
+		}
+	}
+
+	if(in.getKey(KeyCode::J) == 1)
+	{
+		m_config.setRVrs(!m_config.getRVrs());
+	}
+
 	if(in.getEvent(InputEvent::WINDOW_CLOSED))
 	if(in.getEvent(InputEvent::WINDOW_CLOSED))
 	{
 	{
 		quit = true;
 		quit = true;