Browse Source

Remove the remaining spec consts from the shaders

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
655f66f99c

+ 7 - 1
AnKi/Renderer/ClusterBinning.cpp

@@ -35,7 +35,6 @@ Error ClusterBinning::init()
 	{
 		ShaderProgramResourceVariantInitInfo inf(m_binningProg);
 		inf.addMutation("OBJECT_TYPE", MutatorValue(type));
-		inf.addConstant("kZSplitCount", getRenderer().getZSplitCount());
 		const ShaderProgramResourceVariant* variant;
 		m_binningProg->getOrCreateVariant(inf, variant);
 		m_binningGrProgs[type].reset(&variant->getProgram());
@@ -177,6 +176,11 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 
 					Vec4 m_nearPlaneWorld;
 
+					I32 m_zSplitCountMinusOne;
+					I32 m_padding0;
+					I32 m_padding1;
+					I32 m_padding2;
+
 					Mat4 m_invertedViewProjMat;
 				} consts;
 
@@ -190,6 +194,8 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 				extractClipPlane(ctx.m_matrices.m_viewProjection, FrustumPlaneType::kNear, nearPlane);
 				consts.m_nearPlaneWorld = Vec4(nearPlane.getNormal().xyz(), nearPlane.getOffset());
 
+				consts.m_zSplitCountMinusOne = getRenderer().getZSplitCount() - 1;
+
 				consts.m_invertedViewProjMat = ctx.m_matrices.m_invertedViewProjectionJitter;
 
 				cmdb.setPushConstants(&consts, sizeof(consts));

+ 3 - 7
AnKi/Renderer/FinalComposite.cpp

@@ -38,9 +38,6 @@ Error FinalComposite::initInternal()
 	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
 	variantInitInfo.addMutation("FILM_GRAIN", (g_filmGrainStrengthCVar.get() > 0.0) ? 1 : 0);
 	variantInitInfo.addMutation("BLOOM_ENABLED", 1);
-	variantInitInfo.addConstant("kLutSize", U32(kLutSize));
-	variantInitInfo.addConstant("kFramebufferSize", getRenderer().getPostProcessResolution());
-	variantInitInfo.addConstant("kMotionBlurSamples", g_motionBlurSamplesCVar.get());
 
 	for(U32 dbg = 0; dbg < 2; ++dbg)
 	{
@@ -73,9 +70,8 @@ Error FinalComposite::loadColorGradingTextureImage(CString filename)
 {
 	m_lut.reset(nullptr);
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource(filename, m_lut));
-	ANKI_ASSERT(m_lut->getWidth() == kLutSize);
-	ANKI_ASSERT(m_lut->getHeight() == kLutSize);
-	ANKI_ASSERT(m_lut->getDepth() == kLutSize);
+	ANKI_ASSERT(m_lut->getWidth() == m_lut->getHeight());
+	ANKI_ASSERT(m_lut->getWidth() == m_lut->getDepth());
 
 	return Error::kNone;
 }
@@ -167,7 +163,7 @@ void FinalComposite::run(RenderPassWorkContext& rgraphCtx)
 
 		if(g_filmGrainStrengthCVar.get() > 0.0f)
 		{
-			const UVec4 pc(0, 0, floatBitsToUint(g_filmGrainStrengthCVar.get()), getRenderer().getFrameCount() & kMaxU32);
+			const UVec4 pc(g_motionBlurSamplesCVar.get(), floatBitsToUint(g_filmGrainStrengthCVar.get()), getRenderer().getFrameCount() & kMaxU32, 0);
 			cmdb.setPushConstants(&pc, sizeof(pc));
 		}
 	}

+ 0 - 2
AnKi/Renderer/FinalComposite.h

@@ -30,8 +30,6 @@ public:
 	Error loadColorGradingTextureImage(CString filename);
 
 private:
-	static constexpr U kLutSize = 16;
-
 	FramebufferDescription m_fbDescr;
 
 	ShaderProgramResourcePtr m_prog;

+ 1 - 9
AnKi/Renderer/GBufferPost.cpp

@@ -26,15 +26,7 @@ Error GBufferPost::initInternal()
 	ANKI_R_LOGV("Initializing GBufferPost pass");
 
 	// Load shaders
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GBufferPost.ankiprogbin", m_prog));
-
-	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
-	variantInitInfo.addConstant("kTileCount", getRenderer().getTileCounts());
-	variantInitInfo.addConstant("kZSplitCount", getRenderer().getZSplitCount());
-
-	const ShaderProgramResourceVariant* variant;
-	m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_grProg.reset(&variant->getProgram());
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/GBufferPost.ankiprogbin", m_prog, m_grProg));
 
 	// Create FB descr
 	m_fbDescr.m_colorAttachmentCount = 2;

+ 1 - 21
AnKi/Renderer/LensFlare.cpp

@@ -33,30 +33,10 @@ Error LensFlare::initInternal()
 {
 	ANKI_R_LOGV("Initializing lens flare");
 
-	ANKI_CHECK(initSprite());
-	ANKI_CHECK(initOcclusion());
-
-	return Error::kNone;
-}
-
-Error LensFlare::initSprite()
-{
 	m_maxSpritesPerFlare = g_lensFlareMaxSpritesPerFlareCVar.get();
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/LensFlareSprite.ankiprogbin", m_realProg, m_realGrProg));
 
-	return Error::kNone;
-}
-
-Error LensFlare::initOcclusion()
-{
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/LensFlareUpdateIndirectInfo.ankiprogbin", m_updateIndirectBuffProg));
-
-	ShaderProgramResourceVariantInitInfo variantInitInfo(m_updateIndirectBuffProg);
-	variantInitInfo.addConstant("kInDepthMapSize",
-								UVec2(getRenderer().getInternalResolution().x() / 2 / 2, getRenderer().getInternalResolution().y() / 2 / 2));
-	const ShaderProgramResourceVariant* variant;
-	m_updateIndirectBuffProg->getOrCreateVariant(variantInitInfo, variant);
-	m_updateIndirectBuffGrProg.reset(&variant->getProgram());
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/LensFlareUpdateIndirectInfo.ankiprogbin", m_updateIndirectBuffProg, m_updateIndirectBuffGrProg));
 
 	return Error::kNone;
 }

+ 0 - 3
AnKi/Renderer/LensFlare.h

@@ -47,9 +47,6 @@ private:
 		BufferHandle m_indirectBuffHandle;
 	} m_runCtx;
 
-	Error initSprite();
-	Error initOcclusion();
-
 	Error initInternal();
 };
 /// @}

+ 6 - 19
AnKi/Renderer/LightShading.cpp

@@ -55,15 +55,7 @@ Error LightShading::init()
 Error LightShading::initLightShading()
 {
 	// Load shaders and programs
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/LightShading.ankiprogbin", m_lightShading.m_prog));
-
-	ShaderProgramResourceVariantInitInfo variantInitInfo(m_lightShading.m_prog);
-	variantInitInfo.addConstant("kTileCount", getRenderer().getTileCounts());
-	variantInitInfo.addConstant("kZSplitCount", getRenderer().getZSplitCount());
-	const ShaderProgramResourceVariant* variant;
-
-	m_lightShading.m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_lightShading.m_grProg.reset(&variant->getProgram());
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/LightShading.ankiprogbin", m_lightShading.m_prog, m_lightShading.m_grProg));
 
 	// Create RT descr
 	const UVec2 internalResolution = getRenderer().getInternalResolution();
@@ -111,15 +103,7 @@ Error LightShading::initSkybox()
 Error LightShading::initApplyFog()
 {
 	// Load shaders and programs
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/LightShadingApplyFog.ankiprogbin", m_applyFog.m_prog));
-
-	ShaderProgramResourceVariantInitInfo variantInitInfo(m_applyFog.m_prog);
-	variantInitInfo.addConstant("kZSplitCount", getRenderer().getZSplitCount());
-	variantInitInfo.addConstant("kFinalZSplit", getRenderer().getVolumetricFog().getFinalClusterInZ());
-
-	const ShaderProgramResourceVariant* variant;
-	m_applyFog.m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_applyFog.m_grProg.reset(&variant->getProgram());
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/LightShadingApplyFog.ankiprogbin", m_applyFog.m_prog, m_applyFog.m_grProg));
 
 	return Error::kNone;
 }
@@ -253,10 +237,13 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 		class PushConsts
 		{
 		public:
-			Vec2 m_padding;
+			F32 m_zSplitCount;
+			F32 m_finalZSplit;
 			F32 m_near;
 			F32 m_far;
 		} regs;
+		regs.m_zSplitCount = F32(getRenderer().getZSplitCount());
+		regs.m_finalZSplit = F32(getRenderer().getVolumetricFog().getFinalClusterInZ());
 		regs.m_near = ctx.m_cameraNear;
 		regs.m_far = ctx.m_cameraFar;
 

+ 7 - 7
AnKi/Renderer/MotionVectors.cpp

@@ -26,14 +26,9 @@ Error MotionVectors::initInternal()
 	ANKI_R_LOGV("Initializing motion vectors");
 
 	// Prog
-	CString progFname =
+	const CString progFname =
 		(g_preferComputeCVar.get()) ? "ShaderBinaries/MotionVectorsCompute.ankiprogbin" : "ShaderBinaries/MotionVectorsRaster.ankiprogbin";
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource(progFname, m_prog));
-	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
-	variantInitInfo.addConstant("kFramebufferSize", UVec2(getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y()));
-	const ShaderProgramResourceVariant* variant;
-	m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_grProg.reset(&variant->getProgram());
+	ANKI_CHECK(loadShaderProgram(progFname, m_prog, m_grProg));
 
 	// RTs
 	m_motionVectorsRtDescr = getRenderer().create2DRenderTargetDescription(
@@ -127,12 +122,17 @@ void MotionVectors::populateRenderGraph(RenderingContext& ctx)
 			Mat4 m_reprojectionMat;
 			Mat4 m_viewProjectionInvMat;
 			Mat4 m_prevViewProjectionInvMat;
+
+			Vec2 m_viewportSize;
+			F32 m_padding0;
+			F32 m_padding1;
 		} * pc;
 		pc = allocateAndBindConstants<Constants>(cmdb, 0, 5);
 
 		pc->m_reprojectionMat = ctx.m_matrices.m_reprojection;
 		pc->m_viewProjectionInvMat = ctx.m_matrices.m_invertedViewProjectionJitter;
 		pc->m_prevViewProjectionInvMat = ctx.m_prevMatrices.m_invertedViewProjectionJitter;
+		pc->m_viewportSize = Vec2(getRenderer().getInternalResolution());
 
 		if(g_preferComputeCVar.get())
 		{

+ 6 - 6
AnKi/Renderer/RtShadows.cpp

@@ -78,8 +78,6 @@ Error RtShadows::initInternal()
 	{
 		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/RtShadowsDenoise.ankiprogbin", m_denoiseProg));
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_denoiseProg);
-		variantInitInfo.addConstant("kMinSampleCount", 8u);
-		variantInitInfo.addConstant("kMaxSampleCount", 32u);
 		variantInitInfo.addMutation("BLUR_ORIENTATION", 0);
 
 		const ShaderProgramResourceVariant* variant;
@@ -545,10 +543,12 @@ void RtShadows::runDenoise(const RenderingContext& ctx, RenderPassWorkContext& r
 
 	rgraphCtx.bindUavTexture(0, 6, (horizontal) ? m_runCtx.m_intermediateShadowsRts[1] : m_runCtx.m_historyRt);
 
-	RtShadowsDenoiseConstants unis;
-	unis.m_invViewProjMat = ctx.m_matrices.m_invertedViewProjectionJitter;
-	unis.m_time = F32(GlobalFrameIndex::getSingleton().m_value % 0xFFFFu);
-	cmdb.setPushConstants(&unis, sizeof(unis));
+	RtShadowsDenoiseConstants consts;
+	consts.m_invViewProjMat = ctx.m_matrices.m_invertedViewProjectionJitter;
+	consts.m_time = F32(GlobalFrameIndex::getSingleton().m_value % 0xFFFFu);
+	consts.m_minSampleCount = 8;
+	consts.m_maxSampleCount = 32;
+	cmdb.setPushConstants(&consts, sizeof(consts));
 
 	dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2);
 }

+ 3 - 3
AnKi/Renderer/ShadowmapsResolve.cpp

@@ -48,9 +48,6 @@ Error ShadowmapsResolve::initInternal()
 																						: "ShaderBinaries/ShadowmapsResolveRaster.ankiprogbin",
 															m_prog));
 	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
-	variantInitInfo.addConstant("kFramebufferSize", UVec2(width, height));
-	variantInitInfo.addConstant("kTileCount", getRenderer().getTileCounts());
-	variantInitInfo.addConstant("kZSplitCount", getRenderer().getZSplitCount());
 	variantInitInfo.addMutation("PCF", g_shadowMappingPcfCVar.get() != 0);
 	variantInitInfo.addMutation("DIRECTIONAL_LIGHT_SHADOW_RESOLVED", getRenderer().getRtShadowsEnabled());
 	const ShaderProgramResourceVariant* variant;
@@ -147,6 +144,9 @@ void ShadowmapsResolve::run(RenderPassWorkContext& rgraphCtx)
 		rgraphCtx.bindColorTexture(0, 9, getRenderer().getRtShadows().getRt());
 	}
 
+	const Vec4 consts(F32(m_rtDescr.m_width), F32(m_rtDescr.m_height), 0.0f, 0.0f);
+	cmdb.setPushConstants(&consts, sizeof(consts));
+
 	if(g_preferComputeCVar.get())
 	{
 		rgraphCtx.bindUavTexture(0, 10, m_runCtx.m_rt, TextureSubresourceInfo());

+ 0 - 8
AnKi/Renderer/TemporalAA.cpp

@@ -34,17 +34,9 @@ Error TemporalAA::initInternal()
 
 	{
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
-		variantInitInfo.addConstant("kVarianceClippingGamma", 2.7f); // Variance clipping paper proposes 1.0
-		variantInitInfo.addConstant("kBlendFactor", 1.0f / 16.0f);
 		variantInitInfo.addMutation("VARIANCE_CLIPPING", 1);
 		variantInitInfo.addMutation("YCBCR", 0);
 
-		if(g_preferComputeCVar.get())
-		{
-			variantInitInfo.addConstant("kFramebufferSize",
-										UVec2(getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y()));
-		}
-
 		const ShaderProgramResourceVariant* variant;
 		m_prog->getOrCreateVariant(variantInitInfo, variant);
 		m_grProg.reset(&variant->getProgram());

+ 2 - 10
AnKi/Renderer/Tonemapping.cpp

@@ -30,18 +30,10 @@ Error Tonemapping::initInternal()
 	ANKI_R_LOGV("Initializing tonemapping. Resolution %ux%u", width, height);
 
 	// Create program
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/TonemappingAverageLuminance.ankiprogbin", m_prog));
-
-	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
-	variantInitInfo.addConstant("kInputTexSize", UVec2(width, height));
-
-	const ShaderProgramResourceVariant* variant;
-	m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_grProg.reset(&variant->getProgram());
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/TonemappingAverageLuminance.ankiprogbin", m_prog, m_grProg));
 
 	// Create exposure texture.
-	// WARNING: Use it only as IMAGE and nothing else. It will not be tracked by the rendergraph. No tracking means no
-	// automatic image transitions
+	// WARNING: Use it only as IMAGE and nothing else. It will not be tracked by the rendergraph. No tracking means no automatic image transitions
 	const TextureUsageBit usage = TextureUsageBit::kAllUav;
 	const TextureInitInfo texinit = getRenderer().create2DRenderTargetInitInfo(1, 1, Format::kR16G16_Sfloat, usage, "ExposureAndAvgLum1x1");
 	ClearValue clearValue;

+ 6 - 3
AnKi/Shaders/ClusterBinning.ankiprog

@@ -12,8 +12,6 @@
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
 #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 1u);
-
 #if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
 typedef GpuSceneLight GpuSceneType;
 #elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
@@ -39,6 +37,11 @@ struct ClusterBinningConstants
 
 	Vec4 m_nearPlaneWorld;
 
+	I32 m_zSplitCountMinusOne;
+	I32 m_padding0;
+	I32 m_padding1;
+	I32 m_padding2;
+
 	Mat4 m_invertedViewProjMat;
 };
 
@@ -196,7 +199,7 @@ constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1,
 		}
 
 		const I32 startZSplit = max(I32(minDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0);
-		const I32 endZSplit = clamp(I32(maxDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0, I32(kZSplitCount) - 1);
+		const I32 endZSplit = clamp(I32(maxDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0, g_consts.m_zSplitCountMinusOne);
 		for(I32 i = startZSplit; i <= endZSplit; ++i)
 		{
 #if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT

+ 5 - 9
AnKi/Shaders/ClusteredShadingFunctions.hlsl

@@ -71,7 +71,7 @@ U32 computeTileClusterIndexFragCoord(Vec2 fragCoord, U32 tileCountX)
 /// Merge the tiles with z splits into a single cluster.
 Cluster mergeClusters(Cluster tileCluster, Cluster zCluster)
 {
-// #define ANKI_OR_MASKS(x) WaveActiveBitOr(x)
+//#define ANKI_OR_MASKS(x) WaveActiveBitOr(x)
 #define ANKI_OR_MASKS(x) (x)
 
 	Cluster outCluster;
@@ -97,18 +97,14 @@ Cluster mergeClusters(Cluster tileCluster, Cluster zCluster)
 }
 
 /// Get the final cluster after ORing and ANDing the masks.
-Cluster getClusterFragCoord(StructuredBuffer<Cluster> clusters, Vec3 fragCoord, UVec2 tileCounts, U32 zSplitCount, F32 a, F32 b)
+Cluster getClusterFragCoord(StructuredBuffer<Cluster> clusters, ClusteredShadingConstants consts, Vec3 fragCoord)
 {
-	const Cluster tileCluster = clusters[computeTileClusterIndexFragCoord(fragCoord.xy, tileCounts.x)];
-	const Cluster zCluster = clusters[computeZSplitClusterIndex(fragCoord.z, zSplitCount, a, b) + tileCounts.x * tileCounts.y];
+	const Cluster tileCluster = clusters[computeTileClusterIndexFragCoord(fragCoord.xy, consts.m_tileCounts.x)];
+	const Cluster zCluster = clusters[computeZSplitClusterIndex(fragCoord.z, consts.m_zSplitCount, consts.m_zSplitMagic.x, consts.m_zSplitMagic.y)
+									  + consts.m_tileCounts.x * consts.m_tileCounts.y];
 	return mergeClusters(tileCluster, zCluster);
 }
 
-Cluster getClusterFragCoord(StructuredBuffer<Cluster> clusters, ClusteredShadingConstants unis, Vec3 fragCoord)
-{
-	return getClusterFragCoord(clusters, fragCoord, unis.m_tileCounts, unis.m_zSplitCount, unis.m_zSplitMagic.x, unis.m_zSplitMagic.y);
-}
-
 U32 iteratePointLights(inout Cluster cluster)
 {
 	for(U32 block = 0; block < kMaxVisibleLights / 32; ++block)

+ 10 - 12
AnKi/Shaders/FinalComposite.ankiprog

@@ -15,10 +15,6 @@
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/MotionBlur.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_U32(kLutSize, 0u);
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 1u);
-ANKI_SPECIALIZATION_CONSTANT_U32(kMotionBlurSamples, 3u);
-
 [[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
 [[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
 [[vk::binding(2)]] SamplerState g_trilinearRepeatSampler;
@@ -33,21 +29,24 @@ ANKI_SPECIALIZATION_CONSTANT_U32(kMotionBlurSamples, 3u);
 
 struct PushConstants
 {
-	Vec2 m_padding0;
+	U32 m_motionBlurSamples;
 	F32 m_filmGrainStrength;
 	U32 m_frameCount;
+	U32 m_padding;
 };
 
 [[vk::push_constant]] ConstantBuffer<PushConstants> g_pc;
 
 RVec3 colorGrading(RVec3 color)
 {
-	constexpr RF32 kLutSizef = RF32(kLutSize);
-	constexpr RVec3 kLutScale = ((kLutSizef - 1.0) / kLutSizef).xxx;
-	constexpr RVec3 kLutOffset = (1.0 / (2.0 * kLutSize)).xxx;
+	Vec3 lutSize;
+	g_lut.GetDimensions(lutSize.x, lutSize.y, lutSize.y);
+
+	const RVec3 lutScale = ((lutSize.x - 1.0) / lutSize.x).xxx;
+	const RVec3 lutOffset = (1.0 / (2.0 * lutSize.x)).xxx;
 
 	color = min(color, RVec3(1.0, 1.0, 1.0));
-	const RVec3 lutCoords = color * kLutScale + kLutOffset;
+	const RVec3 lutCoords = color * lutScale + lutOffset;
 	return g_lut.SampleLevel(g_trilinearRepeatSampler, lutCoords, 0.0).rgb;
 }
 
@@ -55,10 +54,9 @@ RVec3 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
 {
 	RVec3 outColor;
 
-	if(kMotionBlurSamples > 0u)
+	if(g_pc.m_motionBlurSamples > 0u)
 	{
-		outColor = motionBlur(g_motionVectorsRt, g_nearestAnyClampSampler, g_lightShadingRt, Vec2(kFramebufferSize), g_linearAnyClampSampler, uv,
-							  kMotionBlurSamples);
+		outColor = motionBlur(g_motionVectorsRt, g_nearestAnyClampSampler, g_lightShadingRt, g_linearAnyClampSampler, uv, g_pc.m_motionBlurSamples);
 	}
 	else
 	{

+ 1 - 5
AnKi/Shaders/GBufferPost.ankiprog

@@ -12,9 +12,6 @@
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kTileCount, 0u);
-ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 2u);
-
 [[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
 [[vk::binding(1)]] Texture2D g_depthTex;
 [[vk::binding(2)]] SamplerState g_trilinearRepeatSampler;
@@ -50,8 +47,7 @@ FragOut main([[vk::location(0)]] Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITI
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 
 	// Get the cluster
-	Cluster cluster = getClusterFragCoord(g_clusters, Vec3(svPosition.xy, depth), kTileCount, kZSplitCount, g_clusteredShading.m_zSplitMagic.x,
-										  g_clusteredShading.m_zSplitMagic.y);
+	Cluster cluster = getClusterFragCoord(g_clusters, g_clusteredShading, Vec3(svPosition.xy, depth));
 
 	// Make the decalsMask uniform across the wave because we are accessing bindless textures later on
 	U32 decalsMask = cluster.m_decalsMask[0];

+ 2 - 2
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -15,8 +15,8 @@ struct RtShadowsDenoiseConstants
 	Mat4 m_invViewProjMat;
 
 	F32 m_time;
-	F32 m_padding0;
-	F32 m_padding1;
+	U32 m_minSampleCount;
+	U32 m_maxSampleCount;
 	F32 m_padding2;
 };
 

+ 0 - 2
AnKi/Shaders/IndirectDiffuse.hlsl

@@ -18,8 +18,6 @@
 #define REPROJECT_LIGHTBUFFER false
 #define SSGI_PROBE_COMBINE(ssgiColor, probeColor) ((ssgiColor) + (probeColor))
 
-ANKI_SPECIALIZATION_CONSTANT_U32(kSampleCount, 0u);
-
 [[vk::binding(0)]] ConstantBuffer<ClusteredShadingConstants> g_clusteredShading;
 [[vk::binding(1)]] StructuredBuffer<GlobalIlluminationProbe> g_giProbes;
 [[vk::binding(2)]] StructuredBuffer<Cluster> g_clusters;

+ 1 - 0
AnKi/Shaders/Intellisense.hlsl

@@ -20,6 +20,7 @@
 #define outputtopology(x) [nodiscard]
 #define unroll [nodiscard]
 #define loop [nodiscard]
+#define branch [nodiscard]
 #define out
 #define in
 #define inout

+ 3 - 2
AnKi/Shaders/LensFlareUpdateIndirectInfo.ankiprog

@@ -6,7 +6,6 @@
 #pragma anki start comp
 #include <AnKi/Shaders/Common.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kInDepthMapSize, 0u);
 #define THREAD_COUNT_SQRT 8
 
 [[vk::push_constant]] ConstantBuffer<Mat4> g_mvp;
@@ -36,8 +35,10 @@ groupshared U32 s_maxDepth;
 
 	// Compute the UVs to sample the depth map
 	// Belongs to [-THREAD_COUNT_SQRT, THREAD_COUNT_SQRT]
+	Vec2 depthMapSize;
+	g_depthMap.GetDimensions(depthMapSize.x, depthMapSize.y);
 	const Vec2 displacement = Vec2(svGroupThreadId.xy) - (THREAD_COUNT_SQRT / 2u);
-	const Vec2 texelSize = 1.0 / Vec2(kInDepthMapSize);
+	const Vec2 texelSize = 1.0 / depthMapSize;
 	const Vec2 uv = ndcToUv(posNdc.xy) + displacement * texelSize;
 
 	// Sample and store depth

+ 1 - 5
AnKi/Shaders/LightShading.ankiprog

@@ -13,9 +13,6 @@
 #include <AnKi/Shaders/RtShadows.hlsl>
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kTileCount, 0u);
-ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 2u);
-
 [[vk::binding(0)]] ConstantBuffer<ClusteredShadingConstants> g_clusteredShading;
 [[vk::binding(1)]] StructuredBuffer<PointLight> g_pointLights;
 [[vk::binding(1)]] StructuredBuffer<SpotLight> g_spotLights;
@@ -55,8 +52,7 @@ RVec3 main(Vec4 svPosition : SV_POSITION, Vec2 uv : TEXCOORD) : SV_TARGET0
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 
 	// Get the cluster
-	Cluster cluster = getClusterFragCoord(g_clusters, Vec3(svPosition.xy, depth), kTileCount, kZSplitCount, g_clusteredShading.m_zSplitMagic.x,
-										  g_clusteredShading.m_zSplitMagic.y);
+	Cluster cluster = getClusterFragCoord(g_clusters, g_clusteredShading, Vec3(svPosition.xy, depth));
 
 	// return clusterHeatmap(cluster, 1u << (U32)GpuSceneNonRenderableObjectType::kLight, 3);
 

+ 3 - 5
AnKi/Shaders/LightShadingApplyFog.ankiprog

@@ -11,9 +11,6 @@
 
 #include <AnKi/Shaders/Functions.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 0u);
-ANKI_SPECIALIZATION_CONSTANT_U32(kFinalZSplit, 1u);
-
 [[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
 [[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
 [[vk::binding(2)]] Texture2D g_depthRt;
@@ -21,7 +18,8 @@ ANKI_SPECIALIZATION_CONSTANT_U32(kFinalZSplit, 1u);
 
 struct Constants
 {
-	Vec2 m_padding;
+	F32 m_zSplitCount;
+	F32 m_finalZSplit;
 	F32 m_near;
 	F32 m_far;
 };
@@ -35,7 +33,7 @@ RVec4 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
 	// Compute W coordinate
 	const F32 depth = g_depthRt.SampleLevel(g_nearestAnyClampSampler, uv, 0.0).r;
 	const F32 linearDepth = linearizeDepth(depth, g_consts.m_near, g_consts.m_far);
-	uvw.z = linearDepth * (F32(kZSplitCount) / F32(kFinalZSplit + 1u));
+	uvw.z = linearDepth * (g_consts.m_zSplitCount / (g_consts.m_finalZSplit + 1.0f));
 
 	// Compute UV coordinates
 	uvw.xy = uv;

+ 5 - 2
AnKi/Shaders/MotionBlur.hlsl

@@ -8,9 +8,12 @@
 #include <AnKi/Shaders/Common.hlsl>
 
 // Perform motion blur.
-RVec3 motionBlur(Texture2D motionVectorsRt, SamplerState motionVectorsRtSampler, Texture2D<RVec4> toBlurRt, Vec2 toBlurRtSize,
-				 SamplerState toBlurRtSampler, Vec2 uv, U32 maxSamples)
+RVec3 motionBlur(Texture2D<Vec4> motionVectorsRt, SamplerState motionVectorsRtSampler, Texture2D<RVec4> toBlurRt, SamplerState toBlurRtSampler,
+				 Vec2 uv, U32 maxSamples)
 {
+	Vec2 toBlurRtSize;
+	toBlurRt.GetDimensions(toBlurRtSize.x, toBlurRtSize.y);
+
 	// Compute velocity. Get the max velocity around the curent sample to avoid outlines. TAA's result and the motion
 	// vectors RT do not quite overlap
 	Vec2 velocityMin = motionVectorsRt.SampleLevel(motionVectorsRtSampler, uv, 0.0).rg;

+ 10 - 12
AnKi/Shaders/MotionVectors.hlsl

@@ -7,7 +7,6 @@
 
 #include <AnKi/Shaders/Functions.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 0u);
 constexpr F32 kMaxRejectionDistance = 0.1; // In meters
 constexpr F32 kMaxHistoryLength = 16.0;
 
@@ -22,6 +21,10 @@ struct Constants
 	Mat4 m_reprojectionMat;
 	Mat4 m_viewProjectionInvMat;
 	Mat4 m_prevViewProjectionInvMat;
+
+	Vec2 m_viewportSize;
+	F32 m_padding0;
+	F32 m_padding1;
 };
 
 [[vk::binding(5)]] ConstantBuffer<Constants> g_consts;
@@ -40,7 +43,7 @@ Vec3 clipToWorld(Vec4 clip, Mat4 clipToWorldMat)
 /// Average the some depth values and unproject.
 Vec3 getAverageWorldPosition(Texture2D tex, Vec2 uv, Mat4 clipToWorldMat)
 {
-	const Vec2 halfTexel = (1.0 / Vec2(kFramebufferSize)) / 2.0;
+	const Vec2 halfTexel = (1.0 / g_consts.m_viewportSize) / 2.0;
 
 	Vec4 depths = tex.GatherRed(g_linearAnyClampSampler, uv + halfTexel);
 	depths += tex.GatherRed(g_linearAnyClampSampler, uv - halfTexel);
@@ -53,7 +56,7 @@ Vec3 getAverageWorldPosition(Texture2D tex, Vec2 uv, Mat4 clipToWorldMat)
 /// Get the depths of some neighbour texels, unproject and find the AABB in world space that encloses them.
 void getMinMaxWorldPositions(Texture2D tex, Vec2 uv, Mat4 clipToWorldMat, out Vec3 aabbMin, out Vec3 aabbMax)
 {
-	const Vec2 halfTexel = (1.0 / Vec2(kFramebufferSize)) / 2.0;
+	const Vec2 halfTexel = (1.0 / g_consts.m_viewportSize) / 2.0;
 
 	const Vec4 depths1 = tex.GatherRed(g_linearAnyClampSampler, uv + halfTexel);
 	const Vec4 depths2 = tex.GatherRed(g_linearAnyClampSampler, uv - halfTexel);
@@ -107,7 +110,7 @@ F32 computeRejectionFactor(Vec2 uv, Vec2 historyUv)
 }
 
 #if defined(ANKI_COMPUTE_SHADER)
-[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
+[numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #else
 struct FragOut
 {
@@ -119,12 +122,7 @@ FragOut main(Vec2 uv : TEXCOORD)
 #endif
 {
 #if defined(ANKI_COMPUTE_SHADER)
-	if(skipOutOfBoundsInvocations(UVec2(8, 8), kFramebufferSize, svDispatchThreadId.xy))
-	{
-		return;
-	}
-
-	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kFramebufferSize);
+	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / g_consts.m_viewportSize;
 #endif
 	const F32 depth = g_currentDepthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
 
@@ -158,8 +156,8 @@ FragOut main(Vec2 uv : TEXCOORD)
 
 	// Write out
 #if defined(ANKI_COMPUTE_SHADER)
-	g_motionVectorsUav[svDispatchThreadId.xy] = historyUv - uv;
-	g_historyLengthUav[svDispatchThreadId.xy] = historyLength;
+	g_motionVectorsUav[svDispatchThreadId] = historyUv - uv;
+	g_historyLengthUav[svDispatchThreadId] = historyLength;
 #else
 	FragOut output;
 	output.m_motionVectors = historyUv - uv;

+ 3 - 6
AnKi/Shaders/RtShadowsDenoise.ankiprog

@@ -12,9 +12,6 @@
 #include <AnKi/Shaders/RtShadows.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_U32(kMinSampleCount, 2u);
-ANKI_SPECIALIZATION_CONSTANT_U32(kMaxSampleCount, 3u);
-
 [[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
 [[vk::binding(1)]] Texture2D<Vec4> g_inTex;
 [[vk::binding(2)]] Texture2D<Vec4> g_depthTex;
@@ -85,12 +82,12 @@ F32 computeVarianceCenter(Vec2 uv)
 	if(historyLength < 2.0)
 	{
 		// Worst case
-		sampleCount = kMaxSampleCount;
+		sampleCount = g_consts.m_maxSampleCount;
 	}
 	else if(historyLength > 4.0 && varianceCenter < 0.0001)
 	{
 		// Best case
-		sampleCount = kMinSampleCount;
+		sampleCount = g_consts.m_minSampleCount;
 	}
 	else
 	{
@@ -99,7 +96,7 @@ F32 computeVarianceCenter(Vec2 uv)
 		F32 blur = varianceCenter * 100.0;
 		blur = min(1.0, blur);
 
-		const F32 sampleCountf = lerp(F32(kMinSampleCount), F32(kMaxSampleCount), blur);
+		const F32 sampleCountf = lerp(F32(g_consts.m_minSampleCount), F32(g_consts.m_maxSampleCount), blur);
 
 		sampleCount = U32(sampleCountf);
 	}

+ 18 - 16
AnKi/Shaders/ShadowmapsResolve.hlsl

@@ -8,10 +8,6 @@
 
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 0u);
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kTileCount, 2u);
-ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 4u);
-
 #define DEBUG_CASCADES 0
 
 [[vk::binding(0)]] ConstantBuffer<ClusteredShadingConstants> g_clusteredShading;
@@ -34,6 +30,15 @@ ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 4u);
 [[vk::binding(10)]] RWTexture2D<RVec4> g_outUav;
 #endif
 
+struct Constants
+{
+	Vec2 m_framebufferSize;
+	F32 m_padding0;
+	F32 m_padding1;
+};
+
+[[vk::push_constant]] ConstantBuffer<Constants> g_consts;
+
 Vec3 computeDebugShadowCascadeColor(U32 cascade)
 {
 	if(cascade == 0u)
@@ -55,24 +60,22 @@ Vec3 computeDebugShadowCascadeColor(U32 cascade)
 }
 
 #if defined(ANKI_COMPUTE_SHADER)
-[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
+[numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #else
 RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 #endif
 {
 #if defined(ANKI_COMPUTE_SHADER)
-	if(any(svDispatchThreadId.xy >= kFramebufferSize))
-	{
-		return;
-	}
-
-	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kFramebufferSize);
+	svDispatchThreadId = min(svDispatchThreadId, UVec2(g_consts.m_framebufferSize - 1.0f)); // Just to be sure
+	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / g_consts.m_framebufferSize;
 #endif
 
 #if PCF
 	// Noise
-	const Vec2 kNoiseTexSize = 64.0f;
-	const Vec2 noiseUv = Vec2(kFramebufferSize) / kNoiseTexSize * uv;
+	Vec2 noiseTexSize;
+	g_noiseTex.GetDimensions(noiseTexSize.x, noiseTexSize.y);
+
+	const Vec2 noiseUv = g_consts.m_framebufferSize / noiseTexSize * uv;
 	RVec3 noise = g_noiseTex.SampleLevel(g_trilinearRepeatSampler, noiseUv, 0.0).rgb;
 	noise = animateBlueNoise(noise, g_clusteredShading.m_frame % 16u);
 	const RF32 randFactor = noise.x;
@@ -86,8 +89,7 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 
 	// Cluster
 	const Vec2 fragCoord = uv * g_clusteredShading.m_renderingSize;
-	Cluster cluster = getClusterFragCoord(g_clusters, Vec3(fragCoord, depth), kTileCount, kZSplitCount, g_clusteredShading.m_zSplitMagic.x,
-										  g_clusteredShading.m_zSplitMagic.y);
+	Cluster cluster = getClusterFragCoord(g_clusters, g_clusteredShading, Vec3(fragCoord, depth));
 
 	// Layers
 	U32 shadowCasterCountPerFragment = 0u;
@@ -204,7 +206,7 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 
 	// Store
 #if defined(ANKI_COMPUTE_SHADER)
-	g_outUav[svDispatchThreadId.xy] = shadowFactors;
+	g_outUav[svDispatchThreadId] = shadowFactors;
 #else
 	return shadowFactors;
 #endif

+ 9 - 12
AnKi/Shaders/TemporalAA.hlsl

@@ -10,9 +10,8 @@
 #include <AnKi/Shaders/PackFunctions.hlsl>
 #include <AnKi/Shaders/TonemappingFunctions.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_F32(kVarianceClippingGamma, 0u);
-ANKI_SPECIALIZATION_CONSTANT_F32(kBlendFactor, 1u);
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 2u);
+constexpr F32 kVarianceClippingGamma = 2.7f; // Variance clipping paper proposes 1.0
+constexpr F32 kBlendFactor = 1.0f / 16.0f;
 
 [[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
 [[vk::binding(1)]] Texture2D<RVec4> g_inputRt;
@@ -23,8 +22,8 @@ constexpr U32 kTonemappingBinding = 4u;
 #include <AnKi/Shaders/TonemappingResources.hlsl>
 
 #if defined(ANKI_COMPUTE_SHADER)
-[[vk::binding(5)]] RWTexture2D<RVec4> g_outImg;
-[[vk::binding(6)]] RWTexture2D<RVec4> g_tonemappedImg;
+[[vk::binding(5)]] RWTexture2D<RVec4> g_uavTex;
+[[vk::binding(6)]] RWTexture2D<RVec4> g_tonemappedUavTex;
 #else
 struct FragOut
 {
@@ -48,12 +47,10 @@ FragOut main(Vec2 uv : TEXCOORD)
 #endif
 {
 #if defined(ANKI_COMPUTE_SHADER)
-	if(any(svDispatchThreadId.xy >= kFramebufferSize))
-	{
-		return;
-	}
+	Vec2 outUavSize;
+	g_uavTex.GetDimensions(outUavSize.x, outUavSize.y);
 
-	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kFramebufferSize);
+	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / outUavSize;
 #endif
 
 	// Get prev uv coords
@@ -108,8 +105,8 @@ FragOut main(Vec2 uv : TEXCOORD)
 #endif
 	const Vec3 tonemapped = linearToSRgb(tonemap(outColor, readExposureAndAverageLuminance().x));
 #if defined(ANKI_COMPUTE_SHADER)
-	g_outImg[svDispatchThreadId.xy] = RVec4(outColor, 0.0);
-	g_tonemappedImg[svDispatchThreadId.xy] = RVec4(tonemapped, 0.0);
+	g_uavTex[svDispatchThreadId.xy] = RVec4(outColor, 0.0);
+	g_tonemappedUavTex[svDispatchThreadId.xy] = RVec4(tonemapped, 0.0);
 #else
 	FragOut output;
 	output.m_color = outColor;

+ 18 - 24
AnKi/Shaders/TonemappingAverageLuminance.ankiprog

@@ -7,18 +7,10 @@
 
 #include <AnKi/Shaders/TonemappingFunctions.hlsl>
 
-#define LOG_AVG 0
-
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kInputTexSize, 0u);
-
 #define THREAD_COUNT_X 32u
 #define THREAD_COUNT_Y 16u
 #define THREAD_COUNT UVec2(THREAD_COUNT_X, THREAD_COUNT_Y)
 
-// Align the tex size to workgroup size
-constexpr UVec2 kAlignedInputTexSize = THREAD_COUNT * ((kInputTexSize + THREAD_COUNT - 1u) / THREAD_COUNT);
-constexpr UVec2 kPixelsPerTile = kAlignedInputTexSize / THREAD_COUNT;
-
 [[vk::binding(0)]] Texture2D<RVec4> g_tex;
 
 #define TONEMAPPING_RESOURCE_AS_WRITE_IMAGE 1
@@ -29,28 +21,34 @@ groupshared F32 s_avgLum[THREAD_COUNT_X * THREAD_COUNT_Y];
 
 [numthreads(THREAD_COUNT_X, THREAD_COUNT_Y, 1)] void main(UVec3 svGroupThreadId : SV_GROUPTHREADID, U32 svGroupIndex : SV_GROUPINDEX)
 {
+	UVec2 inputTexSize;
+	g_tex.GetDimensions(inputTexSize.x, inputTexSize.y);
+
+	// Align the tex size to workgroup size
+	const UVec2 alignedInputTexSize = THREAD_COUNT * ((inputTexSize + THREAD_COUNT - 1u) / THREAD_COUNT);
+	const UVec2 pixelsPerTile = alignedInputTexSize / THREAD_COUNT;
+
 	// Gather the log-average luminance of a tile. It will miss some pixels but not too many
-	const U32 yStart = svGroupThreadId.y * kPixelsPerTile.y;
-	const U32 xStart = svGroupThreadId.x * kPixelsPerTile.x;
+	const U32 yStart = svGroupThreadId.y * pixelsPerTile.y;
+	const U32 xStart = svGroupThreadId.x * pixelsPerTile.x;
+
+	const F32 weight = (1.0 / F32(inputTexSize.x * inputTexSize.y));
 
 	F32 avgLum = 0.0;
-	[unroll] for(U32 y = 0u; y < kPixelsPerTile.y; ++y)
+	for(U32 y = 0u; y < pixelsPerTile.y; ++y)
 	{
-		[unroll] for(U32 x = 0u; x < kPixelsPerTile.x; ++x)
+		for(U32 x = 0u; x < pixelsPerTile.x; ++x)
 		{
 			const UVec2 uv = UVec2(xStart, yStart) + UVec2(x, y);
-			if(uv.x >= kInputTexSize.x || uv.y >= kInputTexSize.y)
+			if(uv.x >= F32(inputTexSize.x) || uv.y >= F32(inputTexSize.y))
 			{
 				continue;
 			}
 
 			const Vec3 color = g_tex.Load(IVec3(uv, 0)).rgb;
 			const F32 lum = computeLuminance(color);
-#if LOG_AVG
-			avgLum += log(max(kEpsilonf, lum));
-#else
-			avgLum += lum;
-#endif
+
+			avgLum += lum * weight;
 		}
 	}
 
@@ -67,7 +65,7 @@ groupshared F32 s_avgLum[THREAD_COUNT_X * THREAD_COUNT_Y];
 		}
 
 #if ANKI_PLATFORM_MOBILE
-		if(s > 16u)
+		if(s > WaveGetLaneCount())
 		{
 			GroupMemoryBarrierWithGroupSync();
 		}
@@ -77,11 +75,7 @@ groupshared F32 s_avgLum[THREAD_COUNT_X * THREAD_COUNT_Y];
 	// Write the result
 	[branch] if(svGroupIndex == 0u)
 	{
-#if LOG_AVG
-		const F32 crntLum = exp(s_avgLum[0] * (1.0 / F32(kInputTexSize.x * kInputTexSize.y)));
-#else
-		const F32 crntLum = s_avgLum[0] * (1.0 / F32(kInputTexSize.x * kInputTexSize.y));
-#endif
+		const F32 crntLum = s_avgLum[0];
 
 #if 1
 		const F32 prevLum = readExposureAndAverageLuminance().y;