Browse Source

Remove the remaining spec consts from the shaders

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
655f66f99c

+ 7 - 1
AnKi/Renderer/ClusterBinning.cpp

@@ -35,7 +35,6 @@ Error ClusterBinning::init()
 	{
 	{
 		ShaderProgramResourceVariantInitInfo inf(m_binningProg);
 		ShaderProgramResourceVariantInitInfo inf(m_binningProg);
 		inf.addMutation("OBJECT_TYPE", MutatorValue(type));
 		inf.addMutation("OBJECT_TYPE", MutatorValue(type));
-		inf.addConstant("kZSplitCount", getRenderer().getZSplitCount());
 		const ShaderProgramResourceVariant* variant;
 		const ShaderProgramResourceVariant* variant;
 		m_binningProg->getOrCreateVariant(inf, variant);
 		m_binningProg->getOrCreateVariant(inf, variant);
 		m_binningGrProgs[type].reset(&variant->getProgram());
 		m_binningGrProgs[type].reset(&variant->getProgram());
@@ -177,6 +176,11 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 
 
 					Vec4 m_nearPlaneWorld;
 					Vec4 m_nearPlaneWorld;
 
 
+					I32 m_zSplitCountMinusOne;
+					I32 m_padding0;
+					I32 m_padding1;
+					I32 m_padding2;
+
 					Mat4 m_invertedViewProjMat;
 					Mat4 m_invertedViewProjMat;
 				} consts;
 				} consts;
 
 
@@ -190,6 +194,8 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 				extractClipPlane(ctx.m_matrices.m_viewProjection, FrustumPlaneType::kNear, nearPlane);
 				extractClipPlane(ctx.m_matrices.m_viewProjection, FrustumPlaneType::kNear, nearPlane);
 				consts.m_nearPlaneWorld = Vec4(nearPlane.getNormal().xyz(), nearPlane.getOffset());
 				consts.m_nearPlaneWorld = Vec4(nearPlane.getNormal().xyz(), nearPlane.getOffset());
 
 
+				consts.m_zSplitCountMinusOne = getRenderer().getZSplitCount() - 1;
+
 				consts.m_invertedViewProjMat = ctx.m_matrices.m_invertedViewProjectionJitter;
 				consts.m_invertedViewProjMat = ctx.m_matrices.m_invertedViewProjectionJitter;
 
 
 				cmdb.setPushConstants(&consts, sizeof(consts));
 				cmdb.setPushConstants(&consts, sizeof(consts));

+ 3 - 7
AnKi/Renderer/FinalComposite.cpp

@@ -38,9 +38,6 @@ Error FinalComposite::initInternal()
 	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
 	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
 	variantInitInfo.addMutation("FILM_GRAIN", (g_filmGrainStrengthCVar.get() > 0.0) ? 1 : 0);
 	variantInitInfo.addMutation("FILM_GRAIN", (g_filmGrainStrengthCVar.get() > 0.0) ? 1 : 0);
 	variantInitInfo.addMutation("BLOOM_ENABLED", 1);
 	variantInitInfo.addMutation("BLOOM_ENABLED", 1);
-	variantInitInfo.addConstant("kLutSize", U32(kLutSize));
-	variantInitInfo.addConstant("kFramebufferSize", getRenderer().getPostProcessResolution());
-	variantInitInfo.addConstant("kMotionBlurSamples", g_motionBlurSamplesCVar.get());
 
 
 	for(U32 dbg = 0; dbg < 2; ++dbg)
 	for(U32 dbg = 0; dbg < 2; ++dbg)
 	{
 	{
@@ -73,9 +70,8 @@ Error FinalComposite::loadColorGradingTextureImage(CString filename)
 {
 {
 	m_lut.reset(nullptr);
 	m_lut.reset(nullptr);
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource(filename, m_lut));
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource(filename, m_lut));
-	ANKI_ASSERT(m_lut->getWidth() == kLutSize);
-	ANKI_ASSERT(m_lut->getHeight() == kLutSize);
-	ANKI_ASSERT(m_lut->getDepth() == kLutSize);
+	ANKI_ASSERT(m_lut->getWidth() == m_lut->getHeight());
+	ANKI_ASSERT(m_lut->getWidth() == m_lut->getDepth());
 
 
 	return Error::kNone;
 	return Error::kNone;
 }
 }
@@ -167,7 +163,7 @@ void FinalComposite::run(RenderPassWorkContext& rgraphCtx)
 
 
 		if(g_filmGrainStrengthCVar.get() > 0.0f)
 		if(g_filmGrainStrengthCVar.get() > 0.0f)
 		{
 		{
-			const UVec4 pc(0, 0, floatBitsToUint(g_filmGrainStrengthCVar.get()), getRenderer().getFrameCount() & kMaxU32);
+			const UVec4 pc(g_motionBlurSamplesCVar.get(), floatBitsToUint(g_filmGrainStrengthCVar.get()), getRenderer().getFrameCount() & kMaxU32, 0);
 			cmdb.setPushConstants(&pc, sizeof(pc));
 			cmdb.setPushConstants(&pc, sizeof(pc));
 		}
 		}
 	}
 	}

+ 0 - 2
AnKi/Renderer/FinalComposite.h

@@ -30,8 +30,6 @@ public:
 	Error loadColorGradingTextureImage(CString filename);
 	Error loadColorGradingTextureImage(CString filename);
 
 
 private:
 private:
-	static constexpr U kLutSize = 16;
-
 	FramebufferDescription m_fbDescr;
 	FramebufferDescription m_fbDescr;
 
 
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramResourcePtr m_prog;

+ 1 - 9
AnKi/Renderer/GBufferPost.cpp

@@ -26,15 +26,7 @@ Error GBufferPost::initInternal()
 	ANKI_R_LOGV("Initializing GBufferPost pass");
 	ANKI_R_LOGV("Initializing GBufferPost pass");
 
 
 	// Load shaders
 	// Load shaders
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GBufferPost.ankiprogbin", m_prog));
-
-	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
-	variantInitInfo.addConstant("kTileCount", getRenderer().getTileCounts());
-	variantInitInfo.addConstant("kZSplitCount", getRenderer().getZSplitCount());
-
-	const ShaderProgramResourceVariant* variant;
-	m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_grProg.reset(&variant->getProgram());
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/GBufferPost.ankiprogbin", m_prog, m_grProg));
 
 
 	// Create FB descr
 	// Create FB descr
 	m_fbDescr.m_colorAttachmentCount = 2;
 	m_fbDescr.m_colorAttachmentCount = 2;

+ 1 - 21
AnKi/Renderer/LensFlare.cpp

@@ -33,30 +33,10 @@ Error LensFlare::initInternal()
 {
 {
 	ANKI_R_LOGV("Initializing lens flare");
 	ANKI_R_LOGV("Initializing lens flare");
 
 
-	ANKI_CHECK(initSprite());
-	ANKI_CHECK(initOcclusion());
-
-	return Error::kNone;
-}
-
-Error LensFlare::initSprite()
-{
 	m_maxSpritesPerFlare = g_lensFlareMaxSpritesPerFlareCVar.get();
 	m_maxSpritesPerFlare = g_lensFlareMaxSpritesPerFlareCVar.get();
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/LensFlareSprite.ankiprogbin", m_realProg, m_realGrProg));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/LensFlareSprite.ankiprogbin", m_realProg, m_realGrProg));
 
 
-	return Error::kNone;
-}
-
-Error LensFlare::initOcclusion()
-{
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/LensFlareUpdateIndirectInfo.ankiprogbin", m_updateIndirectBuffProg));
-
-	ShaderProgramResourceVariantInitInfo variantInitInfo(m_updateIndirectBuffProg);
-	variantInitInfo.addConstant("kInDepthMapSize",
-								UVec2(getRenderer().getInternalResolution().x() / 2 / 2, getRenderer().getInternalResolution().y() / 2 / 2));
-	const ShaderProgramResourceVariant* variant;
-	m_updateIndirectBuffProg->getOrCreateVariant(variantInitInfo, variant);
-	m_updateIndirectBuffGrProg.reset(&variant->getProgram());
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/LensFlareUpdateIndirectInfo.ankiprogbin", m_updateIndirectBuffProg, m_updateIndirectBuffGrProg));
 
 
 	return Error::kNone;
 	return Error::kNone;
 }
 }

+ 0 - 3
AnKi/Renderer/LensFlare.h

@@ -47,9 +47,6 @@ private:
 		BufferHandle m_indirectBuffHandle;
 		BufferHandle m_indirectBuffHandle;
 	} m_runCtx;
 	} m_runCtx;
 
 
-	Error initSprite();
-	Error initOcclusion();
-
 	Error initInternal();
 	Error initInternal();
 };
 };
 /// @}
 /// @}

+ 6 - 19
AnKi/Renderer/LightShading.cpp

@@ -55,15 +55,7 @@ Error LightShading::init()
 Error LightShading::initLightShading()
 Error LightShading::initLightShading()
 {
 {
 	// Load shaders and programs
 	// Load shaders and programs
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/LightShading.ankiprogbin", m_lightShading.m_prog));
-
-	ShaderProgramResourceVariantInitInfo variantInitInfo(m_lightShading.m_prog);
-	variantInitInfo.addConstant("kTileCount", getRenderer().getTileCounts());
-	variantInitInfo.addConstant("kZSplitCount", getRenderer().getZSplitCount());
-	const ShaderProgramResourceVariant* variant;
-
-	m_lightShading.m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_lightShading.m_grProg.reset(&variant->getProgram());
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/LightShading.ankiprogbin", m_lightShading.m_prog, m_lightShading.m_grProg));
 
 
 	// Create RT descr
 	// Create RT descr
 	const UVec2 internalResolution = getRenderer().getInternalResolution();
 	const UVec2 internalResolution = getRenderer().getInternalResolution();
@@ -111,15 +103,7 @@ Error LightShading::initSkybox()
 Error LightShading::initApplyFog()
 Error LightShading::initApplyFog()
 {
 {
 	// Load shaders and programs
 	// Load shaders and programs
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/LightShadingApplyFog.ankiprogbin", m_applyFog.m_prog));
-
-	ShaderProgramResourceVariantInitInfo variantInitInfo(m_applyFog.m_prog);
-	variantInitInfo.addConstant("kZSplitCount", getRenderer().getZSplitCount());
-	variantInitInfo.addConstant("kFinalZSplit", getRenderer().getVolumetricFog().getFinalClusterInZ());
-
-	const ShaderProgramResourceVariant* variant;
-	m_applyFog.m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_applyFog.m_grProg.reset(&variant->getProgram());
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/LightShadingApplyFog.ankiprogbin", m_applyFog.m_prog, m_applyFog.m_grProg));
 
 
 	return Error::kNone;
 	return Error::kNone;
 }
 }
@@ -253,10 +237,13 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 		class PushConsts
 		class PushConsts
 		{
 		{
 		public:
 		public:
-			Vec2 m_padding;
+			F32 m_zSplitCount;
+			F32 m_finalZSplit;
 			F32 m_near;
 			F32 m_near;
 			F32 m_far;
 			F32 m_far;
 		} regs;
 		} regs;
+		regs.m_zSplitCount = F32(getRenderer().getZSplitCount());
+		regs.m_finalZSplit = F32(getRenderer().getVolumetricFog().getFinalClusterInZ());
 		regs.m_near = ctx.m_cameraNear;
 		regs.m_near = ctx.m_cameraNear;
 		regs.m_far = ctx.m_cameraFar;
 		regs.m_far = ctx.m_cameraFar;
 
 

+ 7 - 7
AnKi/Renderer/MotionVectors.cpp

@@ -26,14 +26,9 @@ Error MotionVectors::initInternal()
 	ANKI_R_LOGV("Initializing motion vectors");
 	ANKI_R_LOGV("Initializing motion vectors");
 
 
 	// Prog
 	// Prog
-	CString progFname =
+	const CString progFname =
 		(g_preferComputeCVar.get()) ? "ShaderBinaries/MotionVectorsCompute.ankiprogbin" : "ShaderBinaries/MotionVectorsRaster.ankiprogbin";
 		(g_preferComputeCVar.get()) ? "ShaderBinaries/MotionVectorsCompute.ankiprogbin" : "ShaderBinaries/MotionVectorsRaster.ankiprogbin";
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource(progFname, m_prog));
-	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
-	variantInitInfo.addConstant("kFramebufferSize", UVec2(getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y()));
-	const ShaderProgramResourceVariant* variant;
-	m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_grProg.reset(&variant->getProgram());
+	ANKI_CHECK(loadShaderProgram(progFname, m_prog, m_grProg));
 
 
 	// RTs
 	// RTs
 	m_motionVectorsRtDescr = getRenderer().create2DRenderTargetDescription(
 	m_motionVectorsRtDescr = getRenderer().create2DRenderTargetDescription(
@@ -127,12 +122,17 @@ void MotionVectors::populateRenderGraph(RenderingContext& ctx)
 			Mat4 m_reprojectionMat;
 			Mat4 m_reprojectionMat;
 			Mat4 m_viewProjectionInvMat;
 			Mat4 m_viewProjectionInvMat;
 			Mat4 m_prevViewProjectionInvMat;
 			Mat4 m_prevViewProjectionInvMat;
+
+			Vec2 m_viewportSize;
+			F32 m_padding0;
+			F32 m_padding1;
 		} * pc;
 		} * pc;
 		pc = allocateAndBindConstants<Constants>(cmdb, 0, 5);
 		pc = allocateAndBindConstants<Constants>(cmdb, 0, 5);
 
 
 		pc->m_reprojectionMat = ctx.m_matrices.m_reprojection;
 		pc->m_reprojectionMat = ctx.m_matrices.m_reprojection;
 		pc->m_viewProjectionInvMat = ctx.m_matrices.m_invertedViewProjectionJitter;
 		pc->m_viewProjectionInvMat = ctx.m_matrices.m_invertedViewProjectionJitter;
 		pc->m_prevViewProjectionInvMat = ctx.m_prevMatrices.m_invertedViewProjectionJitter;
 		pc->m_prevViewProjectionInvMat = ctx.m_prevMatrices.m_invertedViewProjectionJitter;
+		pc->m_viewportSize = Vec2(getRenderer().getInternalResolution());
 
 
 		if(g_preferComputeCVar.get())
 		if(g_preferComputeCVar.get())
 		{
 		{

+ 6 - 6
AnKi/Renderer/RtShadows.cpp

@@ -78,8 +78,6 @@ Error RtShadows::initInternal()
 	{
 	{
 		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/RtShadowsDenoise.ankiprogbin", m_denoiseProg));
 		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/RtShadowsDenoise.ankiprogbin", m_denoiseProg));
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_denoiseProg);
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_denoiseProg);
-		variantInitInfo.addConstant("kMinSampleCount", 8u);
-		variantInitInfo.addConstant("kMaxSampleCount", 32u);
 		variantInitInfo.addMutation("BLUR_ORIENTATION", 0);
 		variantInitInfo.addMutation("BLUR_ORIENTATION", 0);
 
 
 		const ShaderProgramResourceVariant* variant;
 		const ShaderProgramResourceVariant* variant;
@@ -545,10 +543,12 @@ void RtShadows::runDenoise(const RenderingContext& ctx, RenderPassWorkContext& r
 
 
 	rgraphCtx.bindUavTexture(0, 6, (horizontal) ? m_runCtx.m_intermediateShadowsRts[1] : m_runCtx.m_historyRt);
 	rgraphCtx.bindUavTexture(0, 6, (horizontal) ? m_runCtx.m_intermediateShadowsRts[1] : m_runCtx.m_historyRt);
 
 
-	RtShadowsDenoiseConstants unis;
-	unis.m_invViewProjMat = ctx.m_matrices.m_invertedViewProjectionJitter;
-	unis.m_time = F32(GlobalFrameIndex::getSingleton().m_value % 0xFFFFu);
-	cmdb.setPushConstants(&unis, sizeof(unis));
+	RtShadowsDenoiseConstants consts;
+	consts.m_invViewProjMat = ctx.m_matrices.m_invertedViewProjectionJitter;
+	consts.m_time = F32(GlobalFrameIndex::getSingleton().m_value % 0xFFFFu);
+	consts.m_minSampleCount = 8;
+	consts.m_maxSampleCount = 32;
+	cmdb.setPushConstants(&consts, sizeof(consts));
 
 
 	dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2);
 	dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2);
 }
 }

+ 3 - 3
AnKi/Renderer/ShadowmapsResolve.cpp

@@ -48,9 +48,6 @@ Error ShadowmapsResolve::initInternal()
 																						: "ShaderBinaries/ShadowmapsResolveRaster.ankiprogbin",
 																						: "ShaderBinaries/ShadowmapsResolveRaster.ankiprogbin",
 															m_prog));
 															m_prog));
 	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
 	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
-	variantInitInfo.addConstant("kFramebufferSize", UVec2(width, height));
-	variantInitInfo.addConstant("kTileCount", getRenderer().getTileCounts());
-	variantInitInfo.addConstant("kZSplitCount", getRenderer().getZSplitCount());
 	variantInitInfo.addMutation("PCF", g_shadowMappingPcfCVar.get() != 0);
 	variantInitInfo.addMutation("PCF", g_shadowMappingPcfCVar.get() != 0);
 	variantInitInfo.addMutation("DIRECTIONAL_LIGHT_SHADOW_RESOLVED", getRenderer().getRtShadowsEnabled());
 	variantInitInfo.addMutation("DIRECTIONAL_LIGHT_SHADOW_RESOLVED", getRenderer().getRtShadowsEnabled());
 	const ShaderProgramResourceVariant* variant;
 	const ShaderProgramResourceVariant* variant;
@@ -147,6 +144,9 @@ void ShadowmapsResolve::run(RenderPassWorkContext& rgraphCtx)
 		rgraphCtx.bindColorTexture(0, 9, getRenderer().getRtShadows().getRt());
 		rgraphCtx.bindColorTexture(0, 9, getRenderer().getRtShadows().getRt());
 	}
 	}
 
 
+	const Vec4 consts(F32(m_rtDescr.m_width), F32(m_rtDescr.m_height), 0.0f, 0.0f);
+	cmdb.setPushConstants(&consts, sizeof(consts));
+
 	if(g_preferComputeCVar.get())
 	if(g_preferComputeCVar.get())
 	{
 	{
 		rgraphCtx.bindUavTexture(0, 10, m_runCtx.m_rt, TextureSubresourceInfo());
 		rgraphCtx.bindUavTexture(0, 10, m_runCtx.m_rt, TextureSubresourceInfo());

+ 0 - 8
AnKi/Renderer/TemporalAA.cpp

@@ -34,17 +34,9 @@ Error TemporalAA::initInternal()
 
 
 	{
 	{
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
-		variantInitInfo.addConstant("kVarianceClippingGamma", 2.7f); // Variance clipping paper proposes 1.0
-		variantInitInfo.addConstant("kBlendFactor", 1.0f / 16.0f);
 		variantInitInfo.addMutation("VARIANCE_CLIPPING", 1);
 		variantInitInfo.addMutation("VARIANCE_CLIPPING", 1);
 		variantInitInfo.addMutation("YCBCR", 0);
 		variantInitInfo.addMutation("YCBCR", 0);
 
 
-		if(g_preferComputeCVar.get())
-		{
-			variantInitInfo.addConstant("kFramebufferSize",
-										UVec2(getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y()));
-		}
-
 		const ShaderProgramResourceVariant* variant;
 		const ShaderProgramResourceVariant* variant;
 		m_prog->getOrCreateVariant(variantInitInfo, variant);
 		m_prog->getOrCreateVariant(variantInitInfo, variant);
 		m_grProg.reset(&variant->getProgram());
 		m_grProg.reset(&variant->getProgram());

+ 2 - 10
AnKi/Renderer/Tonemapping.cpp

@@ -30,18 +30,10 @@ Error Tonemapping::initInternal()
 	ANKI_R_LOGV("Initializing tonemapping. Resolution %ux%u", width, height);
 	ANKI_R_LOGV("Initializing tonemapping. Resolution %ux%u", width, height);
 
 
 	// Create program
 	// Create program
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/TonemappingAverageLuminance.ankiprogbin", m_prog));
-
-	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
-	variantInitInfo.addConstant("kInputTexSize", UVec2(width, height));
-
-	const ShaderProgramResourceVariant* variant;
-	m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_grProg.reset(&variant->getProgram());
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/TonemappingAverageLuminance.ankiprogbin", m_prog, m_grProg));
 
 
 	// Create exposure texture.
 	// Create exposure texture.
-	// WARNING: Use it only as IMAGE and nothing else. It will not be tracked by the rendergraph. No tracking means no
-	// automatic image transitions
+	// WARNING: Use it only as IMAGE and nothing else. It will not be tracked by the rendergraph. No tracking means no automatic image transitions
 	const TextureUsageBit usage = TextureUsageBit::kAllUav;
 	const TextureUsageBit usage = TextureUsageBit::kAllUav;
 	const TextureInitInfo texinit = getRenderer().create2DRenderTargetInitInfo(1, 1, Format::kR16G16_Sfloat, usage, "ExposureAndAvgLum1x1");
 	const TextureInitInfo texinit = getRenderer().create2DRenderTargetInitInfo(1, 1, Format::kR16G16_Sfloat, usage, "ExposureAndAvgLum1x1");
 	ClearValue clearValue;
 	ClearValue clearValue;

+ 6 - 3
AnKi/Shaders/ClusterBinning.ankiprog

@@ -12,8 +12,6 @@
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
 #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 
 
-ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 1u);
-
 #if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
 #if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
 typedef GpuSceneLight GpuSceneType;
 typedef GpuSceneLight GpuSceneType;
 #elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
 #elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
@@ -39,6 +37,11 @@ struct ClusterBinningConstants
 
 
 	Vec4 m_nearPlaneWorld;
 	Vec4 m_nearPlaneWorld;
 
 
+	I32 m_zSplitCountMinusOne;
+	I32 m_padding0;
+	I32 m_padding1;
+	I32 m_padding2;
+
 	Mat4 m_invertedViewProjMat;
 	Mat4 m_invertedViewProjMat;
 };
 };
 
 
@@ -196,7 +199,7 @@ constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1,
 		}
 		}
 
 
 		const I32 startZSplit = max(I32(minDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0);
 		const I32 startZSplit = max(I32(minDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0);
-		const I32 endZSplit = clamp(I32(maxDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0, I32(kZSplitCount) - 1);
+		const I32 endZSplit = clamp(I32(maxDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0, g_consts.m_zSplitCountMinusOne);
 		for(I32 i = startZSplit; i <= endZSplit; ++i)
 		for(I32 i = startZSplit; i <= endZSplit; ++i)
 		{
 		{
 #if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
 #if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT

+ 5 - 9
AnKi/Shaders/ClusteredShadingFunctions.hlsl

@@ -71,7 +71,7 @@ U32 computeTileClusterIndexFragCoord(Vec2 fragCoord, U32 tileCountX)
 /// Merge the tiles with z splits into a single cluster.
 /// Merge the tiles with z splits into a single cluster.
 Cluster mergeClusters(Cluster tileCluster, Cluster zCluster)
 Cluster mergeClusters(Cluster tileCluster, Cluster zCluster)
 {
 {
-// #define ANKI_OR_MASKS(x) WaveActiveBitOr(x)
+//#define ANKI_OR_MASKS(x) WaveActiveBitOr(x)
 #define ANKI_OR_MASKS(x) (x)
 #define ANKI_OR_MASKS(x) (x)
 
 
 	Cluster outCluster;
 	Cluster outCluster;
@@ -97,18 +97,14 @@ Cluster mergeClusters(Cluster tileCluster, Cluster zCluster)
 }
 }
 
 
 /// Get the final cluster after ORing and ANDing the masks.
 /// Get the final cluster after ORing and ANDing the masks.
-Cluster getClusterFragCoord(StructuredBuffer<Cluster> clusters, Vec3 fragCoord, UVec2 tileCounts, U32 zSplitCount, F32 a, F32 b)
+Cluster getClusterFragCoord(StructuredBuffer<Cluster> clusters, ClusteredShadingConstants consts, Vec3 fragCoord)
 {
 {
-	const Cluster tileCluster = clusters[computeTileClusterIndexFragCoord(fragCoord.xy, tileCounts.x)];
-	const Cluster zCluster = clusters[computeZSplitClusterIndex(fragCoord.z, zSplitCount, a, b) + tileCounts.x * tileCounts.y];
+	const Cluster tileCluster = clusters[computeTileClusterIndexFragCoord(fragCoord.xy, consts.m_tileCounts.x)];
+	const Cluster zCluster = clusters[computeZSplitClusterIndex(fragCoord.z, consts.m_zSplitCount, consts.m_zSplitMagic.x, consts.m_zSplitMagic.y)
+									  + consts.m_tileCounts.x * consts.m_tileCounts.y];
 	return mergeClusters(tileCluster, zCluster);
 	return mergeClusters(tileCluster, zCluster);
 }
 }
 
 
-Cluster getClusterFragCoord(StructuredBuffer<Cluster> clusters, ClusteredShadingConstants unis, Vec3 fragCoord)
-{
-	return getClusterFragCoord(clusters, fragCoord, unis.m_tileCounts, unis.m_zSplitCount, unis.m_zSplitMagic.x, unis.m_zSplitMagic.y);
-}
-
 U32 iteratePointLights(inout Cluster cluster)
 U32 iteratePointLights(inout Cluster cluster)
 {
 {
 	for(U32 block = 0; block < kMaxVisibleLights / 32; ++block)
 	for(U32 block = 0; block < kMaxVisibleLights / 32; ++block)

+ 10 - 12
AnKi/Shaders/FinalComposite.ankiprog

@@ -15,10 +15,6 @@
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/MotionBlur.hlsl>
 #include <AnKi/Shaders/MotionBlur.hlsl>
 
 
-ANKI_SPECIALIZATION_CONSTANT_U32(kLutSize, 0u);
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 1u);
-ANKI_SPECIALIZATION_CONSTANT_U32(kMotionBlurSamples, 3u);
-
 [[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
 [[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
 [[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
 [[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
 [[vk::binding(2)]] SamplerState g_trilinearRepeatSampler;
 [[vk::binding(2)]] SamplerState g_trilinearRepeatSampler;
@@ -33,21 +29,24 @@ ANKI_SPECIALIZATION_CONSTANT_U32(kMotionBlurSamples, 3u);
 
 
 struct PushConstants
 struct PushConstants
 {
 {
-	Vec2 m_padding0;
+	U32 m_motionBlurSamples;
 	F32 m_filmGrainStrength;
 	F32 m_filmGrainStrength;
 	U32 m_frameCount;
 	U32 m_frameCount;
+	U32 m_padding;
 };
 };
 
 
 [[vk::push_constant]] ConstantBuffer<PushConstants> g_pc;
 [[vk::push_constant]] ConstantBuffer<PushConstants> g_pc;
 
 
 RVec3 colorGrading(RVec3 color)
 RVec3 colorGrading(RVec3 color)
 {
 {
-	constexpr RF32 kLutSizef = RF32(kLutSize);
-	constexpr RVec3 kLutScale = ((kLutSizef - 1.0) / kLutSizef).xxx;
-	constexpr RVec3 kLutOffset = (1.0 / (2.0 * kLutSize)).xxx;
+	Vec3 lutSize;
+	g_lut.GetDimensions(lutSize.x, lutSize.y, lutSize.y);
+
+	const RVec3 lutScale = ((lutSize.x - 1.0) / lutSize.x).xxx;
+	const RVec3 lutOffset = (1.0 / (2.0 * lutSize.x)).xxx;
 
 
 	color = min(color, RVec3(1.0, 1.0, 1.0));
 	color = min(color, RVec3(1.0, 1.0, 1.0));
-	const RVec3 lutCoords = color * kLutScale + kLutOffset;
+	const RVec3 lutCoords = color * lutScale + lutOffset;
 	return g_lut.SampleLevel(g_trilinearRepeatSampler, lutCoords, 0.0).rgb;
 	return g_lut.SampleLevel(g_trilinearRepeatSampler, lutCoords, 0.0).rgb;
 }
 }
 
 
@@ -55,10 +54,9 @@ RVec3 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
 {
 {
 	RVec3 outColor;
 	RVec3 outColor;
 
 
-	if(kMotionBlurSamples > 0u)
+	if(g_pc.m_motionBlurSamples > 0u)
 	{
 	{
-		outColor = motionBlur(g_motionVectorsRt, g_nearestAnyClampSampler, g_lightShadingRt, Vec2(kFramebufferSize), g_linearAnyClampSampler, uv,
-							  kMotionBlurSamples);
+		outColor = motionBlur(g_motionVectorsRt, g_nearestAnyClampSampler, g_lightShadingRt, g_linearAnyClampSampler, uv, g_pc.m_motionBlurSamples);
 	}
 	}
 	else
 	else
 	{
 	{

+ 1 - 5
AnKi/Shaders/GBufferPost.ankiprog

@@ -12,9 +12,6 @@
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kTileCount, 0u);
-ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 2u);
-
 [[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
 [[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
 [[vk::binding(1)]] Texture2D g_depthTex;
 [[vk::binding(1)]] Texture2D g_depthTex;
 [[vk::binding(2)]] SamplerState g_trilinearRepeatSampler;
 [[vk::binding(2)]] SamplerState g_trilinearRepeatSampler;
@@ -50,8 +47,7 @@ FragOut main([[vk::location(0)]] Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITI
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 
 
 	// Get the cluster
 	// Get the cluster
-	Cluster cluster = getClusterFragCoord(g_clusters, Vec3(svPosition.xy, depth), kTileCount, kZSplitCount, g_clusteredShading.m_zSplitMagic.x,
-										  g_clusteredShading.m_zSplitMagic.y);
+	Cluster cluster = getClusterFragCoord(g_clusters, g_clusteredShading, Vec3(svPosition.xy, depth));
 
 
 	// Make the decalsMask uniform across the wave because we are accessing bindless textures later on
 	// Make the decalsMask uniform across the wave because we are accessing bindless textures later on
 	U32 decalsMask = cluster.m_decalsMask[0];
 	U32 decalsMask = cluster.m_decalsMask[0];

+ 2 - 2
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -15,8 +15,8 @@ struct RtShadowsDenoiseConstants
 	Mat4 m_invViewProjMat;
 	Mat4 m_invViewProjMat;
 
 
 	F32 m_time;
 	F32 m_time;
-	F32 m_padding0;
-	F32 m_padding1;
+	U32 m_minSampleCount;
+	U32 m_maxSampleCount;
 	F32 m_padding2;
 	F32 m_padding2;
 };
 };
 
 

+ 0 - 2
AnKi/Shaders/IndirectDiffuse.hlsl

@@ -18,8 +18,6 @@
 #define REPROJECT_LIGHTBUFFER false
 #define REPROJECT_LIGHTBUFFER false
 #define SSGI_PROBE_COMBINE(ssgiColor, probeColor) ((ssgiColor) + (probeColor))
 #define SSGI_PROBE_COMBINE(ssgiColor, probeColor) ((ssgiColor) + (probeColor))
 
 
-ANKI_SPECIALIZATION_CONSTANT_U32(kSampleCount, 0u);
-
 [[vk::binding(0)]] ConstantBuffer<ClusteredShadingConstants> g_clusteredShading;
 [[vk::binding(0)]] ConstantBuffer<ClusteredShadingConstants> g_clusteredShading;
 [[vk::binding(1)]] StructuredBuffer<GlobalIlluminationProbe> g_giProbes;
 [[vk::binding(1)]] StructuredBuffer<GlobalIlluminationProbe> g_giProbes;
 [[vk::binding(2)]] StructuredBuffer<Cluster> g_clusters;
 [[vk::binding(2)]] StructuredBuffer<Cluster> g_clusters;

+ 1 - 0
AnKi/Shaders/Intellisense.hlsl

@@ -20,6 +20,7 @@
 #define outputtopology(x) [nodiscard]
 #define outputtopology(x) [nodiscard]
 #define unroll [nodiscard]
 #define unroll [nodiscard]
 #define loop [nodiscard]
 #define loop [nodiscard]
+#define branch [nodiscard]
 #define out
 #define out
 #define in
 #define in
 #define inout
 #define inout

+ 3 - 2
AnKi/Shaders/LensFlareUpdateIndirectInfo.ankiprog

@@ -6,7 +6,6 @@
 #pragma anki start comp
 #pragma anki start comp
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Common.hlsl>
 
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kInDepthMapSize, 0u);
 #define THREAD_COUNT_SQRT 8
 #define THREAD_COUNT_SQRT 8
 
 
 [[vk::push_constant]] ConstantBuffer<Mat4> g_mvp;
 [[vk::push_constant]] ConstantBuffer<Mat4> g_mvp;
@@ -36,8 +35,10 @@ groupshared U32 s_maxDepth;
 
 
 	// Compute the UVs to sample the depth map
 	// Compute the UVs to sample the depth map
 	// Belongs to [-THREAD_COUNT_SQRT, THREAD_COUNT_SQRT]
 	// Belongs to [-THREAD_COUNT_SQRT, THREAD_COUNT_SQRT]
+	Vec2 depthMapSize;
+	g_depthMap.GetDimensions(depthMapSize.x, depthMapSize.y);
 	const Vec2 displacement = Vec2(svGroupThreadId.xy) - (THREAD_COUNT_SQRT / 2u);
 	const Vec2 displacement = Vec2(svGroupThreadId.xy) - (THREAD_COUNT_SQRT / 2u);
-	const Vec2 texelSize = 1.0 / Vec2(kInDepthMapSize);
+	const Vec2 texelSize = 1.0 / depthMapSize;
 	const Vec2 uv = ndcToUv(posNdc.xy) + displacement * texelSize;
 	const Vec2 uv = ndcToUv(posNdc.xy) + displacement * texelSize;
 
 
 	// Sample and store depth
 	// Sample and store depth

+ 1 - 5
AnKi/Shaders/LightShading.ankiprog

@@ -13,9 +13,6 @@
 #include <AnKi/Shaders/RtShadows.hlsl>
 #include <AnKi/Shaders/RtShadows.hlsl>
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kTileCount, 0u);
-ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 2u);
-
 [[vk::binding(0)]] ConstantBuffer<ClusteredShadingConstants> g_clusteredShading;
 [[vk::binding(0)]] ConstantBuffer<ClusteredShadingConstants> g_clusteredShading;
 [[vk::binding(1)]] StructuredBuffer<PointLight> g_pointLights;
 [[vk::binding(1)]] StructuredBuffer<PointLight> g_pointLights;
 [[vk::binding(1)]] StructuredBuffer<SpotLight> g_spotLights;
 [[vk::binding(1)]] StructuredBuffer<SpotLight> g_spotLights;
@@ -55,8 +52,7 @@ RVec3 main(Vec4 svPosition : SV_POSITION, Vec2 uv : TEXCOORD) : SV_TARGET0
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 
 
 	// Get the cluster
 	// Get the cluster
-	Cluster cluster = getClusterFragCoord(g_clusters, Vec3(svPosition.xy, depth), kTileCount, kZSplitCount, g_clusteredShading.m_zSplitMagic.x,
-										  g_clusteredShading.m_zSplitMagic.y);
+	Cluster cluster = getClusterFragCoord(g_clusters, g_clusteredShading, Vec3(svPosition.xy, depth));
 
 
 	// return clusterHeatmap(cluster, 1u << (U32)GpuSceneNonRenderableObjectType::kLight, 3);
 	// return clusterHeatmap(cluster, 1u << (U32)GpuSceneNonRenderableObjectType::kLight, 3);
 
 

+ 3 - 5
AnKi/Shaders/LightShadingApplyFog.ankiprog

@@ -11,9 +11,6 @@
 
 
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
 
 
-ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 0u);
-ANKI_SPECIALIZATION_CONSTANT_U32(kFinalZSplit, 1u);
-
 [[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
 [[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
 [[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
 [[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
 [[vk::binding(2)]] Texture2D g_depthRt;
 [[vk::binding(2)]] Texture2D g_depthRt;
@@ -21,7 +18,8 @@ ANKI_SPECIALIZATION_CONSTANT_U32(kFinalZSplit, 1u);
 
 
 struct Constants
 struct Constants
 {
 {
-	Vec2 m_padding;
+	F32 m_zSplitCount;
+	F32 m_finalZSplit;
 	F32 m_near;
 	F32 m_near;
 	F32 m_far;
 	F32 m_far;
 };
 };
@@ -35,7 +33,7 @@ RVec4 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
 	// Compute W coordinate
 	// Compute W coordinate
 	const F32 depth = g_depthRt.SampleLevel(g_nearestAnyClampSampler, uv, 0.0).r;
 	const F32 depth = g_depthRt.SampleLevel(g_nearestAnyClampSampler, uv, 0.0).r;
 	const F32 linearDepth = linearizeDepth(depth, g_consts.m_near, g_consts.m_far);
 	const F32 linearDepth = linearizeDepth(depth, g_consts.m_near, g_consts.m_far);
-	uvw.z = linearDepth * (F32(kZSplitCount) / F32(kFinalZSplit + 1u));
+	uvw.z = linearDepth * (g_consts.m_zSplitCount / (g_consts.m_finalZSplit + 1.0f));
 
 
 	// Compute UV coordinates
 	// Compute UV coordinates
 	uvw.xy = uv;
 	uvw.xy = uv;

+ 5 - 2
AnKi/Shaders/MotionBlur.hlsl

@@ -8,9 +8,12 @@
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Common.hlsl>
 
 
 // Perform motion blur.
 // Perform motion blur.
-RVec3 motionBlur(Texture2D motionVectorsRt, SamplerState motionVectorsRtSampler, Texture2D<RVec4> toBlurRt, Vec2 toBlurRtSize,
-				 SamplerState toBlurRtSampler, Vec2 uv, U32 maxSamples)
+RVec3 motionBlur(Texture2D<Vec4> motionVectorsRt, SamplerState motionVectorsRtSampler, Texture2D<RVec4> toBlurRt, SamplerState toBlurRtSampler,
+				 Vec2 uv, U32 maxSamples)
 {
 {
+	Vec2 toBlurRtSize;
+	toBlurRt.GetDimensions(toBlurRtSize.x, toBlurRtSize.y);
+
 	// Compute velocity. Get the max velocity around the curent sample to avoid outlines. TAA's result and the motion
 	// Compute velocity. Get the max velocity around the curent sample to avoid outlines. TAA's result and the motion
 	// vectors RT do not quite overlap
 	// vectors RT do not quite overlap
 	Vec2 velocityMin = motionVectorsRt.SampleLevel(motionVectorsRtSampler, uv, 0.0).rg;
 	Vec2 velocityMin = motionVectorsRt.SampleLevel(motionVectorsRtSampler, uv, 0.0).rg;

+ 10 - 12
AnKi/Shaders/MotionVectors.hlsl

@@ -7,7 +7,6 @@
 
 
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
 
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 0u);
 constexpr F32 kMaxRejectionDistance = 0.1; // In meters
 constexpr F32 kMaxRejectionDistance = 0.1; // In meters
 constexpr F32 kMaxHistoryLength = 16.0;
 constexpr F32 kMaxHistoryLength = 16.0;
 
 
@@ -22,6 +21,10 @@ struct Constants
 	Mat4 m_reprojectionMat;
 	Mat4 m_reprojectionMat;
 	Mat4 m_viewProjectionInvMat;
 	Mat4 m_viewProjectionInvMat;
 	Mat4 m_prevViewProjectionInvMat;
 	Mat4 m_prevViewProjectionInvMat;
+
+	Vec2 m_viewportSize;
+	F32 m_padding0;
+	F32 m_padding1;
 };
 };
 
 
 [[vk::binding(5)]] ConstantBuffer<Constants> g_consts;
 [[vk::binding(5)]] ConstantBuffer<Constants> g_consts;
@@ -40,7 +43,7 @@ Vec3 clipToWorld(Vec4 clip, Mat4 clipToWorldMat)
 /// Average the some depth values and unproject.
 /// Average the some depth values and unproject.
 Vec3 getAverageWorldPosition(Texture2D tex, Vec2 uv, Mat4 clipToWorldMat)
 Vec3 getAverageWorldPosition(Texture2D tex, Vec2 uv, Mat4 clipToWorldMat)
 {
 {
-	const Vec2 halfTexel = (1.0 / Vec2(kFramebufferSize)) / 2.0;
+	const Vec2 halfTexel = (1.0 / g_consts.m_viewportSize) / 2.0;
 
 
 	Vec4 depths = tex.GatherRed(g_linearAnyClampSampler, uv + halfTexel);
 	Vec4 depths = tex.GatherRed(g_linearAnyClampSampler, uv + halfTexel);
 	depths += tex.GatherRed(g_linearAnyClampSampler, uv - halfTexel);
 	depths += tex.GatherRed(g_linearAnyClampSampler, uv - halfTexel);
@@ -53,7 +56,7 @@ Vec3 getAverageWorldPosition(Texture2D tex, Vec2 uv, Mat4 clipToWorldMat)
 /// Get the depths of some neighbour texels, unproject and find the AABB in world space that encloses them.
 /// Get the depths of some neighbour texels, unproject and find the AABB in world space that encloses them.
 void getMinMaxWorldPositions(Texture2D tex, Vec2 uv, Mat4 clipToWorldMat, out Vec3 aabbMin, out Vec3 aabbMax)
 void getMinMaxWorldPositions(Texture2D tex, Vec2 uv, Mat4 clipToWorldMat, out Vec3 aabbMin, out Vec3 aabbMax)
 {
 {
-	const Vec2 halfTexel = (1.0 / Vec2(kFramebufferSize)) / 2.0;
+	const Vec2 halfTexel = (1.0 / g_consts.m_viewportSize) / 2.0;
 
 
 	const Vec4 depths1 = tex.GatherRed(g_linearAnyClampSampler, uv + halfTexel);
 	const Vec4 depths1 = tex.GatherRed(g_linearAnyClampSampler, uv + halfTexel);
 	const Vec4 depths2 = tex.GatherRed(g_linearAnyClampSampler, uv - halfTexel);
 	const Vec4 depths2 = tex.GatherRed(g_linearAnyClampSampler, uv - halfTexel);
@@ -107,7 +110,7 @@ F32 computeRejectionFactor(Vec2 uv, Vec2 historyUv)
 }
 }
 
 
 #if defined(ANKI_COMPUTE_SHADER)
 #if defined(ANKI_COMPUTE_SHADER)
-[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
+[numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #else
 #else
 struct FragOut
 struct FragOut
 {
 {
@@ -119,12 +122,7 @@ FragOut main(Vec2 uv : TEXCOORD)
 #endif
 #endif
 {
 {
 #if defined(ANKI_COMPUTE_SHADER)
 #if defined(ANKI_COMPUTE_SHADER)
-	if(skipOutOfBoundsInvocations(UVec2(8, 8), kFramebufferSize, svDispatchThreadId.xy))
-	{
-		return;
-	}
-
-	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kFramebufferSize);
+	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / g_consts.m_viewportSize;
 #endif
 #endif
 	const F32 depth = g_currentDepthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
 	const F32 depth = g_currentDepthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
 
 
@@ -158,8 +156,8 @@ FragOut main(Vec2 uv : TEXCOORD)
 
 
 	// Write out
 	// Write out
 #if defined(ANKI_COMPUTE_SHADER)
 #if defined(ANKI_COMPUTE_SHADER)
-	g_motionVectorsUav[svDispatchThreadId.xy] = historyUv - uv;
-	g_historyLengthUav[svDispatchThreadId.xy] = historyLength;
+	g_motionVectorsUav[svDispatchThreadId] = historyUv - uv;
+	g_historyLengthUav[svDispatchThreadId] = historyLength;
 #else
 #else
 	FragOut output;
 	FragOut output;
 	output.m_motionVectors = historyUv - uv;
 	output.m_motionVectors = historyUv - uv;

+ 3 - 6
AnKi/Shaders/RtShadowsDenoise.ankiprog

@@ -12,9 +12,6 @@
 #include <AnKi/Shaders/RtShadows.hlsl>
 #include <AnKi/Shaders/RtShadows.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
 
 
-ANKI_SPECIALIZATION_CONSTANT_U32(kMinSampleCount, 2u);
-ANKI_SPECIALIZATION_CONSTANT_U32(kMaxSampleCount, 3u);
-
 [[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
 [[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
 [[vk::binding(1)]] Texture2D<Vec4> g_inTex;
 [[vk::binding(1)]] Texture2D<Vec4> g_inTex;
 [[vk::binding(2)]] Texture2D<Vec4> g_depthTex;
 [[vk::binding(2)]] Texture2D<Vec4> g_depthTex;
@@ -85,12 +82,12 @@ F32 computeVarianceCenter(Vec2 uv)
 	if(historyLength < 2.0)
 	if(historyLength < 2.0)
 	{
 	{
 		// Worst case
 		// Worst case
-		sampleCount = kMaxSampleCount;
+		sampleCount = g_consts.m_maxSampleCount;
 	}
 	}
 	else if(historyLength > 4.0 && varianceCenter < 0.0001)
 	else if(historyLength > 4.0 && varianceCenter < 0.0001)
 	{
 	{
 		// Best case
 		// Best case
-		sampleCount = kMinSampleCount;
+		sampleCount = g_consts.m_minSampleCount;
 	}
 	}
 	else
 	else
 	{
 	{
@@ -99,7 +96,7 @@ F32 computeVarianceCenter(Vec2 uv)
 		F32 blur = varianceCenter * 100.0;
 		F32 blur = varianceCenter * 100.0;
 		blur = min(1.0, blur);
 		blur = min(1.0, blur);
 
 
-		const F32 sampleCountf = lerp(F32(kMinSampleCount), F32(kMaxSampleCount), blur);
+		const F32 sampleCountf = lerp(F32(g_consts.m_minSampleCount), F32(g_consts.m_maxSampleCount), blur);
 
 
 		sampleCount = U32(sampleCountf);
 		sampleCount = U32(sampleCountf);
 	}
 	}

+ 18 - 16
AnKi/Shaders/ShadowmapsResolve.hlsl

@@ -8,10 +8,6 @@
 
 
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 0u);
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kTileCount, 2u);
-ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 4u);
-
 #define DEBUG_CASCADES 0
 #define DEBUG_CASCADES 0
 
 
 [[vk::binding(0)]] ConstantBuffer<ClusteredShadingConstants> g_clusteredShading;
 [[vk::binding(0)]] ConstantBuffer<ClusteredShadingConstants> g_clusteredShading;
@@ -34,6 +30,15 @@ ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 4u);
 [[vk::binding(10)]] RWTexture2D<RVec4> g_outUav;
 [[vk::binding(10)]] RWTexture2D<RVec4> g_outUav;
 #endif
 #endif
 
 
+struct Constants
+{
+	Vec2 m_framebufferSize;
+	F32 m_padding0;
+	F32 m_padding1;
+};
+
+[[vk::push_constant]] ConstantBuffer<Constants> g_consts;
+
 Vec3 computeDebugShadowCascadeColor(U32 cascade)
 Vec3 computeDebugShadowCascadeColor(U32 cascade)
 {
 {
 	if(cascade == 0u)
 	if(cascade == 0u)
@@ -55,24 +60,22 @@ Vec3 computeDebugShadowCascadeColor(U32 cascade)
 }
 }
 
 
 #if defined(ANKI_COMPUTE_SHADER)
 #if defined(ANKI_COMPUTE_SHADER)
-[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
+[numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #else
 #else
 RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 #endif
 #endif
 {
 {
 #if defined(ANKI_COMPUTE_SHADER)
 #if defined(ANKI_COMPUTE_SHADER)
-	if(any(svDispatchThreadId.xy >= kFramebufferSize))
-	{
-		return;
-	}
-
-	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kFramebufferSize);
+	svDispatchThreadId = min(svDispatchThreadId, UVec2(g_consts.m_framebufferSize - 1.0f)); // Just to be sure
+	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / g_consts.m_framebufferSize;
 #endif
 #endif
 
 
 #if PCF
 #if PCF
 	// Noise
 	// Noise
-	const Vec2 kNoiseTexSize = 64.0f;
-	const Vec2 noiseUv = Vec2(kFramebufferSize) / kNoiseTexSize * uv;
+	Vec2 noiseTexSize;
+	g_noiseTex.GetDimensions(noiseTexSize.x, noiseTexSize.y);
+
+	const Vec2 noiseUv = g_consts.m_framebufferSize / noiseTexSize * uv;
 	RVec3 noise = g_noiseTex.SampleLevel(g_trilinearRepeatSampler, noiseUv, 0.0).rgb;
 	RVec3 noise = g_noiseTex.SampleLevel(g_trilinearRepeatSampler, noiseUv, 0.0).rgb;
 	noise = animateBlueNoise(noise, g_clusteredShading.m_frame % 16u);
 	noise = animateBlueNoise(noise, g_clusteredShading.m_frame % 16u);
 	const RF32 randFactor = noise.x;
 	const RF32 randFactor = noise.x;
@@ -86,8 +89,7 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 
 
 	// Cluster
 	// Cluster
 	const Vec2 fragCoord = uv * g_clusteredShading.m_renderingSize;
 	const Vec2 fragCoord = uv * g_clusteredShading.m_renderingSize;
-	Cluster cluster = getClusterFragCoord(g_clusters, Vec3(fragCoord, depth), kTileCount, kZSplitCount, g_clusteredShading.m_zSplitMagic.x,
-										  g_clusteredShading.m_zSplitMagic.y);
+	Cluster cluster = getClusterFragCoord(g_clusters, g_clusteredShading, Vec3(fragCoord, depth));
 
 
 	// Layers
 	// Layers
 	U32 shadowCasterCountPerFragment = 0u;
 	U32 shadowCasterCountPerFragment = 0u;
@@ -204,7 +206,7 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 
 
 	// Store
 	// Store
 #if defined(ANKI_COMPUTE_SHADER)
 #if defined(ANKI_COMPUTE_SHADER)
-	g_outUav[svDispatchThreadId.xy] = shadowFactors;
+	g_outUav[svDispatchThreadId] = shadowFactors;
 #else
 #else
 	return shadowFactors;
 	return shadowFactors;
 #endif
 #endif

+ 9 - 12
AnKi/Shaders/TemporalAA.hlsl

@@ -10,9 +10,8 @@
 #include <AnKi/Shaders/PackFunctions.hlsl>
 #include <AnKi/Shaders/PackFunctions.hlsl>
 #include <AnKi/Shaders/TonemappingFunctions.hlsl>
 #include <AnKi/Shaders/TonemappingFunctions.hlsl>
 
 
-ANKI_SPECIALIZATION_CONSTANT_F32(kVarianceClippingGamma, 0u);
-ANKI_SPECIALIZATION_CONSTANT_F32(kBlendFactor, 1u);
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 2u);
+constexpr F32 kVarianceClippingGamma = 2.7f; // Variance clipping paper proposes 1.0
+constexpr F32 kBlendFactor = 1.0f / 16.0f;
 
 
 [[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
 [[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
 [[vk::binding(1)]] Texture2D<RVec4> g_inputRt;
 [[vk::binding(1)]] Texture2D<RVec4> g_inputRt;
@@ -23,8 +22,8 @@ constexpr U32 kTonemappingBinding = 4u;
 #include <AnKi/Shaders/TonemappingResources.hlsl>
 #include <AnKi/Shaders/TonemappingResources.hlsl>
 
 
 #if defined(ANKI_COMPUTE_SHADER)
 #if defined(ANKI_COMPUTE_SHADER)
-[[vk::binding(5)]] RWTexture2D<RVec4> g_outImg;
-[[vk::binding(6)]] RWTexture2D<RVec4> g_tonemappedImg;
+[[vk::binding(5)]] RWTexture2D<RVec4> g_uavTex;
+[[vk::binding(6)]] RWTexture2D<RVec4> g_tonemappedUavTex;
 #else
 #else
 struct FragOut
 struct FragOut
 {
 {
@@ -48,12 +47,10 @@ FragOut main(Vec2 uv : TEXCOORD)
 #endif
 #endif
 {
 {
 #if defined(ANKI_COMPUTE_SHADER)
 #if defined(ANKI_COMPUTE_SHADER)
-	if(any(svDispatchThreadId.xy >= kFramebufferSize))
-	{
-		return;
-	}
+	Vec2 outUavSize;
+	g_uavTex.GetDimensions(outUavSize.x, outUavSize.y);
 
 
-	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kFramebufferSize);
+	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / outUavSize;
 #endif
 #endif
 
 
 	// Get prev uv coords
 	// Get prev uv coords
@@ -108,8 +105,8 @@ FragOut main(Vec2 uv : TEXCOORD)
 #endif
 #endif
 	const Vec3 tonemapped = linearToSRgb(tonemap(outColor, readExposureAndAverageLuminance().x));
 	const Vec3 tonemapped = linearToSRgb(tonemap(outColor, readExposureAndAverageLuminance().x));
 #if defined(ANKI_COMPUTE_SHADER)
 #if defined(ANKI_COMPUTE_SHADER)
-	g_outImg[svDispatchThreadId.xy] = RVec4(outColor, 0.0);
-	g_tonemappedImg[svDispatchThreadId.xy] = RVec4(tonemapped, 0.0);
+	g_uavTex[svDispatchThreadId.xy] = RVec4(outColor, 0.0);
+	g_tonemappedUavTex[svDispatchThreadId.xy] = RVec4(tonemapped, 0.0);
 #else
 #else
 	FragOut output;
 	FragOut output;
 	output.m_color = outColor;
 	output.m_color = outColor;

+ 18 - 24
AnKi/Shaders/TonemappingAverageLuminance.ankiprog

@@ -7,18 +7,10 @@
 
 
 #include <AnKi/Shaders/TonemappingFunctions.hlsl>
 #include <AnKi/Shaders/TonemappingFunctions.hlsl>
 
 
-#define LOG_AVG 0
-
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kInputTexSize, 0u);
-
 #define THREAD_COUNT_X 32u
 #define THREAD_COUNT_X 32u
 #define THREAD_COUNT_Y 16u
 #define THREAD_COUNT_Y 16u
 #define THREAD_COUNT UVec2(THREAD_COUNT_X, THREAD_COUNT_Y)
 #define THREAD_COUNT UVec2(THREAD_COUNT_X, THREAD_COUNT_Y)
 
 
-// Align the tex size to workgroup size
-constexpr UVec2 kAlignedInputTexSize = THREAD_COUNT * ((kInputTexSize + THREAD_COUNT - 1u) / THREAD_COUNT);
-constexpr UVec2 kPixelsPerTile = kAlignedInputTexSize / THREAD_COUNT;
-
 [[vk::binding(0)]] Texture2D<RVec4> g_tex;
 [[vk::binding(0)]] Texture2D<RVec4> g_tex;
 
 
 #define TONEMAPPING_RESOURCE_AS_WRITE_IMAGE 1
 #define TONEMAPPING_RESOURCE_AS_WRITE_IMAGE 1
@@ -29,28 +21,34 @@ groupshared F32 s_avgLum[THREAD_COUNT_X * THREAD_COUNT_Y];
 
 
 [numthreads(THREAD_COUNT_X, THREAD_COUNT_Y, 1)] void main(UVec3 svGroupThreadId : SV_GROUPTHREADID, U32 svGroupIndex : SV_GROUPINDEX)
 [numthreads(THREAD_COUNT_X, THREAD_COUNT_Y, 1)] void main(UVec3 svGroupThreadId : SV_GROUPTHREADID, U32 svGroupIndex : SV_GROUPINDEX)
 {
 {
+	UVec2 inputTexSize;
+	g_tex.GetDimensions(inputTexSize.x, inputTexSize.y);
+
+	// Align the tex size to workgroup size
+	const UVec2 alignedInputTexSize = THREAD_COUNT * ((inputTexSize + THREAD_COUNT - 1u) / THREAD_COUNT);
+	const UVec2 pixelsPerTile = alignedInputTexSize / THREAD_COUNT;
+
 	// Gather the log-average luminance of a tile. It will miss some pixels but not too many
 	// Gather the log-average luminance of a tile. It will miss some pixels but not too many
-	const U32 yStart = svGroupThreadId.y * kPixelsPerTile.y;
-	const U32 xStart = svGroupThreadId.x * kPixelsPerTile.x;
+	const U32 yStart = svGroupThreadId.y * pixelsPerTile.y;
+	const U32 xStart = svGroupThreadId.x * pixelsPerTile.x;
+
+	const F32 weight = (1.0 / F32(inputTexSize.x * inputTexSize.y));
 
 
 	F32 avgLum = 0.0;
 	F32 avgLum = 0.0;
-	[unroll] for(U32 y = 0u; y < kPixelsPerTile.y; ++y)
+	for(U32 y = 0u; y < pixelsPerTile.y; ++y)
 	{
 	{
-		[unroll] for(U32 x = 0u; x < kPixelsPerTile.x; ++x)
+		for(U32 x = 0u; x < pixelsPerTile.x; ++x)
 		{
 		{
 			const UVec2 uv = UVec2(xStart, yStart) + UVec2(x, y);
 			const UVec2 uv = UVec2(xStart, yStart) + UVec2(x, y);
-			if(uv.x >= kInputTexSize.x || uv.y >= kInputTexSize.y)
+			if(uv.x >= F32(inputTexSize.x) || uv.y >= F32(inputTexSize.y))
 			{
 			{
 				continue;
 				continue;
 			}
 			}
 
 
 			const Vec3 color = g_tex.Load(IVec3(uv, 0)).rgb;
 			const Vec3 color = g_tex.Load(IVec3(uv, 0)).rgb;
 			const F32 lum = computeLuminance(color);
 			const F32 lum = computeLuminance(color);
-#if LOG_AVG
-			avgLum += log(max(kEpsilonf, lum));
-#else
-			avgLum += lum;
-#endif
+
+			avgLum += lum * weight;
 		}
 		}
 	}
 	}
 
 
@@ -67,7 +65,7 @@ groupshared F32 s_avgLum[THREAD_COUNT_X * THREAD_COUNT_Y];
 		}
 		}
 
 
 #if ANKI_PLATFORM_MOBILE
 #if ANKI_PLATFORM_MOBILE
-		if(s > 16u)
+		if(s > WaveGetLaneCount())
 		{
 		{
 			GroupMemoryBarrierWithGroupSync();
 			GroupMemoryBarrierWithGroupSync();
 		}
 		}
@@ -77,11 +75,7 @@ groupshared F32 s_avgLum[THREAD_COUNT_X * THREAD_COUNT_Y];
 	// Write the result
 	// Write the result
 	[branch] if(svGroupIndex == 0u)
 	[branch] if(svGroupIndex == 0u)
 	{
 	{
-#if LOG_AVG
-		const F32 crntLum = exp(s_avgLum[0] * (1.0 / F32(kInputTexSize.x * kInputTexSize.y)));
-#else
-		const F32 crntLum = s_avgLum[0] * (1.0 / F32(kInputTexSize.x * kInputTexSize.y));
-#endif
+		const F32 crntLum = s_avgLum[0];
 
 
 #if 1
 #if 1
 		const F32 prevLum = readExposureAndAverageLuminance().y;
 		const F32 prevLum = readExposureAndAverageLuminance().y;