Browse Source

Add the new cluster shading in light shading

Panagiotis Christopoulos Charitos 4 years ago
parent
commit
dbc1f15d3a

+ 4 - 2
AnKi/Gr/GrObject.cpp

@@ -20,8 +20,10 @@ GrObject::GrObject(GrManager* manager, GrObjectType type, CString name)
 		name = "N/A";
 	}
 
-	m_name = static_cast<char*>(manager->getAllocator().getMemoryPool().allocate(name.getLength() + 1, alignof(char)));
-	memcpy(const_cast<char*>(&m_name[0]), &name[0], name.getLength() + 1);
+	char* pname =
+		static_cast<char*>(manager->getAllocator().getMemoryPool().allocate(name.getLength() + 1, alignof(char)));
+	memcpy(pname, &name[0], name.getLength() + 1);
+	m_name = pname;
 }
 
 GrObject::~GrObject()

+ 5 - 2
AnKi/Renderer/ClusterBinning.cpp

@@ -217,8 +217,10 @@ void ClusterBinning::writeClustererBuffers(RenderingContext& ctx)
 
 	cs.m_clustersAddress =
 		stagingMem.allocateFrame(sizeof(Cluster) * m_clusterCount, StagingGpuMemoryType::STORAGE, cs.m_clustersToken);
+}
 
-	// Fire the async job
+void ClusterBinning::writeClusterBuffersAsync()
+{
 	m_r->getThreadHive().submitTask(
 		[](void* userData, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* signalSemaphore) {
 			static_cast<ClusterBinning*>(userData)->writeClustererBuffersTask();
@@ -248,14 +250,15 @@ void ClusterBinning::writeClustererBuffersTask()
 			out.m_diffuseColor = in.m_diffuseColor;
 			out.m_radius = in.m_radius;
 			out.m_squareRadiusOverOne = 1.0f / (in.m_radius * in.m_radius);
-			out.m_shadowLayer = in.m_shadowLayer;
 
 			if(in.m_shadowRenderQueues[0] == nullptr)
 			{
 				out.m_shadowAtlasTileScale = INVALID_TEXTURE_INDEX;
+				out.m_shadowLayer = MAX_U32;
 			}
 			else
 			{
+				out.m_shadowLayer = in.m_shadowLayer;
 				out.m_shadowAtlasTileScale = in.m_shadowAtlasTileSize;
 				static_assert(sizeof(out.m_shadowAtlasTileOffsets) == sizeof(in.m_shadowAtlasTileOffsets), "See file");
 				memcpy(&out.m_shadowAtlasTileOffsets[0], &in.m_shadowAtlasTileOffsets[0],

+ 4 - 0
AnKi/Renderer/ClusterBinning.h

@@ -26,6 +26,10 @@ public:
 	/// Populate the rendergraph.
 	void populateRenderGraph(RenderingContext& ctx);
 
+	/// It will populate the clusters and the rest of the objects (lights, probes etc) in an async job. Needs to be
+	/// called after the render queue is finalized.
+	void writeClusterBuffersAsync();
+
 private:
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramPtr m_grProg;

+ 22 - 24
AnKi/Renderer/LightShading.cpp

@@ -57,10 +57,9 @@ Error LightShading::initLightShading(const ConfigSet& config)
 	ANKI_CHECK(getResourceManager().loadResource("Shaders/LightShading.ankiprog", m_lightShading.m_prog));
 
 	ShaderProgramResourceVariantInitInfo variantInitInfo(m_lightShading.m_prog);
-	variantInitInfo.addConstant("CLUSTER_COUNT_X", U32(m_r->getClusterCount()[0]));
-	variantInitInfo.addConstant("CLUSTER_COUNT_Y", U32(m_r->getClusterCount()[1]));
-	variantInitInfo.addConstant("CLUSTER_COUNT_Z", U32(m_r->getClusterCount()[2]));
-	variantInitInfo.addConstant("CLUSTER_COUNT", U32(m_r->getClusterCount()[3]));
+	variantInitInfo.addConstant("TILE_COUNT", m_r->getTileCounts());
+	variantInitInfo.addConstant("Z_SPLIT_COUNT", m_r->getZSplitCount());
+	variantInitInfo.addConstant("TILE_SIZE", m_r->getTileSize());
 	variantInitInfo.addConstant("IR_MIPMAP_COUNT", U32(m_r->getProbeReflections().getReflectionTextureMipmapCount()));
 	const ShaderProgramResourceVariant* variant;
 
@@ -107,7 +106,6 @@ void LightShading::run(RenderPassWorkContext& rgraphCtx)
 {
 	const RenderingContext& ctx = *m_runCtx.m_ctx;
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
-	const ClusterBinOut& rsrc = ctx.m_clusterBinOut;
 
 	cmdb->setViewport(0, 0, m_r->getWidth(), m_r->getHeight());
 
@@ -118,40 +116,40 @@ void LightShading::run(RenderPassWorkContext& rgraphCtx)
 		cmdb->setDepthWrite(false);
 
 		// Bind all
-		bindUniforms(cmdb, 0, 0, ctx.m_lightShadingUniformsToken);
+		const ClustererGpuObjects& binning = ctx.m_clusterShading;
+		bindUniforms(cmdb, 0, 0, binning.m_clusteredShadingUniformsToken);
 
-		bindUniforms(cmdb, 0, 1, rsrc.m_pointLightsToken);
-		bindUniforms(cmdb, 0, 2, rsrc.m_spotLightsToken);
+		bindUniforms(cmdb, 0, 1, binning.m_pointLightsToken);
+		bindUniforms(cmdb, 0, 2, binning.m_spotLightsToken);
 		rgraphCtx.bindColorTexture(0, 3, m_r->getShadowMapping().getShadowmapRt());
 
-		bindUniforms(cmdb, 0, 4, rsrc.m_reflectionProbesToken);
+		bindUniforms(cmdb, 0, 4, binning.m_reflectionProbesToken);
 		rgraphCtx.bindColorTexture(0, 5, m_r->getProbeReflections().getReflectionRt());
 		cmdb->bindTexture(0, 6, m_r->getProbeReflections().getIntegrationLut(), TextureUsageBit::SAMPLED_FRAGMENT);
 
 		m_r->getGlobalIllumination().bindVolumeTextures(ctx, rgraphCtx, 0, 7);
-		bindUniforms(cmdb, 0, 8, rsrc.m_globalIlluminationProbesToken);
+		bindUniforms(cmdb, 0, 8, binning.m_globalIlluminationProbesToken);
 
-		bindStorage(cmdb, 0, 9, rsrc.m_clustersToken);
-		bindStorage(cmdb, 0, 10, rsrc.m_indicesToken);
+		bindStorage(cmdb, 0, 9, binning.m_clustersToken);
 
-		cmdb->bindSampler(0, 11, m_r->getSamplers().m_nearestNearestClamp);
-		cmdb->bindSampler(0, 12, m_r->getSamplers().m_trilinearClamp);
-		rgraphCtx.bindColorTexture(0, 13, m_r->getGBuffer().getColorRt(0));
-		rgraphCtx.bindColorTexture(0, 14, m_r->getGBuffer().getColorRt(1));
-		rgraphCtx.bindColorTexture(0, 15, m_r->getGBuffer().getColorRt(2));
-		rgraphCtx.bindTexture(0, 16, m_r->getGBuffer().getDepthRt(),
+		cmdb->bindSampler(0, 10, m_r->getSamplers().m_nearestNearestClamp);
+		cmdb->bindSampler(0, 11, m_r->getSamplers().m_trilinearClamp);
+		rgraphCtx.bindColorTexture(0, 12, m_r->getGBuffer().getColorRt(0));
+		rgraphCtx.bindColorTexture(0, 13, m_r->getGBuffer().getColorRt(1));
+		rgraphCtx.bindColorTexture(0, 14, m_r->getGBuffer().getColorRt(2));
+		rgraphCtx.bindTexture(0, 15, m_r->getGBuffer().getDepthRt(),
 							  TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
-		rgraphCtx.bindColorTexture(0, 17, m_r->getSsr().getRt());
-		rgraphCtx.bindColorTexture(0, 18, m_r->getSsao().getRt());
-		rgraphCtx.bindColorTexture(0, 19, m_r->getSsgi().getRt());
+		rgraphCtx.bindColorTexture(0, 16, m_r->getSsr().getRt());
+		rgraphCtx.bindColorTexture(0, 17, m_r->getSsao().getRt());
+		rgraphCtx.bindColorTexture(0, 18, m_r->getSsgi().getRt());
 
 		if(m_r->getRtShadowsEnabled())
 		{
-			rgraphCtx.bindColorTexture(0, 20, m_r->getRtShadows().getRt());
+			rgraphCtx.bindColorTexture(0, 19, m_r->getRtShadows().getRt());
 		}
 		else
 		{
-			rgraphCtx.bindColorTexture(0, 21, m_r->getShadowmapsResolve().getRt());
+			rgraphCtx.bindColorTexture(0, 20, m_r->getShadowmapsResolve().getRt());
 		}
 
 		// Draw
@@ -191,7 +189,7 @@ void LightShading::run(RenderPassWorkContext& rgraphCtx)
 	}
 
 	// Forward shading last
-	m_r->getForwardShading().run(ctx, rgraphCtx);
+	// TODO m_r->getForwardShading().run(ctx, rgraphCtx);
 }
 
 void LightShading::populateRenderGraph(RenderingContext& ctx)

+ 3 - 0
AnKi/Renderer/Renderer.cpp

@@ -359,6 +359,9 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 
 	m_finalComposite->populateRenderGraph(ctx);
 
+	// Populate the uniforms
+	m_clusterBinning->writeClusterBuffersAsync();
+
 	// Bin lights and update uniforms
 	m_stats.m_lightBinTime = (m_statsEnabled) ? HighRezTimer::getCurrentTime() : -1.0;
 	ClusterBinIn cin;

+ 1 - 0
AnKi/ShaderCompiler/ShaderProgramParser.cpp

@@ -55,6 +55,7 @@ static const char* SHADER_HEADER = R"(#version 460 core
 #extension GL_EXT_shader_explicit_arithmetic_types_float32 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float64 : enable
 #extension GL_EXT_shader_atomic_int64 : enable
+#extension GL_EXT_shader_subgroup_extended_types_int64 : enable
 
 #extension GL_EXT_nonuniform_qualifier : enable
 #extension GL_EXT_scalar_block_layout : enable

+ 35 - 31
AnKi/Shaders/ClusteredShadingCommon2.glsl

@@ -20,55 +20,56 @@ layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_UNIFORMS_BINDING
 //
 // Light uniforms (3)
 //
-#if defined(CLUSTER_SHADING_LIGHTS_BINDING)
-layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_LIGHTS_BINDING, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+#if defined(CLUSTERED_SHADING_LIGHTS_BINDING)
+layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_LIGHTS_BINDING, scalar) uniform ANKI_RANDOM_BLOCK_NAME
 {
 	PointLight2 u_pointLights2[MAX_VISIBLE_POINT_LIGHTS];
 };
 
-layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_LIGHTS_BINDING + 1, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_LIGHTS_BINDING + 1,
+	   scalar) uniform ANKI_RANDOM_BLOCK_NAME
 {
 	SpotLight2 u_spotLights2[MAX_VISIBLE_SPOT_LIGHTS];
 };
 
-layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_LIGHTS_BINDING + 2) uniform texture2D u_shadowAtlasTex;
+layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_LIGHTS_BINDING + 2) uniform texture2D u_shadowAtlasTex;
 #endif
 
 //
-// Indirect uniforms (3)
+// Reflection probes (3)
 //
-#if defined(CLUSTER_SHADING_REFLECTIONS_BINDING)
-layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_REFLECTIONS_BINDING,
+#if defined(CLUSTERED_SHADING_REFLECTIONS_BINDING)
+layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_REFLECTIONS_BINDING,
 	   scalar) uniform ANKI_RANDOM_BLOCK_NAME
 {
 	ReflectionProbe2 u_reflectionProbes2[MAX_VISIBLE_REFLECTION_PROBES];
 };
 
 layout(set = CLUSTERED_SHADING_SET,
-	   binding = CLUSTER_SHADING_REFLECTIONS_BINDING + 1) uniform textureCubeArray u_reflectionsTex2;
+	   binding = CLUSTERED_SHADING_REFLECTIONS_BINDING + 1) uniform textureCubeArray u_reflectionsTex2;
 layout(set = CLUSTERED_SHADING_SET,
-	   binding = CLUSTER_SHADING_REFLECTIONS_BINDING + 2) uniform texture2D u_integrationLut2;
+	   binding = CLUSTERED_SHADING_REFLECTIONS_BINDING + 2) uniform texture2D u_integrationLut2;
 #endif
 
 //
 // Decal uniforms (3)
 //
-#if defined(CLUSTER_SHADING_DECALS_BINDING)
-layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_DECALS_BINDING, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+#if defined(CLUSTERED_SHADING_DECALS_BINDING)
+layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_DECALS_BINDING, scalar) uniform ANKI_RANDOM_BLOCK_NAME
 {
 	Decal2 u_decals2[MAX_VISIBLE_DECALS];
 };
 
-layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_DECALS_BINDING + 1) uniform texture2D u_diffuseDecalTex;
+layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_DECALS_BINDING + 1) uniform texture2D u_diffuseDecalTex;
 layout(set = CLUSTERED_SHADING_SET,
-	   binding = CLUSTER_SHADING_DECALS_BINDING + 2) uniform texture2D u_specularRoughnessDecalTex;
+	   binding = CLUSTERED_SHADING_DECALS_BINDING + 2) uniform texture2D u_specularRoughnessDecalTex;
 #endif
 
 //
 // Fog density uniforms (1)
 //
-#if defined(CLUSTER_SHADING_FOG_BINDING)
-layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_FOG_BINDING, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+#if defined(CLUSTERED_SHADING_FOG_BINDING)
+layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_FOG_BINDING, scalar) uniform ANKI_RANDOM_BLOCK_NAME
 {
 	FogDensityVolume u_fogDensityVolumes[MAX_VISIBLE_FOG_DENSITY_VOLUMES];
 };
@@ -77,21 +78,21 @@ layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_FOG_BINDING, scala
 //
 // GI (2)
 //
-#if defined(CLUSTER_SHADING_GI_BINDING)
-layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_GI_BINDING) uniform texture3D
+#if defined(CLUSTERED_SHADING_GI_BINDING)
+layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_GI_BINDING) uniform texture3D
 	u_globalIlluminationTextures[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES];
 
-layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_GI_BINDING + 1, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_GI_BINDING + 1, scalar) uniform ANKI_RANDOM_BLOCK_NAME
 {
-	GlobalIlluminationProbe u_giProbes[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES];
+	GlobalIlluminationProbe2 u_giProbes[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES];
 };
 #endif
 
 //
 // Cluster uniforms
 //
-#if defined(CLUSTER_SHADING_CLUSTERS_BINDING)
-layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_CLUSTERS_BINDING,
+#if defined(CLUSTERED_SHADING_CLUSTERS_BINDING)
+layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_CLUSTERS_BINDING,
 	   scalar) readonly buffer ANKI_RANDOM_BLOCK_NAME
 {
 	Cluster u_clusters2[];
@@ -102,24 +103,24 @@ layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_CLUSTERS_BINDING,
 Vec3 clusterHeatmap(Cluster cluster, U32 objectTypeMask)
 {
 	U32 maxObjects = 0u;
-	U32 count = 0u;
+	I32 count = 0;
 
 	if((objectTypeMask & (1u << CLUSTER_OBJECT_TYPE_POINT_LIGHT)) != 0)
 	{
 		maxObjects += MAX_VISIBLE_POINT_LIGHTS;
-		count += bitCount(cluster.m_pointLightsMask);
+		count += I32(bitCount(cluster.m_pointLightsMask));
 	}
 
 	if((objectTypeMask & (1u << CLUSTER_OBJECT_TYPE_SPOT_LIGHT)) != 0)
 	{
 		maxObjects += MAX_VISIBLE_SPOT_LIGHTS;
-		count += bitCount(cluster.m_spotLightsMask);
+		count += I32(bitCount(cluster.m_spotLightsMask));
 	}
 
 	if((objectTypeMask & (1u << CLUSTER_OBJECT_TYPE_DECAL)) != 0)
 	{
 		maxObjects += MAX_VISIBLE_DECALS;
-		count += bitCount(cluster.m_decalsMask);
+		count += I32(bitCount(cluster.m_decalsMask));
 	}
 
 	if((objectTypeMask & (1u << CLUSTER_OBJECT_TYPE_FOG_DENSITY_VOLUME)) != 0)
@@ -144,6 +145,7 @@ Vec3 clusterHeatmap(Cluster cluster, U32 objectTypeMask)
 	return heatmap(factor);
 }
 
+#if defined(ANKI_FRAGMENT_SHADER)
 /// Returns the index of the zSplit or linearizeDepth(n, f, depth)*zSplitCount
 /// Simplifying this equation is 1/(a+b/depth) where a=(n-f)/(n*zSplitCount) and b=f/(n*zSplitCount)
 U32 computeZSplitClusterIndex(F32 depth, U32 zSplitCount, F32 a, F32 b)
@@ -153,16 +155,17 @@ U32 computeZSplitClusterIndex(F32 depth, U32 zSplitCount, F32 a, F32 b)
 }
 
 /// Return the tile index.
-U32 computeTileClusterIndex(Vec2 uv, U32 tileCountX, U32 tileCountY)
+U32 computeTileClusterIndex(U32 tileSize, U32 tileCountX)
 {
-	return U32(uv.y * F32(tileCountY * tileCountX) + uv.x * F32(tileCountX));
+	const UVec2 tileXY = UVec2(gl_FragCoord.xy / F32(tileSize));
+	return tileXY.y * tileCountX + tileXY.x;
 }
 
-#if defined(CLUSTER_SHADING_CLUSTERS_BINDING)
+#	if defined(CLUSTERED_SHADING_CLUSTERS_BINDING)
 /// Get the final cluster after ORing and ANDing the masks.
-Cluster getCluster(F32 uv, F32 depth, U32 tileCountX, U32 tileCountY, U32 zSplitCount, F32 a, F32 b)
+Cluster getCluster(F32 depth, U32 tileSize, U32 tileCountX, U32 tileCountY, U32 zSplitCount, F32 a, F32 b)
 {
-	const Cluster tileCluster = u_clusters2[computeTileClusterIndex(uv, tileCountX, tileCountY)];
+	const Cluster tileCluster = u_clusters2[computeTileClusterIndex(tileSize, tileCountX)];
 	const Cluster zCluster = u_clusters2[computeZSplitClusterIndex(depth, zSplitCount, a, b) + tileCountX * tileCountY];
 
 	Cluster outCluster;
@@ -177,4 +180,5 @@ Cluster getCluster(F32 uv, F32 depth, U32 tileCountX, U32 tileCountY, U32 zSplit
 
 	return outCluster;
 }
-#endif
+#	endif
+#endif // defined(ANKI_FRAGMENT_SHADER)

+ 10 - 1
AnKi/Shaders/Functions.glsl

@@ -543,4 +543,13 @@ F32 computeMipLevel(Vec2 normalizedUvs)
 	const F32 deltaMax2 = max(dot(dx, dx), dot(dy, dy));
 	return max(0.0, 0.5 * log2(deltaMax2));
 }
-#endif
+#endif
+
+/// The regular findLSB in glslang has some issues since it invokes a builtin that is only supposed to be used with
+/// 32bit input. This is an alternative implementation but it expects that the input is not zero.
+I32 findLSB64(U64 v)
+{
+	const I32 lsb1 = findLSB(U32(v));
+	const I32 lsb2 = findLSB(U32(v >> 32ul));
+	return (lsb1 >= 0) ? lsb1 : lsb2 + 32;
+}

+ 33 - 0
AnKi/Shaders/LightFunctions.glsl

@@ -347,6 +347,7 @@ Vec3 sampleAmbientDice(Vec3 posx, Vec3 negx, Vec3 posy, Vec3 negy, Vec3 posz, Ve
 }
 
 // Sample the irradiance term from the clipmap
+// TODO remove
 Vec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const GlobalIlluminationProbe probe,
 							  texture3D textures[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES], sampler linearAnyClampSampler)
 {
@@ -377,3 +378,35 @@ Vec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const Glob
 
 	return irradiance;
 }
+
+// Sample the irradiance term from the clipmap
+Vec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const GlobalIlluminationProbe2 probe,
+							  texture3D textures[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES], sampler linearAnyClampSampler)
+{
+	// Find the UVW
+	Vec3 uvw = (worldPos - probe.m_aabbMin) / (probe.m_aabbMax - probe.m_aabbMin);
+
+	// The U contains the 6 directions so divide
+	uvw.x /= 6.0;
+
+	// Calmp it to avoid direction leaking
+	uvw.x = clamp(uvw.x, probe.m_halfTexelSizeU, (1.0 / 6.0) - probe.m_halfTexelSizeU);
+
+	// Read the irradiance
+	Vec3 irradiancePerDir[6u];
+	ANKI_UNROLL for(U32 dir = 0u; dir < 6u; ++dir)
+	{
+		// Point to the correct UV
+		Vec3 shiftedUVw = uvw;
+		shiftedUVw.x += (1.0 / 6.0) * F32(dir);
+
+		irradiancePerDir[dir] =
+			textureLod(textures[nonuniformEXT(probe.m_textureIndex)], linearAnyClampSampler, shiftedUVw, 0.0).rgb;
+	}
+
+	// Sample the irradiance
+	const Vec3 irradiance = sampleAmbientDice(irradiancePerDir[0], irradiancePerDir[1], irradiancePerDir[2],
+											  irradiancePerDir[3], irradiancePerDir[4], irradiancePerDir[5], normal);
+
+	return irradiance;
+}

+ 72 - 105
AnKi/Shaders/LightShading.ankiprog

@@ -5,31 +5,13 @@
 
 #pragma anki mutator USE_SHADOW_LAYERS 0 1
 
-ANKI_SPECIALIZATION_CONSTANT_U32(CLUSTER_COUNT_X, 0, 1u);
-ANKI_SPECIALIZATION_CONSTANT_U32(CLUSTER_COUNT_Y, 1, 1u);
-ANKI_SPECIALIZATION_CONSTANT_U32(CLUSTER_COUNT_Z, 2, 1u);
-ANKI_SPECIALIZATION_CONSTANT_U32(CLUSTER_COUNT, 3, 1u);
+ANKI_SPECIALIZATION_CONSTANT_UVEC2(TILE_COUNT, 0, UVec2(1u));
+ANKI_SPECIALIZATION_CONSTANT_U32(Z_SPLIT_COUNT, 2, 1u);
+ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 3, 1u);
 ANKI_SPECIALIZATION_CONSTANT_U32(IR_MIPMAP_COUNT, 4, 1u);
 
 #pragma anki start vert
-#include <AnKi/Shaders/Common.glsl>
-
-layout(location = 0) out Vec2 out_uv;
-layout(location = 1) out Vec2 out_clusterIJ;
-
-out gl_PerVertex
-{
-	Vec4 gl_Position;
-};
-
-void main()
-{
-	out_uv = Vec2(gl_VertexID & 1, gl_VertexID >> 1) * 2.0;
-	Vec2 pos = out_uv * 2.0 - 1.0;
-	gl_Position = Vec4(pos, 0.0, 1.0);
-
-	out_clusterIJ = Vec2(CLUSTER_COUNT_X, CLUSTER_COUNT_Y) * out_uv;
-}
+#include <AnKi/Shaders/QuadVert.glsl>
 #pragma anki end
 
 #pragma anki start frag
@@ -38,32 +20,31 @@ void main()
 #include <AnKi/Shaders/RtShadows.glsl>
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
 
-#define LIGHT_SET 0
-#define LIGHT_COMMON_UNIS_BINDING 0
-#define LIGHT_LIGHTS_BINDING 1
-#define LIGHT_INDIRECT_SPECULAR_BINDING 4
-#define LIGHT_GLOBAL_ILLUMINATION_BINDING 7
-#define LIGHT_CLUSTERS_BINDING 9
-#include <AnKi/Shaders/ClusteredShadingCommon.glsl>
-
-layout(set = 0, binding = 11) uniform sampler u_nearestAnyClampSampler;
-layout(set = 0, binding = 12) uniform sampler u_trilinearClampSampler;
-
-layout(set = 0, binding = 13) uniform texture2D u_msRt0;
-layout(set = 0, binding = 14) uniform texture2D u_msRt1;
-layout(set = 0, binding = 15) uniform texture2D u_msRt2;
-layout(set = 0, binding = 16) uniform texture2D u_msDepthRt;
-layout(set = 0, binding = 17) uniform texture2D u_ssrRt;
-layout(set = 0, binding = 18) uniform texture2D u_ssaoRt;
-layout(set = 0, binding = 19) uniform texture2D u_ssgiRt;
+#define CLUSTERED_SHADING_SET 0
+#define CLUSTERED_SHADING_UNIFORMS_BINDING 0
+#define CLUSTERED_SHADING_LIGHTS_BINDING 1
+#define CLUSTERED_SHADING_REFLECTIONS_BINDING 4
+#define CLUSTERED_SHADING_GI_BINDING 7
+#define CLUSTERED_SHADING_CLUSTERS_BINDING 9
+#include <AnKi/Shaders/ClusteredShadingCommon2.glsl>
+
+layout(set = 0, binding = 10) uniform sampler u_nearestAnyClampSampler;
+layout(set = 0, binding = 11) uniform sampler u_trilinearClampSampler;
+
+layout(set = 0, binding = 12) uniform texture2D u_msRt0;
+layout(set = 0, binding = 13) uniform texture2D u_msRt1;
+layout(set = 0, binding = 14) uniform texture2D u_msRt2;
+layout(set = 0, binding = 15) uniform texture2D u_msDepthRt;
+layout(set = 0, binding = 16) uniform texture2D u_ssrRt;
+layout(set = 0, binding = 17) uniform texture2D u_ssaoRt;
+layout(set = 0, binding = 18) uniform texture2D u_ssgiRt;
 #if USE_SHADOW_LAYERS
-layout(set = 0, binding = 20) uniform utexture2D u_shadowLayersTex;
+layout(set = 0, binding = 19) uniform utexture2D u_shadowLayersTex;
 #else
-layout(set = 0, binding = 21) uniform texture2D u_resolvedSm;
+layout(set = 0, binding = 20) uniform texture2D u_resolvedSm;
 #endif
 
 layout(location = 0) in Vec2 in_uv;
-layout(location = 1) in Vec2 in_clusterIJ;
 
 layout(location = 0) out Vec3 out_color;
 
@@ -88,20 +69,14 @@ void main()
 	}
 
 	// Get world position
-	const Vec4 worldPos4 = u_invViewProjMat * Vec4(ndc, depth, 1.0);
+	const Vec4 worldPos4 = u_clusterShading.m_matrices.m_invertedViewProjectionJitter * Vec4(ndc, depth, 1.0);
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 
-	// Get first light index
-	U32 idxOffset;
-	{
-		U32 k = computeClusterK(u_clustererMagic, worldPos);
-		U32 clusterIdx =
-			k * (CLUSTER_COUNT_X * CLUSTER_COUNT_Y) + U32(in_clusterIJ.y) * CLUSTER_COUNT_X + U32(in_clusterIJ.x);
-
-		idxOffset = u_clusters[clusterIdx];
+	// Get the cluster
+	Cluster cluster = getCluster(depth, TILE_SIZE, TILE_COUNT.x, TILE_COUNT.y, Z_SPLIT_COUNT,
+								 u_clusterShading.m_zSplitMagic.x, u_clusterShading.m_zSplitMagic.y);
 
-		// out_color = lightHeatmap(idxOffset, 5, 1u << 3); return;
-	}
+	// out_color = clusterHeatmap(cluster, 1u << CLUSTER_OBJECT_TYPE_POINT_LIGHT); return;
 
 	// Decode GBuffer
 	GbufferInfo gbuffer;
@@ -125,14 +100,15 @@ void main()
 	out_color = gbuffer.m_diffuse * gbuffer.m_emission;
 
 	// Dir light
-	Vec3 viewDir = normalize(u_cameraPos - worldPos);
-	if(u_dirLight.m_active != 0u)
+	const Vec3 viewDir = normalize(u_clusterShading.m_cameraPosition - worldPos);
+	const DirectionalLight2 dirLight = u_clusterShading.m_directionalLight;
+	if(dirLight.m_active != 0u)
 	{
 		F32 shadowFactor;
-		if(u_dirLight.m_cascadeCount > 0)
+		if(dirLight.m_cascadeCount > 0)
 		{
 #if USE_SHADOW_LAYERS
-			shadowFactor = resolvedSm[u_dirLight.m_shadowLayer];
+			shadowFactor = resolvedSm[dirLight.m_shadowLayer];
 #else
 			shadowFactor = resolvedSm[0];
 			++resolvedSmIdx;
@@ -143,21 +119,22 @@ void main()
 			shadowFactor = 1.0;
 		}
 
-		const Vec3 l = -u_dirLight.m_dir;
+		const Vec3 l = -dirLight.m_direction;
 
 		const F32 lambert = max(gbuffer.m_subsurface, dot(l, gbuffer.m_normal));
 
 		const Vec3 diffC = diffuseLambert(gbuffer.m_diffuse);
 		const Vec3 specC = computeSpecularColorBrdf(gbuffer, viewDir, l);
 
-		out_color += (diffC + specC) * u_dirLight.m_diffuseColor * (shadowFactor * lambert);
+		out_color += (diffC + specC) * dirLight.m_diffuseColor * (shadowFactor * lambert);
 	}
 
 	// Point lights
-	U32 idx;
-	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
+	ANKI_LOOP while(cluster.m_pointLightsMask != 0u)
 	{
-		PointLight light = u_pointLights[idx];
+		const I32 idx = findLSB64(cluster.m_pointLightsMask);
+		cluster.m_pointLightsMask &= ~(1ul << U64(idx));
+		const PointLight2 light = u_pointLights2[idx];
 
 		LIGHTING_COMMON_BRDF();
 
@@ -175,16 +152,17 @@ void main()
 	}
 
 	// Spot lights
-	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
+	ANKI_LOOP while(cluster.m_spotLightsMask != 0u)
 	{
-		SpotLight light = u_spotLights[idx];
+		const I32 idx = findLSB64(cluster.m_spotLightsMask);
+		cluster.m_spotLightsMask &= ~(1ul << U64(idx));
+		const SpotLight2 light = u_spotLights2[idx];
 
 		LIGHTING_COMMON_BRDF();
 
-		const F32 spot = computeSpotFactor(l, light.m_outerCos, light.m_innerCos, light.m_dir);
+		const F32 spot = computeSpotFactor(l, light.m_outerCos, light.m_innerCos, light.m_direction);
 
-		const F32 shadowmapLayerIdx = light.m_shadowmapId;
-		ANKI_BRANCH if(shadowmapLayerIdx >= 0.0)
+		ANKI_BRANCH if(light.m_shadowLayer != MAX_U32)
 		{
 #if USE_SHADOW_LAYERS
 			const F32 shadow = resolvedSm[light.m_shadowLayer];
@@ -205,17 +183,16 @@ void main()
 		const Vec3 reflDir = reflect(-viewDir, gbuffer.m_normal);
 		const F32 reflLod = F32(IR_MIPMAP_COUNT - 1u) * gbuffer.m_roughness;
 
-		if(subgroupAll(u_lightIndices[idxOffset] != MAX_U32 && u_lightIndices[idxOffset + 1u] == MAX_U32))
+		if(bitCount(cluster.m_reflectionProbesMask) == 1)
 		{
 			// Only one probe, do a fast path without blend weight
 
-			const ReflectionProbe probe = u_reflectionProbes[u_lightIndices[idxOffset]];
-			idxOffset += 2u;
+			const ReflectionProbe2 probe = u_reflectionProbes2[findLSB(cluster.m_reflectionProbesMask)];
 
 			// Sample
 			const Vec3 cubeUv = intersectProbe(worldPos, reflDir, probe.m_aabbMin, probe.m_aabbMax, probe.m_position);
 			const Vec4 cubeArrUv = Vec4(cubeUv, probe.m_cubemapIndex);
-			specIndirect = textureLod(u_reflectionsTex, u_trilinearClampSampler, cubeArrUv, reflLod).rgb;
+			specIndirect = textureLod(u_reflectionsTex2, u_trilinearClampSampler, cubeArrUv, reflLod).rgb;
 		}
 		else
 		{
@@ -224,9 +201,11 @@ void main()
 			F32 totalBlendWeight = EPSILON;
 
 			// Loop probes
-			ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
+			ANKI_LOOP while(cluster.m_reflectionProbesMask != 0u)
 			{
-				const ReflectionProbe probe = u_reflectionProbes[idx];
+				const U32 idx = U32(findLSB(cluster.m_reflectionProbesMask));
+				cluster.m_reflectionProbesMask &= ~(1u << idx);
+				const ReflectionProbe2 probe = u_reflectionProbes2[idx];
 
 				// Compute blend weight
 				const F32 blendWeight = computeProbeBlendWeight(worldPos, probe.m_aabbMin, probe.m_aabbMax, 0.2);
@@ -236,7 +215,7 @@ void main()
 				const Vec3 cubeUv =
 					intersectProbe(worldPos, reflDir, probe.m_aabbMin, probe.m_aabbMax, probe.m_position);
 				const Vec4 cubeArrUv = Vec4(cubeUv, probe.m_cubemapIndex);
-				Vec3 c = textureLod(u_reflectionsTex, u_trilinearClampSampler, cubeArrUv, reflLod).rgb;
+				const Vec3 c = textureLod(u_reflectionsTex2, u_trilinearClampSampler, cubeArrUv, reflLod).rgb;
 				specIndirect += c * blendWeight;
 			}
 
@@ -253,7 +232,7 @@ void main()
 		// Compute env BRDF
 		const F32 NoV = max(EPSILON, dot(gbuffer.m_normal, viewDir));
 		const Vec3 env =
-			envBRDF(gbuffer.m_specular, gbuffer.m_roughness, u_integrationLut, u_trilinearClampSampler, NoV);
+			envBRDF(gbuffer.m_specular, gbuffer.m_roughness, u_integrationLut2, u_trilinearClampSampler, NoV);
 
 		out_color += finalSpecIndirect * env;
 	}
@@ -262,13 +241,11 @@ void main()
 	{
 		Vec3 diffIndirect;
 
-		const U32 crntProbeIdx = u_lightIndices[idxOffset];
-		if(subgroupAllEqual(crntProbeIdx)
-		   && subgroupAll(crntProbeIdx != MAX_U32 && u_lightIndices[idxOffset + 1u] == MAX_U32))
+		if(cluster.m_giProbesMask != 0u)
 		{
 			// All subgroups point to the same probe and there is only one probe, do a fast path without blend weight
 
-			GlobalIlluminationProbe probe = u_giProbes[subgroupBroadcastFirst(crntProbeIdx)]; // It should be uniform
+			const GlobalIlluminationProbe2 probe = u_giProbes[findLSB(cluster.m_giProbesMask)];
 
 			// Sample
 			diffIndirect = sampleGlobalIllumination(worldPos, gbuffer.m_normal, probe, u_globalIlluminationTextures,
@@ -281,32 +258,22 @@ void main()
 			F32 totalBlendWeight = EPSILON;
 			diffIndirect = Vec3(0.0);
 
-			Bool laneActive = true;
-			ANKI_LOOP while(laneActive)
+			// Loop probes
+			ANKI_LOOP while(cluster.m_giProbesMask != 0u)
 			{
-				U32 uniformIdxOffset = subgroupBroadcastFirst(idxOffset); // Should be uniform
-				const Bool laneMatch = uniformIdxOffset == idxOffset;
-
-				if(laneMatch)
-				{
-					laneActive = false;
-
-					// Loop probes
-					ANKI_LOOP while((idx = u_lightIndices[uniformIdxOffset++]) != MAX_U32)
-					{
-						GlobalIlluminationProbe probe = u_giProbes[idx];
-
-						// Compute blend weight
-						const F32 blendWeight =
-							computeProbeBlendWeight(worldPos, probe.m_aabbMin, probe.m_aabbMax, probe.m_fadeDistance);
-						totalBlendWeight += blendWeight;
-
-						// Sample
-						const Vec3 c = sampleGlobalIllumination(worldPos, gbuffer.m_normal, probe,
-																u_globalIlluminationTextures, u_trilinearClampSampler);
-						diffIndirect += c * blendWeight;
-					}
-				}
+				const U32 idx = U32(findLSB(cluster.m_giProbesMask));
+				cluster.m_giProbesMask &= ~(1u << idx);
+				const GlobalIlluminationProbe2 probe = u_giProbes[idx];
+
+				// Compute blend weight
+				const F32 blendWeight =
+					computeProbeBlendWeight(worldPos, probe.m_aabbMin, probe.m_aabbMax, probe.m_fadeDistance);
+				totalBlendWeight += blendWeight;
+
+				// Sample
+				const Vec3 c = sampleGlobalIllumination(worldPos, gbuffer.m_normal, probe, u_globalIlluminationTextures,
+														u_trilinearClampSampler);
+				diffIndirect += c * blendWeight;
 			}
 
 			// Normalize