Browse Source

Complete the cluster binning

Panagiotis Christopoulos Charitos 4 years ago
parent
commit
d46abb16f1

+ 0 - 5
AnKi/Core/StagingGpuMemoryManager.h

@@ -41,11 +41,6 @@ public:
 
 	~StagingGpuMemoryToken() = default;
 
-	explicit operator Bool() const
-	{
-		return m_range != 0;
-	}
-
 	Bool operator==(const StagingGpuMemoryToken& b) const
 	{
 		return m_buffer == b.m_buffer && m_offset == b.m_offset && m_range == b.m_range && m_type == b.m_type;

+ 0 - 1
AnKi/Gr/Utils/ClassGpuAllocator.cpp

@@ -77,7 +77,6 @@ void ClassGpuAllocator::init(GenericMemoryPoolAllocator<U8> alloc, ClassGpuAlloc
 		m_iface->getClassInfo(i, slotSize, chunkSize);
 		ANKI_ASSERT(slotSize > 0 && chunkSize > 0);
 
-		ANKI_ASSERT(isPowerOfTwo(slotSize));
 		ANKI_ASSERT((chunkSize % slotSize) == 0);
 		ANKI_ASSERT((chunkSize / slotSize) <= MAX_SLOTS_PER_CHUNK);
 

+ 3 - 3
AnKi/Math/Functions.h

@@ -125,19 +125,19 @@ inline constexpr T log2(const T x)
 	return T(std::log2(x));
 }
 
-template<typename T, typename std::enable_if<std::is_floating_point<T>::value, int>::type = 0>
+template<typename T, ANKI_ENABLE(std::is_floating_point<T>::value)>
 inline constexpr Bool isZero(const T f, const T e = EPSILON)
 {
 	return absolute<T>(f) < e;
 }
 
-template<typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
+template<typename T, ANKI_ENABLE(std::is_integral<T>::value)>
 inline constexpr Bool isZero(const T f)
 {
 	return f == 0;
 }
 
-template<typename T>
+template<typename T, ANKI_ENABLE(std::is_floating_point<T>::value)>
 inline constexpr T toRad(const T degrees)
 {
 	return degrees * (PI / T(180));

+ 383 - 13
AnKi/Renderer/ClusterBinning.cpp

@@ -6,7 +6,13 @@
 #include <AnKi/Renderer/ClusterBinning.h>
 #include <AnKi/Renderer/Renderer.h>
 #include <AnKi/Renderer/RenderQueue.h>
+#include <AnKi/Renderer/VolumetricLightingAccumulation.h>
 #include <AnKi/Core/ConfigSet.h>
+#include <AnKi/Util/Tracer.h>
+#include <AnKi/Util/ThreadHive.h>
+#include <AnKi/Util/HighRezTimer.h>
+#include <AnKi/Collision/Plane.h>
+#include <AnKi/Collision/Functions.h>
 
 namespace anki
 {
@@ -37,7 +43,7 @@ Error ClusterBinning::init(const ConfigSet& config)
 	m_prog->getOrCreateVariant(variantInitInfo, variant);
 	m_grProg = variant->getProgram();
 
-	m_clusterCount = m_r->getTileCounts().x() * m_r->getTileCounts().y() * m_r->getZSplitCount();
+	m_clusterCount = m_r->getTileCounts().x() * m_r->getTileCounts().y() + m_r->getZSplitCount();
 
 	return Error::NONE;
 }
@@ -61,30 +67,394 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 			this, 0);
 	}
 
-	// Allocate clusters. Store to a 8byte aligned ptr. Maybe that will trick the compiler to memset faster
-	U64* clusters = static_cast<U64*>(m_r->getStagingGpuMemoryManager().allocateFrame(
-		sizeof(Cluster) * m_clusterCount, StagingGpuMemoryType::STORAGE, ctx.m_clustererGpuObjects.m_clusterersToken));
-
-	// Zero the memory because atomics will happen
-	memset(clusters, 0, sizeof(Cluster) * m_clusterCount);
+	writeClustererBuffers(ctx);
 }
 
 void ClusterBinning::run(RenderPassWorkContext& rgraphCtx)
 {
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 
-	const ClustererGpuObjects& tokens = m_runCtx.m_ctx->m_clustererGpuObjects;
+	const ClustererGpuObjects& tokens = m_runCtx.m_ctx->m_clusterShading;
 
 	cmdb->bindShaderProgram(m_grProg);
-	bindUniforms(cmdb, 0, 0, tokens.m_lightingUniformsToken);
-	bindStorage(cmdb, 0, 1, tokens.m_clusterersToken);
-	bindStorage(cmdb, 0, 2, tokens.m_pointLightsToken);
+	bindUniforms(cmdb, 0, 0, tokens.m_clusteredShadingUniformsToken);
+	bindStorage(cmdb, 0, 1, tokens.m_clustersToken);
+	bindUniforms(cmdb, 0, 2, tokens.m_pointLightsToken);
+	bindUniforms(cmdb, 0, 3, tokens.m_spotLightsToken);
+	bindUniforms(cmdb, 0, 4, tokens.m_reflectionProbesToken);
+	bindUniforms(cmdb, 0, 5, tokens.m_globalIlluminationProbesToken);
+	bindUniforms(cmdb, 0, 6, tokens.m_fogDensityVolumesToken);
+	bindUniforms(cmdb, 0, 7, tokens.m_decalsToken);
 
 	const U32 sampleCount = 8;
 	const U32 sizex = m_clusterCount * sampleCount;
 	const RenderQueue& rqueue = *m_runCtx.m_ctx->m_renderQueue;
-	U32 clusterObjectCounts = min(MAX_VISIBLE_POINT_LIGHTS, rqueue.m_pointLights.getSize());
-	cmdb->dispatchCompute((sizex - 64 - 1) / 64, 1, clusterObjectCounts);
+	U32 clusterObjectCounts = rqueue.m_pointLights.getSize();
+	clusterObjectCounts += rqueue.m_spotLights.getSize();
+	clusterObjectCounts += rqueue.m_reflectionProbes.getSize();
+	clusterObjectCounts += rqueue.m_giProbes.getSize();
+	clusterObjectCounts += rqueue.m_fogDensityVolumes.getSize();
+	clusterObjectCounts += rqueue.m_decals.getSize();
+	cmdb->dispatchCompute((sizex - 64 - 1) / 64, clusterObjectCounts, 1);
+}
+
+void ClusterBinning::writeClustererBuffers(RenderingContext& ctx)
+{
+	ANKI_TRACE_SCOPED_EVENT(R_WRITE_CLUSTER_SHADING_OBJECTS);
+
+	// Check limits
+	RenderQueue& rqueue = *ctx.m_renderQueue;
+	if(ANKI_UNLIKELY(rqueue.m_pointLights.getSize() > MAX_VISIBLE_POINT_LIGHTS))
+	{
+		ANKI_R_LOGW("Visible point lights exceed the max value by %u",
+					rqueue.m_pointLights.getSize() - MAX_VISIBLE_POINT_LIGHTS);
+		rqueue.m_pointLights.setArray(rqueue.m_pointLights.getBegin(), MAX_VISIBLE_POINT_LIGHTS);
+	}
+
+	if(ANKI_UNLIKELY(rqueue.m_spotLights.getSize() > MAX_VISIBLE_SPOT_LIGHTS))
+	{
+		ANKI_R_LOGW("Visible spot lights exceed the max value by %u",
+					rqueue.m_spotLights.getSize() - MAX_VISIBLE_SPOT_LIGHTS);
+		rqueue.m_spotLights.setArray(rqueue.m_spotLights.getBegin(), MAX_VISIBLE_SPOT_LIGHTS);
+	}
+
+	if(ANKI_UNLIKELY(rqueue.m_decals.getSize() > MAX_VISIBLE_DECALS))
+	{
+		ANKI_R_LOGW("Visible decals exceed the max value by %u", rqueue.m_decals.getSize() - MAX_VISIBLE_DECALS);
+		rqueue.m_decals.setArray(rqueue.m_decals.getBegin(), MAX_VISIBLE_DECALS);
+	}
+
+	if(ANKI_UNLIKELY(rqueue.m_fogDensityVolumes.getSize() > MAX_VISIBLE_FOG_DENSITY_VOLUMES))
+	{
+		ANKI_R_LOGW("Visible fog volumes exceed the max value by %u",
+					rqueue.m_fogDensityVolumes.getSize() - MAX_VISIBLE_FOG_DENSITY_VOLUMES);
+		rqueue.m_fogDensityVolumes.setArray(rqueue.m_fogDensityVolumes.getBegin(), MAX_VISIBLE_FOG_DENSITY_VOLUMES);
+	}
+
+	if(ANKI_UNLIKELY(rqueue.m_reflectionProbes.getSize() > MAX_VISIBLE_REFLECTION_PROBES))
+	{
+		ANKI_R_LOGW("Visible reflection probes exceed the max value by %u",
+					rqueue.m_reflectionProbes.getSize() - MAX_VISIBLE_REFLECTION_PROBES);
+		rqueue.m_reflectionProbes.setArray(rqueue.m_reflectionProbes.getBegin(), MAX_VISIBLE_REFLECTION_PROBES);
+	}
+
+	if(ANKI_UNLIKELY(rqueue.m_giProbes.getSize() > MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2))
+	{
+		ANKI_R_LOGW("Visible GI probes exceed the max value by %u",
+					rqueue.m_giProbes.getSize() - MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2);
+		rqueue.m_giProbes.setArray(rqueue.m_giProbes.getBegin(), MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2);
+	}
+
+	// Allocate buffers
+	ClustererGpuObjects& cs = ctx.m_clusterShading;
+	StagingGpuMemoryManager& stagingMem = m_r->getStagingGpuMemoryManager();
+
+	cs.m_clusteredShadingUniformsAddress = stagingMem.allocateFrame(
+		sizeof(ClusteredShadingUniforms), StagingGpuMemoryType::UNIFORM, cs.m_clusteredShadingUniformsToken);
+
+	if(rqueue.m_pointLights.getSize())
+	{
+		cs.m_pointLightsAddress = stagingMem.allocateFrame(rqueue.m_pointLights.getSize() * sizeof(PointLight2),
+														   StagingGpuMemoryType::UNIFORM, cs.m_pointLightsToken);
+	}
+	else
+	{
+		cs.m_pointLightsToken.markUnused();
+	}
+
+	if(rqueue.m_spotLights.getSize())
+	{
+		cs.m_spotLightsAddress = stagingMem.allocateFrame(rqueue.m_spotLights.getSize() * sizeof(SpotLight2),
+														  StagingGpuMemoryType::UNIFORM, cs.m_spotLightsToken);
+	}
+	else
+	{
+		cs.m_spotLightsToken.markUnused();
+	}
+
+	if(rqueue.m_reflectionProbes.getSize())
+	{
+		cs.m_reflectionProbesAddress =
+			stagingMem.allocateFrame(rqueue.m_reflectionProbes.getSize() * sizeof(ReflectionProbe2),
+									 StagingGpuMemoryType::UNIFORM, cs.m_reflectionProbesToken);
+	}
+	else
+	{
+		cs.m_reflectionProbesToken.markUnused();
+	}
+
+	if(rqueue.m_decals.getSize())
+	{
+		cs.m_decalsAddress = stagingMem.allocateFrame(rqueue.m_decals.getSize() * sizeof(Decal2),
+													  StagingGpuMemoryType::UNIFORM, cs.m_decalsToken);
+	}
+	else
+	{
+		cs.m_decalsToken.markUnused();
+	}
+
+	if(rqueue.m_fogDensityVolumes.getSize())
+	{
+		cs.m_fogDensityVolumesAddress =
+			stagingMem.allocateFrame(rqueue.m_fogDensityVolumes.getSize() * sizeof(FogDensityVolume2),
+									 StagingGpuMemoryType::UNIFORM, cs.m_fogDensityVolumesToken);
+	}
+	else
+	{
+		cs.m_fogDensityVolumesToken.markUnused();
+	}
+
+	if(rqueue.m_giProbes.getSize())
+	{
+		cs.m_globalIlluminationProbesAddress =
+			stagingMem.allocateFrame(rqueue.m_giProbes.getSize() * sizeof(GlobalIlluminationProbe2),
+									 StagingGpuMemoryType::UNIFORM, cs.m_globalIlluminationProbesToken);
+	}
+	else
+	{
+		cs.m_globalIlluminationProbesToken.markUnused();
+	}
+
+	cs.m_clustersAddress =
+		stagingMem.allocateFrame(sizeof(Cluster) * m_clusterCount, StagingGpuMemoryType::STORAGE, cs.m_clustersToken);
+
+	// Fire the async job
+	m_r->getThreadHive().submitTask(
+		[](void* userData, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* signalSemaphore) {
+			static_cast<ClusterBinning*>(userData)->writeClustererBuffersTask();
+		},
+		this);
+}
+
+void ClusterBinning::writeClustererBuffersTask()
+{
+	ANKI_TRACE_SCOPED_EVENT(R_WRITE_CLUSTER_SHADING_OBJECTS);
+
+	RenderingContext& ctx = *m_runCtx.m_ctx;
+	ClustererGpuObjects& cs = ctx.m_clusterShading;
+	const RenderQueue& rqueue = *ctx.m_renderQueue;
+
+	// Point lights
+	if(rqueue.m_pointLights.getSize())
+	{
+		PointLight2* lights = static_cast<PointLight2*>(cs.m_pointLightsAddress);
+
+		for(U32 i = 0; i < rqueue.m_pointLights.getSize(); ++i)
+		{
+			PointLight2& out = lights[i];
+			const PointLightQueueElement& in = rqueue.m_pointLights[i];
+
+			out.m_position = in.m_worldPosition;
+			out.m_diffuseColor = in.m_diffuseColor;
+			out.m_radius = in.m_radius;
+			out.m_squareRadiusOverOne = 1.0f / (in.m_radius * in.m_radius);
+			out.m_shadowLayer = in.m_shadowLayer;
+
+			if(in.m_shadowRenderQueues[0] == nullptr)
+			{
+				out.m_shadowAtlasTileScale = INVALID_TEXTURE_INDEX;
+			}
+			else
+			{
+				out.m_shadowAtlasTileScale = in.m_shadowAtlasTileSize;
+				static_assert(sizeof(out.m_shadowAtlasTileOffsets) == sizeof(in.m_shadowAtlasTileOffsets), "See file");
+				memcpy(&out.m_shadowAtlasTileOffsets[0], &in.m_shadowAtlasTileOffsets[0],
+					   sizeof(in.m_shadowAtlasTileOffsets));
+			}
+		}
+	}
+
+	// Spot lights
+	if(rqueue.m_spotLights.getSize())
+	{
+		SpotLight2* lights = static_cast<SpotLight2*>(cs.m_spotLightsAddress);
+
+		for(U32 i = 0; i < rqueue.m_spotLights.getSize(); ++i)
+		{
+			const SpotLightQueueElement& in = rqueue.m_spotLights[i];
+			SpotLight2& out = lights[i];
+
+			out.m_position = in.m_worldTransform.getTranslationPart().xyz();
+			memcpy(&out.m_edgePoints[0][0], &in.m_edgePoints[0][0], sizeof(out.m_edgePoints));
+			out.m_diffuseColor = in.m_diffuseColor;
+			out.m_radius = in.m_distance;
+			out.m_squareRadiusOverOne = 1.0f / (in.m_distance * in.m_distance);
+			out.m_shadowLayer = (in.hasShadow()) ? in.m_shadowLayer : MAX_U32;
+			out.m_direction = -in.m_worldTransform.getRotationPart().getZAxis();
+			out.m_outerCos = cos(in.m_outerAngle / 2.0f);
+			out.m_innerCos = cos(in.m_innerAngle / 2.0f);
+
+			if(in.hasShadow())
+			{
+				// bias * proj_l * view_l
+				out.m_textureMatrix = in.m_textureMatrix;
+			}
+			else
+			{
+				out.m_textureMatrix = Mat4::getIdentity();
+			}
+		}
+	}
+
+	// Reflection probes
+	if(rqueue.m_reflectionProbes.getSize())
+	{
+		ReflectionProbe2* probes = static_cast<ReflectionProbe2*>(cs.m_reflectionProbesAddress);
+
+		for(U32 i = 0; i < rqueue.m_reflectionProbes.getSize(); ++i)
+		{
+			const ReflectionProbeQueueElement& in = rqueue.m_reflectionProbes[i];
+			ReflectionProbe2& out = probes[i];
+
+			out.m_position = in.m_worldPosition;
+			out.m_cubemapIndex = F32(in.m_textureArrayIndex);
+			out.m_aabbMin = in.m_aabbMin;
+			out.m_aabbMax = in.m_aabbMax;
+		}
+	}
+
+	// Decals
+	if(rqueue.m_decals.getSize())
+	{
+		Decal2* decals = static_cast<Decal2*>(cs.m_decalsAddress);
+
+		TextureView* diffuseAtlas = nullptr;
+		TextureView* specularRoughnessAtlas = nullptr;
+		for(U32 i = 0; i < rqueue.m_decals.getSize(); ++i)
+		{
+			const DecalQueueElement& in = rqueue.m_decals[i];
+			Decal2& out = decals[i];
+
+			if((diffuseAtlas != nullptr && diffuseAtlas != in.m_diffuseAtlas)
+			   || (specularRoughnessAtlas != nullptr && specularRoughnessAtlas != in.m_specularRoughnessAtlas))
+			{
+				ANKI_R_LOGF("All decals should have the same tex atlas");
+			}
+
+			diffuseAtlas = in.m_diffuseAtlas;
+			specularRoughnessAtlas = in.m_specularRoughnessAtlas;
+
+			// Diff
+			Vec4 uv = in.m_diffuseAtlasUv;
+			out.m_diffuseUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
+			out.m_blendFactors[0] = in.m_diffuseAtlasBlendFactor;
+
+			// Other
+			uv = in.m_specularRoughnessAtlasUv;
+			out.m_normRoughnessUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
+			out.m_blendFactors[1] = in.m_specularRoughnessAtlasBlendFactor;
+
+			// bias * proj_l * view
+			out.m_textureMatrix = in.m_textureMatrix;
+		}
+
+		ANKI_ASSERT(diffuseAtlas || specularRoughnessAtlas);
+		ctx.m_clusterShading.m_diffuseDecalTextureView.reset(diffuseAtlas);
+		ctx.m_clusterShading.m_specularRoughnessDecalTextureView.reset(specularRoughnessAtlas);
+	}
+
+	// Fog volumes
+	if(rqueue.m_fogDensityVolumes.getSize())
+	{
+		FogDensityVolume2* volumes = static_cast<FogDensityVolume2*>(cs.m_fogDensityVolumesAddress);
+
+		for(U32 i = 0; i < rqueue.m_fogDensityVolumes.getSize(); ++i)
+		{
+			const FogDensityQueueElement& in = rqueue.m_fogDensityVolumes[i];
+			FogDensityVolume2& out = volumes[i];
+
+			out.m_density = in.m_density;
+			if(in.m_isBox)
+			{
+				out.m_isBox = 1;
+				out.m_aabbMinOrSphereCenter = in.m_aabbMin;
+				out.m_aabbMaxOrSphereRadiusSquared = in.m_aabbMax;
+			}
+			else
+			{
+				out.m_isBox = 0;
+				out.m_aabbMinOrSphereCenter = in.m_sphereCenter;
+				out.m_aabbMaxOrSphereRadiusSquared = Vec3(in.m_sphereRadius * in.m_sphereRadius);
+			}
+		}
+	}
+
+	// GI
+	if(rqueue.m_giProbes.getSize())
+	{
+		GlobalIlluminationProbe2* probes = static_cast<GlobalIlluminationProbe2*>(cs.m_globalIlluminationProbesAddress);
+
+		for(U32 i = 0; i < rqueue.m_giProbes.getSize(); ++i)
+		{
+			const GlobalIlluminationProbeQueueElement& in = rqueue.m_giProbes[i];
+			GlobalIlluminationProbe2& out = probes[i];
+
+			out.m_aabbMin = in.m_aabbMin;
+			out.m_aabbMax = in.m_aabbMax;
+			out.m_textureIndex = U32(&in - &rqueue.m_giProbes.getFront());
+			out.m_halfTexelSizeU = 1.0f / F32(F32(in.m_cellCounts.x()) * 6.0f) / 2.0f;
+			out.m_fadeDistance = in.m_fadeDistance;
+		}
+	}
+
+	// General uniforms
+	{
+		ClusteredShadingUniforms& unis = *static_cast<ClusteredShadingUniforms*>(cs.m_clusteredShadingUniformsAddress);
+
+		unis.m_renderingSize = Vec2(F32(m_r->getWidth()), F32(m_r->getHeight()));
+
+		unis.m_time = F32(HighRezTimer::getCurrentTime());
+		unis.m_frame = m_r->getFrameCount() & MAX_U32;
+
+		Plane nearPlane;
+		extractClipPlane(rqueue.m_viewProjectionMatrix, FrustumPlaneType::NEAR, nearPlane);
+		unis.m_nearPlaneWSpace = Vec4(nearPlane.getNormal().xyz(), nearPlane.getOffset());
+		unis.m_near = rqueue.m_cameraNear;
+		unis.m_far = rqueue.m_cameraFar;
+		unis.m_zSplitCountOverFrustumLength = F32(m_r->getZSplitCount()) / (rqueue.m_cameraFar - rqueue.m_cameraNear);
+		unis.m_cameraPosition = rqueue.m_cameraTransform.getTranslationPart().xyz();
+
+		unis.m_tileCounts = m_r->getTileCounts();
+		unis.m_zSplitCount = m_r->getZSplitCount();
+		unis.m_lightVolumeLastCluster = m_r->getVolumetricLightingAccumulation().getFinalClusterInZ();
+
+		unis.m_matrices = ctx.m_matrices;
+		unis.m_previousMatrices = ctx.m_prevMatrices;
+
+		unis.m_pointLightCount = rqueue.m_pointLights.getSize();
+		unis.m_spotLightCount = rqueue.m_spotLights.getSize();
+		unis.m_decalCount = rqueue.m_decals.getSize();
+		unis.m_fogDensityVolumeCount = rqueue.m_fogDensityVolumes.getSize();
+		unis.m_reflectionProbeCount = rqueue.m_reflectionProbes.getSize();
+		unis.m_giProbeCount = rqueue.m_giProbes.getSize();
+
+		// Directional light
+		if(rqueue.m_directionalLight.m_uuid != 0)
+		{
+			DirectionalLight2& out = unis.m_directionalLight;
+			const DirectionalLightQueueElement& in = rqueue.m_directionalLight;
+
+			out.m_diffuseColor = in.m_diffuseColor;
+			out.m_cascadeCount = in.m_shadowCascadeCount;
+			out.m_direction = in.m_direction;
+			out.m_active = 1;
+			out.m_effectiveShadowDistance = in.m_effectiveShadowDistance;
+			out.m_shadowCascadesDistancePower = in.m_shadowCascadesDistancePower;
+			out.m_shadowLayer = (in.hasShadow()) ? in.m_shadowLayer : MAX_U32;
+
+			for(U cascade = 0; cascade < in.m_shadowCascadeCount; ++cascade)
+			{
+				out.m_textureMatrices[cascade] = in.m_textureMatrices[cascade];
+			}
+		}
+		else
+		{
+			unis.m_directionalLight.m_active = 0;
+		}
+	}
+
+	// Zero the memory because atomics will happen
+	memset(cs.m_clustersAddress, 0, sizeof(Cluster) * m_clusterCount);
 }
 
 } // end namespace anki

+ 3 - 1
AnKi/Renderer/ClusterBinning.h

@@ -35,10 +35,12 @@ private:
 	class
 	{
 	public:
-		const RenderingContext* m_ctx = nullptr;
+		RenderingContext* m_ctx = nullptr;
 	} m_runCtx;
 
 	void run(RenderPassWorkContext& rgraphCtx);
+	void writeClustererBuffers(RenderingContext& ctx);
+	void writeClustererBuffersTask();
 };
 /// @}
 

+ 11 - 3
AnKi/Renderer/Common.h

@@ -141,13 +141,21 @@ class ClustererGpuObjects
 {
 public:
 	StagingGpuMemoryToken m_pointLightsToken;
+	void* m_pointLightsAddress = nullptr;
 	StagingGpuMemoryToken m_spotLightsToken;
+	void* m_spotLightsAddress = nullptr;
 	StagingGpuMemoryToken m_reflectionProbesToken;
+	void* m_reflectionProbesAddress = nullptr;
 	StagingGpuMemoryToken m_decalsToken;
+	void* m_decalsAddress = nullptr;
 	StagingGpuMemoryToken m_fogDensityVolumesToken;
+	void* m_fogDensityVolumesAddress = nullptr;
 	StagingGpuMemoryToken m_globalIlluminationProbesToken;
-	StagingGpuMemoryToken m_lightingUniformsToken;
-	StagingGpuMemoryToken m_clusterersToken;
+	void* m_globalIlluminationProbesAddress = nullptr;
+	StagingGpuMemoryToken m_clusteredShadingUniformsToken;
+	void* m_clusteredShadingUniformsAddress = nullptr;
+	StagingGpuMemoryToken m_clustersToken;
+	void* m_clustersAddress = nullptr;
 
 	TextureViewPtr m_diffuseDecalTextureView;
 	TextureViewPtr m_specularRoughnessDecalTextureView;
@@ -175,7 +183,7 @@ public:
 
 	StagingGpuMemoryToken m_lightShadingUniformsToken;
 
-	ClustererGpuObjects m_clustererGpuObjects;
+	ClustererGpuObjects m_clusterShading;
 
 	RenderingContext(const StackAllocator<U8>& alloc)
 		: m_tempAllocator(alloc)

+ 1 - 0
AnKi/Renderer/RenderQueue.h

@@ -141,6 +141,7 @@ public:
 	F32 m_outerAngle;
 	F32 m_innerAngle;
 	Vec3 m_diffuseColor;
+	Array<Vec3, 4> m_edgePoints;
 	RenderQueue* m_shadowRenderQueue;
 	RenderQueueDrawCallback m_debugDrawCallback;
 	const void* m_debugDrawCallbackUserData;

+ 0 - 322
AnKi/Renderer/Renderer.cpp

@@ -697,326 +697,4 @@ void Renderer::setCurrentDebugRenderTarget(CString rtName)
 	}
 }
 
-void Renderer::writeClustererBuffers(RenderingContext& ctx)
-{
-	class Ctx
-	{
-	public:
-		Renderer* m_r;
-		RenderingContext* m_ctx;
-	};
-
-	Ctx c;
-	c.m_r = this;
-	c.m_ctx = &ctx;
-
-	m_threadHive->submitTask(
-		[](void* userData, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* signalSemaphore) {
-			static_cast<Ctx*>(userData)->m_r->writeClustererBuffersTask(*static_cast<Ctx*>(userData)->m_ctx);
-		},
-		&c);
-}
-
-void Renderer::writeClustererBuffersTask(RenderingContext& ctx)
-{
-	ANKI_TRACE_SCOPED_EVENT(R_WRITE_CLUSTERER_OBJECTS);
-
-	const RenderQueue& rqueue = *ctx.m_renderQueue;
-
-	// Clusterer uniforms
-	{
-		ClusteredShadingUniforms* unis = static_cast<ClusteredShadingUniforms*>(
-			m_stagingMem->allocateFrame(sizeof(ClusteredShadingUniforms), StagingGpuMemoryType::UNIFORM,
-										ctx.m_clustererGpuObjects.m_lightingUniformsToken));
-
-		unis->m_renderingSize = Vec2(F32(m_width), F32(m_height));
-
-		unis->m_time = F32(HighRezTimer::getCurrentTime());
-		unis->m_frame = m_frameCount & MAX_U32;
-
-		Plane nearPlane;
-		extractClipPlane(rqueue.m_viewProjectionMatrix, FrustumPlaneType::NEAR, nearPlane);
-		unis->m_nearPlaneWSpace = Vec4(nearPlane.getNormal().xyz(), nearPlane.getOffset());
-		unis->m_near = rqueue.m_cameraNear;
-		unis->m_far = rqueue.m_cameraFar;
-		unis->m_oneOverFrustumLength = 1.0f / (rqueue.m_cameraFar - rqueue.m_cameraNear);
-		unis->m_cameraPosition = rqueue.m_cameraTransform.getTranslationPart().xyz();
-
-		unis->m_tileCounts = m_tileCounts;
-		unis->m_zSplitCount = m_zSplitCount;
-		unis->m_lightVolumeLastCluster = m_volLighting->getFinalClusterInZ();
-
-		unis->m_matrices = ctx.m_matrices;
-		unis->m_previousMatrices = ctx.m_prevMatrices;
-
-		// Directional light
-		if(rqueue.m_directionalLight.m_uuid != 0)
-		{
-			DirectionalLight2& out = unis->m_directionalLight;
-			const DirectionalLightQueueElement& in = rqueue.m_directionalLight;
-
-			out.m_diffuseColor = in.m_diffuseColor;
-			out.m_cascadeCount = in.m_shadowCascadeCount;
-			out.m_direction = in.m_direction;
-			out.m_active = 1;
-			out.m_effectiveShadowDistance = in.m_effectiveShadowDistance;
-			out.m_shadowCascadesDistancePower = in.m_shadowCascadesDistancePower;
-			out.m_shadowLayer = (in.hasShadow()) ? in.m_shadowLayer : MAX_U32;
-
-			for(U cascade = 0; cascade < in.m_shadowCascadeCount; ++cascade)
-			{
-				out.m_textureMatrices[cascade] = in.m_textureMatrices[cascade];
-			}
-		}
-		else
-		{
-			unis->m_directionalLight.m_active = 0;
-		}
-	}
-
-	// Point lights
-	U32 visiblePointLightCount = rqueue.m_pointLights.getSize();
-	if(ANKI_UNLIKELY(visiblePointLightCount > MAX_VISIBLE_POINT_LIGHTS))
-	{
-		ANKI_R_LOGW("Visible point lights exceed the max value by %u",
-					visiblePointLightCount - MAX_VISIBLE_POINT_LIGHTS);
-		visiblePointLightCount = MAX_VISIBLE_POINT_LIGHTS;
-	}
-
-	if(visiblePointLightCount)
-	{
-		PointLight2* lights = static_cast<PointLight2*>(
-			m_stagingMem->allocateFrame(visiblePointLightCount * sizeof(PointLight2), StagingGpuMemoryType::STORAGE,
-										ctx.m_clustererGpuObjects.m_pointLightsToken));
-
-		for(U32 i = 0; i < visiblePointLightCount; ++i)
-		{
-			PointLight2& out = lights[i];
-			const PointLightQueueElement& in = rqueue.m_pointLights[i];
-
-			out.m_position = in.m_worldPosition;
-			out.m_diffuseColor = in.m_diffuseColor;
-			out.m_radius = in.m_radius;
-			out.m_squareRadiusOverOne = 1.0f / (in.m_radius * in.m_radius);
-			out.m_shadowLayer = in.m_shadowLayer;
-
-			if(in.m_shadowRenderQueues[0] == nullptr)
-			{
-				out.m_shadowAtlasTileScale = INVALID_TEXTURE_INDEX;
-			}
-			else
-			{
-				out.m_shadowAtlasTileScale = in.m_shadowAtlasTileSize;
-				static_assert(sizeof(out.m_shadowAtlasTileOffsets) == sizeof(in.m_shadowAtlasTileOffsets), "See file");
-				memcpy(&out.m_shadowAtlasTileOffsets[0], &in.m_shadowAtlasTileOffsets[0],
-					   sizeof(in.m_shadowAtlasTileOffsets));
-			}
-		}
-	}
-	else
-	{
-		ctx.m_clustererGpuObjects.m_pointLightsToken.markUnused();
-	}
-
-	// Spot lights
-	U32 visibleSpotLightCount = rqueue.m_spotLights.getSize();
-	if(visibleSpotLightCount > MAX_VISIBLE_SPOT_LIGHTS)
-	{
-		ANKI_R_LOGW("Visible spot lights exceed the max value by %u", visibleSpotLightCount - MAX_VISIBLE_SPOT_LIGHTS);
-		visibleSpotLightCount = MAX_VISIBLE_SPOT_LIGHTS;
-	}
-
-	if(visibleSpotLightCount)
-	{
-		SpotLight2* lights = static_cast<SpotLight2*>(
-			m_stagingMem->allocateFrame(visibleSpotLightCount * sizeof(SpotLight2), StagingGpuMemoryType::STORAGE,
-										ctx.m_clustererGpuObjects.m_spotLightsToken));
-
-		for(U32 i = 0; i < visibleSpotLightCount; ++i)
-		{
-			const SpotLightQueueElement& in = rqueue.m_spotLights[i];
-			SpotLight2& out = lights[i];
-
-			out.m_position = in.m_worldTransform.getTranslationPart().xyz();
-			out.m_diffuseColor = in.m_diffuseColor;
-			out.m_radius = in.m_distance;
-			out.m_squareRadiusOverOne = 1.0f / (in.m_distance * in.m_distance);
-			out.m_shadowLayer = (in.hasShadow()) ? in.m_shadowLayer : MAX_U32;
-			out.m_direction = -in.m_worldTransform.getRotationPart().getZAxis();
-			out.m_outerCos = cos(in.m_outerAngle / 2.0f);
-			out.m_innerCos = cos(in.m_innerAngle / 2.0f);
-
-			if(in.hasShadow())
-			{
-				// bias * proj_l * view_l
-				out.m_textureMatrix = in.m_textureMatrix;
-			}
-			else
-			{
-				out.m_textureMatrix = Mat4::getIdentity();
-			}
-		}
-	}
-	else
-	{
-		ctx.m_clustererGpuObjects.m_spotLightsToken.markUnused();
-	}
-
-	// Reflection probes
-	U32 visibleReflectionPRobeCount = rqueue.m_reflectionProbes.getSize();
-	if(visibleReflectionPRobeCount > MAX_VISIBLE_REFLECTION_PROBES)
-	{
-		ANKI_R_LOGW("Visible reflection probes exceed the max value by %u",
-					visibleReflectionPRobeCount - MAX_VISIBLE_REFLECTION_PROBES);
-		visibleReflectionPRobeCount = MAX_VISIBLE_REFLECTION_PROBES;
-	}
-
-	if(visibleReflectionPRobeCount)
-	{
-		ReflectionProbe2* probes = static_cast<ReflectionProbe2*>(m_stagingMem->allocateFrame(
-			visibleReflectionPRobeCount * sizeof(ReflectionProbe2), StagingGpuMemoryType::STORAGE,
-			ctx.m_clustererGpuObjects.m_reflectionProbesToken));
-
-		for(U32 i = 0; i < visibleReflectionPRobeCount; ++i)
-		{
-			const ReflectionProbeQueueElement& in = rqueue.m_reflectionProbes[i];
-			ReflectionProbe2& out = probes[i];
-
-			out.m_position = in.m_worldPosition;
-			out.m_cubemapIndex = F32(in.m_textureArrayIndex);
-			out.m_aabbMin = in.m_aabbMin;
-			out.m_aabbMax = in.m_aabbMax;
-		}
-	}
-	else
-	{
-		ctx.m_clustererGpuObjects.m_reflectionProbesToken.markUnused();
-	}
-
-	// Decals
-	U32 visibleDecalCount = rqueue.m_decals.getSize();
-	if(visibleDecalCount > MAX_VISIBLE_DECALS)
-	{
-		ANKI_R_LOGW("Visible decals exceed the max value by %u", visibleDecalCount - MAX_VISIBLE_DECALS);
-		visibleDecalCount = MAX_VISIBLE_DECALS;
-	}
-
-	if(visibleDecalCount)
-	{
-		Decal2* decals = static_cast<Decal2*>(m_stagingMem->allocateFrame(sizeof(Decal2) * visibleDecalCount,
-																		  StagingGpuMemoryType::STORAGE,
-																		  ctx.m_clustererGpuObjects.m_decalsToken));
-
-		TextureView* diffuseAtlas = nullptr;
-		TextureView* specularRoughnessAtlas = nullptr;
-		for(U32 i = 0; i < visibleDecalCount; ++i)
-		{
-			const DecalQueueElement& in = rqueue.m_decals[i];
-			Decal2& out = decals[i];
-
-			if((diffuseAtlas != nullptr && diffuseAtlas != in.m_diffuseAtlas)
-			   || (specularRoughnessAtlas != nullptr && specularRoughnessAtlas != in.m_specularRoughnessAtlas))
-			{
-				ANKI_R_LOGF("All decals should have the same tex atlas");
-			}
-
-			diffuseAtlas = in.m_diffuseAtlas;
-			specularRoughnessAtlas = in.m_specularRoughnessAtlas;
-
-			// Diff
-			Vec4 uv = in.m_diffuseAtlasUv;
-			out.m_diffuseUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
-			out.m_blendFactors[0] = in.m_diffuseAtlasBlendFactor;
-
-			// Other
-			uv = in.m_specularRoughnessAtlasUv;
-			out.m_normRoughnessUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
-			out.m_blendFactors[1] = in.m_specularRoughnessAtlasBlendFactor;
-
-			// bias * proj_l * view
-			out.m_textureMatrix = in.m_textureMatrix;
-		}
-
-		ANKI_ASSERT(diffuseAtlas || specularRoughnessAtlas);
-		ctx.m_clustererGpuObjects.m_diffuseDecalTextureView.reset(diffuseAtlas);
-		ctx.m_clustererGpuObjects.m_specularRoughnessDecalTextureView.reset(specularRoughnessAtlas);
-	}
-	else
-	{
-		ctx.m_clustererGpuObjects.m_decalsToken.markUnused();
-	}
-
-	// Fog volumes
-	U32 visibleFogCount = rqueue.m_fogDensityVolumes.getSize();
-	if(visibleFogCount > MAX_VISIBLE_FOG_DENSITY_VOLUMES)
-	{
-		ANKI_R_LOGW("Visible fog density volumes exceed the max value by %u",
-					visibleFogCount - MAX_VISIBLE_FOG_DENSITY_VOLUMES);
-		visibleFogCount = MAX_VISIBLE_FOG_DENSITY_VOLUMES;
-	}
-
-	if(visibleFogCount)
-	{
-		FogDensityVolume2* volumes = static_cast<FogDensityVolume2*>(
-			m_stagingMem->allocateFrame(sizeof(FogDensityVolume2) * visibleFogCount, StagingGpuMemoryType::STORAGE,
-										ctx.m_clustererGpuObjects.m_fogDensityVolumesToken));
-
-		for(U32 i = 0; i < visibleFogCount; ++i)
-		{
-			const FogDensityQueueElement& in = rqueue.m_fogDensityVolumes[i];
-			FogDensityVolume2& out = volumes[i];
-
-			out.m_density = in.m_density;
-			if(in.m_isBox)
-			{
-				out.m_isBox = 1;
-				out.m_aabbMinOrSphereCenter = in.m_aabbMin;
-				out.m_aabbMaxOrSphereRadiusSquared = in.m_aabbMax;
-			}
-			else
-			{
-				out.m_isBox = 0;
-				out.m_aabbMinOrSphereCenter = in.m_sphereCenter;
-				out.m_aabbMaxOrSphereRadiusSquared = Vec3(in.m_sphereRadius * in.m_sphereRadius);
-			}
-		}
-	}
-	else
-	{
-		ctx.m_clustererGpuObjects.m_fogDensityVolumesToken.markUnused();
-	}
-
-	// GI
-	U32 visibleGiProbeCount = rqueue.m_giProbes.getSize();
-	if(visibleGiProbeCount > MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2)
-	{
-		ANKI_R_LOGW("Visible GI probes exceed the max value by %u",
-					visibleGiProbeCount - MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2);
-		visibleGiProbeCount = MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2;
-	}
-
-	if(visibleGiProbeCount)
-	{
-		GlobalIlluminationProbe2* probes = static_cast<GlobalIlluminationProbe2*>(m_stagingMem->allocateFrame(
-			sizeof(GlobalIlluminationProbe2) * visibleGiProbeCount, StagingGpuMemoryType::STORAGE,
-			ctx.m_clustererGpuObjects.m_globalIlluminationProbesToken));
-
-		for(U32 i = 0; i < visibleGiProbeCount; ++i)
-		{
-			const GlobalIlluminationProbeQueueElement& in = rqueue.m_giProbes[i];
-			GlobalIlluminationProbe2& out = probes[i];
-
-			out.m_aabbMin = in.m_aabbMin;
-			out.m_aabbMax = in.m_aabbMax;
-			out.m_textureIndex = U32(&in - &rqueue.m_giProbes.getFront());
-			out.m_halfTexelSizeU = 1.0f / F32(F32(in.m_cellCounts.x()) * 6.0f) / 2.0f;
-			out.m_fadeDistance = in.m_fadeDistance;
-		}
-	}
-	else
-	{
-		ctx.m_clustererGpuObjects.m_globalIlluminationProbesToken.markUnused();
-	}
-}
-
 } // end namespace anki

+ 0 - 3
AnKi/Renderer/Renderer.h

@@ -444,9 +444,6 @@ private:
 	void initJitteredMats();
 
 	void updateLightShadingUniforms(RenderingContext& ctx) const;
-
-	void writeClustererBuffers(RenderingContext& ctx);
-	void writeClustererBuffersTask(RenderingContext& ctx);
 };
 /// @}
 

+ 2 - 2
AnKi/Renderer/RendererObject.cpp

@@ -39,7 +39,7 @@ void* RendererObject::allocateFrameStagingMemory(PtrSize size, StagingGpuMemoryT
 void RendererObject::bindUniforms(CommandBufferPtr& cmdb, U32 set, U32 binding,
 								  const StagingGpuMemoryToken& token) const
 {
-	if(token && !token.isUnused())
+	if(!token.isUnused())
 	{
 		cmdb->bindUniformBuffer(set, binding, token.m_buffer, token.m_offset, token.m_range);
 	}
@@ -51,7 +51,7 @@ void RendererObject::bindUniforms(CommandBufferPtr& cmdb, U32 set, U32 binding,
 
 void RendererObject::bindStorage(CommandBufferPtr& cmdb, U32 set, U32 binding, const StagingGpuMemoryToken& token) const
 {
-	if(token && !token.isUnused())
+	if(!token.isUnused())
 	{
 		cmdb->bindStorageBuffer(set, binding, token.m_buffer, token.m_offset, token.m_range);
 	}

+ 8 - 1
AnKi/Scene/Components/LightComponent.cpp

@@ -44,10 +44,17 @@ Error LightComponent::update(SceneNode& node, Second prevTime, Second crntTime,
 	if(updated && m_type == LightComponentType::SPOT)
 	{
 
-		static const Mat4 biasMat4(0.5, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0);
+		const Mat4 biasMat4(0.5, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0);
 		const Mat4 proj = Mat4::calculatePerspectiveProjectionMatrix(m_spot.m_outerAngle, m_spot.m_outerAngle,
 																	 LIGHT_FRUSTUM_NEAR_PLANE, m_spot.m_distance);
 		m_spot.m_textureMat = biasMat4 * proj * Mat4(m_worldtransform.getInverse());
+
+		Array<Vec4, 4> points;
+		computeEdgesOfFrustum(m_spot.m_distance, m_spot.m_outerAngle, m_spot.m_outerAngle, &points[0]);
+		for(U32 i = 0; i < 4; ++i)
+		{
+			m_spot.m_edgePointsWspace[i] = m_worldtransform.transform(points[i].xyz());
+		}
 	}
 
 	// Update the scene bounds always

+ 14 - 15
AnKi/Scene/Components/LightComponent.h

@@ -163,6 +163,7 @@ public:
 		el.m_outerAngle = m_spot.m_outerAngle;
 		el.m_innerAngle = m_spot.m_innerAngle;
 		el.m_diffuseColor = m_diffColor.xyz();
+		el.m_edgePoints = m_spot.m_edgePointsWspace;
 		el.m_debugDrawCallback = [](RenderQueueDrawContext& ctx, ConstWeakArray<void*> userData) {
 			ANKI_ASSERT(userData.getSize() == 1);
 			static_cast<const LightComponent*>(userData[0])->draw(ctx);
@@ -188,33 +189,31 @@ private:
 	class Point
 	{
 	public:
-		F32 m_radius;
+		F32 m_radius = 1.0f;
 	};
 
 	class Spot
 	{
 	public:
-		Mat4 m_textureMat;
-		F32 m_distance;
-		F32 m_innerAngleCos;
-		F32 m_outerAngleCos;
-		F32 m_outerAngle;
-		F32 m_innerAngle;
+		Mat4 m_textureMat = Mat4::getIdentity();
+		F32 m_distance = 1.0f;
+		F32 m_outerAngle = toRad(30.0f);
+		F32 m_innerAngle = toRad(15.0f);
+		F32 m_outerAngleCos = cos(m_outerAngle / 2.0f);
+		F32 m_innerAngleCos = cos(m_innerAngle / 2.0f);
+		Array<Vec3, 4> m_edgePointsWspace = {};
 	};
 
 	class Dir
 	{
 	public:
-		Vec3 m_sceneMin;
-		Vec3 m_sceneMax;
+		Vec3 m_sceneMin = Vec3(-1.0f);
+		Vec3 m_sceneMax = Vec3(1.0f);
 	};
 
-	union
-	{
-		Point m_point;
-		Spot m_spot;
-		Dir m_dir;
-	};
+	Point m_point;
+	Spot m_spot;
+	Dir m_dir;
 
 	TextureResourcePtr m_pointDebugTex;
 	TextureResourcePtr m_spotDebugTex;

+ 6 - 6
AnKi/Scene/Components/MoveComponent.h

@@ -96,19 +96,19 @@ public:
 
 	/// @name Mess with the local transform
 	/// @{
-	void rotateLocalX(F32 angDegrees)
+	void rotateLocalX(F32 angleRad)
 	{
-		m_ltrf.getRotation().rotateXAxis(angDegrees);
+		m_ltrf.getRotation().rotateXAxis(angleRad);
 		markForUpdate();
 	}
-	void rotateLocalY(F32 angDegrees)
+	void rotateLocalY(F32 angleRad)
 	{
-		m_ltrf.getRotation().rotateYAxis(angDegrees);
+		m_ltrf.getRotation().rotateYAxis(angleRad);
 		markForUpdate();
 	}
-	void rotateLocalZ(F32 angDegrees)
+	void rotateLocalZ(F32 angleRad)
 	{
-		m_ltrf.getRotation().rotateZAxis(angDegrees);
+		m_ltrf.getRotation().rotateZAxis(angleRad);
 		markForUpdate();
 	}
 	void moveLocalX(F32 distance)

+ 1 - 1
AnKi/ShaderCompiler/Common.h

@@ -20,7 +20,7 @@ namespace anki
 #define ANKI_SHADER_COMPILER_LOGW(...) ANKI_LOG("SHCO", WARNING, __VA_ARGS__)
 #define ANKI_SHADER_COMPILER_LOGF(...) ANKI_LOG("SHCO", FATAL, __VA_ARGS__)
 
-constexpr U32 MAX_SHADER_BINARY_NAME_LENGTH = 63;
+constexpr U32 MAX_SHADER_BINARY_NAME_LENGTH = 127;
 
 using MutatorValue = I32; ///< The type of the mutator value
 

+ 26 - 3
AnKi/ShaderCompiler/Glslang.cpp

@@ -20,9 +20,15 @@
 #	pragma GCC diagnostic pop
 #endif
 
+#define ANKI_GLSLANG_DUMP 0
+
 namespace anki
 {
 
+#if ANKI_GLSLANG_DUMP
+static Atomic<U32> g_dumpFileCount;
+#endif
+
 class GlslangCtx
 {
 public:
@@ -245,6 +251,23 @@ Error preprocessGlsl(CString in, StringAuto& out)
 Error compilerGlslToSpirv(CString src, ShaderType shaderType, GenericMemoryPoolAllocator<U8> tmpAlloc,
 						  DynamicArrayAuto<U8>& spirv)
 {
+#if ANKI_GLSLANG_DUMP
+	// Dump it
+	{
+		const U32 count = g_dumpFileCount.fetchAdd(1) / 2;
+		if(count == 0)
+		{
+			ANKI_SHADER_COMPILER_LOGW("GLSL dumping is enabled");
+		}
+
+		File file;
+		StringAuto fname(tmpAlloc);
+		fname.sprintf("/tmp/%u.glsl", count);
+		ANKI_CHECK(file.open(fname, FileOpenFlag::WRITE));
+		ANKI_CHECK(file.writeText("%s", src.cstr()));
+	}
+#endif
+
 	const EShLanguage stage = ankiToGlslangShaderType(shaderType);
 	const EShMessages messages = EShMessages(EShMsgSpvRules | EShMsgVulkanRules);
 
@@ -280,10 +303,10 @@ Error compilerGlslToSpirv(CString src, ShaderType shaderType, GenericMemoryPoolA
 	spirv.resize(U32(glslangSpirv.size() * sizeof(unsigned int)));
 	memcpy(&spirv[0], &glslangSpirv[0], spirv.getSizeInBytes());
 
-#if 0
+#if ANKI_GLSLANG_DUMP
 	// Dump it
 	{
-		static U32 count = 0;
+		const U32 count = g_dumpFileCount.fetchAdd(1) / 2;
 		if(count == 0)
 		{
 			ANKI_SHADER_COMPILER_LOGW("SPIR-V dumping is enabled");
@@ -291,7 +314,7 @@ Error compilerGlslToSpirv(CString src, ShaderType shaderType, GenericMemoryPoolA
 
 		File file;
 		StringAuto fname(tmpAlloc);
-		fname.sprintf("/tmp/%u.spv", count++);
+		fname.sprintf("/tmp/%u.spv", count);
 		ANKI_CHECK(file.open(fname, FileOpenFlag::WRITE | FileOpenFlag::BINARY));
 		ANKI_CHECK(file.write(spirv.getBegin(), spirv.getSizeInBytes()));
 	}

+ 180 - 13
AnKi/Shaders/ClusterBinning.ankiprog

@@ -18,29 +18,52 @@ ANKI_SPECIALIZATION_CONSTANT_UVEC2(RENDERING_SIZE, 4, UVec2(1u));
 const U32 WORKGROUP_SIZE = 64u;
 layout(local_size_x = WORKGROUP_SIZE) in;
 
-layout(set = 0, binding = 0, scalar) uniform b_unis
+layout(set = 0, binding = 0, scalar) uniform ANKI_RANDOM_BLOCK_NAME
 {
 	ClusteredShadingUniforms u_unis;
 };
 
-layout(set = 0, binding = 1, scalar) writeonly buffer b_tiles
+layout(set = 0, binding = 1, scalar) writeonly buffer ANKI_RANDOM_BLOCK_NAME
 {
 	Cluster u_clusters[];
 };
 
-layout(set = 0, binding = 2, scalar) readonly buffer b_plights
+layout(set = 0, binding = 2, scalar) uniform ANKI_RANDOM_BLOCK_NAME
 {
-	PointLight2 u_pointLights[];
+	PointLight2 u_pointLights[MAX_VISIBLE_POINT_LIGHTS];
+};
+
+layout(set = 0, binding = 3, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+{
+	SpotLightBinning u_spotLights[MAX_VISIBLE_SPOT_LIGHTS];
+};
+
+layout(set = 0, binding = 4, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+{
+	ReflectionProbe2 u_reflectionProbes[MAX_VISIBLE_REFLECTION_PROBES];
+};
+
+layout(set = 0, binding = 5, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+{
+	GlobalIlluminationProbe2 u_giProbes[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2];
+};
+
+layout(set = 0, binding = 6, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+{
+	FogDensityVolume2 u_fogVolumes[MAX_VISIBLE_FOG_DENSITY_VOLUMES];
+};
+
+layout(set = 0, binding = 7, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+{
+	Decal2 u_decals[MAX_VISIBLE_DECALS];
 };
 
 const U32 TILE_COUNT = TILE_COUNT_X * TILE_COUNT_Y;
 
 // DX Sample locations
 const U32 SAMPLE_COUNT = 4u;
-const I32 SAMPLE_OFFSET = 8;
-#define LOCATION(x, y) UVec2(IVec2(x, y) + SAMPLE_OFFSET)
-const UVec2 SAMPLE_LOCATIONS[SAMPLE_COUNT] =
-	UVec2[](LOCATION(-2, -6), LOCATION(6, -2), LOCATION(-6, 2), LOCATION(2, 6));
+#define LOCATION(x, y) UVec2(Vec2(IVec2(x, y) + 8) / 16.0 * F32(TILE_SIZE))
+UVec2 SAMPLE_LOCATIONS[SAMPLE_COUNT] = UVec2[](LOCATION(-2, -6), LOCATION(6, -2), LOCATION(-6, 2), LOCATION(2, 6));
 #undef LOCATION
 
 // A mask per tile of this workgroup for the clusterer object being processed by this workgroup
@@ -56,6 +79,32 @@ Bool isPointLight()
 	return clustererObjectIdx < u_unis.m_pointLightCount;
 }
 
+Bool isSpotLight()
+{
+	const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
+	return clustererObjectIdx < u_unis.m_pointLightCount + u_unis.m_spotLightCount;
+}
+
+Bool isReflectionProbe()
+{
+	const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
+	return clustererObjectIdx < u_unis.m_pointLightCount + u_unis.m_spotLightCount + u_unis.m_reflectionProbeCount;
+}
+
+Bool isGiProbe()
+{
+	const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
+	return clustererObjectIdx < u_unis.m_pointLightCount + u_unis.m_spotLightCount + u_unis.m_reflectionProbeCount
+									+ u_unis.m_reflectionProbeCount;
+}
+
+Bool isFogVolume()
+{
+	const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
+	return clustererObjectIdx < u_unis.m_pointLightCount + u_unis.m_spotLightCount + u_unis.m_reflectionProbeCount
+									+ u_unis.m_reflectionProbeCount + u_unis.m_giProbeCount;
+}
+
 void main()
 {
 	const U32 tileIdx = gl_GlobalInvocationID.x / SAMPLE_COUNT;
@@ -105,6 +154,85 @@ void main()
 		const PointLight2 light = u_pointLights[objectArrayIdx];
 		collides = testRaySphere(rayOrigin, rayDir, light.m_position, light.m_radius, t0, t1);
 	}
+	else if(isSpotLight())
+	{
+		objectArrayIdx = clustererObjectIdx - u_unis.m_pointLightCount;
+		const SpotLightBinning light = u_spotLights[objectArrayIdx];
+
+		t0 = 10000.0;
+		t1 = -10000.0;
+
+		// Iterate all triangles
+		const U32 indices[6u * 3u] = U32[](0u, 1u, 2u, 0u, 2u, 3u, 0u, 3u, 4u, 0u, 1u, 4u, 1u, 2u, 3u, 3u, 4u, 1u);
+		U32 hits = 0u;
+		U32 idx = 0u;
+		do
+		{
+			const Vec3 v0 = light.m_edgePoints[indices[idx + 0u]];
+			const Vec3 v1 = light.m_edgePoints[indices[idx + 1u]];
+			const Vec3 v2 = light.m_edgePoints[indices[idx + 2u]];
+
+			F32 t, u, v;
+			const Bool localCollides = testRayTriangle(rayOrigin, rayDir, v0, v1, v2, false, t, u, v);
+
+			if(localCollides)
+			{
+				t0 = min(t0, t);
+				t1 = max(t1, t);
+				++hits;
+			}
+			idx += 3u;
+		} while(hits < 2u && idx < 6u * 3u);
+
+		if(hits == 1u)
+		{
+			t0 = 0.0;
+		}
+
+		collides = (hits != 0u);
+	}
+	else if(isReflectionProbe())
+	{
+		objectArrayIdx = clustererObjectIdx - (u_unis.m_pointLightCount + u_unis.m_spotLightCount);
+		const ReflectionProbe2 probe = u_reflectionProbes[objectArrayIdx];
+
+		collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
+	}
+	else if(isGiProbe())
+	{
+		objectArrayIdx =
+			clustererObjectIdx - (u_unis.m_pointLightCount + u_unis.m_spotLightCount + u_unis.m_reflectionProbeCount);
+		const GlobalIlluminationProbe2 probe = u_giProbes[objectArrayIdx];
+
+		collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
+	}
+	else if(isFogVolume())
+	{
+		objectArrayIdx = clustererObjectIdx
+						 - (u_unis.m_pointLightCount + u_unis.m_spotLightCount + u_unis.m_reflectionProbeCount
+							+ u_unis.m_reflectionProbeCount + u_unis.m_giProbeCount);
+		const FogDensityVolume2 vol = u_fogVolumes[objectArrayIdx];
+
+		if(vol.m_isBox != 0u)
+		{
+			collides =
+				testRayAabb(rayOrigin, rayDir, vol.m_aabbMinOrSphereCenter, vol.m_aabbMaxOrSphereRadiusSquared, t0, t1);
+		}
+		else
+		{
+			collides = testRaySphere(rayOrigin, rayDir, vol.m_aabbMinOrSphereCenter,
+									 sqrt(vol.m_aabbMaxOrSphereRadiusSquared.x), t0, t1);
+		}
+	}
+	else
+	{
+		objectArrayIdx = clustererObjectIdx
+						 - (u_unis.m_pointLightCount + u_unis.m_spotLightCount + u_unis.m_reflectionProbeCount
+							+ u_unis.m_reflectionProbeCount + u_unis.m_giProbeCount + u_unis.m_fogDensityVolumesCount);
+		const Decal2 decal = u_decals[objectArrayIdx];
+
+		collides = testRayObb(rayOrigin, rayDir, decal.m_extend, decal.m_invertedTransform, t0, t1);
+	}
 
 	// Update the masks
 	if(collides)
@@ -133,12 +261,11 @@ void main()
 			minDistFromNearPlane = distFromNearPlaneB;
 			maxDistFromNearPlane = distFromNearPlaneA;
 		}
-		minDistFromNearPlane = max(0.0, minDistFromNearPlane);
-		maxDistFromNearPlane = max(0.0, maxDistFromNearPlane);
 
-		const U32 startZSplit = U32(minDistFromNearPlane * u_unis.m_oneOverFrustumLength);
-		const U32 endZSplit = min(Z_SPLIT_COUNT - 1u, U32(maxDistFromNearPlane * u_unis.m_oneOverFrustumLength));
-		for(U32 i = startZSplit; i <= endZSplit; ++i)
+		const I32 startZSplit = max(I32(minDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0);
+		const I32 endZSplit =
+			clamp(I32(maxDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0, I32(Z_SPLIT_COUNT) - 1);
+		for(I32 i = startZSplit; i <= endZSplit; ++i)
 		{
 			atomicOr(s_zSplitMasks[i], mask);
 		}
@@ -155,6 +282,26 @@ void main()
 		{
 			atomicOr(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
 		}
+		else if(isSpotLight())
+		{
+			atomicOr(u_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
+		}
+		else if(isReflectionProbe())
+		{
+			atomicOr(u_clusters[tileIdx].m_reflectionProbesMask, U32(s_tileMasks[localTileIdx]));
+		}
+		else if(isGiProbe())
+		{
+			atomicOr(u_clusters[tileIdx].m_giProbesMask, U32(s_tileMasks[localTileIdx]));
+		}
+		else if(isFogVolume())
+		{
+			atomicOr(u_clusters[tileIdx].m_fogDensityVolumesMask, U32(s_tileMasks[localTileIdx]));
+		}
+		else
+		{
+			atomicOr(u_clusters[tileIdx].m_decalsMask, U32(s_tileMasks[localTileIdx]));
+		}
 	}
 
 	// All invocations write at least one Z split
@@ -167,6 +314,26 @@ void main()
 			{
 				atomicOr(u_clusters[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
 			}
+			else if(isSpotLight())
+			{
+				atomicOr(u_clusters[TILE_COUNT + i].m_spotLightsMask, s_zSplitMasks[i]);
+			}
+			else if(isReflectionProbe())
+			{
+				atomicOr(u_clusters[TILE_COUNT + i].m_reflectionProbesMask, U32(s_zSplitMasks[i]));
+			}
+			else if(isGiProbe())
+			{
+				atomicOr(u_clusters[TILE_COUNT + i].m_giProbesMask, U32(s_zSplitMasks[i]));
+			}
+			else if(isFogVolume())
+			{
+				atomicOr(u_clusters[TILE_COUNT + i].m_fogDensityVolumesMask, U32(s_zSplitMasks[i]));
+			}
+			else
+			{
+				atomicOr(u_clusters[TILE_COUNT + i].m_decalsMask, U32(s_zSplitMasks[i]));
+			}
 		}
 	}
 }

+ 1 - 1
AnKi/Shaders/ClusteredShadingCommon2.glsl

@@ -94,7 +94,7 @@ layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_GI_BINDING + 1, sc
 layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_CLUSTERS_BINDING,
 	   scalar) readonly buffer ANKI_RANDOM_BLOCK_NAME
 {
-	Cluster u_clusters[];
+	Cluster u_clusters2[];
 };
 #endif
 

+ 38 - 5
AnKi/Shaders/CollisionFunctions.glsl

@@ -6,22 +6,22 @@
 #pragma once
 
 /// https://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/moller-trumbore-ray-triangle-intersection
-Bool testRayTriangle(Vec3 orig, Vec3 dir, Vec3 v0, Vec3 v1, Vec3 v2, Bool backfaceCulling, out F32 t, out F32 u,
+Bool testRayTriangle(Vec3 rayOrigin, Vec3 rayDir, Vec3 v0, Vec3 v1, Vec3 v2, Bool backfaceCulling, out F32 t, out F32 u,
 					 out F32 v)
 {
 	const Vec3 v0v1 = v1 - v0;
 	const Vec3 v0v2 = v2 - v0;
-	const Vec3 pvec = cross(dir, v0v2);
+	const Vec3 pvec = cross(rayDir, v0v2);
 	const F32 det = dot(v0v1, pvec);
 
-	if((backfaceCulling && det <= 0.0) || det == 0.0)
+	if((backfaceCulling && det < EPSILON) || abs(det) < EPSILON)
 	{
 		return false;
 	}
 
 	const F32 invDet = 1.0 / det;
 
-	const Vec3 tvec = orig - v0;
+	const Vec3 tvec = rayOrigin - v0;
 	u = dot(tvec, pvec) * invDet;
 	if(u < 0.0 || u > 1.0)
 	{
@@ -29,13 +29,20 @@ Bool testRayTriangle(Vec3 orig, Vec3 dir, Vec3 v0, Vec3 v1, Vec3 v2, Bool backfa
 	}
 
 	const Vec3 qvec = cross(tvec, v0v1);
-	v = dot(dir, qvec) * invDet;
+	v = dot(rayDir, qvec) * invDet;
 	if(v < 0.0 || u + v > 1.0)
 	{
 		return false;
 	}
 
 	t = dot(v0v2, qvec) * invDet;
+
+	if(t <= EPSILON)
+	{
+		// This is an addition to the original code. Can't have rays that don't touch the triangle
+		return false;
+	}
+
 	return true;
 }
 
@@ -57,6 +64,32 @@ F32 testRayAabbInside(Vec3 rayOrigin, Vec3 rayDir, Vec3 aabbMin, Vec3 aabbMax)
 	return distToIntersect;
 }
 
+/// Ray box intersection by Simon Green
+Bool testRayAabb(Vec3 rayOrigin, Vec3 rayDir, Vec3 aabbMin, Vec3 aabbMax, out F32 t0, out F32 t1)
+{
+	const Vec3 invR = 1.0 / rayDir;
+	const Vec3 tbot = invR * (aabbMin - rayOrigin);
+	const Vec3 ttop = invR * (aabbMax - rayOrigin);
+
+	const Vec3 tmin = min(ttop, tbot);
+	const Vec3 tmax = max(ttop, tbot);
+
+	t0 = max(tmin.x, max(tmin.y, tmin.z));
+	t1 = min(tmax.x, min(tmax.y, tmax.z));
+
+	return t0 < t1 && t1 > EPSILON;
+}
+
+Bool testRayObb(Vec3 rayOrigin, Vec3 rayDir, Vec3 obbExtend, Mat4 obbTransformInv, out F32 t0, out F32 t1)
+{
+	// Transform ray to OBB space
+	const Vec3 rayOriginS = (obbTransformInv * Vec4(rayOrigin, 1.0)).xyz;
+	const Vec3 rayDirS = (obbTransformInv * Vec4(rayDir, 0.0)).xyz;
+
+	// Test as AABB
+	return testRayAabb(rayOriginS, rayDirS, -obbExtend, obbExtend, t0, t1);
+}
+
 /// https://www.scratchapixel.com/lessons/3d-basic-rendering/minimal-ray-tracer-rendering-simple-shapes/ray-sphere-intersection
 Bool testRaySphere(Vec3 rayOrigin, Vec3 rayDir, Vec3 sphereCenter, F32 sphereRadius, out F32 t0, out F32 t1)
 {

+ 33 - 14
AnKi/Shaders/Include/ClusteredShadingTypes2.h

@@ -47,6 +47,7 @@ ANKI_SHADER_STATIC_ASSERT(sizeof(PointLight2) == _ANKI_SIZEOF_PointLight2);
 struct SpotLight2
 {
 	Vec3 m_position; ///< Position in world space.
+	Vec3 m_edgePoints[4u]; ///< Edge points in world space.
 	Vec3 m_diffuseColor;
 	F32 m_radius; ///< Max distance.
 	F32 m_squareRadiusOverOne; ///< 1/(radius^2).
@@ -54,13 +55,30 @@ struct SpotLight2
 	Vec3 m_direction; ///< Light direction.
 	F32 m_outerCos;
 	F32 m_innerCos;
-	Vec3 m_edgePoints[5u]; ///< Edge points in world space
-	Vec3 m_padding;
+	Vec2 m_padding;
 	Mat4 m_textureMatrix;
 };
-const U32 _ANKI_SIZEOF_SpotLight2 = 32u * ANKI_SIZEOF(U32) + ANKI_SIZEOF(Mat4);
+const U32 _ANKI_SIZEOF_SpotLight2 = 28u * ANKI_SIZEOF(U32) + ANKI_SIZEOF(Mat4);
 ANKI_SHADER_STATIC_ASSERT(sizeof(SpotLight2) == _ANKI_SIZEOF_SpotLight2);
 
+/// Spot light different view. This is the same structure as SpotLight but it's designed for binning.
+struct SpotLightBinning
+{
+	Vec3 m_edgePoints[5u]; ///< Edge points in world space.
+	Vec3 m_diffuseColor;
+	F32 m_radius; ///< Max distance.
+	F32 m_squareRadiusOverOne; ///< 1/(radius^2).
+	U32 m_shadowLayer; ///< Shadow layer used in RT shadows. Also used to show that it doesn't cast shadow.
+	Vec3 m_direction; ///< Light direction.
+	F32 m_outerCos;
+	F32 m_innerCos;
+	Vec2 m_padding;
+	Mat4 m_textureMatrix;
+};
+const U32 _ANKI_SIZEOF_SpotLightBinning = _ANKI_SIZEOF_SpotLight2;
+ANKI_SHADER_STATIC_ASSERT(sizeof(SpotLightBinning) == _ANKI_SIZEOF_SpotLightBinning);
+ANKI_SHADER_STATIC_ASSERT(alignof(SpotLightBinning) == alignof(SpotLight2));
+
 /// Directional light (sun).
 struct DirectionalLight2
 {
@@ -93,10 +111,13 @@ struct Decal2
 {
 	Vec4 m_diffuseUv;
 	Vec4 m_normRoughnessUv;
-	Mat4 m_textureMatrix;
 	Vec4 m_blendFactors;
+	Mat4 m_textureMatrix;
+	Mat4 m_invertedTransform;
+	Vec3 m_obbExtend;
+	F32 m_padding;
 };
-const U32 _ANKI_SIZEOF_Decal2 = 3u * ANKI_SIZEOF(Vec4) + ANKI_SIZEOF(Mat4);
+const U32 _ANKI_SIZEOF_Decal2 = 4u * ANKI_SIZEOF(Vec4) + 2u * ANKI_SIZEOF(Mat4);
 ANKI_SHADER_STATIC_ASSERT(sizeof(Decal2) == _ANKI_SIZEOF_Decal2);
 
 /// Fog density volume.
@@ -156,7 +177,7 @@ struct ClusteredShadingUniforms
 	Vec4 m_nearPlaneWSpace;
 	F32 m_near;
 	F32 m_far;
-	F32 m_oneOverFrustumLength; ///< 1/(far-near)
+	F32 m_zSplitCountOverFrustumLength; ///< m_zSplitCount/(far-near)
 	Vec3 m_cameraPosition;
 
 	UVec2 m_tileCounts;
@@ -170,15 +191,13 @@ struct ClusteredShadingUniforms
 	U32 m_reflectionProbeCount;
 	U32 m_giProbeCount;
 
-	F32 m_padding[3u];
-
 	CommonMatrices m_matrices;
 	CommonMatrices m_previousMatrices;
 
 	DirectionalLight2 m_directionalLight;
 };
 const U32 _ANKI_SIZEOF_ClusteredShadingUniforms =
-	28u * ANKI_SIZEOF(U32) + 2u * ANKI_SIZEOF(CommonMatrices) + ANKI_SIZEOF(DirectionalLight2);
+	24u * ANKI_SIZEOF(U32) + 2u * ANKI_SIZEOF(CommonMatrices) + ANKI_SIZEOF(DirectionalLight2);
 ANKI_SHADER_STATIC_ASSERT(sizeof(ClusteredShadingUniforms) == _ANKI_SIZEOF_ClusteredShadingUniforms);
 
 /// Information that a tile or a Z-split will contain.
@@ -187,12 +206,12 @@ struct Cluster
 	U64 m_pointLightsMask;
 	U64 m_spotLightsMask;
 	U64 m_decalsMask;
-	U16 m_fogDensityVolumesMask;
-	U16 m_reflectionProbesMask;
-	U16 m_giProbesMask;
-	U16 m_padding; ///< Add some padding to be 100% sure nothing will break.
+	U32 m_fogDensityVolumesMask;
+	U32 m_reflectionProbesMask;
+	U32 m_giProbesMask;
+	U32 m_padding; ///< Add some padding to be 100% sure nothing will break.
 };
-const U32 _ANKI_SIZEOF_Cluster = 4u * ANKI_SIZEOF(U64);
+const U32 _ANKI_SIZEOF_Cluster = 5u * ANKI_SIZEOF(U64);
 ANKI_SHADER_STATIC_ASSERT(sizeof(Cluster) == _ANKI_SIZEOF_Cluster);
 
 ANKI_END_NAMESPACE

+ 1 - 1
Tools/Shader/ShaderProgramCompilerMain.cpp

@@ -7,7 +7,7 @@
 #include <AnKi/Util.h>
 using namespace anki;
 
-static const char* USAGE = R"(Usage: %s shader_program_file [options]
+static const char* USAGE = R"(Usage: %s input_shader_program_file [options]
 Options:
 -o <name of output>    : The name of the output binary
 -j <thread count>      : Number of threads. Defaults to system's max