Browse Source

Complete the cluster binning

Panagiotis Christopoulos Charitos 4 years ago
parent
commit
d46abb16f1

+ 0 - 5
AnKi/Core/StagingGpuMemoryManager.h

@@ -41,11 +41,6 @@ public:
 
 
 	~StagingGpuMemoryToken() = default;
 	~StagingGpuMemoryToken() = default;
 
 
-	explicit operator Bool() const
-	{
-		return m_range != 0;
-	}
-
 	Bool operator==(const StagingGpuMemoryToken& b) const
 	Bool operator==(const StagingGpuMemoryToken& b) const
 	{
 	{
 		return m_buffer == b.m_buffer && m_offset == b.m_offset && m_range == b.m_range && m_type == b.m_type;
 		return m_buffer == b.m_buffer && m_offset == b.m_offset && m_range == b.m_range && m_type == b.m_type;

+ 0 - 1
AnKi/Gr/Utils/ClassGpuAllocator.cpp

@@ -77,7 +77,6 @@ void ClassGpuAllocator::init(GenericMemoryPoolAllocator<U8> alloc, ClassGpuAlloc
 		m_iface->getClassInfo(i, slotSize, chunkSize);
 		m_iface->getClassInfo(i, slotSize, chunkSize);
 		ANKI_ASSERT(slotSize > 0 && chunkSize > 0);
 		ANKI_ASSERT(slotSize > 0 && chunkSize > 0);
 
 
-		ANKI_ASSERT(isPowerOfTwo(slotSize));
 		ANKI_ASSERT((chunkSize % slotSize) == 0);
 		ANKI_ASSERT((chunkSize % slotSize) == 0);
 		ANKI_ASSERT((chunkSize / slotSize) <= MAX_SLOTS_PER_CHUNK);
 		ANKI_ASSERT((chunkSize / slotSize) <= MAX_SLOTS_PER_CHUNK);
 
 

+ 3 - 3
AnKi/Math/Functions.h

@@ -125,19 +125,19 @@ inline constexpr T log2(const T x)
 	return T(std::log2(x));
 	return T(std::log2(x));
 }
 }
 
 
-template<typename T, typename std::enable_if<std::is_floating_point<T>::value, int>::type = 0>
+template<typename T, ANKI_ENABLE(std::is_floating_point<T>::value)>
 inline constexpr Bool isZero(const T f, const T e = EPSILON)
 inline constexpr Bool isZero(const T f, const T e = EPSILON)
 {
 {
 	return absolute<T>(f) < e;
 	return absolute<T>(f) < e;
 }
 }
 
 
-template<typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
+template<typename T, ANKI_ENABLE(std::is_integral<T>::value)>
 inline constexpr Bool isZero(const T f)
 inline constexpr Bool isZero(const T f)
 {
 {
 	return f == 0;
 	return f == 0;
 }
 }
 
 
-template<typename T>
+template<typename T, ANKI_ENABLE(std::is_floating_point<T>::value)>
 inline constexpr T toRad(const T degrees)
 inline constexpr T toRad(const T degrees)
 {
 {
 	return degrees * (PI / T(180));
 	return degrees * (PI / T(180));

+ 383 - 13
AnKi/Renderer/ClusterBinning.cpp

@@ -6,7 +6,13 @@
 #include <AnKi/Renderer/ClusterBinning.h>
 #include <AnKi/Renderer/ClusterBinning.h>
 #include <AnKi/Renderer/Renderer.h>
 #include <AnKi/Renderer/Renderer.h>
 #include <AnKi/Renderer/RenderQueue.h>
 #include <AnKi/Renderer/RenderQueue.h>
+#include <AnKi/Renderer/VolumetricLightingAccumulation.h>
 #include <AnKi/Core/ConfigSet.h>
 #include <AnKi/Core/ConfigSet.h>
+#include <AnKi/Util/Tracer.h>
+#include <AnKi/Util/ThreadHive.h>
+#include <AnKi/Util/HighRezTimer.h>
+#include <AnKi/Collision/Plane.h>
+#include <AnKi/Collision/Functions.h>
 
 
 namespace anki
 namespace anki
 {
 {
@@ -37,7 +43,7 @@ Error ClusterBinning::init(const ConfigSet& config)
 	m_prog->getOrCreateVariant(variantInitInfo, variant);
 	m_prog->getOrCreateVariant(variantInitInfo, variant);
 	m_grProg = variant->getProgram();
 	m_grProg = variant->getProgram();
 
 
-	m_clusterCount = m_r->getTileCounts().x() * m_r->getTileCounts().y() * m_r->getZSplitCount();
+	m_clusterCount = m_r->getTileCounts().x() * m_r->getTileCounts().y() + m_r->getZSplitCount();
 
 
 	return Error::NONE;
 	return Error::NONE;
 }
 }
@@ -61,30 +67,394 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 			this, 0);
 			this, 0);
 	}
 	}
 
 
-	// Allocate clusters. Store to a 8byte aligned ptr. Maybe that will trick the compiler to memset faster
-	U64* clusters = static_cast<U64*>(m_r->getStagingGpuMemoryManager().allocateFrame(
-		sizeof(Cluster) * m_clusterCount, StagingGpuMemoryType::STORAGE, ctx.m_clustererGpuObjects.m_clusterersToken));
-
-	// Zero the memory because atomics will happen
-	memset(clusters, 0, sizeof(Cluster) * m_clusterCount);
+	writeClustererBuffers(ctx);
 }
 }
 
 
 void ClusterBinning::run(RenderPassWorkContext& rgraphCtx)
 void ClusterBinning::run(RenderPassWorkContext& rgraphCtx)
 {
 {
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 
 
-	const ClustererGpuObjects& tokens = m_runCtx.m_ctx->m_clustererGpuObjects;
+	const ClustererGpuObjects& tokens = m_runCtx.m_ctx->m_clusterShading;
 
 
 	cmdb->bindShaderProgram(m_grProg);
 	cmdb->bindShaderProgram(m_grProg);
-	bindUniforms(cmdb, 0, 0, tokens.m_lightingUniformsToken);
-	bindStorage(cmdb, 0, 1, tokens.m_clusterersToken);
-	bindStorage(cmdb, 0, 2, tokens.m_pointLightsToken);
+	bindUniforms(cmdb, 0, 0, tokens.m_clusteredShadingUniformsToken);
+	bindStorage(cmdb, 0, 1, tokens.m_clustersToken);
+	bindUniforms(cmdb, 0, 2, tokens.m_pointLightsToken);
+	bindUniforms(cmdb, 0, 3, tokens.m_spotLightsToken);
+	bindUniforms(cmdb, 0, 4, tokens.m_reflectionProbesToken);
+	bindUniforms(cmdb, 0, 5, tokens.m_globalIlluminationProbesToken);
+	bindUniforms(cmdb, 0, 6, tokens.m_fogDensityVolumesToken);
+	bindUniforms(cmdb, 0, 7, tokens.m_decalsToken);
 
 
 	const U32 sampleCount = 8;
 	const U32 sampleCount = 8;
 	const U32 sizex = m_clusterCount * sampleCount;
 	const U32 sizex = m_clusterCount * sampleCount;
 	const RenderQueue& rqueue = *m_runCtx.m_ctx->m_renderQueue;
 	const RenderQueue& rqueue = *m_runCtx.m_ctx->m_renderQueue;
-	U32 clusterObjectCounts = min(MAX_VISIBLE_POINT_LIGHTS, rqueue.m_pointLights.getSize());
-	cmdb->dispatchCompute((sizex - 64 - 1) / 64, 1, clusterObjectCounts);
+	U32 clusterObjectCounts = rqueue.m_pointLights.getSize();
+	clusterObjectCounts += rqueue.m_spotLights.getSize();
+	clusterObjectCounts += rqueue.m_reflectionProbes.getSize();
+	clusterObjectCounts += rqueue.m_giProbes.getSize();
+	clusterObjectCounts += rqueue.m_fogDensityVolumes.getSize();
+	clusterObjectCounts += rqueue.m_decals.getSize();
+	cmdb->dispatchCompute((sizex - 64 - 1) / 64, clusterObjectCounts, 1);
+}
+
+void ClusterBinning::writeClustererBuffers(RenderingContext& ctx)
+{
+	ANKI_TRACE_SCOPED_EVENT(R_WRITE_CLUSTER_SHADING_OBJECTS);
+
+	// Check limits
+	RenderQueue& rqueue = *ctx.m_renderQueue;
+	if(ANKI_UNLIKELY(rqueue.m_pointLights.getSize() > MAX_VISIBLE_POINT_LIGHTS))
+	{
+		ANKI_R_LOGW("Visible point lights exceed the max value by %u",
+					rqueue.m_pointLights.getSize() - MAX_VISIBLE_POINT_LIGHTS);
+		rqueue.m_pointLights.setArray(rqueue.m_pointLights.getBegin(), MAX_VISIBLE_POINT_LIGHTS);
+	}
+
+	if(ANKI_UNLIKELY(rqueue.m_spotLights.getSize() > MAX_VISIBLE_SPOT_LIGHTS))
+	{
+		ANKI_R_LOGW("Visible spot lights exceed the max value by %u",
+					rqueue.m_spotLights.getSize() - MAX_VISIBLE_SPOT_LIGHTS);
+		rqueue.m_spotLights.setArray(rqueue.m_spotLights.getBegin(), MAX_VISIBLE_SPOT_LIGHTS);
+	}
+
+	if(ANKI_UNLIKELY(rqueue.m_decals.getSize() > MAX_VISIBLE_DECALS))
+	{
+		ANKI_R_LOGW("Visible decals exceed the max value by %u", rqueue.m_decals.getSize() - MAX_VISIBLE_DECALS);
+		rqueue.m_decals.setArray(rqueue.m_decals.getBegin(), MAX_VISIBLE_DECALS);
+	}
+
+	if(ANKI_UNLIKELY(rqueue.m_fogDensityVolumes.getSize() > MAX_VISIBLE_FOG_DENSITY_VOLUMES))
+	{
+		ANKI_R_LOGW("Visible fog volumes exceed the max value by %u",
+					rqueue.m_fogDensityVolumes.getSize() - MAX_VISIBLE_FOG_DENSITY_VOLUMES);
+		rqueue.m_fogDensityVolumes.setArray(rqueue.m_fogDensityVolumes.getBegin(), MAX_VISIBLE_FOG_DENSITY_VOLUMES);
+	}
+
+	if(ANKI_UNLIKELY(rqueue.m_reflectionProbes.getSize() > MAX_VISIBLE_REFLECTION_PROBES))
+	{
+		ANKI_R_LOGW("Visible reflection probes exceed the max value by %u",
+					rqueue.m_reflectionProbes.getSize() - MAX_VISIBLE_REFLECTION_PROBES);
+		rqueue.m_reflectionProbes.setArray(rqueue.m_reflectionProbes.getBegin(), MAX_VISIBLE_REFLECTION_PROBES);
+	}
+
+	if(ANKI_UNLIKELY(rqueue.m_giProbes.getSize() > MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2))
+	{
+		ANKI_R_LOGW("Visible GI probes exceed the max value by %u",
+					rqueue.m_giProbes.getSize() - MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2);
+		rqueue.m_giProbes.setArray(rqueue.m_giProbes.getBegin(), MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2);
+	}
+
+	// Allocate buffers
+	ClustererGpuObjects& cs = ctx.m_clusterShading;
+	StagingGpuMemoryManager& stagingMem = m_r->getStagingGpuMemoryManager();
+
+	cs.m_clusteredShadingUniformsAddress = stagingMem.allocateFrame(
+		sizeof(ClusteredShadingUniforms), StagingGpuMemoryType::UNIFORM, cs.m_clusteredShadingUniformsToken);
+
+	if(rqueue.m_pointLights.getSize())
+	{
+		cs.m_pointLightsAddress = stagingMem.allocateFrame(rqueue.m_pointLights.getSize() * sizeof(PointLight2),
+														   StagingGpuMemoryType::UNIFORM, cs.m_pointLightsToken);
+	}
+	else
+	{
+		cs.m_pointLightsToken.markUnused();
+	}
+
+	if(rqueue.m_spotLights.getSize())
+	{
+		cs.m_spotLightsAddress = stagingMem.allocateFrame(rqueue.m_spotLights.getSize() * sizeof(SpotLight2),
+														  StagingGpuMemoryType::UNIFORM, cs.m_spotLightsToken);
+	}
+	else
+	{
+		cs.m_spotLightsToken.markUnused();
+	}
+
+	if(rqueue.m_reflectionProbes.getSize())
+	{
+		cs.m_reflectionProbesAddress =
+			stagingMem.allocateFrame(rqueue.m_reflectionProbes.getSize() * sizeof(ReflectionProbe2),
+									 StagingGpuMemoryType::UNIFORM, cs.m_reflectionProbesToken);
+	}
+	else
+	{
+		cs.m_reflectionProbesToken.markUnused();
+	}
+
+	if(rqueue.m_decals.getSize())
+	{
+		cs.m_decalsAddress = stagingMem.allocateFrame(rqueue.m_decals.getSize() * sizeof(Decal2),
+													  StagingGpuMemoryType::UNIFORM, cs.m_decalsToken);
+	}
+	else
+	{
+		cs.m_decalsToken.markUnused();
+	}
+
+	if(rqueue.m_fogDensityVolumes.getSize())
+	{
+		cs.m_fogDensityVolumesAddress =
+			stagingMem.allocateFrame(rqueue.m_fogDensityVolumes.getSize() * sizeof(FogDensityVolume2),
+									 StagingGpuMemoryType::UNIFORM, cs.m_fogDensityVolumesToken);
+	}
+	else
+	{
+		cs.m_fogDensityVolumesToken.markUnused();
+	}
+
+	if(rqueue.m_giProbes.getSize())
+	{
+		cs.m_globalIlluminationProbesAddress =
+			stagingMem.allocateFrame(rqueue.m_giProbes.getSize() * sizeof(GlobalIlluminationProbe2),
+									 StagingGpuMemoryType::UNIFORM, cs.m_globalIlluminationProbesToken);
+	}
+	else
+	{
+		cs.m_globalIlluminationProbesToken.markUnused();
+	}
+
+	cs.m_clustersAddress =
+		stagingMem.allocateFrame(sizeof(Cluster) * m_clusterCount, StagingGpuMemoryType::STORAGE, cs.m_clustersToken);
+
+	// Fire the async job
+	m_r->getThreadHive().submitTask(
+		[](void* userData, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* signalSemaphore) {
+			static_cast<ClusterBinning*>(userData)->writeClustererBuffersTask();
+		},
+		this);
+}
+
+void ClusterBinning::writeClustererBuffersTask()
+{
+	ANKI_TRACE_SCOPED_EVENT(R_WRITE_CLUSTER_SHADING_OBJECTS);
+
+	RenderingContext& ctx = *m_runCtx.m_ctx;
+	ClustererGpuObjects& cs = ctx.m_clusterShading;
+	const RenderQueue& rqueue = *ctx.m_renderQueue;
+
+	// Point lights
+	if(rqueue.m_pointLights.getSize())
+	{
+		PointLight2* lights = static_cast<PointLight2*>(cs.m_pointLightsAddress);
+
+		for(U32 i = 0; i < rqueue.m_pointLights.getSize(); ++i)
+		{
+			PointLight2& out = lights[i];
+			const PointLightQueueElement& in = rqueue.m_pointLights[i];
+
+			out.m_position = in.m_worldPosition;
+			out.m_diffuseColor = in.m_diffuseColor;
+			out.m_radius = in.m_radius;
+			out.m_squareRadiusOverOne = 1.0f / (in.m_radius * in.m_radius);
+			out.m_shadowLayer = in.m_shadowLayer;
+
+			if(in.m_shadowRenderQueues[0] == nullptr)
+			{
+				out.m_shadowAtlasTileScale = INVALID_TEXTURE_INDEX;
+			}
+			else
+			{
+				out.m_shadowAtlasTileScale = in.m_shadowAtlasTileSize;
+				static_assert(sizeof(out.m_shadowAtlasTileOffsets) == sizeof(in.m_shadowAtlasTileOffsets), "See file");
+				memcpy(&out.m_shadowAtlasTileOffsets[0], &in.m_shadowAtlasTileOffsets[0],
+					   sizeof(in.m_shadowAtlasTileOffsets));
+			}
+		}
+	}
+
+	// Spot lights
+	if(rqueue.m_spotLights.getSize())
+	{
+		SpotLight2* lights = static_cast<SpotLight2*>(cs.m_spotLightsAddress);
+
+		for(U32 i = 0; i < rqueue.m_spotLights.getSize(); ++i)
+		{
+			const SpotLightQueueElement& in = rqueue.m_spotLights[i];
+			SpotLight2& out = lights[i];
+
+			out.m_position = in.m_worldTransform.getTranslationPart().xyz();
+			memcpy(&out.m_edgePoints[0][0], &in.m_edgePoints[0][0], sizeof(out.m_edgePoints));
+			out.m_diffuseColor = in.m_diffuseColor;
+			out.m_radius = in.m_distance;
+			out.m_squareRadiusOverOne = 1.0f / (in.m_distance * in.m_distance);
+			out.m_shadowLayer = (in.hasShadow()) ? in.m_shadowLayer : MAX_U32;
+			out.m_direction = -in.m_worldTransform.getRotationPart().getZAxis();
+			out.m_outerCos = cos(in.m_outerAngle / 2.0f);
+			out.m_innerCos = cos(in.m_innerAngle / 2.0f);
+
+			if(in.hasShadow())
+			{
+				// bias * proj_l * view_l
+				out.m_textureMatrix = in.m_textureMatrix;
+			}
+			else
+			{
+				out.m_textureMatrix = Mat4::getIdentity();
+			}
+		}
+	}
+
+	// Reflection probes
+	if(rqueue.m_reflectionProbes.getSize())
+	{
+		ReflectionProbe2* probes = static_cast<ReflectionProbe2*>(cs.m_reflectionProbesAddress);
+
+		for(U32 i = 0; i < rqueue.m_reflectionProbes.getSize(); ++i)
+		{
+			const ReflectionProbeQueueElement& in = rqueue.m_reflectionProbes[i];
+			ReflectionProbe2& out = probes[i];
+
+			out.m_position = in.m_worldPosition;
+			out.m_cubemapIndex = F32(in.m_textureArrayIndex);
+			out.m_aabbMin = in.m_aabbMin;
+			out.m_aabbMax = in.m_aabbMax;
+		}
+	}
+
+	// Decals
+	if(rqueue.m_decals.getSize())
+	{
+		Decal2* decals = static_cast<Decal2*>(cs.m_decalsAddress);
+
+		TextureView* diffuseAtlas = nullptr;
+		TextureView* specularRoughnessAtlas = nullptr;
+		for(U32 i = 0; i < rqueue.m_decals.getSize(); ++i)
+		{
+			const DecalQueueElement& in = rqueue.m_decals[i];
+			Decal2& out = decals[i];
+
+			if((diffuseAtlas != nullptr && diffuseAtlas != in.m_diffuseAtlas)
+			   || (specularRoughnessAtlas != nullptr && specularRoughnessAtlas != in.m_specularRoughnessAtlas))
+			{
+				ANKI_R_LOGF("All decals should have the same tex atlas");
+			}
+
+			diffuseAtlas = in.m_diffuseAtlas;
+			specularRoughnessAtlas = in.m_specularRoughnessAtlas;
+
+			// Diff
+			Vec4 uv = in.m_diffuseAtlasUv;
+			out.m_diffuseUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
+			out.m_blendFactors[0] = in.m_diffuseAtlasBlendFactor;
+
+			// Other
+			uv = in.m_specularRoughnessAtlasUv;
+			out.m_normRoughnessUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
+			out.m_blendFactors[1] = in.m_specularRoughnessAtlasBlendFactor;
+
+			// bias * proj_l * view
+			out.m_textureMatrix = in.m_textureMatrix;
+		}
+
+		ANKI_ASSERT(diffuseAtlas || specularRoughnessAtlas);
+		ctx.m_clusterShading.m_diffuseDecalTextureView.reset(diffuseAtlas);
+		ctx.m_clusterShading.m_specularRoughnessDecalTextureView.reset(specularRoughnessAtlas);
+	}
+
+	// Fog volumes
+	if(rqueue.m_fogDensityVolumes.getSize())
+	{
+		FogDensityVolume2* volumes = static_cast<FogDensityVolume2*>(cs.m_fogDensityVolumesAddress);
+
+		for(U32 i = 0; i < rqueue.m_fogDensityVolumes.getSize(); ++i)
+		{
+			const FogDensityQueueElement& in = rqueue.m_fogDensityVolumes[i];
+			FogDensityVolume2& out = volumes[i];
+
+			out.m_density = in.m_density;
+			if(in.m_isBox)
+			{
+				out.m_isBox = 1;
+				out.m_aabbMinOrSphereCenter = in.m_aabbMin;
+				out.m_aabbMaxOrSphereRadiusSquared = in.m_aabbMax;
+			}
+			else
+			{
+				out.m_isBox = 0;
+				out.m_aabbMinOrSphereCenter = in.m_sphereCenter;
+				out.m_aabbMaxOrSphereRadiusSquared = Vec3(in.m_sphereRadius * in.m_sphereRadius);
+			}
+		}
+	}
+
+	// GI
+	if(rqueue.m_giProbes.getSize())
+	{
+		GlobalIlluminationProbe2* probes = static_cast<GlobalIlluminationProbe2*>(cs.m_globalIlluminationProbesAddress);
+
+		for(U32 i = 0; i < rqueue.m_giProbes.getSize(); ++i)
+		{
+			const GlobalIlluminationProbeQueueElement& in = rqueue.m_giProbes[i];
+			GlobalIlluminationProbe2& out = probes[i];
+
+			out.m_aabbMin = in.m_aabbMin;
+			out.m_aabbMax = in.m_aabbMax;
+			out.m_textureIndex = U32(&in - &rqueue.m_giProbes.getFront());
+			out.m_halfTexelSizeU = 1.0f / F32(F32(in.m_cellCounts.x()) * 6.0f) / 2.0f;
+			out.m_fadeDistance = in.m_fadeDistance;
+		}
+	}
+
+	// General uniforms
+	{
+		ClusteredShadingUniforms& unis = *static_cast<ClusteredShadingUniforms*>(cs.m_clusteredShadingUniformsAddress);
+
+		unis.m_renderingSize = Vec2(F32(m_r->getWidth()), F32(m_r->getHeight()));
+
+		unis.m_time = F32(HighRezTimer::getCurrentTime());
+		unis.m_frame = m_r->getFrameCount() & MAX_U32;
+
+		Plane nearPlane;
+		extractClipPlane(rqueue.m_viewProjectionMatrix, FrustumPlaneType::NEAR, nearPlane);
+		unis.m_nearPlaneWSpace = Vec4(nearPlane.getNormal().xyz(), nearPlane.getOffset());
+		unis.m_near = rqueue.m_cameraNear;
+		unis.m_far = rqueue.m_cameraFar;
+		unis.m_zSplitCountOverFrustumLength = F32(m_r->getZSplitCount()) / (rqueue.m_cameraFar - rqueue.m_cameraNear);
+		unis.m_cameraPosition = rqueue.m_cameraTransform.getTranslationPart().xyz();
+
+		unis.m_tileCounts = m_r->getTileCounts();
+		unis.m_zSplitCount = m_r->getZSplitCount();
+		unis.m_lightVolumeLastCluster = m_r->getVolumetricLightingAccumulation().getFinalClusterInZ();
+
+		unis.m_matrices = ctx.m_matrices;
+		unis.m_previousMatrices = ctx.m_prevMatrices;
+
+		unis.m_pointLightCount = rqueue.m_pointLights.getSize();
+		unis.m_spotLightCount = rqueue.m_spotLights.getSize();
+		unis.m_decalCount = rqueue.m_decals.getSize();
+		unis.m_fogDensityVolumeCount = rqueue.m_fogDensityVolumes.getSize();
+		unis.m_reflectionProbeCount = rqueue.m_reflectionProbes.getSize();
+		unis.m_giProbeCount = rqueue.m_giProbes.getSize();
+
+		// Directional light
+		if(rqueue.m_directionalLight.m_uuid != 0)
+		{
+			DirectionalLight2& out = unis.m_directionalLight;
+			const DirectionalLightQueueElement& in = rqueue.m_directionalLight;
+
+			out.m_diffuseColor = in.m_diffuseColor;
+			out.m_cascadeCount = in.m_shadowCascadeCount;
+			out.m_direction = in.m_direction;
+			out.m_active = 1;
+			out.m_effectiveShadowDistance = in.m_effectiveShadowDistance;
+			out.m_shadowCascadesDistancePower = in.m_shadowCascadesDistancePower;
+			out.m_shadowLayer = (in.hasShadow()) ? in.m_shadowLayer : MAX_U32;
+
+			for(U cascade = 0; cascade < in.m_shadowCascadeCount; ++cascade)
+			{
+				out.m_textureMatrices[cascade] = in.m_textureMatrices[cascade];
+			}
+		}
+		else
+		{
+			unis.m_directionalLight.m_active = 0;
+		}
+	}
+
+	// Zero the memory because atomics will happen
+	memset(cs.m_clustersAddress, 0, sizeof(Cluster) * m_clusterCount);
 }
 }
 
 
 } // end namespace anki
 } // end namespace anki

+ 3 - 1
AnKi/Renderer/ClusterBinning.h

@@ -35,10 +35,12 @@ private:
 	class
 	class
 	{
 	{
 	public:
 	public:
-		const RenderingContext* m_ctx = nullptr;
+		RenderingContext* m_ctx = nullptr;
 	} m_runCtx;
 	} m_runCtx;
 
 
 	void run(RenderPassWorkContext& rgraphCtx);
 	void run(RenderPassWorkContext& rgraphCtx);
+	void writeClustererBuffers(RenderingContext& ctx);
+	void writeClustererBuffersTask();
 };
 };
 /// @}
 /// @}
 
 

+ 11 - 3
AnKi/Renderer/Common.h

@@ -141,13 +141,21 @@ class ClustererGpuObjects
 {
 {
 public:
 public:
 	StagingGpuMemoryToken m_pointLightsToken;
 	StagingGpuMemoryToken m_pointLightsToken;
+	void* m_pointLightsAddress = nullptr;
 	StagingGpuMemoryToken m_spotLightsToken;
 	StagingGpuMemoryToken m_spotLightsToken;
+	void* m_spotLightsAddress = nullptr;
 	StagingGpuMemoryToken m_reflectionProbesToken;
 	StagingGpuMemoryToken m_reflectionProbesToken;
+	void* m_reflectionProbesAddress = nullptr;
 	StagingGpuMemoryToken m_decalsToken;
 	StagingGpuMemoryToken m_decalsToken;
+	void* m_decalsAddress = nullptr;
 	StagingGpuMemoryToken m_fogDensityVolumesToken;
 	StagingGpuMemoryToken m_fogDensityVolumesToken;
+	void* m_fogDensityVolumesAddress = nullptr;
 	StagingGpuMemoryToken m_globalIlluminationProbesToken;
 	StagingGpuMemoryToken m_globalIlluminationProbesToken;
-	StagingGpuMemoryToken m_lightingUniformsToken;
-	StagingGpuMemoryToken m_clusterersToken;
+	void* m_globalIlluminationProbesAddress = nullptr;
+	StagingGpuMemoryToken m_clusteredShadingUniformsToken;
+	void* m_clusteredShadingUniformsAddress = nullptr;
+	StagingGpuMemoryToken m_clustersToken;
+	void* m_clustersAddress = nullptr;
 
 
 	TextureViewPtr m_diffuseDecalTextureView;
 	TextureViewPtr m_diffuseDecalTextureView;
 	TextureViewPtr m_specularRoughnessDecalTextureView;
 	TextureViewPtr m_specularRoughnessDecalTextureView;
@@ -175,7 +183,7 @@ public:
 
 
 	StagingGpuMemoryToken m_lightShadingUniformsToken;
 	StagingGpuMemoryToken m_lightShadingUniformsToken;
 
 
-	ClustererGpuObjects m_clustererGpuObjects;
+	ClustererGpuObjects m_clusterShading;
 
 
 	RenderingContext(const StackAllocator<U8>& alloc)
 	RenderingContext(const StackAllocator<U8>& alloc)
 		: m_tempAllocator(alloc)
 		: m_tempAllocator(alloc)

+ 1 - 0
AnKi/Renderer/RenderQueue.h

@@ -141,6 +141,7 @@ public:
 	F32 m_outerAngle;
 	F32 m_outerAngle;
 	F32 m_innerAngle;
 	F32 m_innerAngle;
 	Vec3 m_diffuseColor;
 	Vec3 m_diffuseColor;
+	Array<Vec3, 4> m_edgePoints;
 	RenderQueue* m_shadowRenderQueue;
 	RenderQueue* m_shadowRenderQueue;
 	RenderQueueDrawCallback m_debugDrawCallback;
 	RenderQueueDrawCallback m_debugDrawCallback;
 	const void* m_debugDrawCallbackUserData;
 	const void* m_debugDrawCallbackUserData;

+ 0 - 322
AnKi/Renderer/Renderer.cpp

@@ -697,326 +697,4 @@ void Renderer::setCurrentDebugRenderTarget(CString rtName)
 	}
 	}
 }
 }
 
 
-void Renderer::writeClustererBuffers(RenderingContext& ctx)
-{
-	class Ctx
-	{
-	public:
-		Renderer* m_r;
-		RenderingContext* m_ctx;
-	};
-
-	Ctx c;
-	c.m_r = this;
-	c.m_ctx = &ctx;
-
-	m_threadHive->submitTask(
-		[](void* userData, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* signalSemaphore) {
-			static_cast<Ctx*>(userData)->m_r->writeClustererBuffersTask(*static_cast<Ctx*>(userData)->m_ctx);
-		},
-		&c);
-}
-
-void Renderer::writeClustererBuffersTask(RenderingContext& ctx)
-{
-	ANKI_TRACE_SCOPED_EVENT(R_WRITE_CLUSTERER_OBJECTS);
-
-	const RenderQueue& rqueue = *ctx.m_renderQueue;
-
-	// Clusterer uniforms
-	{
-		ClusteredShadingUniforms* unis = static_cast<ClusteredShadingUniforms*>(
-			m_stagingMem->allocateFrame(sizeof(ClusteredShadingUniforms), StagingGpuMemoryType::UNIFORM,
-										ctx.m_clustererGpuObjects.m_lightingUniformsToken));
-
-		unis->m_renderingSize = Vec2(F32(m_width), F32(m_height));
-
-		unis->m_time = F32(HighRezTimer::getCurrentTime());
-		unis->m_frame = m_frameCount & MAX_U32;
-
-		Plane nearPlane;
-		extractClipPlane(rqueue.m_viewProjectionMatrix, FrustumPlaneType::NEAR, nearPlane);
-		unis->m_nearPlaneWSpace = Vec4(nearPlane.getNormal().xyz(), nearPlane.getOffset());
-		unis->m_near = rqueue.m_cameraNear;
-		unis->m_far = rqueue.m_cameraFar;
-		unis->m_oneOverFrustumLength = 1.0f / (rqueue.m_cameraFar - rqueue.m_cameraNear);
-		unis->m_cameraPosition = rqueue.m_cameraTransform.getTranslationPart().xyz();
-
-		unis->m_tileCounts = m_tileCounts;
-		unis->m_zSplitCount = m_zSplitCount;
-		unis->m_lightVolumeLastCluster = m_volLighting->getFinalClusterInZ();
-
-		unis->m_matrices = ctx.m_matrices;
-		unis->m_previousMatrices = ctx.m_prevMatrices;
-
-		// Directional light
-		if(rqueue.m_directionalLight.m_uuid != 0)
-		{
-			DirectionalLight2& out = unis->m_directionalLight;
-			const DirectionalLightQueueElement& in = rqueue.m_directionalLight;
-
-			out.m_diffuseColor = in.m_diffuseColor;
-			out.m_cascadeCount = in.m_shadowCascadeCount;
-			out.m_direction = in.m_direction;
-			out.m_active = 1;
-			out.m_effectiveShadowDistance = in.m_effectiveShadowDistance;
-			out.m_shadowCascadesDistancePower = in.m_shadowCascadesDistancePower;
-			out.m_shadowLayer = (in.hasShadow()) ? in.m_shadowLayer : MAX_U32;
-
-			for(U cascade = 0; cascade < in.m_shadowCascadeCount; ++cascade)
-			{
-				out.m_textureMatrices[cascade] = in.m_textureMatrices[cascade];
-			}
-		}
-		else
-		{
-			unis->m_directionalLight.m_active = 0;
-		}
-	}
-
-	// Point lights
-	U32 visiblePointLightCount = rqueue.m_pointLights.getSize();
-	if(ANKI_UNLIKELY(visiblePointLightCount > MAX_VISIBLE_POINT_LIGHTS))
-	{
-		ANKI_R_LOGW("Visible point lights exceed the max value by %u",
-					visiblePointLightCount - MAX_VISIBLE_POINT_LIGHTS);
-		visiblePointLightCount = MAX_VISIBLE_POINT_LIGHTS;
-	}
-
-	if(visiblePointLightCount)
-	{
-		PointLight2* lights = static_cast<PointLight2*>(
-			m_stagingMem->allocateFrame(visiblePointLightCount * sizeof(PointLight2), StagingGpuMemoryType::STORAGE,
-										ctx.m_clustererGpuObjects.m_pointLightsToken));
-
-		for(U32 i = 0; i < visiblePointLightCount; ++i)
-		{
-			PointLight2& out = lights[i];
-			const PointLightQueueElement& in = rqueue.m_pointLights[i];
-
-			out.m_position = in.m_worldPosition;
-			out.m_diffuseColor = in.m_diffuseColor;
-			out.m_radius = in.m_radius;
-			out.m_squareRadiusOverOne = 1.0f / (in.m_radius * in.m_radius);
-			out.m_shadowLayer = in.m_shadowLayer;
-
-			if(in.m_shadowRenderQueues[0] == nullptr)
-			{
-				out.m_shadowAtlasTileScale = INVALID_TEXTURE_INDEX;
-			}
-			else
-			{
-				out.m_shadowAtlasTileScale = in.m_shadowAtlasTileSize;
-				static_assert(sizeof(out.m_shadowAtlasTileOffsets) == sizeof(in.m_shadowAtlasTileOffsets), "See file");
-				memcpy(&out.m_shadowAtlasTileOffsets[0], &in.m_shadowAtlasTileOffsets[0],
-					   sizeof(in.m_shadowAtlasTileOffsets));
-			}
-		}
-	}
-	else
-	{
-		ctx.m_clustererGpuObjects.m_pointLightsToken.markUnused();
-	}
-
-	// Spot lights
-	U32 visibleSpotLightCount = rqueue.m_spotLights.getSize();
-	if(visibleSpotLightCount > MAX_VISIBLE_SPOT_LIGHTS)
-	{
-		ANKI_R_LOGW("Visible spot lights exceed the max value by %u", visibleSpotLightCount - MAX_VISIBLE_SPOT_LIGHTS);
-		visibleSpotLightCount = MAX_VISIBLE_SPOT_LIGHTS;
-	}
-
-	if(visibleSpotLightCount)
-	{
-		SpotLight2* lights = static_cast<SpotLight2*>(
-			m_stagingMem->allocateFrame(visibleSpotLightCount * sizeof(SpotLight2), StagingGpuMemoryType::STORAGE,
-										ctx.m_clustererGpuObjects.m_spotLightsToken));
-
-		for(U32 i = 0; i < visibleSpotLightCount; ++i)
-		{
-			const SpotLightQueueElement& in = rqueue.m_spotLights[i];
-			SpotLight2& out = lights[i];
-
-			out.m_position = in.m_worldTransform.getTranslationPart().xyz();
-			out.m_diffuseColor = in.m_diffuseColor;
-			out.m_radius = in.m_distance;
-			out.m_squareRadiusOverOne = 1.0f / (in.m_distance * in.m_distance);
-			out.m_shadowLayer = (in.hasShadow()) ? in.m_shadowLayer : MAX_U32;
-			out.m_direction = -in.m_worldTransform.getRotationPart().getZAxis();
-			out.m_outerCos = cos(in.m_outerAngle / 2.0f);
-			out.m_innerCos = cos(in.m_innerAngle / 2.0f);
-
-			if(in.hasShadow())
-			{
-				// bias * proj_l * view_l
-				out.m_textureMatrix = in.m_textureMatrix;
-			}
-			else
-			{
-				out.m_textureMatrix = Mat4::getIdentity();
-			}
-		}
-	}
-	else
-	{
-		ctx.m_clustererGpuObjects.m_spotLightsToken.markUnused();
-	}
-
-	// Reflection probes
-	U32 visibleReflectionPRobeCount = rqueue.m_reflectionProbes.getSize();
-	if(visibleReflectionPRobeCount > MAX_VISIBLE_REFLECTION_PROBES)
-	{
-		ANKI_R_LOGW("Visible reflection probes exceed the max value by %u",
-					visibleReflectionPRobeCount - MAX_VISIBLE_REFLECTION_PROBES);
-		visibleReflectionPRobeCount = MAX_VISIBLE_REFLECTION_PROBES;
-	}
-
-	if(visibleReflectionPRobeCount)
-	{
-		ReflectionProbe2* probes = static_cast<ReflectionProbe2*>(m_stagingMem->allocateFrame(
-			visibleReflectionPRobeCount * sizeof(ReflectionProbe2), StagingGpuMemoryType::STORAGE,
-			ctx.m_clustererGpuObjects.m_reflectionProbesToken));
-
-		for(U32 i = 0; i < visibleReflectionPRobeCount; ++i)
-		{
-			const ReflectionProbeQueueElement& in = rqueue.m_reflectionProbes[i];
-			ReflectionProbe2& out = probes[i];
-
-			out.m_position = in.m_worldPosition;
-			out.m_cubemapIndex = F32(in.m_textureArrayIndex);
-			out.m_aabbMin = in.m_aabbMin;
-			out.m_aabbMax = in.m_aabbMax;
-		}
-	}
-	else
-	{
-		ctx.m_clustererGpuObjects.m_reflectionProbesToken.markUnused();
-	}
-
-	// Decals
-	U32 visibleDecalCount = rqueue.m_decals.getSize();
-	if(visibleDecalCount > MAX_VISIBLE_DECALS)
-	{
-		ANKI_R_LOGW("Visible decals exceed the max value by %u", visibleDecalCount - MAX_VISIBLE_DECALS);
-		visibleDecalCount = MAX_VISIBLE_DECALS;
-	}
-
-	if(visibleDecalCount)
-	{
-		Decal2* decals = static_cast<Decal2*>(m_stagingMem->allocateFrame(sizeof(Decal2) * visibleDecalCount,
-																		  StagingGpuMemoryType::STORAGE,
-																		  ctx.m_clustererGpuObjects.m_decalsToken));
-
-		TextureView* diffuseAtlas = nullptr;
-		TextureView* specularRoughnessAtlas = nullptr;
-		for(U32 i = 0; i < visibleDecalCount; ++i)
-		{
-			const DecalQueueElement& in = rqueue.m_decals[i];
-			Decal2& out = decals[i];
-
-			if((diffuseAtlas != nullptr && diffuseAtlas != in.m_diffuseAtlas)
-			   || (specularRoughnessAtlas != nullptr && specularRoughnessAtlas != in.m_specularRoughnessAtlas))
-			{
-				ANKI_R_LOGF("All decals should have the same tex atlas");
-			}
-
-			diffuseAtlas = in.m_diffuseAtlas;
-			specularRoughnessAtlas = in.m_specularRoughnessAtlas;
-
-			// Diff
-			Vec4 uv = in.m_diffuseAtlasUv;
-			out.m_diffuseUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
-			out.m_blendFactors[0] = in.m_diffuseAtlasBlendFactor;
-
-			// Other
-			uv = in.m_specularRoughnessAtlasUv;
-			out.m_normRoughnessUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
-			out.m_blendFactors[1] = in.m_specularRoughnessAtlasBlendFactor;
-
-			// bias * proj_l * view
-			out.m_textureMatrix = in.m_textureMatrix;
-		}
-
-		ANKI_ASSERT(diffuseAtlas || specularRoughnessAtlas);
-		ctx.m_clustererGpuObjects.m_diffuseDecalTextureView.reset(diffuseAtlas);
-		ctx.m_clustererGpuObjects.m_specularRoughnessDecalTextureView.reset(specularRoughnessAtlas);
-	}
-	else
-	{
-		ctx.m_clustererGpuObjects.m_decalsToken.markUnused();
-	}
-
-	// Fog volumes
-	U32 visibleFogCount = rqueue.m_fogDensityVolumes.getSize();
-	if(visibleFogCount > MAX_VISIBLE_FOG_DENSITY_VOLUMES)
-	{
-		ANKI_R_LOGW("Visible fog density volumes exceed the max value by %u",
-					visibleFogCount - MAX_VISIBLE_FOG_DENSITY_VOLUMES);
-		visibleFogCount = MAX_VISIBLE_FOG_DENSITY_VOLUMES;
-	}
-
-	if(visibleFogCount)
-	{
-		FogDensityVolume2* volumes = static_cast<FogDensityVolume2*>(
-			m_stagingMem->allocateFrame(sizeof(FogDensityVolume2) * visibleFogCount, StagingGpuMemoryType::STORAGE,
-										ctx.m_clustererGpuObjects.m_fogDensityVolumesToken));
-
-		for(U32 i = 0; i < visibleFogCount; ++i)
-		{
-			const FogDensityQueueElement& in = rqueue.m_fogDensityVolumes[i];
-			FogDensityVolume2& out = volumes[i];
-
-			out.m_density = in.m_density;
-			if(in.m_isBox)
-			{
-				out.m_isBox = 1;
-				out.m_aabbMinOrSphereCenter = in.m_aabbMin;
-				out.m_aabbMaxOrSphereRadiusSquared = in.m_aabbMax;
-			}
-			else
-			{
-				out.m_isBox = 0;
-				out.m_aabbMinOrSphereCenter = in.m_sphereCenter;
-				out.m_aabbMaxOrSphereRadiusSquared = Vec3(in.m_sphereRadius * in.m_sphereRadius);
-			}
-		}
-	}
-	else
-	{
-		ctx.m_clustererGpuObjects.m_fogDensityVolumesToken.markUnused();
-	}
-
-	// GI
-	U32 visibleGiProbeCount = rqueue.m_giProbes.getSize();
-	if(visibleGiProbeCount > MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2)
-	{
-		ANKI_R_LOGW("Visible GI probes exceed the max value by %u",
-					visibleGiProbeCount - MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2);
-		visibleGiProbeCount = MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2;
-	}
-
-	if(visibleGiProbeCount)
-	{
-		GlobalIlluminationProbe2* probes = static_cast<GlobalIlluminationProbe2*>(m_stagingMem->allocateFrame(
-			sizeof(GlobalIlluminationProbe2) * visibleGiProbeCount, StagingGpuMemoryType::STORAGE,
-			ctx.m_clustererGpuObjects.m_globalIlluminationProbesToken));
-
-		for(U32 i = 0; i < visibleGiProbeCount; ++i)
-		{
-			const GlobalIlluminationProbeQueueElement& in = rqueue.m_giProbes[i];
-			GlobalIlluminationProbe2& out = probes[i];
-
-			out.m_aabbMin = in.m_aabbMin;
-			out.m_aabbMax = in.m_aabbMax;
-			out.m_textureIndex = U32(&in - &rqueue.m_giProbes.getFront());
-			out.m_halfTexelSizeU = 1.0f / F32(F32(in.m_cellCounts.x()) * 6.0f) / 2.0f;
-			out.m_fadeDistance = in.m_fadeDistance;
-		}
-	}
-	else
-	{
-		ctx.m_clustererGpuObjects.m_globalIlluminationProbesToken.markUnused();
-	}
-}
-
 } // end namespace anki
 } // end namespace anki

+ 0 - 3
AnKi/Renderer/Renderer.h

@@ -444,9 +444,6 @@ private:
 	void initJitteredMats();
 	void initJitteredMats();
 
 
 	void updateLightShadingUniforms(RenderingContext& ctx) const;
 	void updateLightShadingUniforms(RenderingContext& ctx) const;
-
-	void writeClustererBuffers(RenderingContext& ctx);
-	void writeClustererBuffersTask(RenderingContext& ctx);
 };
 };
 /// @}
 /// @}
 
 

+ 2 - 2
AnKi/Renderer/RendererObject.cpp

@@ -39,7 +39,7 @@ void* RendererObject::allocateFrameStagingMemory(PtrSize size, StagingGpuMemoryT
 void RendererObject::bindUniforms(CommandBufferPtr& cmdb, U32 set, U32 binding,
 void RendererObject::bindUniforms(CommandBufferPtr& cmdb, U32 set, U32 binding,
 								  const StagingGpuMemoryToken& token) const
 								  const StagingGpuMemoryToken& token) const
 {
 {
-	if(token && !token.isUnused())
+	if(!token.isUnused())
 	{
 	{
 		cmdb->bindUniformBuffer(set, binding, token.m_buffer, token.m_offset, token.m_range);
 		cmdb->bindUniformBuffer(set, binding, token.m_buffer, token.m_offset, token.m_range);
 	}
 	}
@@ -51,7 +51,7 @@ void RendererObject::bindUniforms(CommandBufferPtr& cmdb, U32 set, U32 binding,
 
 
 void RendererObject::bindStorage(CommandBufferPtr& cmdb, U32 set, U32 binding, const StagingGpuMemoryToken& token) const
 void RendererObject::bindStorage(CommandBufferPtr& cmdb, U32 set, U32 binding, const StagingGpuMemoryToken& token) const
 {
 {
-	if(token && !token.isUnused())
+	if(!token.isUnused())
 	{
 	{
 		cmdb->bindStorageBuffer(set, binding, token.m_buffer, token.m_offset, token.m_range);
 		cmdb->bindStorageBuffer(set, binding, token.m_buffer, token.m_offset, token.m_range);
 	}
 	}

+ 8 - 1
AnKi/Scene/Components/LightComponent.cpp

@@ -44,10 +44,17 @@ Error LightComponent::update(SceneNode& node, Second prevTime, Second crntTime,
 	if(updated && m_type == LightComponentType::SPOT)
 	if(updated && m_type == LightComponentType::SPOT)
 	{
 	{
 
 
-		static const Mat4 biasMat4(0.5, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0);
+		const Mat4 biasMat4(0.5, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0);
 		const Mat4 proj = Mat4::calculatePerspectiveProjectionMatrix(m_spot.m_outerAngle, m_spot.m_outerAngle,
 		const Mat4 proj = Mat4::calculatePerspectiveProjectionMatrix(m_spot.m_outerAngle, m_spot.m_outerAngle,
 																	 LIGHT_FRUSTUM_NEAR_PLANE, m_spot.m_distance);
 																	 LIGHT_FRUSTUM_NEAR_PLANE, m_spot.m_distance);
 		m_spot.m_textureMat = biasMat4 * proj * Mat4(m_worldtransform.getInverse());
 		m_spot.m_textureMat = biasMat4 * proj * Mat4(m_worldtransform.getInverse());
+
+		Array<Vec4, 4> points;
+		computeEdgesOfFrustum(m_spot.m_distance, m_spot.m_outerAngle, m_spot.m_outerAngle, &points[0]);
+		for(U32 i = 0; i < 4; ++i)
+		{
+			m_spot.m_edgePointsWspace[i] = m_worldtransform.transform(points[i].xyz());
+		}
 	}
 	}
 
 
 	// Update the scene bounds always
 	// Update the scene bounds always

+ 14 - 15
AnKi/Scene/Components/LightComponent.h

@@ -163,6 +163,7 @@ public:
 		el.m_outerAngle = m_spot.m_outerAngle;
 		el.m_outerAngle = m_spot.m_outerAngle;
 		el.m_innerAngle = m_spot.m_innerAngle;
 		el.m_innerAngle = m_spot.m_innerAngle;
 		el.m_diffuseColor = m_diffColor.xyz();
 		el.m_diffuseColor = m_diffColor.xyz();
+		el.m_edgePoints = m_spot.m_edgePointsWspace;
 		el.m_debugDrawCallback = [](RenderQueueDrawContext& ctx, ConstWeakArray<void*> userData) {
 		el.m_debugDrawCallback = [](RenderQueueDrawContext& ctx, ConstWeakArray<void*> userData) {
 			ANKI_ASSERT(userData.getSize() == 1);
 			ANKI_ASSERT(userData.getSize() == 1);
 			static_cast<const LightComponent*>(userData[0])->draw(ctx);
 			static_cast<const LightComponent*>(userData[0])->draw(ctx);
@@ -188,33 +189,31 @@ private:
 	class Point
 	class Point
 	{
 	{
 	public:
 	public:
-		F32 m_radius;
+		F32 m_radius = 1.0f;
 	};
 	};
 
 
 	class Spot
 	class Spot
 	{
 	{
 	public:
 	public:
-		Mat4 m_textureMat;
-		F32 m_distance;
-		F32 m_innerAngleCos;
-		F32 m_outerAngleCos;
-		F32 m_outerAngle;
-		F32 m_innerAngle;
+		Mat4 m_textureMat = Mat4::getIdentity();
+		F32 m_distance = 1.0f;
+		F32 m_outerAngle = toRad(30.0f);
+		F32 m_innerAngle = toRad(15.0f);
+		F32 m_outerAngleCos = cos(m_outerAngle / 2.0f);
+		F32 m_innerAngleCos = cos(m_innerAngle / 2.0f);
+		Array<Vec3, 4> m_edgePointsWspace = {};
 	};
 	};
 
 
 	class Dir
 	class Dir
 	{
 	{
 	public:
 	public:
-		Vec3 m_sceneMin;
-		Vec3 m_sceneMax;
+		Vec3 m_sceneMin = Vec3(-1.0f);
+		Vec3 m_sceneMax = Vec3(1.0f);
 	};
 	};
 
 
-	union
-	{
-		Point m_point;
-		Spot m_spot;
-		Dir m_dir;
-	};
+	Point m_point;
+	Spot m_spot;
+	Dir m_dir;
 
 
 	TextureResourcePtr m_pointDebugTex;
 	TextureResourcePtr m_pointDebugTex;
 	TextureResourcePtr m_spotDebugTex;
 	TextureResourcePtr m_spotDebugTex;

+ 6 - 6
AnKi/Scene/Components/MoveComponent.h

@@ -96,19 +96,19 @@ public:
 
 
 	/// @name Mess with the local transform
 	/// @name Mess with the local transform
 	/// @{
 	/// @{
-	void rotateLocalX(F32 angDegrees)
+	void rotateLocalX(F32 angleRad)
 	{
 	{
-		m_ltrf.getRotation().rotateXAxis(angDegrees);
+		m_ltrf.getRotation().rotateXAxis(angleRad);
 		markForUpdate();
 		markForUpdate();
 	}
 	}
-	void rotateLocalY(F32 angDegrees)
+	void rotateLocalY(F32 angleRad)
 	{
 	{
-		m_ltrf.getRotation().rotateYAxis(angDegrees);
+		m_ltrf.getRotation().rotateYAxis(angleRad);
 		markForUpdate();
 		markForUpdate();
 	}
 	}
-	void rotateLocalZ(F32 angDegrees)
+	void rotateLocalZ(F32 angleRad)
 	{
 	{
-		m_ltrf.getRotation().rotateZAxis(angDegrees);
+		m_ltrf.getRotation().rotateZAxis(angleRad);
 		markForUpdate();
 		markForUpdate();
 	}
 	}
 	void moveLocalX(F32 distance)
 	void moveLocalX(F32 distance)

+ 1 - 1
AnKi/ShaderCompiler/Common.h

@@ -20,7 +20,7 @@ namespace anki
 #define ANKI_SHADER_COMPILER_LOGW(...) ANKI_LOG("SHCO", WARNING, __VA_ARGS__)
 #define ANKI_SHADER_COMPILER_LOGW(...) ANKI_LOG("SHCO", WARNING, __VA_ARGS__)
 #define ANKI_SHADER_COMPILER_LOGF(...) ANKI_LOG("SHCO", FATAL, __VA_ARGS__)
 #define ANKI_SHADER_COMPILER_LOGF(...) ANKI_LOG("SHCO", FATAL, __VA_ARGS__)
 
 
-constexpr U32 MAX_SHADER_BINARY_NAME_LENGTH = 63;
+constexpr U32 MAX_SHADER_BINARY_NAME_LENGTH = 127;
 
 
 using MutatorValue = I32; ///< The type of the mutator value
 using MutatorValue = I32; ///< The type of the mutator value
 
 

+ 26 - 3
AnKi/ShaderCompiler/Glslang.cpp

@@ -20,9 +20,15 @@
 #	pragma GCC diagnostic pop
 #	pragma GCC diagnostic pop
 #endif
 #endif
 
 
+#define ANKI_GLSLANG_DUMP 0
+
 namespace anki
 namespace anki
 {
 {
 
 
+#if ANKI_GLSLANG_DUMP
+static Atomic<U32> g_dumpFileCount;
+#endif
+
 class GlslangCtx
 class GlslangCtx
 {
 {
 public:
 public:
@@ -245,6 +251,23 @@ Error preprocessGlsl(CString in, StringAuto& out)
 Error compilerGlslToSpirv(CString src, ShaderType shaderType, GenericMemoryPoolAllocator<U8> tmpAlloc,
 Error compilerGlslToSpirv(CString src, ShaderType shaderType, GenericMemoryPoolAllocator<U8> tmpAlloc,
 						  DynamicArrayAuto<U8>& spirv)
 						  DynamicArrayAuto<U8>& spirv)
 {
 {
+#if ANKI_GLSLANG_DUMP
+	// Dump it
+	{
+		const U32 count = g_dumpFileCount.fetchAdd(1) / 2;
+		if(count == 0)
+		{
+			ANKI_SHADER_COMPILER_LOGW("GLSL dumping is enabled");
+		}
+
+		File file;
+		StringAuto fname(tmpAlloc);
+		fname.sprintf("/tmp/%u.glsl", count);
+		ANKI_CHECK(file.open(fname, FileOpenFlag::WRITE));
+		ANKI_CHECK(file.writeText("%s", src.cstr()));
+	}
+#endif
+
 	const EShLanguage stage = ankiToGlslangShaderType(shaderType);
 	const EShLanguage stage = ankiToGlslangShaderType(shaderType);
 	const EShMessages messages = EShMessages(EShMsgSpvRules | EShMsgVulkanRules);
 	const EShMessages messages = EShMessages(EShMsgSpvRules | EShMsgVulkanRules);
 
 
@@ -280,10 +303,10 @@ Error compilerGlslToSpirv(CString src, ShaderType shaderType, GenericMemoryPoolA
 	spirv.resize(U32(glslangSpirv.size() * sizeof(unsigned int)));
 	spirv.resize(U32(glslangSpirv.size() * sizeof(unsigned int)));
 	memcpy(&spirv[0], &glslangSpirv[0], spirv.getSizeInBytes());
 	memcpy(&spirv[0], &glslangSpirv[0], spirv.getSizeInBytes());
 
 
-#if 0
+#if ANKI_GLSLANG_DUMP
 	// Dump it
 	// Dump it
 	{
 	{
-		static U32 count = 0;
+		const U32 count = g_dumpFileCount.fetchAdd(1) / 2;
 		if(count == 0)
 		if(count == 0)
 		{
 		{
 			ANKI_SHADER_COMPILER_LOGW("SPIR-V dumping is enabled");
 			ANKI_SHADER_COMPILER_LOGW("SPIR-V dumping is enabled");
@@ -291,7 +314,7 @@ Error compilerGlslToSpirv(CString src, ShaderType shaderType, GenericMemoryPoolA
 
 
 		File file;
 		File file;
 		StringAuto fname(tmpAlloc);
 		StringAuto fname(tmpAlloc);
-		fname.sprintf("/tmp/%u.spv", count++);
+		fname.sprintf("/tmp/%u.spv", count);
 		ANKI_CHECK(file.open(fname, FileOpenFlag::WRITE | FileOpenFlag::BINARY));
 		ANKI_CHECK(file.open(fname, FileOpenFlag::WRITE | FileOpenFlag::BINARY));
 		ANKI_CHECK(file.write(spirv.getBegin(), spirv.getSizeInBytes()));
 		ANKI_CHECK(file.write(spirv.getBegin(), spirv.getSizeInBytes()));
 	}
 	}

+ 180 - 13
AnKi/Shaders/ClusterBinning.ankiprog

@@ -18,29 +18,52 @@ ANKI_SPECIALIZATION_CONSTANT_UVEC2(RENDERING_SIZE, 4, UVec2(1u));
 const U32 WORKGROUP_SIZE = 64u;
 const U32 WORKGROUP_SIZE = 64u;
 layout(local_size_x = WORKGROUP_SIZE) in;
 layout(local_size_x = WORKGROUP_SIZE) in;
 
 
-layout(set = 0, binding = 0, scalar) uniform b_unis
+layout(set = 0, binding = 0, scalar) uniform ANKI_RANDOM_BLOCK_NAME
 {
 {
 	ClusteredShadingUniforms u_unis;
 	ClusteredShadingUniforms u_unis;
 };
 };
 
 
-layout(set = 0, binding = 1, scalar) writeonly buffer b_tiles
+layout(set = 0, binding = 1, scalar) writeonly buffer ANKI_RANDOM_BLOCK_NAME
 {
 {
 	Cluster u_clusters[];
 	Cluster u_clusters[];
 };
 };
 
 
-layout(set = 0, binding = 2, scalar) readonly buffer b_plights
+layout(set = 0, binding = 2, scalar) uniform ANKI_RANDOM_BLOCK_NAME
 {
 {
-	PointLight2 u_pointLights[];
+	PointLight2 u_pointLights[MAX_VISIBLE_POINT_LIGHTS];
+};
+
+layout(set = 0, binding = 3, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+{
+	SpotLightBinning u_spotLights[MAX_VISIBLE_SPOT_LIGHTS];
+};
+
+layout(set = 0, binding = 4, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+{
+	ReflectionProbe2 u_reflectionProbes[MAX_VISIBLE_REFLECTION_PROBES];
+};
+
+layout(set = 0, binding = 5, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+{
+	GlobalIlluminationProbe2 u_giProbes[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES2];
+};
+
+layout(set = 0, binding = 6, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+{
+	FogDensityVolume2 u_fogVolumes[MAX_VISIBLE_FOG_DENSITY_VOLUMES];
+};
+
+layout(set = 0, binding = 7, scalar) uniform ANKI_RANDOM_BLOCK_NAME
+{
+	Decal2 u_decals[MAX_VISIBLE_DECALS];
 };
 };
 
 
 const U32 TILE_COUNT = TILE_COUNT_X * TILE_COUNT_Y;
 const U32 TILE_COUNT = TILE_COUNT_X * TILE_COUNT_Y;
 
 
 // DX Sample locations
 // DX Sample locations
 const U32 SAMPLE_COUNT = 4u;
 const U32 SAMPLE_COUNT = 4u;
-const I32 SAMPLE_OFFSET = 8;
-#define LOCATION(x, y) UVec2(IVec2(x, y) + SAMPLE_OFFSET)
-const UVec2 SAMPLE_LOCATIONS[SAMPLE_COUNT] =
-	UVec2[](LOCATION(-2, -6), LOCATION(6, -2), LOCATION(-6, 2), LOCATION(2, 6));
+#define LOCATION(x, y) UVec2(Vec2(IVec2(x, y) + 8) / 16.0 * F32(TILE_SIZE))
+UVec2 SAMPLE_LOCATIONS[SAMPLE_COUNT] = UVec2[](LOCATION(-2, -6), LOCATION(6, -2), LOCATION(-6, 2), LOCATION(2, 6));
 #undef LOCATION
 #undef LOCATION
 
 
 // A mask per tile of this workgroup for the clusterer object being processed by this workgroup
 // A mask per tile of this workgroup for the clusterer object being processed by this workgroup
@@ -56,6 +79,32 @@ Bool isPointLight()
 	return clustererObjectIdx < u_unis.m_pointLightCount;
 	return clustererObjectIdx < u_unis.m_pointLightCount;
 }
 }
 
 
+Bool isSpotLight()
+{
+	const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
+	return clustererObjectIdx < u_unis.m_pointLightCount + u_unis.m_spotLightCount;
+}
+
+Bool isReflectionProbe()
+{
+	const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
+	return clustererObjectIdx < u_unis.m_pointLightCount + u_unis.m_spotLightCount + u_unis.m_reflectionProbeCount;
+}
+
+Bool isGiProbe()
+{
+	const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
+	return clustererObjectIdx < u_unis.m_pointLightCount + u_unis.m_spotLightCount + u_unis.m_reflectionProbeCount
+									+ u_unis.m_reflectionProbeCount;
+}
+
+Bool isFogVolume()
+{
+	const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
+	return clustererObjectIdx < u_unis.m_pointLightCount + u_unis.m_spotLightCount + u_unis.m_reflectionProbeCount
+									+ u_unis.m_reflectionProbeCount + u_unis.m_giProbeCount;
+}
+
 void main()
 void main()
 {
 {
 	const U32 tileIdx = gl_GlobalInvocationID.x / SAMPLE_COUNT;
 	const U32 tileIdx = gl_GlobalInvocationID.x / SAMPLE_COUNT;
@@ -105,6 +154,85 @@ void main()
 		const PointLight2 light = u_pointLights[objectArrayIdx];
 		const PointLight2 light = u_pointLights[objectArrayIdx];
 		collides = testRaySphere(rayOrigin, rayDir, light.m_position, light.m_radius, t0, t1);
 		collides = testRaySphere(rayOrigin, rayDir, light.m_position, light.m_radius, t0, t1);
 	}
 	}
+	else if(isSpotLight())
+	{
+		objectArrayIdx = clustererObjectIdx - u_unis.m_pointLightCount;
+		const SpotLightBinning light = u_spotLights[objectArrayIdx];
+
+		t0 = 10000.0;
+		t1 = -10000.0;
+
+		// Iterate all triangles
+		const U32 indices[6u * 3u] = U32[](0u, 1u, 2u, 0u, 2u, 3u, 0u, 3u, 4u, 0u, 1u, 4u, 1u, 2u, 3u, 3u, 4u, 1u);
+		U32 hits = 0u;
+		U32 idx = 0u;
+		do
+		{
+			const Vec3 v0 = light.m_edgePoints[indices[idx + 0u]];
+			const Vec3 v1 = light.m_edgePoints[indices[idx + 1u]];
+			const Vec3 v2 = light.m_edgePoints[indices[idx + 2u]];
+
+			F32 t, u, v;
+			const Bool localCollides = testRayTriangle(rayOrigin, rayDir, v0, v1, v2, false, t, u, v);
+
+			if(localCollides)
+			{
+				t0 = min(t0, t);
+				t1 = max(t1, t);
+				++hits;
+			}
+			idx += 3u;
+		} while(hits < 2u && idx < 6u * 3u);
+
+		if(hits == 1u)
+		{
+			t0 = 0.0;
+		}
+
+		collides = (hits != 0u);
+	}
+	else if(isReflectionProbe())
+	{
+		objectArrayIdx = clustererObjectIdx - (u_unis.m_pointLightCount + u_unis.m_spotLightCount);
+		const ReflectionProbe2 probe = u_reflectionProbes[objectArrayIdx];
+
+		collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
+	}
+	else if(isGiProbe())
+	{
+		objectArrayIdx =
+			clustererObjectIdx - (u_unis.m_pointLightCount + u_unis.m_spotLightCount + u_unis.m_reflectionProbeCount);
+		const GlobalIlluminationProbe2 probe = u_giProbes[objectArrayIdx];
+
+		collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
+	}
+	else if(isFogVolume())
+	{
+		objectArrayIdx = clustererObjectIdx
+						 - (u_unis.m_pointLightCount + u_unis.m_spotLightCount + u_unis.m_reflectionProbeCount
+							+ u_unis.m_reflectionProbeCount + u_unis.m_giProbeCount);
+		const FogDensityVolume2 vol = u_fogVolumes[objectArrayIdx];
+
+		if(vol.m_isBox != 0u)
+		{
+			collides =
+				testRayAabb(rayOrigin, rayDir, vol.m_aabbMinOrSphereCenter, vol.m_aabbMaxOrSphereRadiusSquared, t0, t1);
+		}
+		else
+		{
+			collides = testRaySphere(rayOrigin, rayDir, vol.m_aabbMinOrSphereCenter,
+									 sqrt(vol.m_aabbMaxOrSphereRadiusSquared.x), t0, t1);
+		}
+	}
+	else
+	{
+		objectArrayIdx = clustererObjectIdx
+						 - (u_unis.m_pointLightCount + u_unis.m_spotLightCount + u_unis.m_reflectionProbeCount
+							+ u_unis.m_reflectionProbeCount + u_unis.m_giProbeCount + u_unis.m_fogDensityVolumesCount);
+		const Decal2 decal = u_decals[objectArrayIdx];
+
+		collides = testRayObb(rayOrigin, rayDir, decal.m_extend, decal.m_invertedTransform, t0, t1);
+	}
 
 
 	// Update the masks
 	// Update the masks
 	if(collides)
 	if(collides)
@@ -133,12 +261,11 @@ void main()
 			minDistFromNearPlane = distFromNearPlaneB;
 			minDistFromNearPlane = distFromNearPlaneB;
 			maxDistFromNearPlane = distFromNearPlaneA;
 			maxDistFromNearPlane = distFromNearPlaneA;
 		}
 		}
-		minDistFromNearPlane = max(0.0, minDistFromNearPlane);
-		maxDistFromNearPlane = max(0.0, maxDistFromNearPlane);
 
 
-		const U32 startZSplit = U32(minDistFromNearPlane * u_unis.m_oneOverFrustumLength);
-		const U32 endZSplit = min(Z_SPLIT_COUNT - 1u, U32(maxDistFromNearPlane * u_unis.m_oneOverFrustumLength));
-		for(U32 i = startZSplit; i <= endZSplit; ++i)
+		const I32 startZSplit = max(I32(minDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0);
+		const I32 endZSplit =
+			clamp(I32(maxDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0, I32(Z_SPLIT_COUNT) - 1);
+		for(I32 i = startZSplit; i <= endZSplit; ++i)
 		{
 		{
 			atomicOr(s_zSplitMasks[i], mask);
 			atomicOr(s_zSplitMasks[i], mask);
 		}
 		}
@@ -155,6 +282,26 @@ void main()
 		{
 		{
 			atomicOr(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
 			atomicOr(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
 		}
 		}
+		else if(isSpotLight())
+		{
+			atomicOr(u_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
+		}
+		else if(isReflectionProbe())
+		{
+			atomicOr(u_clusters[tileIdx].m_reflectionProbesMask, U32(s_tileMasks[localTileIdx]));
+		}
+		else if(isGiProbe())
+		{
+			atomicOr(u_clusters[tileIdx].m_giProbesMask, U32(s_tileMasks[localTileIdx]));
+		}
+		else if(isFogVolume())
+		{
+			atomicOr(u_clusters[tileIdx].m_fogDensityVolumesMask, U32(s_tileMasks[localTileIdx]));
+		}
+		else
+		{
+			atomicOr(u_clusters[tileIdx].m_decalsMask, U32(s_tileMasks[localTileIdx]));
+		}
 	}
 	}
 
 
 	// All invocations write at least one Z split
 	// All invocations write at least one Z split
@@ -167,6 +314,26 @@ void main()
 			{
 			{
 				atomicOr(u_clusters[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
 				atomicOr(u_clusters[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
 			}
 			}
+			else if(isSpotLight())
+			{
+				atomicOr(u_clusters[TILE_COUNT + i].m_spotLightsMask, s_zSplitMasks[i]);
+			}
+			else if(isReflectionProbe())
+			{
+				atomicOr(u_clusters[TILE_COUNT + i].m_reflectionProbesMask, U32(s_zSplitMasks[i]));
+			}
+			else if(isGiProbe())
+			{
+				atomicOr(u_clusters[TILE_COUNT + i].m_giProbesMask, U32(s_zSplitMasks[i]));
+			}
+			else if(isFogVolume())
+			{
+				atomicOr(u_clusters[TILE_COUNT + i].m_fogDensityVolumesMask, U32(s_zSplitMasks[i]));
+			}
+			else
+			{
+				atomicOr(u_clusters[TILE_COUNT + i].m_decalsMask, U32(s_zSplitMasks[i]));
+			}
 		}
 		}
 	}
 	}
 }
 }

+ 1 - 1
AnKi/Shaders/ClusteredShadingCommon2.glsl

@@ -94,7 +94,7 @@ layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_GI_BINDING + 1, sc
 layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_CLUSTERS_BINDING,
 layout(set = CLUSTERED_SHADING_SET, binding = CLUSTER_SHADING_CLUSTERS_BINDING,
 	   scalar) readonly buffer ANKI_RANDOM_BLOCK_NAME
 	   scalar) readonly buffer ANKI_RANDOM_BLOCK_NAME
 {
 {
-	Cluster u_clusters[];
+	Cluster u_clusters2[];
 };
 };
 #endif
 #endif
 
 

+ 38 - 5
AnKi/Shaders/CollisionFunctions.glsl

@@ -6,22 +6,22 @@
 #pragma once
 #pragma once
 
 
 /// https://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/moller-trumbore-ray-triangle-intersection
 /// https://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/moller-trumbore-ray-triangle-intersection
-Bool testRayTriangle(Vec3 orig, Vec3 dir, Vec3 v0, Vec3 v1, Vec3 v2, Bool backfaceCulling, out F32 t, out F32 u,
+Bool testRayTriangle(Vec3 rayOrigin, Vec3 rayDir, Vec3 v0, Vec3 v1, Vec3 v2, Bool backfaceCulling, out F32 t, out F32 u,
 					 out F32 v)
 					 out F32 v)
 {
 {
 	const Vec3 v0v1 = v1 - v0;
 	const Vec3 v0v1 = v1 - v0;
 	const Vec3 v0v2 = v2 - v0;
 	const Vec3 v0v2 = v2 - v0;
-	const Vec3 pvec = cross(dir, v0v2);
+	const Vec3 pvec = cross(rayDir, v0v2);
 	const F32 det = dot(v0v1, pvec);
 	const F32 det = dot(v0v1, pvec);
 
 
-	if((backfaceCulling && det <= 0.0) || det == 0.0)
+	if((backfaceCulling && det < EPSILON) || abs(det) < EPSILON)
 	{
 	{
 		return false;
 		return false;
 	}
 	}
 
 
 	const F32 invDet = 1.0 / det;
 	const F32 invDet = 1.0 / det;
 
 
-	const Vec3 tvec = orig - v0;
+	const Vec3 tvec = rayOrigin - v0;
 	u = dot(tvec, pvec) * invDet;
 	u = dot(tvec, pvec) * invDet;
 	if(u < 0.0 || u > 1.0)
 	if(u < 0.0 || u > 1.0)
 	{
 	{
@@ -29,13 +29,20 @@ Bool testRayTriangle(Vec3 orig, Vec3 dir, Vec3 v0, Vec3 v1, Vec3 v2, Bool backfa
 	}
 	}
 
 
 	const Vec3 qvec = cross(tvec, v0v1);
 	const Vec3 qvec = cross(tvec, v0v1);
-	v = dot(dir, qvec) * invDet;
+	v = dot(rayDir, qvec) * invDet;
 	if(v < 0.0 || u + v > 1.0)
 	if(v < 0.0 || u + v > 1.0)
 	{
 	{
 		return false;
 		return false;
 	}
 	}
 
 
 	t = dot(v0v2, qvec) * invDet;
 	t = dot(v0v2, qvec) * invDet;
+
+	if(t <= EPSILON)
+	{
+		// This is an addition to the original code. Can't have rays that don't touch the triangle
+		return false;
+	}
+
 	return true;
 	return true;
 }
 }
 
 
@@ -57,6 +64,32 @@ F32 testRayAabbInside(Vec3 rayOrigin, Vec3 rayDir, Vec3 aabbMin, Vec3 aabbMax)
 	return distToIntersect;
 	return distToIntersect;
 }
 }
 
 
+/// Ray box intersection by Simon Green
+Bool testRayAabb(Vec3 rayOrigin, Vec3 rayDir, Vec3 aabbMin, Vec3 aabbMax, out F32 t0, out F32 t1)
+{
+	const Vec3 invR = 1.0 / rayDir;
+	const Vec3 tbot = invR * (aabbMin - rayOrigin);
+	const Vec3 ttop = invR * (aabbMax - rayOrigin);
+
+	const Vec3 tmin = min(ttop, tbot);
+	const Vec3 tmax = max(ttop, tbot);
+
+	t0 = max(tmin.x, max(tmin.y, tmin.z));
+	t1 = min(tmax.x, min(tmax.y, tmax.z));
+
+	return t0 < t1 && t1 > EPSILON;
+}
+
+Bool testRayObb(Vec3 rayOrigin, Vec3 rayDir, Vec3 obbExtend, Mat4 obbTransformInv, out F32 t0, out F32 t1)
+{
+	// Transform ray to OBB space
+	const Vec3 rayOriginS = (obbTransformInv * Vec4(rayOrigin, 1.0)).xyz;
+	const Vec3 rayDirS = (obbTransformInv * Vec4(rayDir, 0.0)).xyz;
+
+	// Test as AABB
+	return testRayAabb(rayOriginS, rayDirS, -obbExtend, obbExtend, t0, t1);
+}
+
 /// https://www.scratchapixel.com/lessons/3d-basic-rendering/minimal-ray-tracer-rendering-simple-shapes/ray-sphere-intersection
 /// https://www.scratchapixel.com/lessons/3d-basic-rendering/minimal-ray-tracer-rendering-simple-shapes/ray-sphere-intersection
 Bool testRaySphere(Vec3 rayOrigin, Vec3 rayDir, Vec3 sphereCenter, F32 sphereRadius, out F32 t0, out F32 t1)
 Bool testRaySphere(Vec3 rayOrigin, Vec3 rayDir, Vec3 sphereCenter, F32 sphereRadius, out F32 t0, out F32 t1)
 {
 {

+ 33 - 14
AnKi/Shaders/Include/ClusteredShadingTypes2.h

@@ -47,6 +47,7 @@ ANKI_SHADER_STATIC_ASSERT(sizeof(PointLight2) == _ANKI_SIZEOF_PointLight2);
 struct SpotLight2
 struct SpotLight2
 {
 {
 	Vec3 m_position; ///< Position in world space.
 	Vec3 m_position; ///< Position in world space.
+	Vec3 m_edgePoints[4u]; ///< Edge points in world space.
 	Vec3 m_diffuseColor;
 	Vec3 m_diffuseColor;
 	F32 m_radius; ///< Max distance.
 	F32 m_radius; ///< Max distance.
 	F32 m_squareRadiusOverOne; ///< 1/(radius^2).
 	F32 m_squareRadiusOverOne; ///< 1/(radius^2).
@@ -54,13 +55,30 @@ struct SpotLight2
 	Vec3 m_direction; ///< Light direction.
 	Vec3 m_direction; ///< Light direction.
 	F32 m_outerCos;
 	F32 m_outerCos;
 	F32 m_innerCos;
 	F32 m_innerCos;
-	Vec3 m_edgePoints[5u]; ///< Edge points in world space
-	Vec3 m_padding;
+	Vec2 m_padding;
 	Mat4 m_textureMatrix;
 	Mat4 m_textureMatrix;
 };
 };
-const U32 _ANKI_SIZEOF_SpotLight2 = 32u * ANKI_SIZEOF(U32) + ANKI_SIZEOF(Mat4);
+const U32 _ANKI_SIZEOF_SpotLight2 = 28u * ANKI_SIZEOF(U32) + ANKI_SIZEOF(Mat4);
 ANKI_SHADER_STATIC_ASSERT(sizeof(SpotLight2) == _ANKI_SIZEOF_SpotLight2);
 ANKI_SHADER_STATIC_ASSERT(sizeof(SpotLight2) == _ANKI_SIZEOF_SpotLight2);
 
 
+/// Spot light different view. This is the same structure as SpotLight but it's designed for binning.
+struct SpotLightBinning
+{
+	Vec3 m_edgePoints[5u]; ///< Edge points in world space.
+	Vec3 m_diffuseColor;
+	F32 m_radius; ///< Max distance.
+	F32 m_squareRadiusOverOne; ///< 1/(radius^2).
+	U32 m_shadowLayer; ///< Shadow layer used in RT shadows. Also used to show that it doesn't cast shadow.
+	Vec3 m_direction; ///< Light direction.
+	F32 m_outerCos;
+	F32 m_innerCos;
+	Vec2 m_padding;
+	Mat4 m_textureMatrix;
+};
+const U32 _ANKI_SIZEOF_SpotLightBinning = _ANKI_SIZEOF_SpotLight2;
+ANKI_SHADER_STATIC_ASSERT(sizeof(SpotLightBinning) == _ANKI_SIZEOF_SpotLightBinning);
+ANKI_SHADER_STATIC_ASSERT(alignof(SpotLightBinning) == alignof(SpotLight2));
+
 /// Directional light (sun).
 /// Directional light (sun).
 struct DirectionalLight2
 struct DirectionalLight2
 {
 {
@@ -93,10 +111,13 @@ struct Decal2
 {
 {
 	Vec4 m_diffuseUv;
 	Vec4 m_diffuseUv;
 	Vec4 m_normRoughnessUv;
 	Vec4 m_normRoughnessUv;
-	Mat4 m_textureMatrix;
 	Vec4 m_blendFactors;
 	Vec4 m_blendFactors;
+	Mat4 m_textureMatrix;
+	Mat4 m_invertedTransform;
+	Vec3 m_obbExtend;
+	F32 m_padding;
 };
 };
-const U32 _ANKI_SIZEOF_Decal2 = 3u * ANKI_SIZEOF(Vec4) + ANKI_SIZEOF(Mat4);
+const U32 _ANKI_SIZEOF_Decal2 = 4u * ANKI_SIZEOF(Vec4) + 2u * ANKI_SIZEOF(Mat4);
 ANKI_SHADER_STATIC_ASSERT(sizeof(Decal2) == _ANKI_SIZEOF_Decal2);
 ANKI_SHADER_STATIC_ASSERT(sizeof(Decal2) == _ANKI_SIZEOF_Decal2);
 
 
 /// Fog density volume.
 /// Fog density volume.
@@ -156,7 +177,7 @@ struct ClusteredShadingUniforms
 	Vec4 m_nearPlaneWSpace;
 	Vec4 m_nearPlaneWSpace;
 	F32 m_near;
 	F32 m_near;
 	F32 m_far;
 	F32 m_far;
-	F32 m_oneOverFrustumLength; ///< 1/(far-near)
+	F32 m_zSplitCountOverFrustumLength; ///< m_zSplitCount/(far-near)
 	Vec3 m_cameraPosition;
 	Vec3 m_cameraPosition;
 
 
 	UVec2 m_tileCounts;
 	UVec2 m_tileCounts;
@@ -170,15 +191,13 @@ struct ClusteredShadingUniforms
 	U32 m_reflectionProbeCount;
 	U32 m_reflectionProbeCount;
 	U32 m_giProbeCount;
 	U32 m_giProbeCount;
 
 
-	F32 m_padding[3u];
-
 	CommonMatrices m_matrices;
 	CommonMatrices m_matrices;
 	CommonMatrices m_previousMatrices;
 	CommonMatrices m_previousMatrices;
 
 
 	DirectionalLight2 m_directionalLight;
 	DirectionalLight2 m_directionalLight;
 };
 };
 const U32 _ANKI_SIZEOF_ClusteredShadingUniforms =
 const U32 _ANKI_SIZEOF_ClusteredShadingUniforms =
-	28u * ANKI_SIZEOF(U32) + 2u * ANKI_SIZEOF(CommonMatrices) + ANKI_SIZEOF(DirectionalLight2);
+	24u * ANKI_SIZEOF(U32) + 2u * ANKI_SIZEOF(CommonMatrices) + ANKI_SIZEOF(DirectionalLight2);
 ANKI_SHADER_STATIC_ASSERT(sizeof(ClusteredShadingUniforms) == _ANKI_SIZEOF_ClusteredShadingUniforms);
 ANKI_SHADER_STATIC_ASSERT(sizeof(ClusteredShadingUniforms) == _ANKI_SIZEOF_ClusteredShadingUniforms);
 
 
 /// Information that a tile or a Z-split will contain.
 /// Information that a tile or a Z-split will contain.
@@ -187,12 +206,12 @@ struct Cluster
 	U64 m_pointLightsMask;
 	U64 m_pointLightsMask;
 	U64 m_spotLightsMask;
 	U64 m_spotLightsMask;
 	U64 m_decalsMask;
 	U64 m_decalsMask;
-	U16 m_fogDensityVolumesMask;
-	U16 m_reflectionProbesMask;
-	U16 m_giProbesMask;
-	U16 m_padding; ///< Add some padding to be 100% sure nothing will break.
+	U32 m_fogDensityVolumesMask;
+	U32 m_reflectionProbesMask;
+	U32 m_giProbesMask;
+	U32 m_padding; ///< Add some padding to be 100% sure nothing will break.
 };
 };
-const U32 _ANKI_SIZEOF_Cluster = 4u * ANKI_SIZEOF(U64);
+const U32 _ANKI_SIZEOF_Cluster = 5u * ANKI_SIZEOF(U64);
 ANKI_SHADER_STATIC_ASSERT(sizeof(Cluster) == _ANKI_SIZEOF_Cluster);
 ANKI_SHADER_STATIC_ASSERT(sizeof(Cluster) == _ANKI_SIZEOF_Cluster);
 
 
 ANKI_END_NAMESPACE
 ANKI_END_NAMESPACE

+ 1 - 1
Tools/Shader/ShaderProgramCompilerMain.cpp

@@ -7,7 +7,7 @@
 #include <AnKi/Util.h>
 #include <AnKi/Util.h>
 using namespace anki;
 using namespace anki;
 
 
-static const char* USAGE = R"(Usage: %s shader_program_file [options]
+static const char* USAGE = R"(Usage: %s input_shader_program_file [options]
 Options:
 Options:
 -o <name of output>    : The name of the output binary
 -o <name of output>    : The name of the output binary
 -j <thread count>      : Number of threads. Defaults to system's max
 -j <thread count>      : Number of threads. Defaults to system's max