瀏覽代碼

Add some code for light/probe visibility

Panagiotis Christopoulos Charitos 2 年之前
父節點
當前提交
dc15130af7

+ 1 - 12
AnKi.natvis

@@ -34,17 +34,6 @@
 		</Expand>
 	</Type>
 
-	<Type Name="anki::DynamicArrayRaii&lt;*&gt;">
-		<Intrinsic Name="size" Expression="m_size" />
-		<DisplayString>{{ size={m_size} capacity={m_capacity} }}</DisplayString>
-		<Expand>
-			<ArrayItems>
-				<Size>m_size</Size>
-				<ValuePointer>m_data</ValuePointer>
-			</ArrayItems>
-		</Expand>
-	</Type>
-
 	<Type Name="anki::Array&lt;*,*&gt;">
 		<DisplayString>{{ size={$T2} }}</DisplayString>
 		<Expand>
@@ -62,7 +51,7 @@
 		<DisplayString>{m_data.m_data,s}</DisplayString>
 	</Type>
 
-	<Type Name="anki::BaseStringRaii&lt;*&gt;">
+	<Type Name="anki::BaseString&lt;*&gt;">
 		<DisplayString Condition="(bool)(m_data.m_data==0)">*Empty*</DisplayString>
 		<DisplayString>{m_data.m_data,s}</DisplayString>
 	</Type>

+ 21 - 0
AnKi/Renderer/GpuVisibility.cpp

@@ -211,4 +211,25 @@ void GpuVisibility::populateRenderGraph(CString passesName, RenderingTechnique t
 	});
 }
 
+Error GpuVisibilityNonRenderables::init()
+{
+	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuVisibilityNonRenderables.ankiprogbin", m_prog));
+
+	for(U32 hzb = 0; hzb < 2; ++hzb)
+	{
+		for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
+		{
+			ShaderProgramResourceVariantInitInfo variantInit(m_prog);
+			variantInit.addMutation("HZB_TEST", hzb);
+			variantInit.addMutation("STATS", ANKI_STATS_ENABLED);
+			variantInit.addMutation("OBJECT_TYPE", U32(type));
+
+			const ShaderProgramResourceVariant* variant;
+			m_prog->getOrCreateVariant(variantInit, variant);
+
+			m_grProgs[hzb][type].reset(&variant->getProgram());
+		}
+	}
+}
+
 } // end namespace anki

+ 25 - 0
AnKi/Renderer/GpuVisibility.h

@@ -52,6 +52,31 @@ private:
 	U64 m_lastFrameIdx = kMaxU64;
 #endif
 };
+
+/// @memberof GpuVisibilityNonRenderables
+class GpuVisibilityNonRenderablesOutput
+{
+public:
+	BufferHandle m_bufferHandle; ///< Some buffer handle to be used for tracking. No need to track all buffers.
+
+	Array<Buffer*, U32(GpuSceneNonRenderableObjectType::kCount)> m_buffers = {};
+	Array<PtrSize, U32(GpuSceneNonRenderableObjectType::kCount)> m_bufferOffsets = {};
+	Array<PtrSize, U32(GpuSceneNonRenderableObjectType::kCount)> m_bufferRanges = {};
+};
+
+/// GPU visibility of lights, probes etc.
+class GpuVisibilityNonRenderables : public RendererObject
+{
+public:
+	Error init();
+
+	void populateRenderGraph(CString passesName, GpuSceneNonRenderableObjectTypeBit objectTypes, const Mat4& viewProjectionMat,
+							 const RenderTargetHandle* hzbRt, RenderGraphDescription& rgraph, GpuVisibilityNonRenderablesOutput& out);
+
+private:
+	ShaderProgramResourcePtr m_prog;
+	Array2d<ShaderProgramPtr, 2, U32(GpuSceneNonRenderableObjectType::kCount)> m_grProgs;
+};
 /// @}
 
 } // end namespace anki

+ 356 - 0
AnKi/Renderer/TileAllocator2.cpp

@@ -0,0 +1,356 @@
+// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <AnKi/Renderer/TileAllocator2.h>
+
+namespace anki {
+
+class TileAllocator2::Tile
+{
+public:
+	Timestamp m_lastUsedTimestamp = 0; ///< The last timestamp this tile was used.
+	U64 m_lightUuid = 0;
+	U64 m_lightAdditionalIdendification = 0;
+	Array<U32, 4> m_viewport = {};
+	Array<U32, 4> m_subTiles = {kMaxU32, kMaxU32, kMaxU32, kMaxU32};
+	U32 m_superTile = kMaxU32; ///< The parent.
+	U8 m_lightHierarchy = 0;
+};
+
+TileAllocator2::TileAllocator2()
+{
+}
+
+TileAllocator2::~TileAllocator2()
+{
+}
+
+void TileAllocator2::init(U32 tileCountX, U32 tileCountY, U32 hierarchyCount, Bool enableCaching)
+{
+	// Preconditions
+	ANKI_ASSERT(tileCountX > 0);
+	ANKI_ASSERT(tileCountY > 0);
+	ANKI_ASSERT(hierarchyCount > 0);
+
+	// Store some stuff
+	m_tileCountX = U16(tileCountX);
+	m_tileCountY = U16(tileCountY);
+	m_hierarchyCount = U8(hierarchyCount);
+	m_cachingEnabled = enableCaching;
+	m_firstTileIdxOfHierarchy.resize(hierarchyCount + 1);
+
+	// Create the tile array & index ranges
+	U32 tileCount = 0;
+	for(U32 hierarchy = 0; hierarchy < hierarchyCount; ++hierarchy)
+	{
+		const U32 hierarchyTileCountX = tileCountX >> hierarchy;
+		const U32 hierarchyTileCountY = tileCountY >> hierarchy;
+		ANKI_ASSERT((hierarchyTileCountX << hierarchy) == tileCountX && "Every hierarchy should be power of 2 of its parent hierarchy");
+		ANKI_ASSERT((hierarchyTileCountY << hierarchy) == tileCountY && "Every hierarchy should be power of 2 of its parent hierarchy");
+
+		m_firstTileIdxOfHierarchy[hierarchy] = tileCount;
+
+		tileCount += hierarchyTileCountX * hierarchyTileCountY;
+	}
+	ANKI_ASSERT(tileCount >= tileCountX * tileCountY);
+	m_allTiles.resize(tileCount);
+	m_firstTileIdxOfHierarchy[hierarchyCount] = tileCount - 1;
+
+	// Init the tiles
+	U32 tileIdx = 0;
+	for(U32 hierarchy = 0; hierarchy < hierarchyCount; ++hierarchy)
+	{
+		const U32 hierarchyTileCountX = tileCountX >> hierarchy;
+		const U32 hierarchyTileCountY = tileCountY >> hierarchy;
+
+		for(U32 y = 0; y < hierarchyTileCountY; ++y)
+		{
+			for(U32 x = 0; x < hierarchyTileCountX; ++x)
+			{
+				ANKI_ASSERT(tileIdx >= m_firstTileIdxOfHierarchy[hierarchy] && tileIdx <= m_firstTileIdxOfHierarchy[hierarchy + 1]);
+				Tile& tile = m_allTiles[tileIdx];
+
+				tile.m_viewport[0] = x << hierarchy;
+				tile.m_viewport[1] = y << hierarchy;
+				tile.m_viewport[2] = 1 << hierarchy;
+				tile.m_viewport[3] = 1 << hierarchy;
+
+				if(hierarchy > 0)
+				{
+					// Has sub tiles
+					for(U32 j = 0; j < 2; ++j)
+					{
+						for(U32 i = 0; i < 2; ++i)
+						{
+							const U32 subTileIdx = translateTileIdx((x << 1) + i, (y << 1) + j, hierarchy - 1);
+							m_allTiles[subTileIdx].m_superTile = tileIdx;
+
+							tile.m_subTiles[j * 2 + i] = subTileIdx;
+						}
+					}
+				}
+				else
+				{
+					// No sub-tiles
+				}
+
+				++tileIdx;
+			}
+		}
+	}
+}
+
+void TileAllocator2::updateSubTiles(const Tile& updateFrom, U64 crntLightUuid, ArrayOfLightUuids& kickedOutLights)
+{
+	if(updateFrom.m_subTiles[0] == kMaxU32)
+	{
+		return;
+	}
+
+	for(U32 idx : updateFrom.m_subTiles)
+	{
+		if(m_allTiles[idx].m_lightUuid != 0 && m_allTiles[idx].m_lightUuid != crntLightUuid)
+		{
+			kickedOutLights.emplaceBack(m_allTiles[idx].m_lightUuid);
+		}
+
+		m_allTiles[idx].m_lastUsedTimestamp = updateFrom.m_lastUsedTimestamp;
+		m_allTiles[idx].m_lightUuid = updateFrom.m_lightUuid;
+		m_allTiles[idx].m_lightAdditionalIdendification = updateFrom.m_lightAdditionalIdendification;
+		m_allTiles[idx].m_lightHierarchy = updateFrom.m_lightHierarchy;
+
+		updateSubTiles(m_allTiles[idx], crntLightUuid, kickedOutLights);
+	}
+}
+
+void TileAllocator2::updateSuperTiles(const Tile& updateFrom, U64 crntLightUuid, ArrayOfLightUuids& kickedOutLights)
+{
+	if(updateFrom.m_superTile != kMaxU32)
+	{
+		if(m_allTiles[updateFrom.m_superTile].m_lightUuid != 0 && m_allTiles[updateFrom.m_superTile].m_lightUuid != crntLightUuid)
+		{
+			kickedOutLights.emplaceBack(m_allTiles[updateFrom.m_superTile].m_lightUuid);
+		}
+
+		m_allTiles[updateFrom.m_superTile].m_lightUuid = 0;
+		m_allTiles[updateFrom.m_superTile].m_lastUsedTimestamp = updateFrom.m_lastUsedTimestamp;
+		updateSuperTiles(m_allTiles[updateFrom.m_superTile], crntLightUuid, kickedOutLights);
+	}
+}
+
+Bool TileAllocator2::searchTileRecursively(U32 crntTileIdx, U32 crntTileHierarchy, U32 allocationHierarchy, Timestamp crntTimestamp,
+										   U32& emptyTileIdx, U32& toKickTileIdx, Timestamp& tileToKickMinTimestamp) const
+{
+	const Tile& tile = m_allTiles[crntTileIdx];
+
+	if(crntTileHierarchy == allocationHierarchy)
+	{
+		// We may have a candidate
+
+		const Bool done = evaluateCandidate(crntTileIdx, crntTimestamp, emptyTileIdx, toKickTileIdx, tileToKickMinTimestamp);
+
+		if(done)
+		{
+			return true;
+		}
+	}
+	else if(tile.m_subTiles[0] != kMaxU32)
+	{
+		// Move down the hierarchy
+
+		ANKI_ASSERT(allocationHierarchy < crntTileHierarchy);
+
+		for(const U32 idx : tile.m_subTiles)
+		{
+			const Bool done = searchTileRecursively(idx, crntTileHierarchy - 1, allocationHierarchy, crntTimestamp, emptyTileIdx, toKickTileIdx,
+													tileToKickMinTimestamp);
+
+			if(done)
+			{
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+Bool TileAllocator2::evaluateCandidate(U32 tileIdx, Timestamp crntTimestamp, U32& emptyTileIdx, U32& toKickTileIdx,
+									   Timestamp& tileToKickMinTimestamp) const
+{
+	const Tile& tile = m_allTiles[tileIdx];
+
+	if(m_cachingEnabled)
+	{
+		if(tile.m_lastUsedTimestamp == 0)
+		{
+			// Found empty
+			emptyTileIdx = tileIdx;
+			return true;
+		}
+		else if(tile.m_lastUsedTimestamp != crntTimestamp && tile.m_lastUsedTimestamp < tileToKickMinTimestamp)
+		{
+			// Found one with low timestamp
+			toKickTileIdx = tileIdx;
+			tileToKickMinTimestamp = tile.m_lastUsedTimestamp;
+		}
+	}
+	else
+	{
+		if(tile.m_lastUsedTimestamp != crntTimestamp)
+		{
+			emptyTileIdx = tileIdx;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+TileAllocatorResult2 TileAllocator2::allocate(Timestamp crntTimestamp, U64 lightUuid, U64 lightAdditionalIdentification, U32 hierarchy,
+											  Array<U32, 4>& tileViewport, ArrayOfLightUuids& kickedOutLightUuids)
+{
+	// Preconditions
+	ANKI_ASSERT(crntTimestamp > 0);
+	ANKI_ASSERT(lightUuid != 0);
+	ANKI_ASSERT(hierarchy < m_hierarchyCount);
+
+	kickedOutLightUuids.destroy();
+
+	// 1) Search if it's already cached
+	if(m_cachingEnabled)
+	{
+		auto it = m_lightUuidToTileIdx.find(lightUuid);
+		if(it != m_lightUuidToTileIdx.getEnd())
+		{
+			Tile& tile = m_allTiles[*it];
+
+			if(tile.m_lightUuid != lightUuid || tile.m_lightHierarchy != hierarchy)
+			{
+				// Cache entry is wrong, remove it
+				m_lightUuidToTileIdx.erase(it);
+			}
+			else
+			{
+				// Same light & hierarchy, found the cache entry.
+
+				ANKI_ASSERT(tile.m_lastUsedTimestamp != crntTimestamp && "Trying to allocate the same thing twice in this timestamp?");
+
+				ANKI_ASSERT(tile.m_lightUuid == lightUuid && tile.m_lightHierarchy == hierarchy);
+
+				tileViewport = {tile.m_viewport[0], tile.m_viewport[1], tile.m_viewport[2], tile.m_viewport[3]};
+
+				const Bool needsReRendering = tile.m_lightAdditionalIdendification != lightAdditionalIdentification;
+
+				tile.m_lastUsedTimestamp = crntTimestamp;
+				tile.m_lightAdditionalIdendification = lightAdditionalIdentification;
+
+				updateTileHierarchy(tile, lightUuid, kickedOutLightUuids);
+				ANKI_ASSERT(kickedOutLightUuids.getSize() == 0);
+
+				TileAllocatorResult2 result = TileAllocatorResult2::kAllocationSucceded;
+				if(needsReRendering)
+				{
+					result |= TileAllocatorResult2::kNeedsRefresh;
+				}
+
+				return result;
+			}
+		}
+	}
+
+	// Start searching for a suitable tile. Do a hieratchical search to end up with better locality and not better utilization of the atlas' space
+	U32 emptyTileIdx = kMaxU32;
+	U32 toKickTileIdx = kMaxU32;
+	Timestamp tileToKickMinTimestamp = kMaxTimestamp;
+	const U32 maxHierarchy = m_hierarchyCount - 1;
+	if(hierarchy == maxHierarchy)
+	{
+		// This search is simple, iterate the tiles of the max hierarchy
+
+		for(U32 tileIdx = m_firstTileIdxOfHierarchy[maxHierarchy]; tileIdx <= m_firstTileIdxOfHierarchy[maxHierarchy + 1]; ++tileIdx)
+		{
+			const Bool done = evaluateCandidate(tileIdx, crntTimestamp, emptyTileIdx, toKickTileIdx, tileToKickMinTimestamp);
+
+			if(done)
+			{
+				break;
+			}
+		}
+	}
+	else
+	{
+		// Need to do a recursive search
+
+		for(U32 tileIdx = m_firstTileIdxOfHierarchy[maxHierarchy]; tileIdx <= m_firstTileIdxOfHierarchy[maxHierarchy + 1]; ++tileIdx)
+		{
+			const Bool done =
+				searchTileRecursively(tileIdx, maxHierarchy, hierarchy, crntTimestamp, emptyTileIdx, toKickTileIdx, tileToKickMinTimestamp);
+
+			if(done)
+			{
+				break;
+			}
+		}
+	}
+
+	TileAllocatorResult2 result = TileAllocatorResult2::kAllocationSucceded | TileAllocatorResult2::kNeedsRefresh;
+
+	U32 allocatedTileIdx;
+	if(emptyTileIdx != kMaxU32)
+	{
+		allocatedTileIdx = emptyTileIdx;
+	}
+	else if(toKickTileIdx != kMaxU32)
+	{
+		allocatedTileIdx = toKickTileIdx;
+	}
+	else
+	{
+		// Out of tiles
+		return TileAllocatorResult2::kAllocationFailed;
+	}
+
+	// Allocation succedded, need to do some bookkeeping
+
+	// Mark the allocated tile
+	Tile& allocatedTile = m_allTiles[allocatedTileIdx];
+	allocatedTile.m_lastUsedTimestamp = crntTimestamp;
+	allocatedTile.m_lightUuid = lightUuid;
+	allocatedTile.m_lightAdditionalIdendification = lightAdditionalIdentification;
+	allocatedTile.m_lightHierarchy = U8(hierarchy);
+
+	updateTileHierarchy(allocatedTile, lightUuid, kickedOutLightUuids);
+
+	if(kickedOutLightUuids.getSize())
+	{
+		result |= TileAllocatorResult2::kOtherTileKicked;
+	}
+
+	// Update the cache
+	if(m_cachingEnabled)
+	{
+		m_lightUuidToTileIdx.emplace(lightUuid, allocatedTileIdx);
+	}
+
+	// Return
+	tileViewport = {allocatedTile.m_viewport[0], allocatedTile.m_viewport[1], allocatedTile.m_viewport[2], allocatedTile.m_viewport[3]};
+
+	return result;
+}
+
+void TileAllocator2::invalidateCache(U64 lightUuid)
+{
+	ANKI_ASSERT(m_cachingEnabled);
+	ANKI_ASSERT(lightUuid > 0);
+
+	auto it = m_lightUuidToTileIdx.find(lightUuid);
+	if(it != m_lightUuidToTileIdx.getEnd())
+	{
+		m_lightUuidToTileIdx.erase(it);
+	}
+}
+
+} // end namespace anki

+ 91 - 0
AnKi/Renderer/TileAllocator2.h

@@ -0,0 +1,91 @@
+// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Renderer/Common.h>
+
+namespace anki {
+
+/// @addtogroup renderer
+/// @{
+
+/// The result of a tile allocation.
+enum class TileAllocatorResult2 : U32
+{
+	kAllocationFailed = 0,
+	kAllocationSucceded = 1 << 0, ///< Allocation succedded or out of tile space.
+	kOtherTileKicked = 1 << 1, ///< Another tile was kicked.
+	kNeedsRefresh = 1 << 2, ///< Additional identification or hierarchy missmatch. Needs to be re-rendered.
+};
+ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(TileAllocatorResult2)
+
+/// Allocates tiles out of a tilemap suitable for shadow mapping.
+class TileAllocator2
+{
+public:
+	using ArrayOfLightUuids = DynamicArray<U64, MemoryPoolPtrWrapper<StackMemoryPool>>;
+
+	TileAllocator2();
+
+	TileAllocator2(const TileAllocator2&) = delete; // Non-copyable
+
+	~TileAllocator2();
+
+	TileAllocator2& operator=(const TileAllocator2&) = delete; // Non-copyable
+
+	/// Initialize the allocator.
+	/// @param tileCountX The size of the smallest tile (0 hierarchy level).
+	void init(U32 tileCountX, U32 tileCountY, U32 hierarchyCount, Bool enableCaching);
+
+	/// Allocate some tiles.
+	/// @param hierarchy If it's 0 it chooses the smallest tile.
+	[[nodiscard]] TileAllocatorResult2 allocate(Timestamp crntTimestamp, U64 lightUuid, U64 lightAdditionalIdentification, U32 hierarchy,
+												Array<U32, 4>& tileViewport, ArrayOfLightUuids& kickedOutLightUuids);
+
+	/// Remove an light from the cache.
+	void invalidateCache(U64 lightUuid);
+
+private:
+	class Tile;
+
+	RendererDynamicArray<Tile> m_allTiles;
+	RendererDynamicArray<U32> m_firstTileIdxOfHierarchy;
+
+	RendererHashMap<U64, U32> m_lightUuidToTileIdx;
+
+	U16 m_tileCountX = 0; ///< Tile count for hierarchy 0
+	U16 m_tileCountY = 0; ///< Tile count for hierarchy 0
+	U8 m_hierarchyCount = 0;
+	Bool m_cachingEnabled = false;
+
+	U32 translateTileIdx(U32 x, U32 y, U32 hierarchy) const
+	{
+		const U32 hierarchyWidth = m_tileCountX >> hierarchy;
+		const U32 idx = y * hierarchyWidth + x + m_firstTileIdxOfHierarchy[hierarchy];
+		ANKI_ASSERT(idx < m_allTiles.getSize());
+		return idx;
+	}
+
+	void updateSubTiles(const Tile& updateFrom, U64 crntLightUuid, ArrayOfLightUuids& kickedOutLights);
+
+	void updateSuperTiles(const Tile& updateFrom, U64 crntLightUuid, ArrayOfLightUuids& kickedOutLights);
+
+	/// Given a tile move the hierarchy up and down to update the hierarchy this tile belongs to.
+	void updateTileHierarchy(const Tile& updateFrom, U64 crntLightUuid, ArrayOfLightUuids& kickedOutLights)
+	{
+		updateSubTiles(updateFrom, crntLightUuid, kickedOutLights);
+		updateSuperTiles(updateFrom, crntLightUuid, kickedOutLights);
+	}
+
+	/// Search for a tile recursively.
+	Bool searchTileRecursively(U32 crntTileIdx, U32 crntTileHierarchy, U32 allocationHierarchy, Timestamp crntTimestamp, U32& emptyTileIdx,
+							   U32& toKickTileIdx, Timestamp& tileToKickMinTimestamp) const;
+
+	Bool evaluateCandidate(U32 tileIdx, Timestamp crntTimestamp, U32& emptyTileIdx, U32& toKickTileIdx, Timestamp& tileToKickMinTimestamp) const;
+};
+/// @}
+
+} // end namespace anki

+ 2 - 0
AnKi/Scene/Components/DecalComponent.cpp

@@ -85,6 +85,8 @@ Error DecalComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 
 		gpuDecal.m_obbExtend = m_obb.getExtend().xyz();
 
+		gpuDecal.m_boundingSphere = Vec4(m_obb.getCenter().xyz(), m_obb.getExtend().getLength());
+
 		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneIndex.getOffsetInGpuScene(), gpuDecal);
 	}
 

+ 2 - 2
AnKi/Scene/Components/FogDensityComponent.cpp

@@ -48,11 +48,11 @@ Error FogDensityComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		if(m_isBox)
 		{
 			gpuVolume.m_aabbMinOrSphereCenter = m_aabbMin.xyz();
-			gpuVolume.m_aabbMaxOrSphereRadiusSquared = m_aabbMax.xyz();
+			gpuVolume.m_aabbMaxOrSphereRadius = m_aabbMax.xyz();
 		}
 		else
 		{
-			gpuVolume.m_aabbMaxOrSphereRadiusSquared = Vec3(m_sphereRadius * m_sphereRadius);
+			gpuVolume.m_aabbMaxOrSphereRadius = Vec3(m_sphereRadius);
 			gpuVolume.m_aabbMinOrSphereCenter = m_worldPos.xyz();
 		}
 		gpuVolume.m_isBox = m_isBox;

+ 2 - 2
AnKi/Shaders/ClusterBinning.ankiprog

@@ -169,11 +169,11 @@ Bool isGiProbe(U32 objectIdx)
 
 		if(vol.m_isBox != 0u)
 		{
-			collides = testRayAabb(rayOrigin, rayDir, vol.m_aabbMinOrSphereCenter, vol.m_aabbMaxOrSphereRadiusSquared, t0, t1);
+			collides = testRayAabb(rayOrigin, rayDir, vol.m_aabbMinOrSphereCenter, vol.m_aabbMaxOrSphereRadius, t0, t1);
 		}
 		else
 		{
-			collides = testRaySphere(rayOrigin, rayDir, vol.m_aabbMinOrSphereCenter, sqrt(vol.m_aabbMaxOrSphereRadiusSquared.x), t0, t1);
+			collides = testRaySphere(rayOrigin, rayDir, vol.m_aabbMinOrSphereCenter, vol.m_aabbMaxOrSphereRadius.x, t0, t1);
 		}
 	}
 	// Reflection probe

+ 12 - 0
AnKi/Shaders/CollisionFunctions.hlsl

@@ -165,3 +165,15 @@ F32 testPlaneSphere(Vec3 planeNormal, F32 planeOffset, Vec3 sphereCenter, F32 sp
 	dist = centerDist + sphereRadius;
 	return (dist < 0.0f) ? dist : 0.0f;
 }
+
+Bool frustumTest(Vec4 frustumPlanes[6], Vec3 sphereCenter, F32 sphereRadius)
+{
+	F32 minPlaneDistance = testPlanePoint(frustumPlanes[0].xyz, frustumPlanes[0].w, sphereCenter);
+	[unroll] for(U32 i = 1; i < 6; ++i)
+	{
+		const F32 d = testPlanePoint(frustumPlanes[i].xyz, frustumPlanes[i].w, sphereCenter);
+		minPlaneDistance = min(minPlaneDistance, d);
+	}
+
+	return minPlaneDistance > -sphereRadius;
+}

+ 1 - 8
AnKi/Shaders/GpuVisibility.ankiprog

@@ -58,14 +58,7 @@
 
 	// Frustum test
 	//
-	F32 minPlaneDistance = testPlanePoint(g_unis.m_clipPlanes[0].xyz, g_unis.m_clipPlanes[0].w, aabb.m_sphereCenter);
-	[unroll] for(U32 i = 1; i < 6; ++i)
-	{
-		const F32 d = testPlanePoint(g_unis.m_clipPlanes[i].xyz, g_unis.m_clipPlanes[i].w, aabb.m_sphereCenter);
-		minPlaneDistance = min(minPlaneDistance, d);
-	}
-
-	if(minPlaneDistance <= aabb.m_negativeSphereRadius)
+	if(!frustumTest(g_unis.m_clipPlanes, aabb.m_sphereCenter, -aabb.m_negativeSphereRadius))
 	{
 		return;
 	}

+ 104 - 0
AnKi/Shaders/GpuVisibilityNonRenderables.ankiprog

@@ -0,0 +1,104 @@
+// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma anki mutator HZB_TEST 0 1
+#pragma anki mutator STATS 0 1
+#pragma anki mutator OBJECT_TYPE 0 1 2 3 4 5
+
+#pragma anki start comp
+
+#include <AnKi/Shaders/Common.hlsl>
+#include <AnKi/Shaders/Include/GpuSceneTypes.h>
+#include <AnKi/Shaders/Include/MiscRendererTypes.h>
+#include <AnKi/Shaders/CollisionFunctions.hlsl>
+
+#if OBJECT_TYPE == 0
+typedef GpuScenePointLight ObjectType;
+#elif OBJECT_TYPE == 1
+typedef GpuSceneSpotLight ObjectType;
+#elif OBJECT_TYPE == 2
+typedef GpuSceneDecal ObjectType;
+#elif OBJECT_TYPE == 3
+typedef GpuSceneFogDensityVolume ObjectType;
+#elif OBJECT_TYPE == 4
+typedef GpuSceneReflectionProbe ObjectType;
+#else
+typedef GpuSceneGlobalIlluminationProbe ObjectType;
+#endif
+
+[[vk::binding(0)]] StructuredBuffer<ObjectType> g_objects;
+[[vk::binding(1)]] ConstantBuffer<GpuVisibilityUniforms> g_unis;
+[[vk::binding(2)]] RWStructuredBuffer<U32> g_visibleCount;
+[[vk::binding(3)]] RWStructuredBuffer<U32> g_visibleIndices;
+
+Vec4 getSphere(GpuScenePointLight l)
+{
+	return Vec4(l.m_position, l.m_radius);
+}
+
+Vec4 getSphere(GpuSceneSpotLight l)
+{
+	return Vec4(l.m_position, l.m_radius);
+}
+
+Vec4 getSphere(GpuSceneDecal l)
+{
+	return l.m_boundingSphere;
+}
+
+Vec4 getSphere(GpuSceneFogDensityVolume l)
+{
+	if(l.m_isBox)
+	{
+		const Vec3 center = (l.m_aabbMinOrSphereCenter + l.m_aabbMaxOrSphereRadius) / 2.0f;
+		const F32 radius = length(l.m_aabbMaxOrSphereRadius - center);
+		return Vec4(center, radius);
+	}
+	else
+	{
+		return Vec4(l.m_aabbMinOrSphereCenter, l.m_aabbMaxOrSphereRadius.x);
+	}
+}
+
+Vec4 getSphere(GpuSceneReflectionProbe l)
+{
+	const Vec3 center = (l.m_aabbMin + l.m_aabbMax) / 2.0f;
+	const F32 radius = length(center - l.m_aabbMax);
+	return Vec4(center, radius);
+}
+
+Vec4 getSphere(GpuSceneGlobalIlluminationProbe l)
+{
+	const Vec3 center = (l.m_aabbMin + l.m_aabbMax) / 2.0f;
+	const F32 radius = length(center - l.m_aabbMax);
+	return Vec4(center, radius);
+}
+
+[numthreads(64, 1, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
+{
+	U32 objectCount, unused;
+	g_objects.GetDimensions(objectCount, unused);
+
+	if(svDispatchThreadId.x >= objectCount)
+	{
+		return;
+	}
+
+	// Frustum test
+	//
+	const Vec4 sphere = getSphere(g_objects[svDispatchThreadId.x]);
+	if(!frustumTest(g_unis.m_clipPlanes, sphere.xyz, sphere.w))
+	{
+		return;
+	}
+
+	// Add the object
+	//
+	U32 idx;
+	InterlockedAdd(g_visibleIndices[0], 1, idx);
+	g_visibleIndices[idx] = svDispatchThreadId.x;
+}
+
+#pragma anki end

+ 36 - 5
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -61,8 +61,10 @@ struct GpuScenePointLight
 	RVec3 m_diffuseColor;
 	RF32 m_squareRadiusOverOne; ///< 1/(radius^2).
 
-	Vec3 m_padding;
 	U32 m_shadow;
+	U32 m_uuid;
+	U32 m_padding0;
+	U32 m_padding1;
 };
 constexpr U32 kSizeof_GpuScenePointLight = 3u * sizeof(Vec4);
 static_assert(sizeof(GpuScenePointLight) == kSizeof_GpuScenePointLight);
@@ -84,7 +86,7 @@ struct GpuSceneSpotLight
 	U32 m_shadow;
 	RF32 m_outerCos;
 	RF32 m_innerCos;
-	U32 m_padding1;
+	U32 m_uuid;
 };
 constexpr U32 kSizeof_GpuSceneSpotLight = 8u * sizeof(Vec4);
 static_assert(sizeof(GpuSceneSpotLight) == kSizeof_GpuSceneSpotLight);
@@ -96,7 +98,7 @@ struct GpuSceneReflectionProbe
 	U32 m_cubeTexture; ///< Bindless index of the reflection texture.
 
 	Vec3 m_aabbMin;
-	F32 m_padding0;
+	F32 m_uuid;
 
 	Vec3 m_aabbMax;
 	F32 m_padding1;
@@ -136,8 +138,10 @@ struct GpuSceneDecal
 
 	Vec3 m_obbExtend;
 	F32 m_padding0;
+
+	Vec4 m_boundingSphere;
 };
-constexpr U32 kSizeof_GpuSceneDecal = 2u * sizeof(Vec4) + 2u * sizeof(Mat4);
+constexpr U32 kSizeof_GpuSceneDecal = 3u * sizeof(Vec4) + 2u * sizeof(Mat4);
 static_assert(sizeof(GpuSceneDecal) == kSizeof_GpuSceneDecal);
 
 /// Fog density volume.
@@ -146,10 +150,37 @@ struct GpuSceneFogDensityVolume
 	Vec3 m_aabbMinOrSphereCenter;
 	U32 m_isBox;
 
-	Vec3 m_aabbMaxOrSphereRadiusSquared;
+	Vec3 m_aabbMaxOrSphereRadius;
 	RF32 m_density;
 };
 constexpr U32 kSizeof_GpuSceneFogDensityVolume = 2u * sizeof(Vec4);
 static_assert(sizeof(GpuSceneFogDensityVolume) == kSizeof_GpuSceneFogDensityVolume);
 
+enum class GpuSceneNonRenderableObjectType : U32
+{
+	kPointLight,
+	kSpotLight,
+	kDecal,
+	kFogDensityVolume,
+	kReflectionProbe,
+	kGlobalIlluminationProbe,
+
+	kCount,
+	kFirst = 0
+};
+ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(GpuSceneNonRenderableObjectType)
+
+enum class GpuSceneNonRenderableObjectTypeBit : U32
+{
+	kNone = 0,
+
+	kPointLight = 1 << 0,
+	kSpotLight = 1 << 1,
+	kDecal = 1 << 2,
+	kFogDensityVolume = 1 << 3,
+	kReflectionProbe = 1 << 4,
+	kGlobalIlluminationProbe = 1 << 5,
+};
+ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(GpuSceneNonRenderableObjectTypeBit)
+
 ANKI_END_NAMESPACE

+ 47 - 0
AnKi/Shaders/Include/GpuVisibilityTypes.h

@@ -0,0 +1,47 @@
+// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Shaders/Include/Common.h>
+
+ANKI_BEGIN_NAMESPACE
+
+struct GpuVisibilityUniforms
+{
+	Vec4 m_clipPlanes[6u];
+
+	UVec3 m_padding1;
+	U32 m_aabbCount;
+
+	Vec4 m_maxLodDistances;
+
+	Vec3 m_lodReferencePoint;
+	F32 m_padding2;
+
+	Mat4 m_viewProjectionMat;
+};
+
+struct PointLightRendererCacheEntry
+{
+	U32 m_shadowLayer; ///< Shadow layer used in RT shadows. Also used to show that it doesn't cast shadow.
+	F32 m_shadowAtlasTileScale; ///< UV scale for all tiles.
+	U32 m_uuid;
+	F32 m_padding0;
+
+	Vec4 m_shadowAtlasTileOffsets[6u]; ///< It's a array of Vec2 but because of padding round it up.
+};
+
+struct SpotLightRendererCacheEntry
+{
+	U32 m_shadowLayer; ///< Shadow layer used in RT shadows. Also used to show that it doesn't cast shadow.
+	U32 m_uuid;
+	U32 m_padding0;
+	U32 m_padding1;
+
+	Mat4 m_textureMatrix;
+};
+
+ANKI_END_NAMESPACE

+ 3 - 0
AnKi/Shaders/Intellisense.hlsl

@@ -28,6 +28,7 @@ using U8 = unsigned int;
 using U16 = unsigned int;
 using U32 = unsigned int;
 using F32 = float;
+using Bool = bool;
 
 struct UVec2
 {
@@ -120,6 +121,8 @@ template<typename T>
 struct StructuredBuffer
 {
 	T& operator[](U32 index);
+
+	void GetDimensions(U32& length, U32& stride);
 };
 
 template<typename T>

+ 2 - 2
AnKi/Shaders/VolumetricLightingAccumulation.ankiprog

@@ -225,12 +225,12 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 			F32 factor;
 			[branch] if(vol.m_isBox == 1u)
 			{
-				factor = computeProbeBlendWeight(worldPos, vol.m_aabbMinOrSphereCenter, vol.m_aabbMaxOrSphereRadiusSquared, 0.2);
+				factor = computeProbeBlendWeight(worldPos, vol.m_aabbMinOrSphereCenter, vol.m_aabbMaxOrSphereRadius, 0.2);
 			}
 			else
 			{
 				const Vec3 diff = worldPos - vol.m_aabbMinOrSphereCenter;
-				F32 distSq = dot(diff, diff) / vol.m_aabbMaxOrSphereRadiusSquared.x;
+				F32 distSq = dot(diff, diff) / (vol.m_aabbMaxOrSphereRadius.x * vol.m_aabbMaxOrSphereRadius.x);
 				distSq = min(1.0, distSq);
 				factor = 1.0 - distSq;
 			}

+ 101 - 55
Tests/Renderer/TileAllocator.cpp

@@ -4,64 +4,110 @@
 // http://www.anki3d.org/LICENSE
 
 #include <Tests/Framework/Framework.h>
-#include <AnKi/Renderer/TileAllocator.h>
+#include <AnKi/Renderer/TileAllocator2.h>
 
 ANKI_TEST(Renderer, TileAllocator)
 {
-	TileAllocator talloc;
-	talloc.init(8, 8, 3, true);
-
-	Array<U32, 4> viewport;
-	TileAllocatorResult res;
-
-	const U lightUuid = 1;
-	const U dcCount = 666;
-	Timestamp crntTimestamp = 1;
-	Timestamp lightTimestamp = 1;
-
-	// Allocate 1 med
-	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 1, 0, dcCount, 1, viewport);
-	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::kAllocationSucceded);
-
-	// Allocate 3 big
-	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 2, 0, dcCount, 2, viewport);
-	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::kAllocationSucceded);
-	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 3, 0, dcCount, 2, viewport);
-	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::kAllocationSucceded);
-	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 4, 0, dcCount, 2, viewport);
-	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::kAllocationSucceded);
-
-	// Fail to allocate 1 big
-	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 5, 0, dcCount, 2, viewport);
-	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::kAllocationFailed);
-
-	// Allocate 3 med
-	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 1, 1, dcCount, 1, viewport);
-	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::kAllocationSucceded);
-	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 1, 2, dcCount, 1, viewport);
-	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::kAllocationSucceded);
-	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 1, 3, dcCount, 1, viewport);
-	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::kAllocationSucceded);
-
-	// Fail to allocate a small
-	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 6, 0, dcCount, 0, viewport);
-	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::kAllocationFailed);
-
-	// New frame
-	++crntTimestamp;
-
-	// Allocate 3 big again
-	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 2, 0, dcCount + 1, 2, viewport);
-	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::kAllocationSucceded);
-	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 3, 0, dcCount, 2, viewport);
-	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::kCached);
-	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 4, 0, dcCount + 1, 2, viewport);
-	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::kAllocationSucceded);
-
-	// Allocate 16 small
-	for(U i = 0; i < 16; ++i)
+	RendererMemoryPool::allocateSingleton(allocAligned, nullptr);
+
 	{
-		res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 6 + i, 0, dcCount, 0, viewport);
-		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::kAllocationSucceded);
+		StackMemoryPool pool;
+		pool.init(allocAligned, nullptr, 1024);
+
+		TileAllocator2::ArrayOfLightUuids kickedOutUuids(&pool);
+
+		TileAllocator2 talloc;
+		talloc.init(8, 8, 3, true);
+
+		Array<U32, 4> viewport;
+		TileAllocatorResult2 res;
+
+		const U64 lightUuid = 1000;
+		const U64 dcCount = 666;
+		Timestamp crntTimestamp = 1;
+
+		constexpr U kSmallTile = 0;
+		constexpr U kMedTile = 1;
+		constexpr U kBigTile = 2;
+
+		// Allocate 1 med
+		res = talloc.allocate(crntTimestamp, lightUuid + 1, dcCount, kMedTile, viewport, kickedOutUuids);
+		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult2::kAllocationSucceded | TileAllocatorResult2::kNeedsRefresh);
+
+		// Allocate 3 big
+		res = talloc.allocate(crntTimestamp, lightUuid + 2, dcCount, kBigTile, viewport, kickedOutUuids);
+		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult2::kAllocationSucceded | TileAllocatorResult2::kNeedsRefresh);
+		res = talloc.allocate(crntTimestamp, lightUuid + 3, dcCount, kBigTile, viewport, kickedOutUuids);
+		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult2::kAllocationSucceded | TileAllocatorResult2::kNeedsRefresh);
+		res = talloc.allocate(crntTimestamp, lightUuid + 4, dcCount, kBigTile, viewport, kickedOutUuids);
+		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult2::kAllocationSucceded | TileAllocatorResult2::kNeedsRefresh);
+
+		// Fail to allocate 1 big
+		res = talloc.allocate(crntTimestamp, lightUuid + 5, dcCount, kBigTile, viewport, kickedOutUuids);
+		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult2::kAllocationFailed);
+
+		// Allocate 3 med
+		res = talloc.allocate(crntTimestamp, lightUuid + 6, dcCount, kMedTile, viewport, kickedOutUuids);
+		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult2::kAllocationSucceded | TileAllocatorResult2::kNeedsRefresh);
+		res = talloc.allocate(crntTimestamp, lightUuid + 7, dcCount, kMedTile, viewport, kickedOutUuids);
+		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult2::kAllocationSucceded | TileAllocatorResult2::kNeedsRefresh);
+		res = talloc.allocate(crntTimestamp, lightUuid + 8, dcCount, kMedTile, viewport, kickedOutUuids);
+		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult2::kAllocationSucceded | TileAllocatorResult2::kNeedsRefresh);
+
+		// Fail to allocate a small
+		res = talloc.allocate(crntTimestamp, lightUuid + 9, dcCount, kSmallTile, viewport, kickedOutUuids);
+		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult2::kAllocationFailed);
+
+		// New frame
+		++crntTimestamp;
+
+		// Allocate the same 3 big again
+		res = talloc.allocate(crntTimestamp, lightUuid + 2, dcCount + 1, kBigTile, viewport, kickedOutUuids);
+		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult2::kAllocationSucceded | TileAllocatorResult2::kNeedsRefresh);
+		res = talloc.allocate(crntTimestamp, lightUuid + 3, dcCount, kBigTile, viewport, kickedOutUuids);
+		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult2::kAllocationSucceded);
+		res = talloc.allocate(crntTimestamp, lightUuid + 4, dcCount + 1, kBigTile, viewport, kickedOutUuids);
+		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult2::kAllocationSucceded | TileAllocatorResult2::kNeedsRefresh);
+
+		// New frame
+		++crntTimestamp;
+
+		// Allocate 16 small
+		TileAllocator2::ArrayOfLightUuids allKicked(&pool);
+		for(U i = 0; i < 16; ++i)
+		{
+			res = talloc.allocate(crntTimestamp, lightUuid + 10 + i, dcCount, 0, viewport, kickedOutUuids);
+			ANKI_TEST_EXPECT_EQ(!!(res & (TileAllocatorResult2::kAllocationSucceded | TileAllocatorResult2::kNeedsRefresh)), true);
+
+			for(U64 uuid : kickedOutUuids)
+			{
+				allKicked.emplaceBack(uuid);
+			}
+		}
+
+		// Check those that are kicked
+		ANKI_TEST_EXPECT_EQ(allKicked.getSize(), 4);
+
+		for(U64 uuid = lightUuid + 1; uuid <= lightUuid + 9; ++uuid)
+		{
+			auto it = std::find(allKicked.getBegin(), allKicked.getEnd(), uuid);
+
+			if(uuid == lightUuid + 5 || uuid == lightUuid + 9)
+			{
+				// Allocation failures, skip
+			}
+			else if(uuid >= lightUuid + 2 && uuid <= lightUuid + 4)
+			{
+				// It's the big tiles, shouldn't have been kicked
+				ANKI_TEST_EXPECT_EQ(it, allKicked.getEnd());
+			}
+			else
+			{
+				// Tiles from the 1st frame, should have been kicked all
+				ANKI_TEST_EXPECT_NEQ(it, allKicked.getEnd());
+			}
+		}
 	}
+
+	RendererMemoryPool::freeSingleton();
 }