Browse Source

Optimize the clusterer a bit

Panagiotis Christopoulos Charitos 7 years ago
parent
commit
10119bed8c

+ 9 - 15
shaders/ClusteredShadingCommon.glsl

@@ -132,32 +132,26 @@ layout(std430, ANKI_SS_BINDING(LIGHT_SET, LIGHT_SS_BINDING + 1)) readonly buffer
 Vec3 lightHeatmap(U32 firstIndex, U32 maxLights, Bool decals, Bool plights, Bool slights, Bool probes)
 {
 	U32 count = 0;
+	U32 idx;
 
-	U32 decalCount = u_lightIndices[firstIndex];
-	firstIndex += decalCount + 1u;
-	if(decals)
+	while((idx = u_lightIndices[firstIndex++]) != MAX_U32)
 	{
-		count += decalCount;
+		count += (plights) ? 1u : 0u;
 	}
 
-	U32 pointLightCount = u_lightIndices[firstIndex];
-	firstIndex += pointLightCount + 1u;
-	if(plights)
+	while((idx = u_lightIndices[firstIndex++]) != MAX_U32)
 	{
-		count += pointLightCount;
+		count += (slights) ? 1u : 0u;
 	}
 
-	U32 spotLightCount = u_lightIndices[firstIndex];
-	firstIndex += spotLightCount + 1u;
-	if(slights)
+	while((idx = u_lightIndices[firstIndex++]) != MAX_U32)
 	{
-		count += spotLightCount;
+		count += (probes) ? 1u : 0u;
 	}
 
-	U32 probeCount = u_lightIndices[firstIndex];
-	if(probes)
+	while((idx = u_lightIndices[firstIndex++]) != MAX_U32)
 	{
-		count += probeCount;
+		count += (decals) ? 1u : 0u;
 	}
 
 	F32 factor = min(1.0, F32(count) / F32(maxLights));

+ 5 - 12
shaders/ForwardShadingCommonFrag.glsl

@@ -49,16 +49,11 @@ Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos)
 
 	U32 idxOffset = u_clusters[clusterIdx];
 
-	// Skip decals
-	U32 count = u_lightIndices[idxOffset];
-	idxOffset += count + 1;
-
 	// Point lights
-	count = u_lightIndices[idxOffset++];
-	U32 idxOffsetEnd = idxOffset + count;
-	ANKI_LOOP while(idxOffset < idxOffsetEnd)
+	U32 idx;
+	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
 	{
-		PointLight light = u_pointLights[u_lightIndices[idxOffset++]];
+		PointLight light = u_pointLights[idx];
 
 		Vec3 diffC = diffCol * light.m_diffuseColorTileSize.rgb;
 
@@ -80,11 +75,9 @@ Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos)
 	}
 
 	// Spot lights
-	count = u_lightIndices[idxOffset++];
-	idxOffsetEnd = idxOffset + count;
-	ANKI_LOOP while(idxOffset < idxOffsetEnd)
+	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
 	{
-		SpotLight light = u_spotLights[u_lightIndices[idxOffset++]];
+		SpotLight light = u_spotLights[idx];
 
 		Vec3 diffC = diffCol * light.m_diffuseColorShadowmapId.rgb;
 

+ 6 - 5
shaders/GBufferPost.glslp

@@ -66,7 +66,7 @@ void main()
 	Vec4 worldPos4 = u_invViewProjMat * Vec4(ndc, depth, 1.0);
 	Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 
-	// Get first light index
+	// Get first decal index
 	U32 idxOffset;
 	{
 		U32 k = computeClusterK(u_clustererMagic, worldPos);
@@ -74,18 +74,19 @@ void main()
 			k * (CLUSTER_COUNT_X * CLUSTER_COUNT_Y) + U32(in_clusterIJ.y) * CLUSTER_COUNT_X + U32(in_clusterIJ.x);
 
 		idxOffset = u_clusters[clusterIdx];
+		idxOffset = u_lightIndices[idxOffset - 2u]; // Use the offset metadata
 	}
 
 	// Process decals
-	U32 count = u_lightIndices[idxOffset++];
-	if(count == 0)
+	U32 idx = u_lightIndices[idxOffset];
+	if(idx == MAX_U32)
 	{
 		discard;
 	}
 
-	while(count-- != 0)
+	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
 	{
-		Decal decal = u_decals[u_lightIndices[idxOffset++]];
+		Decal decal = u_decals[idx];
 
 		// Project pos to decal space
 		Vec4 texCoords4 = decal.m_texProjectionMat * Vec4(worldPos, 1.0);

+ 8 - 16
shaders/LightShading.glslp

@@ -74,11 +74,10 @@ void readReflectionsAndIrradianceFromProbes(U32 idxOffset,
 	F32 totalBlendWeight = EPSILON;
 
 	// Check proxy
-	U32 count = u_lightIndices[idxOffset++];
-	U32 idxOffsetEnd = idxOffset + count;
-	ANKI_LOOP while(idxOffset < idxOffsetEnd)
+	U32 idx;
+	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
 	{
-		ReflectionProbe probe = u_reflectionProbes[u_lightIndices[idxOffset++]];
+		ReflectionProbe probe = u_reflectionProbes[idx];
 		Vec3 aabbMin = probe.m_aabbMinPad1.xyz;
 		Vec3 aabbMax = probe.m_aabbMaxPad1.xyz;
 		Vec3 probeOrigin = probe.m_positionCubemapIndex.xyz;
@@ -146,17 +145,12 @@ void main()
 	// Ambient and emissive color
 	out_color = gbuffer.m_diffuse * gbuffer.m_emission;
 
-	// Skip decals
-	U32 count = u_lightIndices[idxOffset];
-	idxOffset += count + 1u;
-
 	// Point lights
 	Vec3 viewDir = normalize(u_cameraPos - worldPos);
-	count = u_lightIndices[idxOffset++];
-	U32 idxOffsetEnd = idxOffset + count;
-	ANKI_LOOP while(idxOffset < idxOffsetEnd)
+	U32 idx;
+	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
 	{
-		PointLight light = u_pointLights[u_lightIndices[idxOffset++]];
+		PointLight light = u_pointLights[idx];
 
 		LIGHTING_COMMON_BRDF();
 
@@ -171,11 +165,9 @@ void main()
 	}
 
 	// Spot lights
-	count = u_lightIndices[idxOffset++];
-	idxOffsetEnd = idxOffset + count;
-	ANKI_LOOP while(idxOffset < idxOffsetEnd)
+	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
 	{
-		SpotLight light = u_spotLights[u_lightIndices[idxOffset++]];
+		SpotLight light = u_spotLights[idx];
 
 		LIGHTING_COMMON_BRDF();
 

+ 7 - 16
shaders/VolumetricLightingAccumulation.glslp

@@ -98,16 +98,11 @@ Vec3 accumulateLights(U32 clusterIdx, Vec3 worldPos)
 	// Get ID offset
 	U32 idxOffset = u_clusters[clusterIdx];
 
-	// Skip decals
-	U32 count = u_lightIndices[idxOffset];
-	idxOffset += count + 1u;
-
 	// Point lights
-	count = u_lightIndices[idxOffset++];
-	U32 idxOffsetEnd = idxOffset + count;
-	ANKI_LOOP while(idxOffset < idxOffsetEnd)
+	U32 idx;
+	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
 	{
-		PointLight light = u_pointLights[u_lightIndices[idxOffset++]];
+		PointLight light = u_pointLights[idx];
 
 		Vec3 frag2Light = light.m_posRadius.xyz - worldPos;
 		F32 factor = computeAttenuationFactor(light.m_posRadius.w, frag2Light);
@@ -126,11 +121,9 @@ Vec3 accumulateLights(U32 clusterIdx, Vec3 worldPos)
 	}
 
 	// Spot lights
-	count = u_lightIndices[idxOffset++];
-	idxOffsetEnd = idxOffset + count;
-	ANKI_LOOP while(idxOffset < idxOffsetEnd)
+	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
 	{
-		SpotLight light = u_spotLights[u_lightIndices[idxOffset++]];
+		SpotLight light = u_spotLights[idx];
 
 		Vec3 frag2Light = light.m_posRadius.xyz - worldPos;
 		F32 factor = computeAttenuationFactor(light.m_posRadius.w, frag2Light);
@@ -157,11 +150,9 @@ Vec3 accumulateLights(U32 clusterIdx, Vec3 worldPos)
 	// Probes
 	F32 totalBlendWeight = EPSILON;
 	Vec3 diffIndirect = Vec3(0.0);
-	count = u_lightIndices[idxOffset++];
-	idxOffsetEnd = idxOffset + count;
-	ANKI_LOOP while(idxOffset < idxOffsetEnd)
+	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
 	{
-		ReflectionProbe probe = u_reflectionProbes[u_lightIndices[idxOffset++]];
+		ReflectionProbe probe = u_reflectionProbes[idx];
 		Vec3 aabbMin = probe.m_aabbMinPad1.xyz;
 		Vec3 aabbMax = probe.m_aabbMaxPad1.xyz;
 		Vec3 probeOrigin = probe.m_positionCubemapIndex.xyz;

+ 4 - 0
shaders/glsl_cpp_common/ClusteredShading.h

@@ -9,6 +9,10 @@
 
 ANKI_BEGIN_NAMESPACE
 
+// Consts
+const U32 TYPED_OBJECT_COUNT = 5u;
+const F32 INVALID_TEXTURE_INDEX = -1.0;
+
 // See the documentation in the ClustererBin class.
 struct ClustererMagicValues
 {

+ 109 - 134
src/anki/renderer/ClusterBin.cpp

@@ -13,9 +13,6 @@
 namespace anki
 {
 
-static const U32 TYPED_OBJECT_COUNT = 4; // Point, spot, decal & probe
-static const F32 INVALID_TEXTURE_INDEX = -1.0;
-
 /// Get a view space point.
 static Vec4 unproject(const F32 zVspace, const Vec2& ndc, const Vec4& unprojParams)
 {
@@ -69,22 +66,36 @@ public:
 class ClusterBin::TileCtx
 {
 public:
+	struct ClusterMetaInfo
+	{
+		Array<U16, TYPED_OBJECT_COUNT> m_counts;
+		U16 m_offset;
+	};
+
+	DynamicArrayAuto<Vec4> m_clusterEdgesWSpace;
+	DynamicArrayAuto<Aabb> m_clusterBoxes;
+	DynamicArrayAuto<Sphere> m_clusterSpheres;
+
+	DynamicArrayAuto<ClusterMetaInfo> m_clusterInfos;
+	DynamicArrayAuto<U32> m_indices;
+
+	U32 m_clusterCountZ = MAX_U32;
+
 	TileCtx(StackAllocator<U8>& alloc)
 		: m_clusterEdgesWSpace(alloc)
 		, m_clusterBoxes(alloc)
 		, m_clusterSpheres(alloc)
+		, m_clusterInfos(alloc)
 		, m_indices(alloc)
-		, m_pIndices(alloc)
-		, m_pCounts(alloc)
 	{
 	}
 
-	DynamicArrayAuto<Vec4> m_clusterEdgesWSpace;
-	DynamicArrayAuto<Aabb> m_clusterBoxes;
-	DynamicArrayAuto<Sphere> m_clusterSpheres;
-	DynamicArrayAuto<U32> m_indices;
-	DynamicArrayAuto<U32*> m_pIndices;
-	DynamicArrayAuto<U32*> m_pCounts;
+	WeakArray<U32> getClusterIndices(const U clusterZ)
+	{
+		ANKI_ASSERT(clusterZ < m_clusterCountZ);
+		const U perClusterCount = m_indices.getSize() / m_clusterCountZ;
+		return WeakArray<U32>(&m_indices[perClusterCount * clusterZ], perClusterCount);
+	}
 };
 
 ClusterBin::~ClusterBin()
@@ -103,7 +114,13 @@ void ClusterBin::init(
 
 	m_totalClusterCount = clusterCountX * clusterCountY * clusterCountZ;
 
-	m_indexCount = m_totalClusterCount * cfg.getNumber("r.avgObjectsPerCluster");
+	m_avgObjectsPerCluster = cfg.getNumber("r.avgObjectsPerCluster");
+
+	// The actual indices per cluster are
+	// - the object indices per cluster
+	// - plus TYPED_OBJECT_COUNT-1 that is the offset per object type minus the first object type
+	// - plus TYPED_OBJECT_COUNT the stopper dummy indices
+	m_indexCount = m_totalClusterCount * (m_avgObjectsPerCluster + TYPED_OBJECT_COUNT - 1 + TYPED_OBJECT_COUNT);
 
 	m_clusterEdges.create(m_alloc, m_clusterCounts[0] * m_clusterCounts[1] * (m_clusterCounts[2] + 1) * 4);
 }
@@ -163,14 +180,13 @@ void ClusterBin::bin(ClusterBinIn& in, ClusterBinOut& out)
 			BinCtx& ctx = *self;
 
 			TileCtx tileCtx(ctx.m_in->m_tempAlloc);
-			const U clusterCountZ = ctx.m_bin->m_clusterCounts[2];
-			const U32 avgIndicesPerCluster = ctx.m_bin->m_indexCount / ctx.m_bin->m_totalClusterCount;
+			const U32 clusterCountZ = ctx.m_bin->m_clusterCounts[2];
 			tileCtx.m_clusterEdgesWSpace.create((clusterCountZ + 1) * 4);
 			tileCtx.m_clusterBoxes.create(clusterCountZ);
 			tileCtx.m_clusterSpheres.create(clusterCountZ);
-			tileCtx.m_indices.create(clusterCountZ * avgIndicesPerCluster);
-			tileCtx.m_pIndices.create(clusterCountZ);
-			tileCtx.m_pCounts.create(clusterCountZ);
+			tileCtx.m_indices.create(clusterCountZ * ctx.m_bin->m_avgObjectsPerCluster);
+			tileCtx.m_clusterInfos.create(clusterCountZ);
+			tileCtx.m_clusterCountZ = clusterCountZ;
 
 			const U tileCount = ctx.m_bin->m_clusterCounts[0] * ctx.m_bin->m_clusterCounts[1];
 			U tileIdx;
@@ -245,7 +261,6 @@ void ClusterBin::binTile(U32 tileIdx, BinCtx& ctx, TileCtx& tileCtx)
 	ANKI_ASSERT(tileIdx < m_clusterCounts[0] * m_clusterCounts[1]);
 	const U tileX = tileIdx % m_clusterCounts[0];
 	const U tileY = tileIdx / m_clusterCounts[0];
-	const U32 avgIndicesPerCluster = m_indexCount / m_totalClusterCount;
 
 	// Compute the tile's cluster edges in view space
 	WeakArray<Vec4> clusterEdgesVSpace(
@@ -316,68 +331,22 @@ void ClusterBin::binTile(U32 tileIdx, BinCtx& ctx, TileCtx& tileCtx)
 		clusterSpheres[clusterZ] = Sphere(sphereCenter, (aabbMin - sphereCenter).getLength());
 	}
 
-	// Set temp indices for each cluster
-	DynamicArrayAuto<U32>& indices = tileCtx.m_indices;
-	DynamicArrayAuto<U32*>& pIndices = tileCtx.m_pIndices;
-	DynamicArrayAuto<U32*>& pCounts = tileCtx.m_pCounts;
-
-	for(U clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
-	{
-		pIndices[clusterZ] = &indices[clusterZ * avgIndicesPerCluster];
-	}
-
-	// Decals
-	{
-		for(U clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
-		{
-			pCounts[clusterZ] = pIndices[clusterZ];
-			*pCounts[clusterZ] = 0;
-			++pIndices[clusterZ];
-		}
+	// Zero the infos
+	memset(&tileCtx.m_clusterInfos[0], 0, tileCtx.m_clusterInfos.getSizeInBytes());
 
-		Obb decalBox;
-		for(U i = 0; i < ctx.m_in->m_renderQueue->m_decals.getSize(); ++i)
-		{
-			const DecalQueueElement& decal = ctx.m_in->m_renderQueue->m_decals[i];
-			decalBox.setCenter(decal.m_obbCenter.xyz0());
-			decalBox.setRotation(Mat3x4(decal.m_obbRotation));
-			decalBox.setExtend(decal.m_obbExtend.xyz0());
-
-			if(!insideClusterFrustum(frustumPlanes, decalBox))
-			{
-				continue;
-			}
-
-			for(U clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
-			{
-				if(!testCollisionShapes(decalBox, clusterBoxes[clusterZ]))
-				{
-					continue;
-				}
-
-				const U32 count = pIndices[clusterZ] - &indices[clusterZ * avgIndicesPerCluster];
-				if(ANKI_UNLIKELY(count + 3 >= avgIndicesPerCluster))
-				{
-					ANKI_R_LOGW("Out of cluster indices. Increase r.avgObjectsPerCluster");
-					continue;
-				}
-
-				*pIndices[clusterZ] = i;
-				*pCounts[clusterZ] += 1;
-				++pIndices[clusterZ];
-			}
-		}
-	}
+#define ANKI_SET_IDX(typeIdx) \
+	ClusterBin::TileCtx::ClusterMetaInfo& inf = tileCtx.m_clusterInfos[clusterZ]; \
+	if(ANKI_UNLIKELY(inf.m_offset + 1 >= m_avgObjectsPerCluster)) \
+	{ \
+		ANKI_R_LOGW("Out of cluster indices. Increase r.avgObjectsPerCluster"); \
+		continue; \
+	} \
+	tileCtx.getClusterIndices(clusterZ)[inf.m_offset++] = i; \
+	++inf.m_counts[typeIdx]; \
+	ANKI_ASSERT(inf.m_counts[typeIdx] <= m_avgObjectsPerCluster)
 
 	// Point lights
 	{
-		for(U clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
-		{
-			pCounts[clusterZ] = pIndices[clusterZ];
-			*pCounts[clusterZ] = 0;
-			++pIndices[clusterZ];
-		}
-
 		Sphere lightSphere;
 		for(U i = 0; i < ctx.m_in->m_renderQueue->m_pointLights.getSize(); ++i)
 		{
@@ -397,29 +366,13 @@ void ClusterBin::binTile(U32 tileIdx, BinCtx& ctx, TileCtx& tileCtx)
 					continue;
 				}
 
-				const U32 count = pIndices[clusterZ] - &indices[clusterZ * avgIndicesPerCluster];
-				if(ANKI_UNLIKELY(count + 2 >= avgIndicesPerCluster))
-				{
-					ANKI_R_LOGW("Out of cluster indices. Increase r.avgObjectsPerCluster");
-					continue;
-				}
-
-				*pIndices[clusterZ] = i;
-				*pCounts[clusterZ] += 1;
-				++pIndices[clusterZ];
+				ANKI_SET_IDX(0);
 			}
 		}
 	}
 
 	// Spot lights
 	{
-		for(U clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
-		{
-			pCounts[clusterZ] = pIndices[clusterZ];
-			*pCounts[clusterZ] = 0;
-			++pIndices[clusterZ];
-		}
-
 		PerspectiveFrustum slightFrustum;
 		for(U i = 0; i < ctx.m_in->m_renderQueue->m_spotLights.getSize(); ++i)
 		{
@@ -442,29 +395,13 @@ void ClusterBin::binTile(U32 tileIdx, BinCtx& ctx, TileCtx& tileCtx)
 					continue;
 				}
 
-				const U32 count = pIndices[clusterZ] - &indices[clusterZ * avgIndicesPerCluster];
-				if(ANKI_UNLIKELY(count + 1 >= avgIndicesPerCluster))
-				{
-					ANKI_R_LOGW("Out of cluster indices. Increase r.avgObjectsPerCluster");
-					continue;
-				}
-
-				*pIndices[clusterZ] = i;
-				*pCounts[clusterZ] += 1;
-				++pIndices[clusterZ];
+				ANKI_SET_IDX(1);
 			}
 		}
 	}
 
 	// Probes
 	{
-		for(U clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
-		{
-			pCounts[clusterZ] = pIndices[clusterZ];
-			*pCounts[clusterZ] = 0;
-			++pIndices[clusterZ];
-		}
-
 		Aabb probeBox;
 		for(U i = 0; i < ctx.m_in->m_renderQueue->m_reflectionProbes.getSize(); ++i)
 		{
@@ -484,49 +421,87 @@ void ClusterBin::binTile(U32 tileIdx, BinCtx& ctx, TileCtx& tileCtx)
 					continue;
 				}
 
-				const U32 count = pIndices[clusterZ] - &indices[clusterZ * avgIndicesPerCluster];
-				if(ANKI_UNLIKELY(count >= avgIndicesPerCluster))
+				ANKI_SET_IDX(2);
+			}
+		}
+	}
+
+	// Decals
+	{
+		Obb decalBox;
+		for(U i = 0; i < ctx.m_in->m_renderQueue->m_decals.getSize(); ++i)
+		{
+			const DecalQueueElement& decal = ctx.m_in->m_renderQueue->m_decals[i];
+			decalBox.setCenter(decal.m_obbCenter.xyz0());
+			decalBox.setRotation(Mat3x4(decal.m_obbRotation));
+			decalBox.setExtend(decal.m_obbExtend.xyz0());
+
+			if(!insideClusterFrustum(frustumPlanes, decalBox))
+			{
+				continue;
+			}
+
+			for(U clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
+			{
+				if(!testCollisionShapes(decalBox, clusterBoxes[clusterZ]))
 				{
-					ANKI_R_LOGW("Out of cluster indices. Increase r.avgObjectsPerCluster");
 					continue;
 				}
 
-				*pIndices[clusterZ] = i;
-				*pCounts[clusterZ] += 1;
-				++pIndices[clusterZ];
+				ANKI_SET_IDX(3);
 			}
 		}
 	}
 
+	// Fog volumes
+	{
+		// TODO
+	}
+
 	// Upload the indices for all clusters of the tile
 	for(U clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
 	{
-		const U indexCount = pIndices[clusterZ] - &indices[clusterZ * avgIndicesPerCluster];
-		ANKI_ASSERT(indexCount <= avgIndicesPerCluster);
-		ANKI_ASSERT(indexCount >= TYPED_OBJECT_COUNT);
-
-		U firstIndex;
-		if(indexCount > TYPED_OBJECT_COUNT)
+		WeakArray<U32> inIndices = tileCtx.getClusterIndices(clusterZ);
+		const ClusterBin::TileCtx::ClusterMetaInfo& inf = tileCtx.m_clusterInfos[clusterZ];
+
+		const U other = (TYPED_OBJECT_COUNT - 1) + TYPED_OBJECT_COUNT;
+		const U indexCountPlusOther = inf.m_offset + other;
+		ANKI_ASSERT(indexCountPlusOther <= m_avgObjectsPerCluster + other);
+		ANKI_ASSERT(indexCountPlusOther >= other);
+
+		// Write indices
+		const U32 firstIndex = ctx.m_allocatedIndexCount.fetchAdd(indexCountPlusOther);
+		ANKI_ASSERT(firstIndex + indexCountPlusOther <= ctx.m_lightIds.getSize());
+		WeakArray<U32> outIndices(&ctx.m_lightIds[firstIndex], indexCountPlusOther);
+
+		// Write the offsets
+		U offset = firstIndex + TYPED_OBJECT_COUNT - 1;
+		for(U i = 1; i < TYPED_OBJECT_COUNT; ++i)
 		{
-			// Have some objects to bin
-
-			firstIndex = ctx.m_allocatedIndexCount.fetchAdd(indexCount);
-			ANKI_ASSERT(firstIndex + indexCount <= ctx.m_lightIds.getSize());
-
-			memcpy(&ctx.m_lightIds[firstIndex],
-				&indices[clusterZ * avgIndicesPerCluster],
-				sizeof(ctx.m_lightIds[firstIndex]) * indexCount);
+			offset += inf.m_counts[i - 1] + 1; // Count plus the stop
+			outIndices[i - 1] = offset;
 		}
-		else
+
+		// Write indices
+		U outIndicesOffset = TYPED_OBJECT_COUNT - 1;
+		U inIndicesOffset = 0;
+		for(U i = 0; i < TYPED_OBJECT_COUNT; ++i)
 		{
-			// No typed objects, point to the preallocated cluster
-			firstIndex = 0;
+			for(U c = 0; c < inf.m_counts[i]; ++c)
+			{
+				outIndices[outIndicesOffset++] = inIndices[inIndicesOffset++];
+			}
+
+			// Stop
+			outIndices[outIndicesOffset++] = MAX_U32;
 		}
+		ANKI_ASSERT(inIndicesOffset == inf.m_offset);
+		ANKI_ASSERT(outIndicesOffset == indexCountPlusOther);
 
 		// Write the cluster
 		const U clusterIndex =
 			clusterZ * (m_clusterCounts[0] * m_clusterCounts[1]) + tileY * m_clusterCounts[0] + tileX;
-		ctx.m_clusters[clusterIndex] = firstIndex;
+		ctx.m_clusters[clusterIndex] = firstIndex + TYPED_OBJECT_COUNT - 1; // Points to the first object
 	}
 }
 

+ 1 - 0
src/anki/renderer/ClusterBin.h

@@ -68,6 +68,7 @@ private:
 	Array<U32, 3> m_clusterCounts = {};
 	U32 m_totalClusterCount = 0;
 	U32 m_indexCount = 0;
+	U32 m_avgObjectsPerCluster = 0;
 
 	DynamicArray<Vec4> m_clusterEdges; ///< Cache those for opt. [tileCount][K+1][4]
 	Vec4 m_prevUnprojParams = Vec4(0.0f); ///< To check if m_tiles is dirty.