Browse Source

Split the meshlet into 2 structs

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
3fcc9b5aa7

+ 1 - 0
AnKi/Core/GpuMemory/UnifiedGeometryBuffer.h

@@ -87,6 +87,7 @@ public:
 	{
 	{
 		UnifiedGeometryBufferAllocation out;
 		UnifiedGeometryBufferAllocation out;
 		m_pool.allocate(size, alignment, out.m_token);
 		m_pool.allocate(size, alignment, out.m_token);
+		ANKI_ASSERT(isAligned(alignment, out.m_token.m_offset));
 		out.m_realOffset = U32(out.m_token.m_offset);
 		out.m_realOffset = U32(out.m_token.m_offset);
 		out.m_realAllocatedSize = U32(size);
 		out.m_realAllocatedSize = U32(size);
 		return out;
 		return out;

+ 1 - 1
AnKi/Gr/Utils/SegregatedListsGpuMemoryPool.cpp

@@ -204,7 +204,7 @@ void SegregatedListsGpuMemoryPool::deleteChunk(Chunk* chunk)
 void SegregatedListsGpuMemoryPool::allocate(PtrSize size, U32 alignment, SegregatedListsGpuMemoryPoolToken& token)
 void SegregatedListsGpuMemoryPool::allocate(PtrSize size, U32 alignment, SegregatedListsGpuMemoryPoolToken& token)
 {
 {
 	ANKI_ASSERT(isInitialized());
 	ANKI_ASSERT(isInitialized());
-	ANKI_ASSERT(size > 0 && alignment > 0 && isPowerOfTwo(alignment));
+	ANKI_ASSERT(size > 0 && alignment > 0);
 	ANKI_ASSERT(token == SegregatedListsGpuMemoryPoolToken());
 	ANKI_ASSERT(token == SegregatedListsGpuMemoryPoolToken());
 
 
 	LockGuard lock(m_lock);
 	LockGuard lock(m_lock);

+ 4 - 1
AnKi/Renderer/Utils/Drawer.cpp

@@ -59,7 +59,10 @@ void RenderableDrawer::setState(const RenderableDrawerArguments& args, CommandBu
 								   &UnifiedGeometryBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize, Format::k##fmt);
 								   &UnifiedGeometryBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize, Format::k##fmt);
 #include <AnKi/Shaders/Include/UnifiedGeometryTypes.defs.h>
 #include <AnKi/Shaders/Include/UnifiedGeometryTypes.defs.h>
 
 
-	cmdb.bindUavBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kMeshlets), UnifiedGeometryBuffer::getSingleton().getBufferOffsetRange());
+	cmdb.bindUavBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kMeshletBoundingVolumes),
+					   UnifiedGeometryBuffer::getSingleton().getBufferOffsetRange());
+	cmdb.bindUavBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kMeshletGeometryDescriptors),
+					   UnifiedGeometryBuffer::getSingleton().getBufferOffsetRange());
 	cmdb.bindUavBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kTaskShaderPayloads), args.m_taskShaderPayloadsBuffer);
 	cmdb.bindUavBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kTaskShaderPayloads), args.m_taskShaderPayloadsBuffer);
 	cmdb.bindUavBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kRenderables),
 	cmdb.bindUavBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kRenderables),
 					   GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
 					   GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());

+ 38 - 18
AnKi/Resource/MeshResource.cpp

@@ -147,8 +147,12 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 			const PtrSize meshletIndicesSize = header.m_meshletPrimitiveCounts[l] * sizeof(U8Vec4);
 			const PtrSize meshletIndicesSize = header.m_meshletPrimitiveCounts[l] * sizeof(U8Vec4);
 			lod.m_meshletIndices = UnifiedGeometryBuffer::getSingleton().allocate(meshletIndicesSize, sizeof(U8Vec4));
 			lod.m_meshletIndices = UnifiedGeometryBuffer::getSingleton().allocate(meshletIndicesSize, sizeof(U8Vec4));
 
 
-			const PtrSize meshletsSize = header.m_meshletCounts[l] * sizeof(Meshlet);
-			lod.m_meshlets = UnifiedGeometryBuffer::getSingleton().allocate(meshletsSize, sizeof(Meshlet));
+			const PtrSize meshletBoundingVolumesSize = header.m_meshletCounts[l] * sizeof(MeshletBoundingVolume);
+			lod.m_meshletBoundingVolumes = UnifiedGeometryBuffer::getSingleton().allocate(meshletBoundingVolumesSize, sizeof(MeshletBoundingVolume));
+
+			const PtrSize meshletGeomDescriptorsSize = header.m_meshletCounts[l] * sizeof(MeshletGeometryDescriptor);
+			lod.m_meshletGeometryDescriptors =
+				UnifiedGeometryBuffer::getSingleton().allocate(meshletGeomDescriptorsSize, sizeof(MeshletGeometryDescriptor));
 
 
 			lod.m_meshletCount = header.m_meshletCounts[l];
 			lod.m_meshletCount = header.m_meshletCounts[l];
 		}
 		}
@@ -195,7 +199,8 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 			if(lod.m_meshletIndices.isValid())
 			if(lod.m_meshletIndices.isValid())
 			{
 			{
 				cmdb->fillBuffer(lod.m_meshletIndices, 0);
 				cmdb->fillBuffer(lod.m_meshletIndices, 0);
-				cmdb->fillBuffer(lod.m_meshlets, 0);
+				cmdb->fillBuffer(lod.m_meshletBoundingVolumes, 0);
+				cmdb->fillBuffer(lod.m_meshletGeometryDescriptors, 0);
 			}
 			}
 		}
 		}
 
 
@@ -227,7 +232,7 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 	GrManager& gr = GrManager::getSingleton();
 	GrManager& gr = GrManager::getSingleton();
 	TransferGpuAllocator& transferAlloc = ResourceManager::getSingleton().getTransferGpuAllocator();
 	TransferGpuAllocator& transferAlloc = ResourceManager::getSingleton().getTransferGpuAllocator();
 
 
-	Array<TransferGpuAllocatorHandle, kMaxLodCount*(U32(VertexStreamId::kMeshRelatedCount) + 1 + 2)> handles;
+	Array<TransferGpuAllocatorHandle, kMaxLodCount*(U32(VertexStreamId::kMeshRelatedCount) + 1 + 3)> handles;
 	U32 handleCount = 0;
 	U32 handleCount = 0;
 
 
 	Buffer* unifiedGeometryBuffer = &UnifiedGeometryBuffer::getSingleton().getBuffer();
 	Buffer* unifiedGeometryBuffer = &UnifiedGeometryBuffer::getSingleton().getBuffer();
@@ -285,7 +290,7 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 									 lod.m_vertexBuffersAllocationToken[stream].getOffset(), handle.getRange());
 									 lod.m_vertexBuffersAllocationToken[stream].getOffset(), handle.getRange());
 		}
 		}
 
 
-		if(lod.m_meshlets.isValid())
+		if(lod.m_meshletBoundingVolumes.isValid())
 		{
 		{
 			// Indices
 			// Indices
 			TransferGpuAllocatorHandle& handle = handles[handleCount++];
 			TransferGpuAllocatorHandle& handle = handles[handleCount++];
@@ -302,15 +307,23 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 			ANKI_CHECK(loader.storeMeshletBuffer(lodIdx, WeakArray(binaryMeshlets)));
 			ANKI_CHECK(loader.storeMeshletBuffer(lodIdx, WeakArray(binaryMeshlets)));
 
 
 			TransferGpuAllocatorHandle& handle2 = handles[handleCount++];
 			TransferGpuAllocatorHandle& handle2 = handles[handleCount++];
-			ANKI_CHECK(transferAlloc.allocate(lod.m_meshlets.getAllocatedSize(), handle2));
-			WeakArray<Meshlet> outMeshlets(static_cast<Meshlet*>(handle2.getMappedMemory()), loader.getHeader().m_meshletCounts[lodIdx]);
+			ANKI_CHECK(transferAlloc.allocate(lod.m_meshletBoundingVolumes.getAllocatedSize(), handle2));
+			WeakArray<MeshletBoundingVolume> outMeshletBoundingVolumes(static_cast<MeshletBoundingVolume*>(handle2.getMappedMemory()),
+																	   loader.getHeader().m_meshletCounts[lodIdx]);
+
+			TransferGpuAllocatorHandle& handle3 = handles[handleCount++];
+			ANKI_CHECK(transferAlloc.allocate(lod.m_meshletGeometryDescriptors.getAllocatedSize(), handle3));
+			WeakArray<MeshletGeometryDescriptor> outMeshletGeomDescriptors(static_cast<MeshletGeometryDescriptor*>(handle3.getMappedMemory()),
+																		   loader.getHeader().m_meshletCounts[lodIdx]);
 
 
 			for(U32 i = 0; i < binaryMeshlets.getSize(); ++i)
 			for(U32 i = 0; i < binaryMeshlets.getSize(); ++i)
 			{
 			{
 				const MeshBinaryMeshlet& inMeshlet = binaryMeshlets[i];
 				const MeshBinaryMeshlet& inMeshlet = binaryMeshlets[i];
-				Meshlet& outMeshlet = outMeshlets[i];
+				MeshletGeometryDescriptor& outMeshletGeom = outMeshletGeomDescriptors[i];
+				MeshletBoundingVolume& outMeshletBoundingVolume = outMeshletBoundingVolumes[i];
 
 
-				outMeshlet = {};
+				outMeshletBoundingVolume = {};
+				outMeshletGeom = {};
 				for(VertexStreamId stream : EnumIterable(VertexStreamId::kMeshRelatedFirst, VertexStreamId::kMeshRelatedCount))
 				for(VertexStreamId stream : EnumIterable(VertexStreamId::kMeshRelatedFirst, VertexStreamId::kMeshRelatedCount))
 				{
 				{
 					if(!(m_presentVertStreams & VertexStreamMask(1u << stream)))
 					if(!(m_presentVertStreams & VertexStreamMask(1u << stream)))
@@ -318,24 +331,31 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 						continue;
 						continue;
 					}
 					}
 
 
-					outMeshlet.m_vertexOffsets[U32(stream)] =
+					outMeshletGeom.m_vertexOffsets[U32(stream)] =
 						lod.m_vertexBuffersAllocationToken[stream].getOffset() / getFormatInfo(kMeshRelatedVertexStreamFormats[stream]).m_texelSize
 						lod.m_vertexBuffersAllocationToken[stream].getOffset() / getFormatInfo(kMeshRelatedVertexStreamFormats[stream]).m_texelSize
 						+ inMeshlet.m_firstVertex;
 						+ inMeshlet.m_firstVertex;
 				}
 				}
 
 
-				outMeshlet.m_firstPrimitive =
+				outMeshletGeom.m_firstPrimitive =
 					lod.m_meshletIndices.getOffset() / getFormatInfo(kMeshletPrimitiveFormat).m_texelSize + inMeshlet.m_firstPrimitive;
 					lod.m_meshletIndices.getOffset() / getFormatInfo(kMeshletPrimitiveFormat).m_texelSize + inMeshlet.m_firstPrimitive;
-				outMeshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint = (inMeshlet.m_primitiveCount << 16u) | inMeshlet.m_vertexCount;
-				outMeshlet.m_aabbMin = inMeshlet.m_boundingVolume.m_aabbMin;
-				outMeshlet.m_aabbMax = inMeshlet.m_boundingVolume.m_aabbMax;
-				outMeshlet.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm =
+				outMeshletGeom.m_primitiveCount_R16_Uint_vertexCount_R16_Uint = (inMeshlet.m_primitiveCount << 16u) | inMeshlet.m_vertexCount;
+				outMeshletGeom.m_positionTranslation = m_positionsTranslation;
+				outMeshletGeom.m_positionScale = m_positionsScale;
+
+				outMeshletBoundingVolume.m_aabbMin = inMeshlet.m_boundingVolume.m_aabbMin;
+				outMeshletBoundingVolume.m_aabbMax = inMeshlet.m_boundingVolume.m_aabbMax;
+				outMeshletBoundingVolume.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm =
 					packSnorm4x8(Vec4(inMeshlet.m_coneDirection, cos(inMeshlet.m_coneAngle / 2.0f)));
 					packSnorm4x8(Vec4(inMeshlet.m_coneDirection, cos(inMeshlet.m_coneAngle / 2.0f)));
-				outMeshlet.m_coneApex_R8G8B8A8_Snorm = packSnorm4x8(inMeshlet.m_coneApex.xyz0());
-				outMeshlet.m_sphereRadius = ((outMeshlet.m_aabbMin + outMeshlet.m_aabbMax) / 2.0f - outMeshlet.m_aabbMax).getLength();
+				outMeshletBoundingVolume.m_coneApex_R8G8B8A8_Snorm = packSnorm4x8(inMeshlet.m_coneApex.xyz0());
+				outMeshletBoundingVolume.m_sphereRadius =
+					((outMeshletBoundingVolume.m_aabbMin + outMeshletBoundingVolume.m_aabbMax) / 2.0f - outMeshletBoundingVolume.m_aabbMax)
+						.getLength();
 			}
 			}
 
 
-			cmdb->copyBufferToBuffer(&handle2.getBuffer(), handle2.getOffset(), unifiedGeometryBuffer, lod.m_meshlets.getOffset(),
+			cmdb->copyBufferToBuffer(&handle2.getBuffer(), handle2.getOffset(), unifiedGeometryBuffer, lod.m_meshletBoundingVolumes.getOffset(),
 									 handle2.getRange());
 									 handle2.getRange());
+			cmdb->copyBufferToBuffer(&handle3.getBuffer(), handle3.getOffset(), unifiedGeometryBuffer, lod.m_meshletGeometryDescriptors.getOffset(),
+									 handle3.getRange());
 		}
 		}
 	}
 	}
 
 

+ 5 - 3
AnKi/Resource/MeshResource.h

@@ -70,9 +70,10 @@ public:
 		vertexCount = m_lods[lod].m_vertexCount;
 		vertexCount = m_lods[lod].m_vertexCount;
 	}
 	}
 
 
-	void getMeshletBufferInfo(U32 lod, PtrSize& meshletUgbOffset, U32& meshletCount)
+	void getMeshletBufferInfo(U32 lod, PtrSize& meshletBoundingVolumesUgbOffset, PtrSize& meshletGeometryDescriptorsUgbOffset, U32& meshletCount)
 	{
 	{
-		meshletUgbOffset = m_lods[lod].m_meshlets.getOffset();
+		meshletBoundingVolumesUgbOffset = m_lods[lod].m_meshletBoundingVolumes.getOffset();
+		meshletGeometryDescriptorsUgbOffset = m_lods[lod].m_meshletGeometryDescriptors.getOffset();
 		ANKI_ASSERT(m_lods[lod].m_meshletCount);
 		ANKI_ASSERT(m_lods[lod].m_meshletCount);
 		meshletCount = m_lods[lod].m_meshletCount;
 		meshletCount = m_lods[lod].m_meshletCount;
 	}
 	}
@@ -115,7 +116,8 @@ private:
 		Array<UnifiedGeometryBufferAllocation, U32(VertexStreamId::kMeshRelatedCount)> m_vertexBuffersAllocationToken;
 		Array<UnifiedGeometryBufferAllocation, U32(VertexStreamId::kMeshRelatedCount)> m_vertexBuffersAllocationToken;
 
 
 		UnifiedGeometryBufferAllocation m_meshletIndices;
 		UnifiedGeometryBufferAllocation m_meshletIndices;
-		UnifiedGeometryBufferAllocation m_meshlets;
+		UnifiedGeometryBufferAllocation m_meshletBoundingVolumes;
+		UnifiedGeometryBufferAllocation m_meshletGeometryDescriptors;
 
 
 		U32 m_indexCount = 0;
 		U32 m_indexCount = 0;
 		U32 m_vertexCount = 0;
 		U32 m_vertexCount = 0;

+ 8 - 5
AnKi/Resource/ModelResource.cpp

@@ -31,14 +31,16 @@ void ModelPatch::getGeometryInfo(U32 lod, ModelPatchGeometryInfo& inf) const
 
 
 	if(m_lodInfos[lod].m_meshletCount != kMaxU32)
 	if(m_lodInfos[lod].m_meshletCount != kMaxU32)
 	{
 	{
-		ANKI_ASSERT(m_lodInfos[lod].m_meshletsUgbOffset != kMaxPtrSize);
+		ANKI_ASSERT(m_lodInfos[lod].m_meshletBoundingVolumesUgbOffset != kMaxPtrSize);
 		inf.m_meshletCount = m_lodInfos[lod].m_meshletCount;
 		inf.m_meshletCount = m_lodInfos[lod].m_meshletCount;
-		inf.m_meshletsUgbOffset = m_lodInfos[lod].m_meshletsUgbOffset;
+		inf.m_meshletBoundingVolumesUgbOffset = m_lodInfos[lod].m_meshletBoundingVolumesUgbOffset;
+		inf.m_meshletGometryDescriptorsUgbOffset = m_lodInfos[lod].m_meshletGometryDescriptorsUgbOffset;
 	}
 	}
 	else
 	else
 	{
 	{
 		inf.m_meshletCount = 0;
 		inf.m_meshletCount = 0;
-		inf.m_meshletsUgbOffset = kMaxPtrSize;
+		inf.m_meshletBoundingVolumesUgbOffset = kMaxPtrSize;
+		inf.m_meshletGometryDescriptorsUgbOffset = kMaxPtrSize;
 	}
 	}
 }
 }
 
 
@@ -116,9 +118,10 @@ Error ModelPatch::init([[maybe_unused]] ModelResource* model, CString meshFName,
 		if(GrManager::getSingleton().getDeviceCapabilities().m_meshShaders)
 		if(GrManager::getSingleton().getDeviceCapabilities().m_meshShaders)
 		{
 		{
 			U32 dummy;
 			U32 dummy;
-			m_mesh->getMeshletBufferInfo(l, lod.m_meshletsUgbOffset, dummy);
+			m_mesh->getMeshletBufferInfo(l, lod.m_meshletBoundingVolumesUgbOffset, lod.m_meshletGometryDescriptorsUgbOffset, dummy);
 
 
-			lod.m_meshletsUgbOffset += firstMeshlet * sizeof(Meshlet);
+			lod.m_meshletBoundingVolumesUgbOffset += firstMeshlet * sizeof(MeshletBoundingVolume);
+			lod.m_meshletGometryDescriptorsUgbOffset += firstMeshlet * sizeof(MeshletGeometryDescriptor);
 			lod.m_meshletCount = meshletCount;
 			lod.m_meshletCount = meshletCount;
 		}
 		}
 	}
 	}

+ 4 - 2
AnKi/Resource/ModelResource.h

@@ -40,7 +40,8 @@ public:
 	/// Offset to the vertex buffer or kMaxPtrSize if stream is not present.
 	/// Offset to the vertex buffer or kMaxPtrSize if stream is not present.
 	Array<PtrSize, U32(VertexStreamId::kMeshRelatedCount)> m_vertexUgbOffsets;
 	Array<PtrSize, U32(VertexStreamId::kMeshRelatedCount)> m_vertexUgbOffsets;
 
 
-	PtrSize m_meshletsUgbOffset;
+	PtrSize m_meshletBoundingVolumesUgbOffset = kMaxPtrSize;
+	PtrSize m_meshletGometryDescriptorsUgbOffset = kMaxPtrSize;
 	U32 m_meshletCount;
 	U32 m_meshletCount;
 
 
 	AccelerationStructurePtr m_blas;
 	AccelerationStructurePtr m_blas;
@@ -81,7 +82,8 @@ private:
 
 
 		Array<PtrSize, U32(VertexStreamId::kMeshRelatedCount)> m_vertexUgbOffsets = {};
 		Array<PtrSize, U32(VertexStreamId::kMeshRelatedCount)> m_vertexUgbOffsets = {};
 
 
-		PtrSize m_meshletsUgbOffset = kMaxPtrSize;
+		PtrSize m_meshletBoundingVolumesUgbOffset = kMaxPtrSize;
+		PtrSize m_meshletGometryDescriptorsUgbOffset = kMaxPtrSize;
 		U32 m_meshletCount = kMaxU32;
 		U32 m_meshletCount = kMaxU32;
 	};
 	};
 
 

+ 4 - 2
AnKi/Scene/Components/ModelComponent.cpp

@@ -183,8 +183,10 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 
 
 				if(inf.m_meshletCount)
 				if(inf.m_meshletCount)
 				{
 				{
-					ANKI_ASSERT((inf.m_meshletsUgbOffset % sizeof(Meshlet)) == 0);
-					meshLod.m_firstMeshlet = U32(inf.m_meshletsUgbOffset / sizeof(Meshlet));
+					ANKI_ASSERT((inf.m_meshletBoundingVolumesUgbOffset % sizeof(MeshletBoundingVolume)) == 0);
+					meshLod.m_firstMeshletBoundingVolume = U32(inf.m_meshletBoundingVolumesUgbOffset / sizeof(MeshletBoundingVolume));
+					ANKI_ASSERT((inf.m_meshletGometryDescriptorsUgbOffset % sizeof(MeshletGeometryDescriptor)) == 0);
+					meshLod.m_firstMeshletGeometryDescriptor = U32(inf.m_meshletGometryDescriptorsUgbOffset / sizeof(MeshletGeometryDescriptor));
 					meshLod.m_meshletCount = inf.m_meshletCount;
 					meshLod.m_meshletCount = inf.m_meshletCount;
 				}
 				}
 			}
 			}

+ 11 - 14
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -158,13 +158,11 @@ struct FragOut
 
 
 struct TaskOut
 struct TaskOut
 {
 {
-	U32 m_firstMeshletIndex;
+	U32 m_firstMeshletGeometryDescriptor;
 	U32 m_visibleMeshletsRelativeIndices[kMeshletGroupSize / sizeof(U32)];
 	U32 m_visibleMeshletsRelativeIndices[kMeshletGroupSize / sizeof(U32)];
 	U32 m_worldTransformsOffset;
 	U32 m_worldTransformsOffset;
 	U32 m_constantsOffset;
 	U32 m_constantsOffset;
 	U32 m_boneTransformsOrParticleEmitterOffset;
 	U32 m_boneTransformsOrParticleEmitterOffset;
-	Vec3 m_positionTranslation;
-	F32 m_positionScale;
 };
 };
 
 
 struct Mat3x4_2
 struct Mat3x4_2
@@ -302,18 +300,17 @@ struct FirstPayload
 
 
 	const GpuSceneRenderable renderable = g_renderables[renderableIdx];
 	const GpuSceneRenderable renderable = g_renderables[renderableIdx];
 	const GpuSceneMeshLod meshLod = g_gpuScene.Load<GpuSceneMeshLod>(renderable.m_meshLodsOffset + sizeof(GpuSceneMeshLod) * lod);
 	const GpuSceneMeshLod meshLod = g_gpuScene.Load<GpuSceneMeshLod>(renderable.m_meshLodsOffset + sizeof(GpuSceneMeshLod) * lod);
-	U32 firstMeshlet = meshletGroup * kMeshletGroupSize;
-	const U32 meshletCount = min(kMeshletGroupSize, meshLod.m_meshletCount - firstMeshlet);
-	firstMeshlet += meshLod.m_firstMeshlet;
+	U32 firstMeshletBoundingVolume = meshletGroup * kMeshletGroupSize;
+	const U32 meshletCount = min(kMeshletGroupSize, meshLod.m_meshletCount - firstMeshletBoundingVolume);
+	firstMeshletBoundingVolume += meshLod.m_firstMeshletBoundingVolume;
+	const U32 firstMeshletGeometryDescriptor = meshletGroup * kMeshletGroupSize + meshLod.m_firstMeshletGeometryDescriptor;
 
 
 	if(svGroupIndex == 0u)
 	if(svGroupIndex == 0u)
 	{
 	{
-		s_payload.m_firstMeshletIndex = firstMeshlet;
+		s_payload.m_firstMeshletGeometryDescriptor = firstMeshletGeometryDescriptor;
 		s_payload.m_worldTransformsOffset = renderable.m_worldTransformsOffset;
 		s_payload.m_worldTransformsOffset = renderable.m_worldTransformsOffset;
 		s_payload.m_constantsOffset = renderable.m_constantsOffset;
 		s_payload.m_constantsOffset = renderable.m_constantsOffset;
 		s_payload.m_boneTransformsOrParticleEmitterOffset = renderable.m_boneTransformsOffset;
 		s_payload.m_boneTransformsOrParticleEmitterOffset = renderable.m_boneTransformsOffset;
-		s_payload.m_positionScale = meshLod.m_positionScale;
-		s_payload.m_positionTranslation = meshLod.m_positionTranslation;
 
 
 		s_visibleMeshletCount = 0;
 		s_visibleMeshletCount = 0;
 
 
@@ -329,11 +326,11 @@ struct FirstPayload
 	{
 	{
 		Bool cull = false;
 		Bool cull = false;
 
 
-		const Meshlet meshlet = g_meshlets[firstMeshlet + svGroupIndex];
+		const MeshletBoundingVolume meshletBoundingVol = g_meshletBoundingVolumes[firstMeshletBoundingVolume + svGroupIndex];
 		const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
 		const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
 
 
 #	if MESHLET_BACKFACE_CULLING
 #	if MESHLET_BACKFACE_CULLING
-		cull = cullBackfaceMeshlet(meshlet, worldTransform, g_globalConstants.m_cameraTransform.getTranslationPart());
+		cull = cullBackfaceMeshlet(meshletBoundingVol, worldTransform, g_globalConstants.m_cameraTransform.getTranslationPart());
 #	endif
 #	endif
 
 
 		const Mat4 wordTransform4 = {worldTransform.m_row0, worldTransform.m_row1, worldTransform.m_row2, Vec4(0.0f, 0.0f, 0.0f, 1.0f)};
 		const Mat4 wordTransform4 = {worldTransform.m_row0, worldTransform.m_row1, worldTransform.m_row2, Vec4(0.0f, 0.0f, 0.0f, 1.0f)};
@@ -341,7 +338,7 @@ struct FirstPayload
 
 
 		Vec2 minNdc, maxNdc;
 		Vec2 minNdc, maxNdc;
 		F32 aabbMinDepth;
 		F32 aabbMinDepth;
-		projectAabb(meshlet.m_aabbMin, meshlet.m_aabbMax, mvp, minNdc, maxNdc, aabbMinDepth);
+		projectAabb(meshletBoundingVol.m_aabbMin, meshletBoundingVol.m_aabbMax, mvp, minNdc, maxNdc, aabbMinDepth);
 
 
 #	if MESHLET_OUTSIDE_OF_SCREEN_CULLING
 #	if MESHLET_OUTSIDE_OF_SCREEN_CULLING
 		// Outside of the screen
 		// Outside of the screen
@@ -396,7 +393,7 @@ main(U32 svGroupId : SV_GROUPID, U32 svGroupIndex : SV_GROUPINDEX, in payload Ta
 	const U32 localIdx = svGroupId % 4u;
 	const U32 localIdx = svGroupId % 4u;
 	const U32 relativeMeshletIdx = (payload.m_visibleMeshletsRelativeIndices[groupIdx] >> (localIdx * 8u)) & 0xFFu;
 	const U32 relativeMeshletIdx = (payload.m_visibleMeshletsRelativeIndices[groupIdx] >> (localIdx * 8u)) & 0xFFu;
 
 
-	const Meshlet meshlet = g_meshlets[payload.m_firstMeshletIndex + relativeMeshletIdx];
+	const MeshletGeometryDescriptor meshlet = g_meshletGeometryDescriptors[payload.m_firstMeshletGeometryDescriptor + relativeMeshletIdx];
 	const U32 primCount = meshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint >> 16u;
 	const U32 primCount = meshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint >> 16u;
 	const U32 vertCount = meshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint & 0xFFFFu;
 	const U32 vertCount = meshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint & 0xFFFFu;
 
 
@@ -412,7 +409,7 @@ main(U32 svGroupId : SV_GROUPID, U32 svGroupIndex : SV_GROUPINDEX, in payload Ta
 		{
 		{
 			MeshPerVertOut output;
 			MeshPerVertOut output;
 
 
-			UnpackedMeshVertex vert = loadVertex(meshlet, idx, ANKI_BONES, payload.m_positionScale, payload.m_positionTranslation);
+			UnpackedMeshVertex vert = loadVertex(meshlet, idx, ANKI_BONES, meshlet.m_positionScale, meshlet.m_positionTranslation);
 
 
 			const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(payload.m_worldTransformsOffset);
 			const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(payload.m_worldTransformsOffset);
 			const Mat3x4 prevWorldTransform = g_gpuScene.Load<Mat3x4>(payload.m_worldTransformsOffset + sizeof(Mat3x4));
 			const Mat3x4 prevWorldTransform = g_gpuScene.Load<Mat3x4>(payload.m_worldTransformsOffset + sizeof(Mat3x4));

+ 8 - 3
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -62,16 +62,21 @@ struct GpuSceneMeshLod
 	U32 m_vertexOffsets[(U32)VertexStreamId::kMeshRelatedCount];
 	U32 m_vertexOffsets[(U32)VertexStreamId::kMeshRelatedCount];
 	U32 m_indexCount;
 	U32 m_indexCount;
 	U32 m_firstIndex; ///< In sizeof(indexType)
 	U32 m_firstIndex; ///< In sizeof(indexType)
-	U32 m_firstMeshlet; ///< In sizeof(Meshlet)
+	U32 m_padding1;
+
+	U32 m_firstMeshletBoundingVolume; ///< In sizeof(MeshletBoundingVolume)
+	U32 m_firstMeshletGeometryDescriptor; ///< In sizeof(MeshletGeometryDescriptor)
+	U32 m_meshletCount; ///< Can be zero if the mesh doesn't support mesh shading (or mesh shading is off)
+	U32 m_padding2;
 
 
 	Vec3 m_positionTranslation;
 	Vec3 m_positionTranslation;
 	F32 m_positionScale;
 	F32 m_positionScale;
 
 
 	UVec2 m_blasAddress;
 	UVec2 m_blasAddress;
 	U32 m_tlasInstanceMask; ///< Mask that goes to AccelerationStructureInstance::m_instanceCustomIndex24_mask8
 	U32 m_tlasInstanceMask; ///< Mask that goes to AccelerationStructureInstance::m_instanceCustomIndex24_mask8
-	U32 m_meshletCount; ///< Can be zero if the mesh doesn't support mesh shading (or mesh shading is off)
+	U32 m_padding3;
 };
 };
-static_assert(sizeof(GpuSceneMeshLod) == sizeof(Vec4) * 4);
+static_assert(sizeof(GpuSceneMeshLod) == sizeof(Vec4) * 5);
 
 
 struct GpuSceneParticleEmitter
 struct GpuSceneParticleEmitter
 {
 {

+ 2 - 1
AnKi/Shaders/Include/MaterialTypes.h

@@ -45,7 +45,8 @@ enum class MaterialBinding : U32
 #include <AnKi/Shaders/Include/UnifiedGeometryTypes.defs.h>
 #include <AnKi/Shaders/Include/UnifiedGeometryTypes.defs.h>
 
 
 	// For mesh shading
 	// For mesh shading
-	kMeshlets, ///< Points to the unified geom buffer
+	kMeshletBoundingVolumes, ///< Points to the unified geom buffer
+	kMeshletGeometryDescriptors, ///< Points to the unified geom buffer
 	kTaskShaderPayloads,
 	kTaskShaderPayloads,
 	kRenderables,
 	kRenderables,
 	kHzbTexture,
 	kHzbTexture,

+ 19 - 9
AnKi/Shaders/Include/MeshTypes.h

@@ -83,20 +83,30 @@ struct UnpackedMeshVertex
 	RVec4 m_boneWeights;
 	RVec4 m_boneWeights;
 };
 };
 
 
-struct Meshlet
+/// Contains the meshlet data accessed by the task shaders (or whomever does meshlet visibility).
+struct MeshletBoundingVolume
+{
+	Vec3 m_aabbMin;
+	F32 m_sphereRadius;
+
+	Vec3 m_aabbMax;
+	U32 m_coneApex_R8G8B8A8_Snorm;
+
+	Vec3 m_padding;
+	U32 m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm;
+};
+
+/// Contains all the geometry related info of a meshlet. Accessed by the mesh shaders.
+struct MeshletGeometryDescriptor
 {
 {
 	U32 m_vertexOffsets[(U32)VertexStreamId::kMeshRelatedCount];
 	U32 m_vertexOffsets[(U32)VertexStreamId::kMeshRelatedCount];
 	U32 m_firstPrimitive; // In size of kMeshletPrimitiveFormat
 	U32 m_firstPrimitive; // In size of kMeshletPrimitiveFormat
 	U32 m_primitiveCount_R16_Uint_vertexCount_R16_Uint;
 	U32 m_primitiveCount_R16_Uint_vertexCount_R16_Uint;
-	U32 m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm;
-
-	Vec3 m_aabbMin;
-	U32 m_coneApex_R8G8B8A8_Snorm;
+	F32 m_positionScale;
 
 
-	Vec3 m_aabbMax;
-	F32 m_sphereRadius;
+	Vec3 m_positionTranslation;
+	F32 m_padding;
 };
 };
-// Power of 2 because the sizeof will be used as allocation alignment and allocation alignments need to be power of 2
-static_assert(isPowerOfTwo(sizeof(Meshlet)));
+static_assert(isAligned(16, sizeof(MeshletGeometryDescriptor)));
 
 
 ANKI_END_NAMESPACE
 ANKI_END_NAMESPACE

+ 5 - 3
AnKi/Shaders/MaterialShadersCommon.hlsl

@@ -24,7 +24,9 @@ ANKI_BINDLESS_SET(MaterialSet::kBindless)
 	[[vk::binding(MaterialBinding::kUnifiedGeometry_##fmt, MaterialSet::kGlobal)]] Buffer<shaderType> g_unifiedGeom_##fmt;
 	[[vk::binding(MaterialBinding::kUnifiedGeometry_##fmt, MaterialSet::kGlobal)]] Buffer<shaderType> g_unifiedGeom_##fmt;
 #include <AnKi/Shaders/Include/UnifiedGeometryTypes.defs.h>
 #include <AnKi/Shaders/Include/UnifiedGeometryTypes.defs.h>
 
 
-[[vk::binding(MaterialBinding::kMeshlets, MaterialSet::kGlobal)]] StructuredBuffer<Meshlet> g_meshlets;
+[[vk::binding(MaterialBinding::kMeshletBoundingVolumes, MaterialSet::kGlobal)]] StructuredBuffer<MeshletBoundingVolume> g_meshletBoundingVolumes;
+[[vk::binding(MaterialBinding::kMeshletGeometryDescriptors, MaterialSet::kGlobal)]] StructuredBuffer<MeshletGeometryDescriptor>
+	g_meshletGeometryDescriptors;
 [[vk::binding(MaterialBinding::kTaskShaderPayloads, MaterialSet::kGlobal)]] StructuredBuffer<GpuSceneTaskShaderPayload> g_taskShaderPayloads;
 [[vk::binding(MaterialBinding::kTaskShaderPayloads, MaterialSet::kGlobal)]] StructuredBuffer<GpuSceneTaskShaderPayload> g_taskShaderPayloads;
 [[vk::binding(MaterialBinding::kRenderables, MaterialSet::kGlobal)]] StructuredBuffer<GpuSceneRenderable> g_renderables;
 [[vk::binding(MaterialBinding::kRenderables, MaterialSet::kGlobal)]] StructuredBuffer<GpuSceneRenderable> g_renderables;
 [[vk::binding(MaterialBinding::kHzbTexture, MaterialSet::kGlobal)]] Texture2D<Vec4> g_hzbTexture;
 [[vk::binding(MaterialBinding::kHzbTexture, MaterialSet::kGlobal)]] Texture2D<Vec4> g_hzbTexture;
@@ -64,7 +66,7 @@ UnpackedMeshVertex loadVertex(GpuSceneMeshLod mlod, U32 svVertexId, Bool bones)
 	return v;
 	return v;
 }
 }
 
 
-UnpackedMeshVertex loadVertex(Meshlet meshlet, U32 vertexIndex, Bool bones, F32 positionScale, Vec3 positionTranslation)
+UnpackedMeshVertex loadVertex(MeshletGeometryDescriptor meshlet, U32 vertexIndex, Bool bones, F32 positionScale, Vec3 positionTranslation)
 {
 {
 	UnpackedMeshVertex v;
 	UnpackedMeshVertex v;
 	v.m_position = g_unifiedGeom_R16G16B16A16_Unorm[meshlet.m_vertexOffsets[(U32)VertexStreamId::kPosition] + vertexIndex];
 	v.m_position = g_unifiedGeom_R16G16B16A16_Unorm[meshlet.m_vertexOffsets[(U32)VertexStreamId::kPosition] + vertexIndex];
@@ -82,7 +84,7 @@ UnpackedMeshVertex loadVertex(Meshlet meshlet, U32 vertexIndex, Bool bones, F32
 	return v;
 	return v;
 }
 }
 
 
-Bool cullBackfaceMeshlet(Meshlet meshlet, Mat3x4 worldTransform, Vec3 cameraWorldPos)
+Bool cullBackfaceMeshlet(MeshletBoundingVolume meshlet, Mat3x4 worldTransform, Vec3 cameraWorldPos)
 {
 {
 	const Vec4 coneData = unpackSnorm4x8(meshlet.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);
 	const Vec4 coneData = unpackSnorm4x8(meshlet.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);
 
 

+ 1 - 1
AnKi/Util/SegregatedListsAllocatorBuilder.h

@@ -81,7 +81,7 @@ public:
 
 
 	/// Allocate memory.
 	/// Allocate memory.
 	/// @param size The size to allocate.
 	/// @param size The size to allocate.
-	/// @param alignment The alignment of the returned address.
+	/// @param alignment The alignment of the returned address. No need to be power of 2.
 	/// @param[out] chunk The chunk that the memory belongs to.
 	/// @param[out] chunk The chunk that the memory belongs to.
 	/// @param[out] offset The offset inside the chunk.
 	/// @param[out] offset The offset inside the chunk.
 	/// @note This is thread safe.
 	/// @note This is thread safe.

+ 42 - 4
Tests/Util/SegregatedListsAllocatorBuilder.cpp

@@ -74,7 +74,7 @@ static void printAllocatorBuilder(const TAlloc& sl)
 	printf("%s\n", str.cstr());
 	printf("%s\n", str.cstr());
 }
 }
 
 
-template<Bool kValidate, U32 kIterationCount, Bool kStats>
+template<Bool kValidate, U32 kIterationCount, Bool kStats, Bool kExtraValidation>
 static void fuzzyTest()
 static void fuzzyTest()
 {
 {
 	class Alloc
 	class Alloc
@@ -103,7 +103,7 @@ static void fuzzyTest()
 			do
 			do
 			{
 			{
 				alloc.m_size = getRandom() % 70_MB;
 				alloc.m_size = getRandom() % 70_MB;
-				alloc.m_alignment = nextPowerOfTwo(getRandom() % 16);
+				alloc.m_alignment = getAlignedRoundUp(4, getRandom() % 16);
 			} while(alloc.m_size == 0 || alloc.m_alignment == 0);
 			} while(alloc.m_size == 0 || alloc.m_alignment == 0);
 
 
 			ANKI_TEST_EXPECT_NO_ERR(sl.allocate(alloc.m_size, alloc.m_alignment, alloc.m_chunk, alloc.m_address));
 			ANKI_TEST_EXPECT_NO_ERR(sl.allocate(alloc.m_size, alloc.m_alignment, alloc.m_chunk, alloc.m_address));
@@ -120,6 +120,38 @@ static void fuzzyTest()
 			sl.free(alloc.m_chunk, alloc.m_address, alloc.m_size);
 			sl.free(alloc.m_chunk, alloc.m_address, alloc.m_size);
 		}
 		}
 
 
+		if(kExtraValidation)
+		{
+			// Make sure they don't overlap
+			for(U32 a = 0; a < allocs.size(); ++a)
+			{
+				for(U32 b = 0; b < allocs.size(); ++b)
+				{
+					if(a == b)
+					{
+						continue;
+					}
+
+					const Alloc& allocA = allocs[a];
+					const Alloc& allocB = allocs[b];
+
+					if(allocA.m_chunk != allocB.m_chunk)
+					{
+						continue;
+					}
+
+					if(allocA.m_address < allocB.m_address)
+					{
+						ANKI_TEST_EXPECT_EQ(allocA.m_address + allocA.m_size <= allocB.m_address, true);
+					}
+					else
+					{
+						ANKI_TEST_EXPECT_EQ(allocB.m_address + allocB.m_size <= allocA.m_address, true);
+					}
+				}
+			}
+		}
+
 		if(kStats)
 		if(kStats)
 		{
 		{
 			const F64 f = sl.computeExternalFragmentation();
 			const F64 f = sl.computeExternalFragmentation();
@@ -151,6 +183,8 @@ static void fuzzyTest()
 
 
 ANKI_TEST(Util, SegregatedListsAllocatorBuilder)
 ANKI_TEST(Util, SegregatedListsAllocatorBuilder)
 {
 {
+	DefaultMemoryPool::allocateSingleton(allocAligned, nullptr);
+
 	// Simple test
 	// Simple test
 	{
 	{
 		SLAlloc sl;
 		SLAlloc sl;
@@ -186,10 +220,14 @@ ANKI_TEST(Util, SegregatedListsAllocatorBuilder)
 	}
 	}
 
 
 	// Fuzzy test
 	// Fuzzy test
-	fuzzyTest<true, 1024, false>();
+	fuzzyTest<true, 1024, false, true>();
+
+	DefaultMemoryPool::freeSingleton();
 }
 }
 
 
 ANKI_TEST(Util, SegregatedListsAllocatorBuilderBenchmark)
 ANKI_TEST(Util, SegregatedListsAllocatorBuilderBenchmark)
 {
 {
-	fuzzyTest<false, 2000000, true>();
+	DefaultMemoryPool::allocateSingleton(allocAligned, nullptr);
+	fuzzyTest<false, 2000000, true, false>();
+	DefaultMemoryPool::freeSingleton();
 }
 }