Browse Source

Prepare the particle emitters for GPU occlusion

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
f477843e5e

+ 1 - 2
AnKi/Core/GpuMemory/GpuSceneBuffer.cpp

@@ -31,8 +31,7 @@ void GpuSceneBuffer::init()
 	m_pool.init(buffUsage, classes, poolSize, "GpuScene", true);
 
 	// Allocate something dummy to force creating the GPU buffer
-	GpuSceneBufferAllocation alloc;
-	allocate(16, 4, alloc);
+	GpuSceneBufferAllocation alloc = allocate(16, 4);
 	deferredFree(alloc);
 }
 

+ 3 - 1
AnKi/Core/GpuMemory/GpuSceneBuffer.h

@@ -76,9 +76,11 @@ public:
 
 	void init();
 
-	void allocate(PtrSize size, U32 alignment, GpuSceneBufferAllocation& alloc)
+	GpuSceneBufferAllocation allocate(PtrSize size, U32 alignment)
 	{
+		GpuSceneBufferAllocation alloc;
 		m_pool.allocate(size, alignment, alloc.m_token);
+		return alloc;
 	}
 
 	void deferredFree(GpuSceneBufferAllocation& alloc)

+ 1 - 1
AnKi/Core/GpuMemory/RebarTransientMemoryPool.cpp

@@ -31,7 +31,7 @@ void RebarTransientMemoryPool::init()
 	buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
 	buffInit.m_size = g_rebarGpuMemorySizeCvar.get();
 	buffInit.m_usage = BufferUsageBit::kAllUniform | BufferUsageBit::kAllStorage | BufferUsageBit::kVertex | BufferUsageBit::kIndex
-					   | BufferUsageBit::kShaderBindingTable | BufferUsageBit::kAllIndirect;
+					   | BufferUsageBit::kShaderBindingTable | BufferUsageBit::kAllIndirect | BufferUsageBit::kTransferSource;
 	m_buffer = GrManager::getSingleton().newBuffer(buffInit);
 
 	m_bufferSize = buffInit.m_size;

+ 1 - 2
AnKi/Core/GpuMemory/UnifiedGeometryBuffer.cpp

@@ -34,8 +34,7 @@ void UnifiedGeometryBuffer::init()
 	m_pool.init(buffUsage, classes, poolSize, "UnifiedGeometry", false);
 
 	// Allocate something dummy to force creating the GPU buffer
-	UnifiedGeometryBufferAllocation alloc;
-	allocate(16, 4, alloc);
+	UnifiedGeometryBufferAllocation alloc = allocate(16, 4);
 	deferredFree(alloc);
 }
 

+ 34 - 4
AnKi/Core/GpuMemory/UnifiedGeometryBuffer.h

@@ -36,7 +36,11 @@ public:
 	{
 		ANKI_ASSERT(!isValid() && "Forgot to delete");
 		m_token = b.m_token;
+		m_realOffset = b.m_realOffset;
+		m_realAllocatedSize = b.m_realAllocatedSize;
 		b.m_token = {};
+		b.m_realAllocatedSize = 0;
+		b.m_realOffset = kMaxU32;
 		return *this;
 	}
 
@@ -49,17 +53,19 @@ public:
 	U32 getOffset() const
 	{
 		ANKI_ASSERT(isValid());
-		return U32(m_token.m_offset);
+		return m_realOffset;
 	}
 
 	U32 getAllocatedSize() const
 	{
 		ANKI_ASSERT(isValid());
-		return U32(m_token.m_size);
+		return m_realAllocatedSize;
 	}
 
 private:
 	SegregatedListsGpuMemoryPoolToken m_token;
+	U32 m_realOffset = kMaxU32; ///< In some allocations with weird alignments we need a different offset.
+	U32 m_realAllocatedSize = 0;
 };
 
 /// Manages vertex and index memory for the WHOLE application.
@@ -75,14 +81,38 @@ public:
 
 	void init();
 
-	void allocate(PtrSize size, U32 alignment, UnifiedGeometryBufferAllocation& alloc)
+	UnifiedGeometryBufferAllocation allocate(PtrSize size, U32 alignment)
 	{
-		m_pool.allocate(size, alignment, alloc.m_token);
+		UnifiedGeometryBufferAllocation out;
+		m_pool.allocate(size, alignment, out.m_token);
+		out.m_realOffset = U32(out.m_token.m_offset);
+		out.m_realAllocatedSize = U32(size);
+		return out;
+	}
+
+	/// Allocate a vertex buffer.
+	UnifiedGeometryBufferAllocation allocateFormat(Format format, U32 count)
+	{
+		const U32 texelSize = getFormatInfo(format).m_texelSize;
+		const U32 alignment = max(4u, nextPowerOfTwo(texelSize));
+		const U32 size = count * texelSize + alignment; // Over-allocate
+
+		UnifiedGeometryBufferAllocation out;
+		m_pool.allocate(size, alignment, out.m_token);
+
+		const U32 remainder = out.m_token.m_offset % texelSize;
+		out.m_realOffset = U32(out.m_token.m_offset + (texelSize - remainder));
+		out.m_realAllocatedSize = count * texelSize;
+		ANKI_ASSERT(isAligned(texelSize, out.m_realOffset));
+		ANKI_ASSERT(out.m_realOffset + out.m_realAllocatedSize <= out.m_token.m_offset + out.m_token.m_size);
+		return out;
 	}
 
 	void deferredFree(UnifiedGeometryBufferAllocation& alloc)
 	{
 		m_pool.deferredFree(alloc.m_token);
+		alloc.m_realAllocatedSize = 0;
+		alloc.m_realOffset = kMaxU32;
 	}
 
 	void endFrame()

+ 1 - 1
AnKi/Gr/CommandBuffer.h

@@ -32,7 +32,7 @@ public:
 	BufferUsageBit m_previousUsage = BufferUsageBit::kNone;
 	BufferUsageBit m_nextUsage = BufferUsageBit::kNone;
 	PtrSize m_offset = 0;
-	PtrSize m_size = 0;
+	PtrSize m_range = 0;
 };
 
 class AccelerationStructureBarrierInfo

+ 1 - 1
AnKi/Gr/RenderGraph.cpp

@@ -1333,7 +1333,7 @@ void RenderGraph::run() const
 			inf.m_previousUsage = barrier.m_usageBefore;
 			inf.m_nextUsage = barrier.m_usageAfter;
 			inf.m_offset = m_ctx->m_buffers[barrier.m_idx].m_offset;
-			inf.m_size = m_ctx->m_buffers[barrier.m_idx].m_range;
+			inf.m_range = m_ctx->m_buffers[barrier.m_idx].m_range;
 			inf.m_buffer = m_ctx->m_buffers[barrier.m_idx].m_buffer.get();
 		}
 		DynamicArray<AccelerationStructureBarrierInfo, MemoryPoolPtrWrapper<StackMemoryPool>> asBarriers(pool);

+ 4 - 4
AnKi/Gr/Vulkan/CommandBufferImpl.cpp

@@ -676,15 +676,15 @@ void CommandBufferImpl::setPipelineBarrierInternal(ConstWeakArray<TextureBarrier
 		ANKI_ASSERT(barrier.m_offset < impl.getSize());
 		inf.offset = barrier.m_offset;
 
-		if(barrier.m_size == kMaxPtrSize)
+		if(barrier.m_range == kMaxPtrSize)
 		{
 			inf.size = VK_WHOLE_SIZE;
 		}
 		else
 		{
-			ANKI_ASSERT(barrier.m_size > 0);
-			ANKI_ASSERT(barrier.m_offset + barrier.m_size <= impl.getSize());
-			inf.size = barrier.m_size;
+			ANKI_ASSERT(barrier.m_range > 0);
+			ANKI_ASSERT(barrier.m_offset + barrier.m_range <= impl.getSize());
+			inf.size = barrier.m_range;
 		}
 
 		VkPipelineStageFlags srcStage;

+ 3 - 1
AnKi/Renderer/RenderQueue.h

@@ -36,8 +36,10 @@ public:
 
 	U32 m_worldTransformsOffset;
 	U32 m_uniformsOffset;
-	U32 m_geometryOffset;
+	U32 m_meshLodOffset;
 	U32 m_boneTransformsOffset;
+	U32 m_particleEmitterOffset;
+	U32 m_instanceCount;
 
 	union
 	{

+ 4 - 4
AnKi/Renderer/RtShadows.cpp

@@ -630,12 +630,12 @@ void RtShadows::buildSbt(RenderingContext& ctx)
 		// Init SBT record
 		memcpy(sbt, &shaderGroupHandles[element.m_shaderGroupHandleIndex * shaderHandleSize], shaderHandleSize);
 
-		ANKI_ASSERT(shaderHandleSize + sizeof(GpuSceneRenderable) <= m_sbtRecordSize);
-		GpuSceneRenderable* shaderRecord = reinterpret_cast<GpuSceneRenderable*>(sbt + shaderHandleSize);
+		ANKI_ASSERT(shaderHandleSize + sizeof(GpuSceneRenderableVertex) <= m_sbtRecordSize);
+		GpuSceneRenderableVertex* shaderRecord = reinterpret_cast<GpuSceneRenderableVertex*>(sbt + shaderHandleSize);
 		shaderRecord->m_worldTransformsOffset = element.m_worldTransformsOffset;
 		shaderRecord->m_uniformsOffset = element.m_uniformsOffset;
-		shaderRecord->m_geometryOffset = element.m_geometryOffset;
-		shaderRecord->m_boneTransformsOffset = 0;
+		shaderRecord->m_meshLodOffset = element.m_geometryOffset;
+		shaderRecord->m_boneTransformsOrParticleEmitterOffset = 0;
 
 		sbt += m_sbtRecordSize;
 	}

+ 10 - 8
AnKi/Renderer/Utils/Drawer.cpp

@@ -86,19 +86,21 @@ void RenderableDrawer::flushDrawcall(Context& ctx, CommandBuffer& cmdb)
 {
 	// Instance buffer
 	RebarAllocation token;
-	GpuSceneRenderablePacked* instances = static_cast<GpuSceneRenderablePacked*>(
-		RebarTransientMemoryPool::getSingleton().allocateFrame(sizeof(GpuSceneRenderablePacked) * ctx.m_cachedRenderElementCount, token));
+	GpuSceneRenderableVertex* instances = static_cast<GpuSceneRenderableVertex*>(
+		RebarTransientMemoryPool::getSingleton().allocateFrame(sizeof(GpuSceneRenderableVertex) * ctx.m_cachedRenderElementCount, token));
 	for(U32 i = 0; i < ctx.m_cachedRenderElementCount; ++i)
 	{
-		GpuSceneRenderable renderable = {};
+		GpuSceneRenderableVertex renderable = {};
 		renderable.m_worldTransformsOffset = ctx.m_cachedRenderElements[i]->m_worldTransformsOffset;
 		renderable.m_uniformsOffset = ctx.m_cachedRenderElements[i]->m_uniformsOffset;
-		renderable.m_geometryOffset = ctx.m_cachedRenderElements[i]->m_geometryOffset;
-		renderable.m_boneTransformsOffset = ctx.m_cachedRenderElements[i]->m_boneTransformsOffset;
-		instances[i] = packGpuSceneRenderable(renderable);
+		renderable.m_meshLodOffset = ctx.m_cachedRenderElements[i]->m_meshLodOffset;
+		renderable.m_boneTransformsOrParticleEmitterOffset = ctx.m_cachedRenderElements[i]->m_boneTransformsOffset
+																 ? ctx.m_cachedRenderElements[i]->m_boneTransformsOffset
+																 : ctx.m_cachedRenderElements[i]->m_particleEmitterOffset;
+		instances[i] = renderable;
 	}
 
-	cmdb.bindVertexBuffer(0, &RebarTransientMemoryPool::getSingleton().getBuffer(), token.m_offset, sizeof(GpuSceneRenderablePacked),
+	cmdb.bindVertexBuffer(0, &RebarTransientMemoryPool::getSingleton().getBuffer(), token.m_offset, sizeof(GpuSceneRenderableVertex),
 						  VertexStepRate::kInstance);
 
 	// Set state
@@ -152,7 +154,7 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 	setState(args, cmdb);
 
 	cmdb.bindVertexBuffer(0, args.m_instanceRateRenderablesBuffer.m_buffer, args.m_instanceRateRenderablesBuffer.m_offset,
-						  sizeof(GpuSceneRenderablePacked), VertexStepRate::kInstance);
+						  sizeof(GpuSceneRenderableVertex), VertexStepRate::kInstance);
 
 	U32 allUserCount = 0;
 	U32 bucketCount = 0;

+ 1 - 0
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -80,6 +80,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 	{
 	case RenderingTechnique::kGBuffer:
 		aabbCount = GpuSceneArrays::RenderableAabbGBuffer::getSingleton().getElementCount();
+		break;
 	case RenderingTechnique::kDepth:
 		aabbCount = GpuSceneArrays::RenderableAabbDepth::getSingleton().getElementCount();
 		break;

+ 7 - 20
AnKi/Resource/MeshResource.cpp

@@ -118,7 +118,7 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 		lod.m_indexCount = header.m_totalIndexCounts[l];
 		ANKI_ASSERT((lod.m_indexCount % 3) == 0 && "Expecting triangles");
 		const PtrSize indexBufferSize = PtrSize(lod.m_indexCount) * getIndexSize(m_indexType);
-		UnifiedGeometryBuffer::getSingleton().allocate(indexBufferSize, getIndexSize(m_indexType), lod.m_indexBufferAllocationToken);
+		lod.m_indexBufferAllocationToken = UnifiedGeometryBuffer::getSingleton().allocate(indexBufferSize, getIndexSize(m_indexType));
 
 		// Vertex stuff
 		lod.m_vertexCount = header.m_totalVertexCounts[l];
@@ -131,19 +131,8 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 
 			m_presentVertStreams |= VertexStreamMask(1 << stream);
 
-			const U32 texelSize = getFormatInfo(kMeshRelatedVertexStreamFormats[stream]).m_texelSize;
-			const U32 alignment = max(4u, nextPowerOfTwo(texelSize));
-			const PtrSize vertexBufferSize = PtrSize(lod.m_vertexCount) * texelSize + alignment;
-
-			UnifiedGeometryBuffer::getSingleton().allocate(vertexBufferSize, alignment, lod.m_vertexBuffersAllocationToken[stream]);
-
-			// We need to align the actual offset to the texel size
-			const PtrSize remainder = lod.m_vertexBuffersAllocationToken[stream].getOffset() % texelSize;
-			lod.m_fixedUnifiedGeometryBufferOffset[stream] = U8(texelSize - remainder);
-
-			ANKI_ASSERT((lod.m_vertexBuffersAllocationToken[stream].getOffset() + lod.m_fixedUnifiedGeometryBufferOffset[stream]) % texelSize == 0);
-			ANKI_ASSERT(lod.m_fixedUnifiedGeometryBufferOffset[stream] + PtrSize(lod.m_vertexCount) * texelSize
-						<= lod.m_vertexBuffersAllocationToken[stream].getAllocatedSize());
+			lod.m_vertexBuffersAllocationToken[stream] =
+				UnifiedGeometryBuffer::getSingleton().allocateFormat(kMeshRelatedVertexStreamFormats[stream], lod.m_vertexCount);
 		}
 
 		// BLAS
@@ -157,8 +146,7 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 			inf.m_bottomLevel.m_indexCount = lod.m_indexCount;
 			inf.m_bottomLevel.m_indexType = m_indexType;
 			inf.m_bottomLevel.m_positionBuffer = &UnifiedGeometryBuffer::getSingleton().getBuffer();
-			inf.m_bottomLevel.m_positionBufferOffset = lod.m_vertexBuffersAllocationToken[VertexStreamId::kPosition].getOffset()
-													   + lod.m_fixedUnifiedGeometryBufferOffset[VertexStreamId::kPosition];
+			inf.m_bottomLevel.m_positionBufferOffset = lod.m_vertexBuffersAllocationToken[VertexStreamId::kPosition].getOffset();
 			inf.m_bottomLevel.m_positionStride = getFormatInfo(kMeshRelatedVertexStreamFormats[VertexStreamId::kPosition]).m_texelSize;
 			inf.m_bottomLevel.m_positionsFormat = kMeshRelatedVertexStreamFormats[VertexStreamId::kPosition];
 			inf.m_bottomLevel.m_positionCount = lod.m_vertexCount;
@@ -272,8 +260,7 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 
 			// Copy
 			cmdb->copyBufferToBuffer(&handle.getBuffer(), handle.getOffset(), unifiedGeometryBuffer,
-									 lod.m_vertexBuffersAllocationToken[stream].getOffset() + lod.m_fixedUnifiedGeometryBufferOffset[stream],
-									 handle.getRange());
+									 lod.m_vertexBuffersAllocationToken[stream].getOffset(), handle.getRange());
 		}
 	}
 
@@ -285,7 +272,7 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 		BufferBarrierInfo bufferBarrier;
 		bufferBarrier.m_buffer = unifiedGeometryBuffer;
 		bufferBarrier.m_offset = 0;
-		bufferBarrier.m_size = kMaxPtrSize;
+		bufferBarrier.m_range = kMaxPtrSize;
 		bufferBarrier.m_previousUsage = BufferUsageBit::kTransferDestination;
 		bufferBarrier.m_nextUsage = unifiedGeometryBufferNonTransferUsage;
 
@@ -321,7 +308,7 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 		BufferBarrierInfo bufferBarrier;
 		bufferBarrier.m_buffer = unifiedGeometryBuffer;
 		bufferBarrier.m_offset = 0;
-		bufferBarrier.m_size = kMaxPtrSize;
+		bufferBarrier.m_range = kMaxPtrSize;
 		bufferBarrier.m_previousUsage = BufferUsageBit::kTransferDestination;
 		bufferBarrier.m_nextUsage = unifiedGeometryBufferNonTransferUsage;
 

+ 1 - 2
AnKi/Resource/MeshResource.h

@@ -64,7 +64,7 @@ public:
 	/// Get vertex buffer info.
 	void getVertexStreamInfo(U32 lod, VertexStreamId stream, PtrSize& bufferOffset, U32& vertexCount) const
 	{
-		bufferOffset = m_lods[lod].m_vertexBuffersAllocationToken[stream].getOffset() + m_lods[lod].m_fixedUnifiedGeometryBufferOffset[stream];
+		bufferOffset = m_lods[lod].m_vertexBuffersAllocationToken[stream].getOffset();
 		vertexCount = m_lods[lod].m_vertexCount;
 	}
 
@@ -104,7 +104,6 @@ private:
 	public:
 		UnifiedGeometryBufferAllocation m_indexBufferAllocationToken;
 		Array<UnifiedGeometryBufferAllocation, U32(VertexStreamId::kMeshRelatedCount)> m_vertexBuffersAllocationToken;
-		Array<U8, U32(VertexStreamId::kMeshRelatedCount)> m_fixedUnifiedGeometryBufferOffset = {};
 
 		U32 m_indexCount = 0;
 		U32 m_vertexCount = 0;

+ 5 - 4
AnKi/Scene/Components/ModelComponent.cpp

@@ -75,7 +75,7 @@ void ModelComponent::loadModelResource(CString filename)
 		uniformsSize += size;
 	}
 
-	GpuSceneBuffer::getSingleton().allocate(uniformsSize, 4, m_gpuSceneUniforms);
+	m_gpuSceneUniforms = GpuSceneBuffer::getSingleton().allocate(uniformsSize, 4);
 	uniformsSize = 0;
 
 	// Init the patches
@@ -187,10 +187,10 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 			m_patchInfos[i].m_gpuSceneMeshLods.uploadToGpuScene(meshLods);
 
 			// Upload the GpuSceneRenderable
-			GpuSceneRenderable gpuRenderable;
+			GpuSceneRenderable gpuRenderable = {};
 			gpuRenderable.m_worldTransformsOffset = m_gpuSceneTransforms.getGpuSceneOffset();
 			gpuRenderable.m_uniformsOffset = m_patchInfos[i].m_gpuSceneUniformsOffset;
-			gpuRenderable.m_geometryOffset = m_patchInfos[i].m_gpuSceneMeshLods.getGpuSceneOffset();
+			gpuRenderable.m_meshLodsOffset = m_patchInfos[i].m_gpuSceneMeshLods.getGpuSceneOffset();
 			gpuRenderable.m_boneTransformsOffset = (hasSkin) ? m_skinComponent->getBoneTransformsGpuSceneOffset() : 0;
 			m_patchInfos[i].m_gpuSceneRenderable.uploadToGpuScene(gpuRenderable);
 		}
@@ -373,7 +373,8 @@ void ModelComponent::setupRenderableQueueElements(U32 lod, RenderingTechnique te
 		queueElem.m_program = modelInf.m_program.get();
 		queueElem.m_worldTransformsOffset = m_gpuSceneTransforms.getGpuSceneOffset();
 		queueElem.m_uniformsOffset = m_patchInfos[i].m_gpuSceneUniformsOffset;
-		queueElem.m_geometryOffset = m_patchInfos[i].m_gpuSceneMeshLods.getGpuSceneOffset() + lod * sizeof(GpuSceneMeshLod);
+		queueElem.m_meshLodOffset = m_patchInfos[i].m_gpuSceneMeshLods.getGpuSceneOffset() + lod * sizeof(GpuSceneMeshLod);
+		queueElem.m_particleEmitterOffset = 0;
 		queueElem.m_boneTransformsOffset = (hasSkin) ? m_skinComponent->getBoneTransformsGpuSceneOffset() : 0;
 		queueElem.m_indexCount = modelInf.m_indexCount;
 		queueElem.m_firstIndex = U32(modelInf.m_indexBufferOffset / 2 + modelInf.m_firstIndex);

+ 78 - 7
AnKi/Scene/Components/ParticleEmitterComponent.cpp

@@ -15,6 +15,7 @@
 #include <AnKi/Math.h>
 #include <AnKi/Renderer/RenderQueue.h>
 #include <AnKi/Shaders/Include/GpuSceneFunctions.h>
+#include <AnKi/Core/GpuMemory/RebarTransientMemoryPool.h>
 
 namespace anki {
 
@@ -197,6 +198,55 @@ ParticleEmitterComponent::ParticleEmitterComponent(SceneNode* node)
 	: SceneComponent(node, kClassType)
 	, m_spatial(this)
 {
+	// Allocate and populate a quad
+	const U32 vertCount = 4;
+	const U32 indexCount = 6;
+
+	m_quadPositions = UnifiedGeometryBuffer::getSingleton().allocateFormat(kMeshRelatedVertexStreamFormats[VertexStreamId::kPosition], vertCount);
+	m_quadUvs = UnifiedGeometryBuffer::getSingleton().allocateFormat(kMeshRelatedVertexStreamFormats[VertexStreamId::kUv], vertCount);
+	m_quadIndices = UnifiedGeometryBuffer::getSingleton().allocateFormat(Format::kR16_Uint, indexCount);
+
+	RebarAllocation positionsAlloc;
+	static_assert(kMeshRelatedVertexStreamFormats[VertexStreamId::kPosition] == Format::kR16G16B16A16_Unorm);
+	U16Vec4* transientPositions = RebarTransientMemoryPool::getSingleton().allocateFrame<U16Vec4>(vertCount, positionsAlloc);
+	transientPositions[0] = U16Vec4(0, 0, 0, 0);
+	transientPositions[1] = U16Vec4(kMaxU16, 0, 0, 0);
+	transientPositions[2] = U16Vec4(kMaxU16, kMaxU16, 0, 0);
+	transientPositions[3] = U16Vec4(0, kMaxU16, 0, 0);
+
+	RebarAllocation uvsAlloc;
+	static_assert(kMeshRelatedVertexStreamFormats[VertexStreamId::kUv] == Format::kR32G32_Sfloat);
+	Vec2* transientUvs = RebarTransientMemoryPool::getSingleton().allocateFrame<Vec2>(vertCount, uvsAlloc);
+	transientUvs[0] = Vec2(0.0f);
+	transientUvs[1] = Vec2(1.0f, 0.0f);
+	transientUvs[2] = Vec2(1.0f, 1.0f);
+	transientUvs[3] = Vec2(0.0f, 1.0f);
+
+	RebarAllocation indicesAlloc;
+	U16* transientIndices = RebarTransientMemoryPool::getSingleton().allocateFrame<U16>(indexCount, indicesAlloc);
+	transientIndices[0] = 0;
+	transientIndices[1] = 1;
+	transientIndices[2] = 3;
+	transientIndices[3] = 1;
+	transientIndices[4] = 2;
+	transientIndices[5] = 3;
+
+	CommandBufferInitInfo cmdbInit("Particle quad upload");
+	cmdbInit.m_flags |= CommandBufferFlag::kSmallBatch;
+	CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cmdbInit);
+	Buffer* srcBuff = &RebarTransientMemoryPool::getSingleton().getBuffer();
+	Buffer* dstBuff = &UnifiedGeometryBuffer::getSingleton().getBuffer();
+	cmdb->copyBufferToBuffer(srcBuff, positionsAlloc.m_offset, dstBuff, m_quadPositions.getOffset(), positionsAlloc.m_range);
+	cmdb->copyBufferToBuffer(srcBuff, uvsAlloc.m_offset, dstBuff, m_quadUvs.getOffset(), uvsAlloc.m_range);
+	cmdb->copyBufferToBuffer(srcBuff, indicesAlloc.m_offset, dstBuff, m_quadIndices.getOffset(), indicesAlloc.m_range);
+	BufferBarrierInfo barrier;
+	barrier.m_buffer = dstBuff;
+	barrier.m_offset = 0;
+	barrier.m_range = kMaxPtrSize;
+	barrier.m_previousUsage = BufferUsageBit::kTransferDestination;
+	barrier.m_nextUsage = dstBuff->getBufferUsage();
+	cmdb->setPipelineBarrier({}, {&barrier, 1}, {});
+	cmdb->flush();
 }
 
 ParticleEmitterComponent::~ParticleEmitterComponent()
@@ -255,11 +305,11 @@ void ParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 	}
 
 	// GPU scene allocations
-	GpuSceneBuffer::getSingleton().allocate(sizeof(Vec3) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuScenePositions);
-	GpuSceneBuffer::getSingleton().allocate(sizeof(F32) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuSceneAlphas);
-	GpuSceneBuffer::getSingleton().allocate(sizeof(F32) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuSceneScales);
-	GpuSceneBuffer::getSingleton().allocate(m_particleEmitterResource->getMaterial()->getPrefilledLocalUniforms().getSizeInBytes(), alignof(U32),
-											m_gpuSceneUniforms);
+	m_gpuScenePositions = GpuSceneBuffer::getSingleton().allocate(sizeof(Vec3) * m_props.m_maxNumOfParticles, alignof(F32));
+	m_gpuSceneAlphas = GpuSceneBuffer::getSingleton().allocate(sizeof(F32) * m_props.m_maxNumOfParticles, alignof(F32));
+	m_gpuSceneScales = GpuSceneBuffer::getSingleton().allocate(sizeof(F32) * m_props.m_maxNumOfParticles, alignof(F32));
+	m_gpuSceneUniforms =
+		GpuSceneBuffer::getSingleton().allocate(m_particleEmitterResource->getMaterial()->getPrefilledLocalUniforms().getSizeInBytes(), alignof(U32));
 
 	// Allocate buckets
 	for(RenderingTechnique t :
@@ -323,6 +373,7 @@ Error ParticleEmitterComponent::update(SceneComponentUpdateInfo& info, Bool& upd
 		particles.m_vertexOffsets[U32(VertexStreamId::kParticlePosition)] = m_gpuScenePositions.getOffset();
 		particles.m_vertexOffsets[U32(VertexStreamId::kParticleColor)] = m_gpuSceneAlphas.getOffset();
 		particles.m_vertexOffsets[U32(VertexStreamId::kParticleScale)] = m_gpuSceneScales.getOffset();
+		particles.m_aliveParticleCount = m_aliveParticleCount;
 		if(!m_gpuSceneParticleEmitter.isValid())
 		{
 			m_gpuSceneParticleEmitter.allocate();
@@ -333,11 +384,30 @@ Error ParticleEmitterComponent::update(SceneComponentUpdateInfo& info, Bool& upd
 		patcher.newCopy(*info.m_framePool, m_gpuSceneUniforms, m_particleEmitterResource->getMaterial()->getPrefilledLocalUniforms().getSizeInBytes(),
 						m_particleEmitterResource->getMaterial()->getPrefilledLocalUniforms().getBegin());
 
+		// Upload mesh LODs
+		GpuSceneMeshLod meshLod = {};
+		meshLod.m_vertexOffsets[U32(VertexStreamId::kPosition)] =
+			m_quadPositions.getOffset() / getFormatInfo(kMeshRelatedVertexStreamFormats[VertexStreamId::kPosition]).m_texelSize;
+		meshLod.m_vertexOffsets[U32(VertexStreamId::kUv)] =
+			m_quadUvs.getOffset() / getFormatInfo(kMeshRelatedVertexStreamFormats[VertexStreamId::kUv]).m_texelSize;
+		meshLod.m_indexCount = 6;
+		meshLod.m_firstIndex = m_quadIndices.getOffset() / sizeof(U16);
+		meshLod.m_positionScale = 1.0f;
+		meshLod.m_positionTranslation = Vec3(-0.5f, -0.5f, 0.0f);
+		Array<GpuSceneMeshLod, kMaxLodCount> meshLods;
+		meshLods.fill(meshLod);
+		if(!m_gpuSceneMeshLods.isValid())
+		{
+			m_gpuSceneMeshLods.allocate();
+		}
+		m_gpuSceneMeshLods.uploadToGpuScene(meshLods);
+
 		// Upload the GpuSceneRenderable
 		GpuSceneRenderable renderable;
 		renderable.m_boneTransformsOffset = 0;
-		renderable.m_geometryOffset = m_gpuSceneParticleEmitter.getGpuSceneOffset();
 		renderable.m_uniformsOffset = m_gpuSceneUniforms.getOffset();
+		renderable.m_meshLodsOffset = m_gpuSceneMeshLods.getGpuSceneOffset();
+		renderable.m_particleEmitterOffset = m_gpuSceneParticleEmitter.getGpuSceneOffset();
 		renderable.m_worldTransformsOffset = 0;
 		if(!m_gpuSceneRenderable.isValid())
 		{
@@ -530,7 +600,8 @@ void ParticleEmitterComponent::setupRenderableQueueElements(RenderingTechnique t
 	el->m_program = prog.get();
 	el->m_worldTransformsOffset = 0;
 	el->m_uniformsOffset = m_gpuSceneUniforms.getOffset();
-	el->m_geometryOffset = m_gpuSceneParticleEmitter.getGpuSceneOffset();
+	el->m_meshLodOffset = m_gpuSceneMeshLods.getGpuSceneOffset();
+	el->m_particleEmitterOffset = m_gpuSceneParticleEmitter.getGpuSceneOffset();
 	el->m_boneTransformsOffset = 0;
 	el->m_vertexCount = 6 * m_aliveParticleCount;
 	el->m_firstVertex = 0;

+ 6 - 0
AnKi/Scene/Components/ParticleEmitterComponent.h

@@ -10,6 +10,7 @@
 #include <AnKi/Scene/RenderStateBucket.h>
 #include <AnKi/Scene/GpuSceneArray.h>
 #include <AnKi/Resource/ParticleEmitterResource.h>
+#include <AnKi/Core/GpuMemory/UnifiedGeometryBuffer.h>
 #include <AnKi/Collision/Aabb.h>
 #include <AnKi/Util/WeakArray.h>
 
@@ -62,12 +63,17 @@ private:
 	Second m_timeLeftForNextEmission = 0.0;
 	U32 m_aliveParticleCount = 0;
 
+	UnifiedGeometryBufferAllocation m_quadPositions;
+	UnifiedGeometryBufferAllocation m_quadUvs;
+	UnifiedGeometryBufferAllocation m_quadIndices;
+
 	GpuSceneBufferAllocation m_gpuScenePositions;
 	GpuSceneBufferAllocation m_gpuSceneAlphas;
 	GpuSceneBufferAllocation m_gpuSceneScales;
 	GpuSceneBufferAllocation m_gpuSceneUniforms;
 	GpuSceneArrays::ParticleEmitter::Allocation m_gpuSceneParticleEmitter;
 	GpuSceneArrays::Renderable::Allocation m_gpuSceneRenderable;
+	GpuSceneArrays::MeshLod::Allocation m_gpuSceneMeshLods;
 	GpuSceneArrays::RenderableAabbGBuffer::Allocation m_gpuSceneRenderableAabbGBuffer;
 	GpuSceneArrays::RenderableAabbDepth::Allocation m_gpuSceneRenderableAabbDepth;
 	GpuSceneArrays::RenderableAabbForward::Allocation m_gpuSceneRenderableAabbForward;

+ 1 - 1
AnKi/Scene/Components/SkinComponent.cpp

@@ -48,7 +48,7 @@ void SkinComponent::loadSkeletonResource(CString fname)
 	m_boneTrfs[1].resize(boneCount, Mat3x4::getIdentity());
 	m_animationTrfs.resize(boneCount, Trf{Vec3(0.0f), Quat::getIdentity(), 1.0f});
 
-	GpuSceneBuffer::getSingleton().allocate(sizeof(Mat4) * boneCount * 2, 4, m_gpuSceneBoneTransforms);
+	m_gpuSceneBoneTransforms = GpuSceneBuffer::getSingleton().allocate(sizeof(Mat4) * boneCount * 2, 4);
 }
 
 void SkinComponent::playAnimation(U32 track, AnimationResourcePtr anim, const AnimationPlayInfo& info)

+ 1 - 1
AnKi/Scene/GpuSceneArray.inl.h

@@ -14,7 +14,7 @@ GpuSceneArray<TGpuSceneObject, kId>::GpuSceneArray(U32 maxArraySize)
 {
 	maxArraySize = getAlignedRoundUp(sizeof(SubMask), maxArraySize);
 	const U32 alignment = GrManager::getSingleton().getDeviceCapabilities().m_storageBufferBindOffsetAlignment;
-	GpuSceneBuffer::getSingleton().allocate(sizeof(TGpuSceneObject) * maxArraySize, alignment, m_gpuSceneAllocation);
+	m_gpuSceneAllocation = GpuSceneBuffer::getSingleton().allocate(sizeof(TGpuSceneObject) * maxArraySize, alignment);
 
 	m_inUseIndicesMask.resize(maxArraySize / sizeof(SubMask), false);
 	ANKI_ASSERT(m_inuUseIndicesCount == 0);

+ 0 - 2
AnKi/Scene/Visibility.cpp

@@ -310,8 +310,6 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 	const FrustumFlags frustumFlags = m_frcCtx->m_frustum;
 	const Frustum& primaryFrustum = *m_frcCtx->m_primaryFrustum.m_frustum;
 
-	StackMemoryPool& framePool = SceneGraph::getSingleton().getFrameMemoryPool();
-
 	WeakArray<RenderQueue> nextQueues;
 	WeakArray<VisibilityFrustum> nextFrustums;
 

+ 1 - 0
AnKi/ShaderCompiler/Dxc.cpp

@@ -90,6 +90,7 @@ Error compileHlslToSpirv(CString src, ShaderType shaderType, Bool compileWith16b
 	dxcArgs.emplaceBack(profile(shaderType));
 	dxcArgs.emplaceBack("-spirv");
 	dxcArgs.emplaceBack("-fspv-target-env=vulkan1.1spirv1.4");
+	// dxcArgs.emplaceBack("-fvk-support-nonzero-base-instance"); // Match DX12's behavior, SV_INSTANCEID starts from zero
 	// dxcArgs.emplaceBack("-Zi"); // Debug info
 	dxcArgs.emplaceBack(hlslFilename);
 

+ 3 - 3
AnKi/Shaders/ForwardShadingFog.ankiprog

@@ -17,7 +17,7 @@
 
 struct VertIn
 {
-	[[vk::location(0)]] GpuSceneRenderablePacked m_gpuSceneRenderable : RENDERABLE;
+	[[vk::location(0)]] UVec4 m_gpuSceneRenderable : RENDERABLE;
 	U32 m_svVertexId : SV_VERTEXID;
 };
 
@@ -34,8 +34,8 @@ VertOut main(VertIn input)
 {
 	VertOut output;
 
-	const GpuSceneRenderable renderable = unpackGpuSceneRenderable(input.m_gpuSceneRenderable);
-	const GpuSceneMeshLod mesh = g_gpuScene.Load<GpuSceneMeshLod>(renderable.m_geometryOffset);
+	const GpuSceneRenderableVertex renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
+	const GpuSceneMeshLod mesh = g_gpuScene.Load<GpuSceneMeshLod>(renderable.m_meshLodOffset);
 	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
 	const UnpackedMeshVertex vertex = loadVertex(mesh, input.m_svVertexId, false);
 

+ 3 - 3
AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog

@@ -18,7 +18,7 @@
 
 struct VertIn
 {
-	[[vk::location(0)]] GpuSceneRenderablePacked m_gpuSceneRenderable : RENDERABLE;
+	[[vk::location(0)]] UVec4 m_gpuSceneRenderable : RENDERABLE;
 	U32 m_svVertexId : SV_VERTEXID;
 };
 
@@ -36,8 +36,8 @@ VertOut main(VertIn input)
 {
 	VertOut output;
 
-	const GpuSceneRenderable renderable = unpackGpuSceneRenderable(input.m_gpuSceneRenderable);
-	const GpuSceneMeshLod mesh = g_gpuScene.Load<GpuSceneMeshLod>(renderable.m_geometryOffset);
+	const GpuSceneRenderableVertex renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
+	const GpuSceneMeshLod mesh = g_gpuScene.Load<GpuSceneMeshLod>(renderable.m_meshLodOffset);
 	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
 	const UnpackedMeshVertex vertex = loadVertex(mesh, input.m_svVertexId, false);
 

+ 17 - 11
AnKi/Shaders/ForwardShadingParticles.ankiprog

@@ -11,7 +11,7 @@
 
 struct VertIn
 {
-	[[vk::location(0)]] GpuSceneRenderablePacked m_gpuSceneRenderable : RENDERABLE;
+	[[vk::location(0)]] UVec4 m_gpuSceneRenderable : RENDERABLE;
 	U32 m_svVertexId : SV_VERTEXID;
 };
 
@@ -36,28 +36,34 @@ struct VertOut
 
 VertOut main(VertIn input)
 {
-	const U32 particleId = input.m_svVertexId / 6u;
-	const U32 vertexId = input.m_svVertexId % 6u;
+	const GpuSceneRenderableVertex renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
+	const GpuSceneParticleEmitter particles = g_gpuScene.Load<GpuSceneParticleEmitter>(renderable.m_boneTransformsOrParticleEmitterOffset);
+	const GpuSceneMeshLod meshLod = g_gpuScene.Load<GpuSceneMeshLod>(renderable.m_meshLodOffset);
 
-	const GpuSceneRenderable renderable = unpackGpuSceneRenderable(input.m_gpuSceneRenderable);
-	const GpuSceneParticleEmitter particles = g_gpuScene.Load<GpuSceneParticleEmitter>(renderable.m_geometryOffset);
+	const U32 particleId = input.m_svVertexId / meshLod.m_indexCount;
+	const U32 vertexId = g_unifiedGeom_R16_Uint[meshLod.m_firstIndex + input.m_svVertexId % meshLod.m_indexCount];
 
 	U32 idx = particles.m_vertexOffsets[(U32)VertexStreamId::kParticlePosition] + particleId * sizeof(Vec3);
-	const Vec3 vertPos = g_gpuScene.Load<Vec3>(idx);
+	const Vec3 particlePos = g_gpuScene.Load<Vec3>(idx);
 	idx = particles.m_vertexOffsets[(U32)VertexStreamId::kParticleScale] + particleId * sizeof(F32);
-	const F32 vertScale = g_gpuScene.Load<F32>(idx);
+	const F32 particleScale = g_gpuScene.Load<F32>(idx);
 	idx = particles.m_vertexOffsets[(U32)VertexStreamId::kParticleColor] + particleId * sizeof(F32);
-	const F32 vertAlpha = g_gpuScene.Load<F32>(idx);
+	const F32 particleAlpha = g_gpuScene.Load<F32>(idx);
 
 	VertOut output;
 
-	output.m_uv = Vec2(vertexId & 1u, ((vertexId + 1u) / 3u) & 1u);
+	output.m_uv = g_unifiedGeom_R32G32_Sfloat[meshLod.m_vertexOffsets[(U32)VertexStreamId::kUv] + vertexId];
 
-	output.m_worldPos = mul(g_globalUniforms.m_cameraTransform, Vec4((output.m_uv - 0.5) * vertScale, 0.0, 0.0)) + vertPos;
+	const Vec3 localPos =
+		g_unifiedGeom_R16G16B16A16_Unorm[meshLod.m_vertexOffsets[(U32)VertexStreamId::kPosition] + vertexId] * meshLod.m_positionScale
+		+ meshLod.m_positionTranslation;
+
+	// Apply the particle scale, rotate the mesh to face the camera (billboard) and finally apply the particle position
+	output.m_worldPos = mul(g_globalUniforms.m_cameraTransform, Vec4(localPos * particleScale, 0.0)) + particlePos;
 
 	output.m_svPosition = mul(g_globalUniforms.m_viewProjectionMatrix, Vec4(output.m_worldPos, 1.0));
 
-	output.m_alpha = vertAlpha;
+	output.m_alpha = particleAlpha;
 	output.m_uniformsOffset = renderable.m_uniformsOffset;
 
 	return output;

+ 7 - 6
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -60,7 +60,7 @@
 struct VertIn
 {
 	U32 m_svVertexId : SV_VERTEXID;
-	[[vk::location(0)]] GpuSceneRenderablePacked m_gpuSceneRenderable : RENDERABLE;
+	[[vk::location(0)]] UVec4 m_gpuSceneRenderable : RENDERABLE;
 };
 
 struct VertOut
@@ -107,16 +107,17 @@ struct Mat3x4_2
 	Mat3x4 m_b;
 };
 
-Mat3x4_2 loadBoneTransforms(UnpackedMeshVertex vert, GpuSceneRenderable renderable, U32 index)
+Mat3x4_2 loadBoneTransforms(UnpackedMeshVertex vert, GpuSceneRenderableVertex renderable, U32 index)
 {
 	const U32 boneIdx = vert.m_boneIndices[index];
-	U32 byteOffset = renderable.m_boneTransformsOffset;
+	U32 byteOffset = renderable.m_boneTransformsOrParticleEmitterOffset;
 	byteOffset += boneIdx * sizeof(Mat3x4) * 2;
 	return g_gpuScene.Load<Mat3x4_2>(byteOffset);
 }
 
 #if ANKI_BONES
-void skinning(UnpackedMeshVertex vert, GpuSceneRenderable renderable, inout Vec3 pos, inout Vec3 prevPos, inout RVec3 normal, inout RVec4 tangent)
+void skinning(UnpackedMeshVertex vert, GpuSceneRenderableVertex renderable, inout Vec3 pos, inout Vec3 prevPos, inout RVec3 normal,
+			  inout RVec4 tangent)
 {
 	Mat3x4_2 mats = loadBoneTransforms(vert, renderable, 0);
 
@@ -170,8 +171,8 @@ VertOut main(VertIn input)
 {
 	VertOut output;
 
-	const GpuSceneRenderable renderable = unpackGpuSceneRenderable(input.m_gpuSceneRenderable);
-	const GpuSceneMeshLod mesh = g_gpuScene.Load<GpuSceneMeshLod>(renderable.m_geometryOffset);
+	const GpuSceneRenderableVertex renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
+	const GpuSceneMeshLod mesh = g_gpuScene.Load<GpuSceneMeshLod>(renderable.m_meshLodOffset);
 	UnpackedMeshVertex vert = loadVertex(mesh, input.m_svVertexId, ANKI_BONES);
 
 	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);

+ 3 - 3
AnKi/Shaders/GBufferGpuParticles.ankiprog

@@ -20,7 +20,7 @@
 struct VertIn
 {
 	U32 m_svVertexId : SV_VERTEXID;
-	[[vk::location(0)]] GpuSceneRenderablePacked m_gpuSceneRenderable : RENDERABLE;
+	[[vk::location(0)]] UVec4 m_gpuSceneRenderable : RENDERABLE;
 };
 
 struct VertOut
@@ -45,8 +45,8 @@ VertOut main(VertIn input)
 {
 	VertOut output;
 
-	const GpuSceneRenderable renderable = unpackGpuSceneRenderable(input.m_gpuSceneRenderable);
-	const GpuSceneParticleEmitter particles = g_gpuScene.Load<GpuSceneParticleEmitter>(renderable.m_geometryOffset);
+	const GpuSceneRenderableVertex renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
+	const GpuSceneParticleEmitter particles = g_gpuScene.Load<GpuSceneParticleEmitter>(renderable.m_boneTransformsOrParticleEmitterOffset);
 
 	// Read vertex
 	const U32 particleId = input.m_svVertexId;

+ 8 - 5
AnKi/Shaders/GpuVisibility.ankiprog

@@ -22,7 +22,7 @@
 [[vk::binding(2)]] ByteAddressBuffer g_gpuScene;
 
 // These 2 have the same size
-[[vk::binding(3)]] RWStructuredBuffer<GpuSceneRenderable> g_instanceRateRenderables;
+[[vk::binding(3)]] RWStructuredBuffer<UVec4> g_instanceRateRenderables;
 [[vk::binding(4)]] RWStructuredBuffer<DrawIndexedIndirectArgs> g_drawIndexedIndirectArgs;
 
 // Index pointing to the above arrays. One for each render state bucket
@@ -180,7 +180,7 @@
 	indirectIdx += g_drawIndirectArgsOffsets[renderStateBucket];
 
 	const GpuSceneRenderable renderableIn = g_renderables[renderableIdx];
-	const U32 meshLodOffset = renderableIn.m_geometryOffset + sizeof(GpuSceneMeshLod) * lod;
+	const U32 meshLodOffset = renderableIn.m_meshLodsOffset + sizeof(GpuSceneMeshLod) * lod;
 	const GpuSceneMeshLod meshLod = g_gpuScene.Load<GpuSceneMeshLod>(meshLodOffset);
 
 	DrawIndexedIndirectArgs indirect;
@@ -191,9 +191,12 @@
 	indirect.m_firstInstance = indirectIdx;
 	g_drawIndexedIndirectArgs[indirectIdx] = indirect;
 
-	GpuSceneRenderable renderableOut = renderableIn;
-	renderableOut.m_geometryOffset = meshLodOffset;
-	g_instanceRateRenderables[indirectIdx] = renderableOut;
+	UVec4 instanceVertex;
+	instanceVertex.x = renderableIn.m_worldTransformsOffset;
+	instanceVertex.y = renderableIn.m_uniformsOffset;
+	instanceVertex.z = meshLodOffset;
+	instanceVertex.w = renderableIn.m_boneTransformsOffset;
+	g_instanceRateRenderables[indirectIdx] = instanceVertex;
 
 	// Now update the stats
 #if STATS

+ 4 - 14
AnKi/Shaders/Include/GpuSceneFunctions.h

@@ -9,23 +9,13 @@
 
 ANKI_BEGIN_NAMESPACE
 
-inline GpuSceneRenderablePacked packGpuSceneRenderable(GpuSceneRenderable x)
+inline GpuSceneRenderableVertex unpackGpuSceneRenderableVertex(UVec4 x)
 {
-	GpuSceneRenderablePacked o;
-	o[0] = x.m_worldTransformsOffset;
-	o[1] = x.m_uniformsOffset;
-	o[2] = x.m_geometryOffset;
-	o[3] = x.m_boneTransformsOffset;
-	return o;
-}
-
-inline GpuSceneRenderable unpackGpuSceneRenderable(GpuSceneRenderablePacked x)
-{
-	GpuSceneRenderable o;
+	GpuSceneRenderableVertex o;
 	o.m_worldTransformsOffset = x[0];
 	o.m_uniformsOffset = x[1];
-	o.m_geometryOffset = x[2];
-	o.m_boneTransformsOffset = x[3];
+	o.m_meshLodOffset = x[2];
+	o.m_boneTransformsOrParticleEmitterOffset = x[3];
 	return o;
 }
 

+ 13 - 5
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -21,12 +21,20 @@ struct GpuSceneRenderable
 {
 	U32 m_worldTransformsOffset; ///< First is the crnt transform and the 2nd the previous
 	U32 m_uniformsOffset;
-	U32 m_geometryOffset; ///< Points to a GpuSceneMeshLod or a GpuSceneParticleEmitter
-	U32 m_boneTransformsOffset;
+	U32 m_meshLodsOffset; ///< Points to an array of GpuSceneMeshLod sized kMaxLodCount.
+	U32 m_boneTransformsOffset; ///< Array of Mat3x4 or 0 if its not a skin.
+	U32 m_particleEmitterOffset; ///< Offset to GpuSceneParticleEmitter or 0 if it's not an emitter.
 };
-static_assert(sizeof(GpuSceneRenderable) == sizeof(Vec4) * 1);
 
-typedef UVec4 GpuSceneRenderablePacked;
+/// Almost similar to GpuSceneRenderable but with only what the material shaders need. Needs to fit in a UVec4 vertex attribute.
+struct GpuSceneRenderableVertex
+{
+	U32 m_worldTransformsOffset;
+	U32 m_uniformsOffset;
+	U32 m_meshLodOffset; ///< Points to a single GpuSceneMeshLod and not an array
+	U32 m_boneTransformsOrParticleEmitterOffset;
+};
+static_assert(sizeof(GpuSceneRenderableVertex) == sizeof(UVec4));
 
 /// Used in visibility testing.
 struct GpuSceneRenderableAabb
@@ -54,7 +62,7 @@ static_assert(sizeof(GpuSceneMeshLod) == sizeof(Vec4) * 3);
 struct GpuSceneParticleEmitter
 {
 	U32 m_vertexOffsets[(U32)VertexStreamId::kParticleRelatedCount];
-	U32 m_padding0;
+	U32 m_aliveParticleCount;
 };
 static_assert(sizeof(GpuSceneParticleEmitter) == sizeof(Vec4) * 2);
 

+ 2 - 2
AnKi/Shaders/RtShadowsHit.ankiprog

@@ -20,7 +20,7 @@
 #pragma anki start ahit
 
 #if ALPHA_TEXTURE
-[[vk::shader_record_ext]] ConstantBuffer<GpuSceneRenderable> g_gpuSceneRenderable;
+[[vk::shader_record_ext]] ConstantBuffer<GpuSceneRenderableVertex> g_gpuSceneRenderable;
 #endif
 
 [shader("anyhit")] void main(inout RayPayload payload, in Barycentrics barycentrics)
@@ -33,7 +33,7 @@
 
 	const Vec3 bary = Vec3(1.0f - barycentrics.m_value.x - barycentrics.m_value.y, barycentrics.m_value.x, barycentrics.m_value.y);
 
-	const GpuSceneMeshLod mesh = g_gpuScene.Load<GpuSceneMeshLod>(g_gpuSceneRenderable.m_geometryOffset);
+	const GpuSceneMeshLod mesh = g_gpuScene.Load<GpuSceneMeshLod>(g_gpuSceneRenderable.m_meshLodOffset);
 
 	const U32 idx0 = g_unifiedGeom_R16_Uint[mesh.m_firstIndex + PrimitiveIndex() * 3 + 0];
 	const U32 idx1 = g_unifiedGeom_R16_Uint[mesh.m_firstIndex + PrimitiveIndex() * 3 + 1];

+ 1 - 1
Tests/Gr/Gr.cpp

@@ -406,7 +406,7 @@ static void setBufferBarrier(CommandBufferPtr cmdb, BufferPtr buffer, BufferUsag
 	barrier.m_previousUsage = before;
 	barrier.m_nextUsage = after;
 	barrier.m_offset = offset;
-	barrier.m_size = range;
+	barrier.m_range = range;
 	barrier.m_buffer = buffer.get();
 
 	cmdb->setPipelineBarrier({}, {&barrier, 1}, {});

+ 5 - 1
Tests/Util/String.cpp

@@ -9,6 +9,8 @@
 
 ANKI_TEST(Util, String)
 {
+	DefaultMemoryPool::allocateSingleton(allocAligned, nullptr);
+
 	// Copy
 	{
 		String a, b;
@@ -182,7 +184,7 @@ ANKI_TEST(Util, String)
 		ANKI_TEST_EXPECT_EQ(i, -9223372036854775807);
 
 		F64 f;
-		a += "123456789.145";
+		a = "123456789.145";
 		ANKI_TEST_EXPECT_NO_ERR(a.toNumber(f));
 		ANKI_TEST_EXPECT_EQ(f, 123456789.145);
 	}
@@ -205,4 +207,6 @@ ANKI_TEST(Util, String)
 		a.replaceAll("%foo%", "");
 		ANKI_TEST_EXPECT_EQ(a, "ajlkadsf");
 	}
+
+	DefaultMemoryPool::freeSingleton();
 }