Browse Source

Switch to GPU scene

Panagiotis Christopoulos Charitos 3 years ago
parent
commit
bf629e68a6
39 changed files with 532 additions and 703 deletions
  1. 1 0
      AnKi/Core/App.cpp
  2. 2 1
      AnKi/Core/GpuMemoryPools.cpp
  3. 7 0
      AnKi/Core/GpuMemoryPools.h
  4. 5 3
      AnKi/Core/MaliHwCounters.cpp
  5. 2 2
      AnKi/Gr/Vulkan/BufferImpl.cpp
  6. 1 0
      AnKi/Renderer/Common.h
  7. 41 3
      AnKi/Renderer/Drawer.cpp
  8. 3 0
      AnKi/Renderer/Drawer.h
  9. 14 10
      AnKi/Renderer/ForwardShading.cpp
  10. 5 0
      AnKi/Renderer/GBuffer.cpp
  11. 10 0
      AnKi/Renderer/IndirectDiffuseProbes.cpp
  12. 10 0
      AnKi/Renderer/ProbeReflections.cpp
  13. 2 0
      AnKi/Renderer/RenderQueue.h
  14. 11 8
      AnKi/Renderer/Renderer.cpp
  15. 5 0
      AnKi/Renderer/Renderer.h
  16. 5 0
      AnKi/Renderer/ShadowMapping.cpp
  17. 5 5
      AnKi/Resource/MeshResource.cpp
  18. 2 2
      AnKi/Resource/MeshResource.h
  19. 9 3
      AnKi/Scene/Components/ModelComponent.cpp
  20. 4 3
      AnKi/Scene/Components/MoveComponent.cpp
  21. 7 1
      AnKi/Scene/Components/MoveComponent.h
  22. 67 36
      AnKi/Scene/Components/ParticleEmitterComponent.cpp
  23. 19 6
      AnKi/Scene/Components/ParticleEmitterComponent.h
  24. 3 3
      AnKi/Scene/Components/RenderComponent.cpp
  25. 4 3
      AnKi/Scene/Components/RenderComponent.h
  26. 10 77
      AnKi/Scene/ModelNode.cpp
  27. 8 0
      AnKi/Scene/ParticleEmitterNode.cpp
  28. 9 6
      AnKi/Shaders/ForwardShadingCommon.hlsl
  29. 18 17
      AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog
  30. 26 17
      AnKi/Shaders/ForwardShadingParticles.ankiprog
  31. 60 77
      AnKi/Shaders/GBufferGeneric.ankiprog
  32. 0 389
      AnKi/Shaders/GBufferGenericGpuScene.ankiprog
  33. 27 0
      AnKi/Shaders/Include/GpuSceneFunctions.h
  34. 18 9
      AnKi/Shaders/Include/GpuSceneTypes.h
  35. 8 5
      AnKi/Shaders/Include/MaterialTypes.h
  36. 11 7
      AnKi/Shaders/Include/MeshTypes.h
  37. 69 0
      AnKi/Shaders/MaterialShadersCommon.hlsl
  38. 3 0
      AnKi/Util/Process.cpp
  39. 21 10
      Tools/Shader/ShaderProgramBinaryDumpMain.cpp

+ 1 - 0
AnKi/Core/App.cpp

@@ -372,6 +372,7 @@ Error App::initInternal(AllocAlignedCallback allocCb, void* allocCbUserData)
 	renderInit.m_globTimestamp = &m_globalTimestamp;
 	renderInit.m_gpuScenePool = m_gpuSceneMemPool;
 	renderInit.m_gpuSceneMicroPatcher = m_gpuSceneMicroPatcher;
+	renderInit.m_unifiedGometryMemoryPool = m_unifiedGometryMemPool;
 	m_renderer = newInstance<MainRenderer>(m_mainPool);
 	ANKI_CHECK(m_renderer->init(renderInit));
 

+ 2 - 1
AnKi/Core/GpuMemoryPools.cpp

@@ -20,7 +20,8 @@ void UnifiedGeometryMemoryPool::init(HeapMemoryPool* pool, GrManager* gr, const
 
 	const Array classes = {1_KB, 8_KB, 32_KB, 128_KB, 512_KB, 4_MB, 8_MB, 16_MB, poolSize};
 
-	BufferUsageBit buffUsage = BufferUsageBit::kVertex | BufferUsageBit::kIndex | BufferUsageBit::kTransferDestination;
+	BufferUsageBit buffUsage = BufferUsageBit::kVertex | BufferUsageBit::kIndex | BufferUsageBit::kTransferDestination
+							   | (BufferUsageBit::kAllTexture & BufferUsageBit::kAllRead);
 
 	if(gr->getDeviceCapabilities().m_rayTracingEnabled)
 	{

+ 7 - 0
AnKi/Core/GpuMemoryPools.h

@@ -186,6 +186,13 @@ public:
 	/// @note It's thread-safe.
 	void newCopy(StackMemoryPool& frameCpuPool, PtrSize gpuSceneDestOffset, PtrSize dataSize, const void* data);
 
+	/// Check if there is a need to call patchGpuScene or if no copies are needed.
+	/// @note Not thread-safe. Nothing else should be happening before calling it.
+	Bool patchingIsNeeded() const
+	{
+		return m_crntFramePatchHeaders.getSize() > 0;
+	}
+
 	/// Copy the data to the GPU scene buffer.
 	/// @note Not thread-safe. Nothing else should be happening before calling it.
 	void patchGpuScene(RebarStagingGpuMemoryPool& rebarPool, CommandBuffer& cmdb, const BufferPtr& gpuSceneBuffer);

+ 5 - 3
AnKi/Core/MaliHwCounters.cpp

@@ -18,9 +18,11 @@ MaliHwCounters::MaliHwCounters(BaseMemoryPool* pool)
 {
 	ANKI_ASSERT(pool);
 #if ANKI_HWCPIPE_ENABLE
-	hwcpipe::HWCPipe* hwc = newInstance<hwcpipe::HWCPipe>(*m_pool);
-	hwc->set_enabled_gpu_counters({hwcpipe::GpuCounter::GpuCycles, hwcpipe::GpuCounter::ExternalMemoryWriteBytes,
-								   hwcpipe::GpuCounter::ExternalMemoryReadBytes});
+	const hwcpipe::CpuCounterSet cpuCounters;
+	const hwcpipe::GpuCounterSet gpuCounters = {hwcpipe::GpuCounter::GpuCycles,
+												hwcpipe::GpuCounter::ExternalMemoryWriteBytes,
+												hwcpipe::GpuCounter::ExternalMemoryReadBytes};
+	hwcpipe::HWCPipe* hwc = newInstance<hwcpipe::HWCPipe>(*m_pool, cpuCounters, gpuCounters);
 
 	hwc->run();
 

+ 2 - 2
AnKi/Gr/Vulkan/BufferImpl.cpp

@@ -383,14 +383,14 @@ VkBufferView BufferImpl::getOrCreateBufferView(Format fmt, PtrSize offset, PtrSi
 	{
 		ANKI_ASSERT(m_size >= offset);
 		range = m_size - offset;
+		range = getAlignedRoundDown(getFormatInfo(fmt).m_texelSize, range);
 	}
 
 	// Checks
 	ANKI_ASSERT(!!(m_usage & BufferUsageBit::kAllTexture));
 	ANKI_ASSERT(offset + range <= m_size);
 
-	ANKI_ASSERT(isAligned(getGrManagerImpl().getDeviceCapabilities().m_textureBufferBindOffsetAlignment,
-						  m_memHandle.m_offset + offset)
+	ANKI_ASSERT(isAligned(getGrManagerImpl().getDeviceCapabilities().m_textureBufferBindOffsetAlignment, offset)
 				&& "Offset not aligned");
 
 	ANKI_ASSERT((range % getFormatInfo(fmt).m_texelSize) == 0

+ 1 - 0
AnKi/Renderer/Common.h

@@ -84,6 +84,7 @@ public:
 	Timestamp* m_globTimestamp = nullptr;
 	GpuSceneMemoryPool* m_gpuScenePool = nullptr;
 	GpuSceneMicroPatcher* m_gpuSceneMicroPatcher = nullptr;
+	UnifiedGeometryMemoryPool* m_unifiedGometryMemoryPool = nullptr;
 };
 
 /// GPU buffers and textures that the clusterer refers to.

+ 41 - 3
AnKi/Renderer/Drawer.cpp

@@ -10,6 +10,7 @@
 #include <AnKi/Util/Tracer.h>
 #include <AnKi/Util/Logger.h>
 #include <AnKi/Shaders/Include/MaterialTypes.h>
+#include <AnKi/Shaders/Include/GpuSceneFunctions.h>
 
 namespace anki {
 
@@ -59,14 +60,33 @@ void RenderableDrawer::drawRange(RenderingTechnique technique, const RenderableD
 		static_assert(sizeof(globalUniforms->m_cameraTransform) == sizeof(args.m_cameraTransform));
 		memcpy(&globalUniforms->m_cameraTransform, &args.m_cameraTransform, sizeof(args.m_cameraTransform));
 
-		cmdb->bindUniformBuffer(kMaterialSetGlobal, kMaterialBindingGlobalUniforms,
+		cmdb->bindUniformBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kGlobalUniforms),
 								m_r->getExternalSubsystems().m_rebarStagingPool->getBuffer(),
 								globalUniformsToken.m_offset, globalUniformsToken.m_range);
 	}
 
 	// More globals
-	cmdb->bindAllBindless(kMaterialSetBindless);
-	cmdb->bindSampler(kMaterialSetGlobal, kMaterialBindingTrilinearRepeatSampler, args.m_sampler);
+	cmdb->bindAllBindless(U32(MaterialSet::kBindless));
+	cmdb->bindSampler(U32(MaterialSet::kGlobal), U32(MaterialBinding::kTrilinearRepeatSampler), args.m_sampler);
+	cmdb->bindStorageBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kGpuScene), args.m_gpuSceneBuffer, 0,
+							kMaxPtrSize);
+
+#define _ANKI_BIND_TEXTURE_BUFFER(format) \
+	cmdb->bindReadOnlyTextureBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kUnifiedGeometry_##format), \
+									args.m_unifiedGeometryBuffer, 0, kMaxPtrSize, Format::k##format)
+
+	_ANKI_BIND_TEXTURE_BUFFER(R32_Sfloat);
+	_ANKI_BIND_TEXTURE_BUFFER(R32G32_Sfloat);
+	_ANKI_BIND_TEXTURE_BUFFER(R32G32B32_Sfloat);
+	_ANKI_BIND_TEXTURE_BUFFER(R32G32B32A32_Sfloat);
+	_ANKI_BIND_TEXTURE_BUFFER(R16G16B16A16_Unorm);
+	_ANKI_BIND_TEXTURE_BUFFER(R8G8B8A8_Snorm);
+	_ANKI_BIND_TEXTURE_BUFFER(R8G8B8A8_Uint);
+
+#undef _ANKI_BIND_TEXTURE_BUFFER
+
+	// Misc
+	cmdb->setVertexAttribute(0, 0, Format::kR32_Uint, 0);
 
 	// Set a few things
 	Context ctx;
@@ -101,6 +121,24 @@ void RenderableDrawer::flushDrawcall(Context& ctx)
 	ctx.m_queueCtx.m_key.setLod(ctx.m_cachedRenderElementLods[0]);
 	ctx.m_queueCtx.m_key.setInstanceCount(ctx.m_cachedRenderElementCount);
 
+	// Instance buffer
+	RebarGpuMemoryToken token;
+	PackedGpuSceneRenderableInstance* instances =
+		static_cast<PackedGpuSceneRenderableInstance*>(ctx.m_queueCtx.m_rebarStagingPool->allocateFrame(
+			sizeof(PackedGpuSceneRenderableInstance) * ctx.m_cachedRenderElementCount, token));
+	for(U32 i = 0; i < ctx.m_cachedRenderElementCount; ++i)
+	{
+		UnpackedGpuSceneRenderableInstance instance;
+		instance.m_lod = ctx.m_cachedRenderElementLods[0];
+		instance.m_renderableOffset = ctx.m_cachedRenderElements[i].m_renderableOffset;
+		instances[i] = packGpuSceneRenderableInstance(instance);
+	}
+
+	ctx.m_queueCtx.m_commandBuffer->bindVertexBuffer(0, ctx.m_queueCtx.m_rebarStagingPool->getBuffer(), token.m_offset,
+													 sizeof(PackedGpuSceneRenderableInstance),
+													 VertexStepRate::kInstance);
+
+	// Draw
 	ctx.m_cachedRenderElements[0].m_callback(
 		ctx.m_queueCtx, ConstWeakArray<void*>(const_cast<void**>(&ctx.m_userData[0]), ctx.m_cachedRenderElementCount));
 

+ 3 - 0
AnKi/Renderer/Drawer.h

@@ -28,6 +28,9 @@ public:
 	Mat4 m_previousViewProjectionMatrix;
 
 	SamplerPtr m_sampler;
+	BufferPtr m_gpuSceneBuffer;
+	BufferPtr m_unifiedGeometryBuffer;
+
 	U32 m_minLod = 0;
 	U32 m_maxLod = kMaxLodCount - 1;
 };

+ 14 - 10
AnKi/Renderer/ForwardShading.cpp

@@ -35,19 +35,21 @@ void ForwardShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgr
 		cmdb->setBlendFactors(0, BlendFactor::kSrcAlpha, BlendFactor::kOneMinusSrcAlpha);
 
 		const ClusteredShadingContext& rsrc = ctx.m_clusteredShading;
-		const U32 set = kMaterialSetGlobal;
-		cmdb->bindSampler(set, kMaterialBindingLinearClampSampler, m_r->getSamplers().m_trilinearClamp);
-		cmdb->bindSampler(set, kMaterialBindingShadowSampler, m_r->getSamplers().m_trilinearClampShadow);
+		const U32 set = U32(MaterialSet::kGlobal);
+		cmdb->bindSampler(set, U32(MaterialBinding::kLinearClampSampler), m_r->getSamplers().m_trilinearClamp);
+		cmdb->bindSampler(set, U32(MaterialBinding::kShadowSampler), m_r->getSamplers().m_trilinearClampShadow);
 
-		rgraphCtx.bindTexture(set, kMaterialBindingDepthRt, m_r->getDepthDownscale().getHiZRt(), kHiZHalfSurface);
-		rgraphCtx.bindColorTexture(set, kMaterialBindingLightVolume, m_r->getVolumetricLightingAccumulation().getRt());
+		rgraphCtx.bindTexture(set, U32(MaterialBinding::kDepthRt), m_r->getDepthDownscale().getHiZRt(),
+							  kHiZHalfSurface);
+		rgraphCtx.bindColorTexture(set, U32(MaterialBinding::kLightVolume),
+								   m_r->getVolumetricLightingAccumulation().getRt());
 
-		bindUniforms(cmdb, set, kMaterialBindingClusterShadingUniforms, rsrc.m_clusteredShadingUniformsToken);
-		bindUniforms(cmdb, set, kMaterialBindingClusterShadingLights, rsrc.m_pointLightsToken);
-		bindUniforms(cmdb, set, kMaterialBindingClusterShadingLights + 1, rsrc.m_spotLightsToken);
-		rgraphCtx.bindColorTexture(set, kMaterialBindingClusterShadingLights + 2,
+		bindUniforms(cmdb, set, U32(MaterialBinding::kClusterShadingUniforms), rsrc.m_clusteredShadingUniformsToken);
+		bindUniforms(cmdb, set, U32(MaterialBinding::kClusterShadingLights), rsrc.m_pointLightsToken);
+		bindUniforms(cmdb, set, U32(MaterialBinding::kClusterShadingLights) + 1, rsrc.m_spotLightsToken);
+		rgraphCtx.bindColorTexture(set, U32(MaterialBinding::kClusterShadingLights) + 2,
 								   m_r->getShadowMapping().getShadowmapRt());
-		bindStorage(cmdb, set, kMaterialBindingClusters, rsrc.m_clustersToken);
+		bindStorage(cmdb, set, U32(MaterialBinding::kClusters), rsrc.m_clustersToken);
 
 		RenderableDrawerArguments args;
 		args.m_viewMatrix = ctx.m_matrices.m_view;
@@ -55,6 +57,8 @@ void ForwardShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgr
 		args.m_viewProjectionMatrix = ctx.m_matrices.m_viewProjectionJitter;
 		args.m_previousViewProjectionMatrix = ctx.m_prevMatrices.m_viewProjectionJitter; // Not sure about that
 		args.m_sampler = m_r->getSamplers().m_trilinearRepeatAnisoResolutionScalingBias;
+		args.m_gpuSceneBuffer = getExternalSubsystems().m_gpuScenePool->getBuffer();
+		args.m_unifiedGeometryBuffer = getExternalSubsystems().m_unifiedGometryMemoryPool->getBuffer();
 
 		// Start drawing
 		m_r->getSceneDrawer().drawRange(RenderingTechnique::kForward, args,

+ 5 - 0
AnKi/Renderer/GBuffer.cpp

@@ -128,6 +128,8 @@ void GBuffer::runInThread(const RenderingContext& ctx, RenderPassWorkContext& rg
 	args.m_viewProjectionMatrix = ctx.m_matrices.m_viewProjectionJitter;
 	args.m_previousViewProjectionMatrix = ctx.m_matrices.m_jitter * ctx.m_prevMatrices.m_viewProjection;
 	args.m_sampler = m_r->getSamplers().m_trilinearRepeatAnisoResolutionScalingBias;
+	args.m_gpuSceneBuffer = getExternalSubsystems().m_gpuScenePool->getBuffer();
+	args.m_unifiedGeometryBuffer = getExternalSubsystems().m_unifiedGometryMemoryPool->getBuffer();
 
 	// First do early Z (if needed)
 	if(earlyZStart < earlyZEnd)
@@ -245,6 +247,9 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 	{
 		pass.newTextureDependency(sriRt, TextureUsageBit::kFramebufferShadingRate);
 	}
+
+	pass.newBufferDependency(m_r->getGpuSceneBufferHandle(),
+							 BufferUsageBit::kStorageGeometryRead | BufferUsageBit::kStorageFragmentRead);
 }
 
 } // end namespace anki

+ 10 - 0
AnKi/Renderer/IndirectDiffuseProbes.cpp

@@ -287,6 +287,9 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 
 		TextureSubresourceInfo subresource(DepthStencilAspectBit::kDepth);
 		pass.newTextureDependency(giCtx->m_gbufferDepthRt, TextureUsageBit::kAllFramebuffer, subresource);
+
+		pass.newBufferDependency(m_r->getGpuSceneBufferHandle(),
+								 BufferUsageBit::kStorageGeometryRead | BufferUsageBit::kStorageFragmentRead);
 	}
 
 	// Shadow pass. Optional
@@ -324,6 +327,9 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 
 		TextureSubresourceInfo subresource(DepthStencilAspectBit::kDepth);
 		pass.newTextureDependency(giCtx->m_shadowsRt, TextureUsageBit::kAllFramebuffer, subresource);
+
+		pass.newBufferDependency(m_r->getGpuSceneBufferHandle(),
+								 BufferUsageBit::kStorageGeometryRead | BufferUsageBit::kStorageFragmentRead);
 	}
 	else
 	{
@@ -578,6 +584,8 @@ void IndirectDiffuseProbes::runGBufferInThread(RenderPassWorkContext& rgraphCtx,
 			args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
 			args.m_sampler = m_r->getSamplers().m_trilinearRepeat;
 			args.m_minLod = args.m_maxLod = kMaxLodCount - 1;
+			args.m_gpuSceneBuffer = getExternalSubsystems().m_gpuScenePool->getBuffer();
+			args.m_unifiedGeometryBuffer = getExternalSubsystems().m_unifiedGometryMemoryPool->getBuffer();
 
 			m_r->getSceneDrawer().drawRange(RenderingTechnique::kGBuffer, args,
 											rqueue.m_renderables.getBegin() + localStart,
@@ -636,6 +644,8 @@ void IndirectDiffuseProbes::runShadowmappingInThread(RenderPassWorkContext& rgra
 			args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
 			args.m_sampler = m_r->getSamplers().m_trilinearRepeatAniso;
 			args.m_maxLod = args.m_minLod = kMaxLodCount - 1;
+			args.m_gpuSceneBuffer = getExternalSubsystems().m_gpuScenePool->getBuffer();
+			args.m_unifiedGeometryBuffer = getExternalSubsystems().m_unifiedGometryMemoryPool->getBuffer();
 
 			m_r->getSceneDrawer().drawRange(RenderingTechnique::kShadow, args,
 											cascadeRenderQueue.m_renderables.getBegin() + localStart,

+ 10 - 0
AnKi/Renderer/ProbeReflections.cpp

@@ -357,6 +357,8 @@ void ProbeReflections::runGBuffer(RenderPassWorkContext& rgraphCtx)
 			args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care about prev mats
 			args.m_sampler = m_r->getSamplers().m_trilinearRepeat;
 			args.m_minLod = args.m_maxLod = kMaxLodCount - 1;
+			args.m_gpuSceneBuffer = getExternalSubsystems().m_gpuScenePool->getBuffer();
+			args.m_unifiedGeometryBuffer = getExternalSubsystems().m_unifiedGometryMemoryPool->getBuffer();
 
 			m_r->getSceneDrawer().drawRange(RenderingTechnique::kGBuffer, args,
 											rqueue.m_renderables.getBegin() + localStart,
@@ -544,6 +546,9 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 
 		TextureSubresourceInfo subresource(DepthStencilAspectBit::kDepth);
 		pass.newTextureDependency(m_ctx.m_gbufferDepthRt, TextureUsageBit::kAllFramebuffer, subresource);
+
+		pass.newBufferDependency(m_r->getGpuSceneBufferHandle(),
+								 BufferUsageBit::kStorageGeometryRead | BufferUsageBit::kStorageFragmentRead);
 	}
 
 	// Shadow pass. Optional
@@ -588,6 +593,9 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 
 		TextureSubresourceInfo subresource(DepthStencilAspectBit::kDepth);
 		pass.newTextureDependency(m_ctx.m_shadowMapRt, TextureUsageBit::kAllFramebuffer, subresource);
+
+		pass.newBufferDependency(m_r->getGpuSceneBufferHandle(),
+								 BufferUsageBit::kStorageGeometryRead | BufferUsageBit::kStorageFragmentRead);
 	}
 	else
 	{
@@ -735,6 +743,8 @@ void ProbeReflections::runShadowMapping(RenderPassWorkContext& rgraphCtx)
 			args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
 			args.m_sampler = m_r->getSamplers().m_trilinearRepeatAniso;
 			args.m_minLod = args.m_maxLod = kMaxLodCount - 1;
+			args.m_gpuSceneBuffer = getExternalSubsystems().m_gpuScenePool->getBuffer();
+			args.m_unifiedGeometryBuffer = getExternalSubsystems().m_unifiedGometryMemoryPool->getBuffer();
 
 			m_r->getSceneDrawer().drawRange(RenderingTechnique::kShadow, args,
 											cascadeRenderQueue.m_renderables.getBegin() + localStart,

+ 2 - 0
AnKi/Renderer/RenderQueue.h

@@ -61,6 +61,8 @@ public:
 	/// Unless m_mergeKey is zero.
 	U64 m_mergeKey;
 
+	U32 m_renderableOffset;
+
 	F32 m_distanceFromCamera; ///< Don't set this. Visibility will.
 
 	U8 m_lod; ///< Don't set this. Visibility will.

+ 11 - 8
AnKi/Renderer/Renderer.cpp

@@ -693,14 +693,17 @@ void Renderer::gpuSceneCopy(RenderingContext& ctx)
 	m_runCtx.m_gpuSceneHandle = rgraph.importBuffer(m_subsystems.m_gpuScenePool->getBuffer(),
 													m_subsystems.m_gpuScenePool->getBuffer()->getBufferUsage());
 
-	ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("GPU scene patching");
-	rpass.newBufferDependency(m_runCtx.m_gpuSceneHandle, BufferUsageBit::kStorageComputeWrite);
-
-	rpass.setWork([this](RenderPassWorkContext& rgraphCtx) {
-		m_subsystems.m_gpuSceneMicroPatcher->patchGpuScene(*m_subsystems.m_rebarStagingPool,
-														   *rgraphCtx.m_commandBuffer.get(),
-														   m_subsystems.m_gpuScenePool->getBuffer());
-	});
+	if(m_subsystems.m_gpuSceneMicroPatcher->patchingIsNeeded())
+	{
+		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("GPU scene patching");
+		rpass.newBufferDependency(m_runCtx.m_gpuSceneHandle, BufferUsageBit::kStorageComputeWrite);
+
+		rpass.setWork([this](RenderPassWorkContext& rgraphCtx) {
+			m_subsystems.m_gpuSceneMicroPatcher->patchGpuScene(*m_subsystems.m_rebarStagingPool,
+															   *rgraphCtx.m_commandBuffer.get(),
+															   m_subsystems.m_gpuScenePool->getBuffer());
+		});
+	}
 }
 
 } // end namespace anki

+ 5 - 0
AnKi/Renderer/Renderer.h

@@ -154,6 +154,11 @@ public:
 	Format getHdrFormat() const;
 	Format getDepthNoStencilFormat() const;
 
+	BufferHandle getGpuSceneBufferHandle() const
+	{
+		return m_runCtx.m_gpuSceneHandle;
+	}
+
 	/// @name Debug render targets
 	/// @{
 

+ 5 - 0
AnKi/Renderer/ShadowMapping.cpp

@@ -126,6 +126,9 @@ void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
 
 		TextureSubresourceInfo subresource = TextureSubresourceInfo(DepthStencilAspectBit::kDepth);
 		pass.newTextureDependency(m_runCtx.m_rt, TextureUsageBit::kAllFramebuffer, subresource);
+
+		pass.newBufferDependency(m_r->getGpuSceneBufferHandle(),
+								 BufferUsageBit::kStorageGeometryRead | BufferUsageBit::kStorageFragmentRead);
 	}
 }
 
@@ -613,6 +616,8 @@ void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
 		args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
 		args.m_sampler = m_r->getSamplers().m_trilinearRepeatAniso;
 		args.m_minLod = args.m_maxLod = work.m_renderQueueElementsLod;
+		args.m_gpuSceneBuffer = getExternalSubsystems().m_gpuScenePool->getBuffer();
+		args.m_unifiedGeometryBuffer = getExternalSubsystems().m_unifiedGometryMemoryPool->getBuffer();
 
 		m_r->getSceneDrawer().drawRange(RenderingTechnique::kShadow, args,
 										work.m_renderQueue->m_renderables.getBegin() + work.m_firstRenderableElement,

+ 5 - 5
AnKi/Resource/MeshResource.cpp

@@ -147,13 +147,13 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 
 			// We need to align the actual offset to the texel size
 			const PtrSize remainder = lod.m_vertexBuffersAllocationToken[stream].m_offset % texelSize;
-			lod.m_fixedUniversalGeometryBufferOffset[stream] = U8(texelSize - remainder);
+			lod.m_fixedUnifiedGeometryBufferOffset[stream] = U8(texelSize - remainder);
 
 			ANKI_ASSERT(
-				(lod.m_vertexBuffersAllocationToken[stream].m_offset + lod.m_fixedUniversalGeometryBufferOffset[stream])
+				(lod.m_vertexBuffersAllocationToken[stream].m_offset + lod.m_fixedUnifiedGeometryBufferOffset[stream])
 					% texelSize
 				== 0);
-			ANKI_ASSERT(lod.m_fixedUniversalGeometryBufferOffset[stream] + PtrSize(lod.m_vertexCount) * texelSize
+			ANKI_ASSERT(lod.m_fixedUnifiedGeometryBufferOffset[stream] + PtrSize(lod.m_vertexCount) * texelSize
 						<= lod.m_vertexBuffersAllocationToken[stream].m_size);
 		}
 
@@ -171,7 +171,7 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 			inf.m_bottomLevel.m_positionBuffer = getExternalSubsystems().m_unifiedGometryMemoryPool->getBuffer();
 			inf.m_bottomLevel.m_positionBufferOffset =
 				lod.m_vertexBuffersAllocationToken[VertexStreamId::kPosition].m_offset
-				+ lod.m_fixedUniversalGeometryBufferOffset[VertexStreamId::kPosition];
+				+ lod.m_fixedUnifiedGeometryBufferOffset[VertexStreamId::kPosition];
 			inf.m_bottomLevel.m_positionStride =
 				getFormatInfo(kMeshRelatedVertexStreamFormats[VertexStreamId::kPosition]).m_texelSize;
 			inf.m_bottomLevel.m_positionsFormat = kMeshRelatedVertexStreamFormats[VertexStreamId::kPosition];
@@ -293,7 +293,7 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 			// Copy
 			cmdb->copyBufferToBuffer(handle.getBuffer(), handle.getOffset(), unifiedGeometryBuffer,
 									 lod.m_vertexBuffersAllocationToken[stream].m_offset
-										 + lod.m_fixedUniversalGeometryBufferOffset[stream],
+										 + lod.m_fixedUnifiedGeometryBufferOffset[stream],
 									 handle.getRange());
 		}
 	}

+ 2 - 2
AnKi/Resource/MeshResource.h

@@ -65,7 +65,7 @@ public:
 	void getVertexStreamInfo(U32 lod, VertexStreamId stream, PtrSize& bufferOffset, U32& vertexCount) const
 	{
 		bufferOffset = m_lods[lod].m_vertexBuffersAllocationToken[stream].m_offset
-					   + m_lods[lod].m_fixedUniversalGeometryBufferOffset[stream];
+					   + m_lods[lod].m_fixedUnifiedGeometryBufferOffset[stream];
 		vertexCount = m_lods[lod].m_vertexCount;
 	}
 
@@ -105,7 +105,7 @@ private:
 	public:
 		SegregatedListsGpuMemoryPoolToken m_indexBufferAllocationToken;
 		Array<SegregatedListsGpuMemoryPoolToken, U32(VertexStreamId::kMeshRelatedCount)> m_vertexBuffersAllocationToken;
-		Array<U8, U32(VertexStreamId::kMeshRelatedCount)> m_fixedUniversalGeometryBufferOffset = {};
+		Array<U8, U32(VertexStreamId::kMeshRelatedCount)> m_fixedUnifiedGeometryBufferOffset = {};
 
 		U32 m_indexCount = 0;
 		U32 m_vertexCount = 0;

+ 9 - 3
AnKi/Scene/Components/ModelComponent.cpp

@@ -54,7 +54,7 @@ Error ModelComponent::loadModelResource(CString filename)
 	GpuSceneMemoryPool& gpuScene = *getExternalSubsystems(*m_node).m_gpuSceneMemoryPool;
 
 	gpuScene.free(m_gpuSceneMeshGpuViews);
-	gpuScene.allocate(sizeof(MeshGpuView) * m_modelPatchMergeKeys.getSize(), 4, m_gpuSceneMeshGpuViews);
+	gpuScene.allocate(sizeof(GpuSceneMesh) * m_modelPatchMergeKeys.getSize(), 4, m_gpuSceneMeshGpuViews);
 
 	U32 uniformsSize = 0;
 	m_gpuSceneUniformsOffsetPerPatch.resize(m_node->getMemoryPool(), modelPatchCount);
@@ -86,10 +86,10 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 
 		// Upload the mesh views
 		const U32 modelPatchCount = m_model->getModelPatches().getSize();
-		DynamicArrayRaii<MeshGpuView> meshViews(info.m_framePool, modelPatchCount);
+		DynamicArrayRaii<GpuSceneMesh> meshViews(info.m_framePool, modelPatchCount);
 		for(U32 i = 0; i < modelPatchCount; ++i)
 		{
-			MeshGpuView& view = meshViews[i];
+			GpuSceneMesh& view = meshViews[i];
 			const ModelPatch& patch = m_model->getModelPatches()[i];
 			const MeshResource& mesh = *patch.getMesh();
 
@@ -123,6 +123,12 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 				view.m_lods[l].m_indexOffset = U32(offset);
 				view.m_lods[l].m_indexCount = indexCount;
 			}
+
+			// Copy the last LOD to the rest just in case
+			for(U32 l = mesh.getLodCount(); l < kMaxLodCount; ++l)
+			{
+				view.m_lods[l] = view.m_lods[l - 1];
+			}
 		}
 
 		gpuScenePatcher.newCopy(*info.m_framePool, m_gpuSceneMeshGpuViews.m_offset, meshViews.getSizeInBytes(),

+ 4 - 3
AnKi/Scene/Components/MoveComponent.cpp

@@ -15,7 +15,7 @@ MoveComponent::MoveComponent(SceneNode* node)
 	: SceneComponent(node, getStaticClassId())
 	, m_ignoreLocalTransform(false)
 	, m_ignoreParentTransform(false)
-	, m_dirtyLastFrame(true)
+	, m_dirtyThisFrame(true)
 {
 	getExternalSubsystems(*node).m_gpuSceneMemoryPool->allocate(sizeof(Mat3x4) * 2, alignof(F32), m_gpuSceneTransforms);
 	markForUpdate();
@@ -84,7 +84,8 @@ Error MoveComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 	}
 
 	// Micro patch
-	if(dirty || m_dirtyLastFrame)
+	const Bool dirtyLastFrame = m_dirtyThisFrame;
+	if(dirty || dirtyLastFrame)
 	{
 		Array<Mat3x4, 2> trfs;
 		trfs[0] = Mat3x4(m_wtrf);
@@ -93,7 +94,7 @@ Error MoveComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		info.m_gpuSceneMicroPatcher->newCopy(*info.m_framePool, m_gpuSceneTransforms.m_offset, sizeof(trfs), &trfs[0]);
 	}
 
-	m_dirtyLastFrame = dirty;
+	m_dirtyThisFrame = dirty;
 
 	return Error::kNone;
 }

+ 7 - 1
AnKi/Scene/Components/MoveComponent.h

@@ -145,6 +145,12 @@ public:
 		return U32(m_gpuSceneTransforms.m_offset);
 	}
 
+	/// It's valid after the component was updated.
+	Bool wasDirtyThisFrame() const
+	{
+		return m_dirtyThisFrame;
+	}
+
 private:
 	/// The transformation in local space
 	Transform m_ltrf = Transform::getIdentity();
@@ -160,7 +166,7 @@ private:
 	Bool m_markedForUpdate : 1;
 	Bool m_ignoreLocalTransform : 1;
 	Bool m_ignoreParentTransform : 1;
-	Bool m_dirtyLastFrame : 1;
+	Bool m_dirtyThisFrame : 1;
 
 	void markForUpdate()
 	{

+ 67 - 36
AnKi/Scene/Components/ParticleEmitterComponent.cpp

@@ -205,6 +205,13 @@ ParticleEmitterComponent::~ParticleEmitterComponent()
 {
 	m_simpleParticles.destroy(m_node->getMemoryPool());
 	m_physicsParticles.destroy(m_node->getMemoryPool());
+
+	GpuSceneMemoryPool& gpuScenePool = *getExternalSubsystems(*m_node).m_gpuSceneMemoryPool;
+	gpuScenePool.free(m_gpuScenePositions);
+	gpuScenePool.free(m_gpuSceneScales);
+	gpuScenePool.free(m_gpuSceneAlphas);
+	gpuScenePool.free(m_gpuSceneParticles);
+	gpuScenePool.free(m_gpuSceneUniforms);
 }
 
 Error ParticleEmitterComponent::loadParticleEmitterResource(CString filename)
@@ -219,10 +226,17 @@ Error ParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 	// Load
 	ANKI_CHECK(getExternalSubsystems(*m_node).m_resourceManager->loadResource(filename, m_particleEmitterResource));
 	m_props = m_particleEmitterResource->getProperties();
+	m_resourceUpdated = true;
 
 	// Cleanup
 	m_simpleParticles.destroy(m_node->getMemoryPool());
 	m_physicsParticles.destroy(m_node->getMemoryPool());
+	GpuSceneMemoryPool& gpuScenePool = *getExternalSubsystems(*m_node).m_gpuSceneMemoryPool;
+	gpuScenePool.free(m_gpuScenePositions);
+	gpuScenePool.free(m_gpuSceneScales);
+	gpuScenePool.free(m_gpuSceneAlphas);
+	gpuScenePool.free(m_gpuSceneParticles);
+	gpuScenePool.free(m_gpuSceneUniforms);
 
 	// Init particles
 	m_simulationType = (m_props.m_usePhysicsEngine) ? SimulationType::kPhysicsEngine : SimulationType::kSimple;
@@ -247,7 +261,13 @@ Error ParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 		m_simpleParticles.create(m_node->getMemoryPool(), m_props.m_maxNumOfParticles);
 	}
 
-	m_vertBuffSize = m_props.m_maxNumOfParticles * kVertexSize;
+	// GPU scene allocations
+	gpuScenePool.allocate(sizeof(Vec3) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuScenePositions);
+	gpuScenePool.allocate(sizeof(F32) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuSceneAlphas);
+	gpuScenePool.allocate(sizeof(F32) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuSceneScales);
+	gpuScenePool.allocate(sizeof(GpuSceneParticles), alignof(U32), m_gpuSceneParticles);
+	gpuScenePool.allocate(m_particleEmitterResource->getMaterial()->getPrefilledLocalUniforms().getSizeInBytes(),
+						  alignof(U32), m_gpuSceneUniforms);
 
 	return Error::kNone;
 }
@@ -261,22 +281,53 @@ Error ParticleEmitterComponent::update(SceneComponentUpdateInfo& info, Bool& upd
 	}
 
 	updated = true;
+	Vec3* positions;
+	F32* scales;
+	F32* alphas;
 
 	if(m_simulationType == SimulationType::kSimple)
 	{
-		simulate(info.m_previousTime, info.m_currentTime, WeakArray<SimpleParticle>(m_simpleParticles));
+		simulate(info.m_previousTime, info.m_currentTime, WeakArray<SimpleParticle>(m_simpleParticles), positions,
+				 scales, alphas);
 	}
 	else
 	{
 		ANKI_ASSERT(m_simulationType == SimulationType::kPhysicsEngine);
-		simulate(info.m_previousTime, info.m_currentTime, WeakArray<PhysicsParticle>(m_physicsParticles));
+		simulate(info.m_previousTime, info.m_currentTime, WeakArray<PhysicsParticle>(m_physicsParticles), positions,
+				 scales, alphas);
+	}
+
+	// Upload to the GPU scene
+	GpuSceneMicroPatcher& patcher = *info.m_gpuSceneMicroPatcher;
+	if(m_aliveParticleCount > 0)
+	{
+		patcher.newCopy(*info.m_framePool, m_gpuScenePositions.m_offset, sizeof(Vec3) * m_aliveParticleCount,
+						positions);
+		patcher.newCopy(*info.m_framePool, m_gpuSceneScales.m_offset, sizeof(F32) * m_aliveParticleCount, scales);
+		patcher.newCopy(*info.m_framePool, m_gpuSceneAlphas.m_offset, sizeof(F32) * m_aliveParticleCount, alphas);
+	}
+
+	if(m_resourceUpdated)
+	{
+		GpuSceneParticles particles = {};
+		particles.m_vertexOffsets[U32(VertexStreamId::kParticlePosition)] = U32(m_gpuScenePositions.m_offset);
+		particles.m_vertexOffsets[U32(VertexStreamId::kParticleColor)] = U32(m_gpuSceneAlphas.m_offset);
+		particles.m_vertexOffsets[U32(VertexStreamId::kParticleScale)] = U32(m_gpuSceneScales.m_offset);
+
+		patcher.newCopy(*info.m_framePool, m_gpuSceneParticles.m_offset, sizeof(GpuSceneParticles), &particles);
+
+		patcher.newCopy(*info.m_framePool, m_gpuSceneUniforms.m_offset,
+						m_particleEmitterResource->getMaterial()->getPrefilledLocalUniforms().getSizeInBytes(),
+						m_particleEmitterResource->getMaterial()->getPrefilledLocalUniforms().getBegin());
 	}
 
+	m_resourceUpdated = false;
 	return Error::kNone;
 }
 
 template<typename TParticle>
-void ParticleEmitterComponent::simulate(Second prevUpdateTime, Second crntTime, WeakArray<TParticle> particles)
+void ParticleEmitterComponent::simulate(Second prevUpdateTime, Second crntTime, WeakArray<TParticle> particles,
+										Vec3*& positions, F32*& scales, F32*& alphas)
 {
 	// - Deactivate the dead particles
 	// - Calc the AABB
@@ -286,8 +337,12 @@ void ParticleEmitterComponent::simulate(Second prevUpdateTime, Second crntTime,
 	Vec3 aabbMax(kMinF32);
 	m_aliveParticleCount = 0;
 
-	F32* verts = static_cast<F32*>(m_node->getFrameMemoryPool().allocate(m_vertBuffSize, alignof(F32)));
-	m_verts = verts;
+	positions = static_cast<Vec3*>(
+		m_node->getFrameMemoryPool().allocate(m_props.m_maxNumOfParticles * sizeof(Vec3), alignof(Vec3)));
+	scales = static_cast<F32*>(
+		m_node->getFrameMemoryPool().allocate(m_props.m_maxNumOfParticles * sizeof(F32), alignof(F32)));
+	alphas = static_cast<F32*>(
+		m_node->getFrameMemoryPool().allocate(m_props.m_maxNumOfParticles * sizeof(F32), alignof(F32)));
 
 	F32 maxParticleSize = -1.0f;
 
@@ -308,9 +363,6 @@ void ParticleEmitterComponent::simulate(Second prevUpdateTime, Second crntTime,
 		{
 			// It's alive
 
-			// Do checks
-			ANKI_ASSERT((ptrToNumber(verts) + kVertexSize - ptrToNumber(m_verts)) <= m_vertBuffSize);
-
 			// This will calculate a new world transformation
 			particle.simulate(prevUpdateTime, crntTime);
 
@@ -319,17 +371,14 @@ void ParticleEmitterComponent::simulate(Second prevUpdateTime, Second crntTime,
 			aabbMin = aabbMin.min(origin);
 			aabbMax = aabbMax.max(origin);
 
-			verts[0] = origin.x();
-			verts[1] = origin.y();
-			verts[2] = origin.z();
+			positions[m_aliveParticleCount] = origin;
 
-			verts[3] = particle.m_crntSize;
+			scales[m_aliveParticleCount] = particle.m_crntSize;
 			maxParticleSize = max(maxParticleSize, particle.m_crntSize);
 
-			verts[4] = clamp(particle.m_crntAlpha, 0.0f, 1.0f);
+			alphas[m_aliveParticleCount] = clamp(particle.m_crntAlpha, 0.0f, 1.0f);
 
 			++m_aliveParticleCount;
-			verts += 5;
 		}
 	}
 
@@ -344,7 +393,8 @@ void ParticleEmitterComponent::simulate(Second prevUpdateTime, Second crntTime,
 	else
 	{
 		m_worldBoundingVolume = Aabb(Vec3(0.0f), Vec3(0.001f));
-		m_verts = nullptr;
+		positions = nullptr;
+		alphas = scales = nullptr;
 	}
 
 	//
@@ -391,32 +441,13 @@ void ParticleEmitterComponent::draw(RenderQueueDrawContext& ctx) const
 
 	if(!ctx.m_debugDraw)
 	{
-		// Load verts
-		RebarGpuMemoryToken token;
-		void* gpuStorage = ctx.m_rebarStagingPool->allocateFrame(m_aliveParticleCount * kVertexSize, token);
-		memcpy(gpuStorage, m_verts, m_aliveParticleCount * kVertexSize);
-
 		// Program
 		ShaderProgramPtr prog;
 		m_particleEmitterResource->getRenderingInfo(ctx.m_key, prog);
 		cmdb->bindShaderProgram(prog);
 
-		// Vertex attribs
-		cmdb->setVertexAttribute(U32(VertexAttributeId::kPosition), 0, Format::kR32G32B32_Sfloat, 0);
-		cmdb->setVertexAttribute(U32(VertexAttributeId::SCALE), 0, Format::kR32_Sfloat, sizeof(Vec3));
-		cmdb->setVertexAttribute(U32(VertexAttributeId::ALPHA), 0, Format::kR32_Sfloat, sizeof(Vec3) + sizeof(F32));
-
-		// Vertex buff
-		cmdb->bindVertexBuffer(0, ctx.m_rebarStagingPool->getBuffer(), token.m_offset, kVertexSize,
-							   VertexStepRate::kInstance);
-
-		// Uniforms
-		Array<Mat3x4, 1> trf = {Mat3x4::getIdentity()};
-		RenderComponent::allocateAndSetupUniforms(m_particleEmitterResource->getMaterial(), ctx, trf, trf,
-												  *ctx.m_rebarStagingPool);
-
 		// Draw
-		cmdb->drawArrays(PrimitiveTopology::kTriangleStrip, 4, m_aliveParticleCount, 0, 0);
+		cmdb->drawArrays(PrimitiveTopology::kTriangles, 6 * m_aliveParticleCount);
 	}
 	else
 	{

+ 19 - 6
AnKi/Scene/Components/ParticleEmitterComponent.h

@@ -57,6 +57,16 @@ public:
 		return m_particleEmitterResource.isCreated();
 	}
 
+	U32 getGpuSceneParticlesOffset() const
+	{
+		return U32(m_gpuSceneParticles.m_offset);
+	}
+
+	U32 getGpuSceneUniforms() const
+	{
+		return U32(m_gpuSceneUniforms.m_offset);
+	}
+
 private:
 	class ParticleBase;
 	class SimpleParticle;
@@ -69,8 +79,6 @@ private:
 		kPhysicsEngine
 	};
 
-	static constexpr U32 kVertexSize = 5 * sizeof(F32);
-
 	SceneNode* m_node = nullptr;
 
 	ParticleEmitterProperties m_props;
@@ -80,21 +88,26 @@ private:
 	DynamicArray<PhysicsParticle> m_physicsParticles;
 	Second m_timeLeftForNextEmission = 0.0;
 	U32 m_aliveParticleCount = 0;
+	Bool m_resourceUpdated = true;
 
 	Transform m_transform = Transform::getIdentity();
 	Aabb m_worldBoundingVolume = Aabb(Vec3(-1.0f), Vec3(1.0f));
 
-	U32 m_vertBuffSize = 0;
-	void* m_verts = nullptr;
-
 	ImageResourcePtr m_dbgImage;
 
+	SegregatedListsGpuMemoryPoolToken m_gpuScenePositions;
+	SegregatedListsGpuMemoryPoolToken m_gpuSceneAlphas;
+	SegregatedListsGpuMemoryPoolToken m_gpuSceneScales;
+	SegregatedListsGpuMemoryPoolToken m_gpuSceneParticles;
+	SegregatedListsGpuMemoryPoolToken m_gpuSceneUniforms;
+
 	SimulationType m_simulationType = SimulationType::kUndefined;
 
 	Error update(SceneComponentUpdateInfo& info, Bool& updated);
 
 	template<typename TParticle>
-	void simulate(Second prevUpdateTime, Second crntTime, WeakArray<TParticle> particles);
+	void simulate(Second prevUpdateTime, Second crntTime, WeakArray<TParticle> particles, Vec3*& positions,
+				  F32*& scales, F32*& alphas);
 
 	void draw(RenderQueueDrawContext& ctx) const;
 };

+ 3 - 3
AnKi/Scene/Components/RenderComponent.cpp

@@ -16,8 +16,8 @@ ANKI_SCENE_COMPONENT_STATICS(RenderComponent)
 RenderComponent::RenderComponent(SceneNode* node)
 	: SceneComponent(node, getStaticClassId())
 {
-	getExternalSubsystems(*node).m_gpuSceneMemoryPool->allocate(sizeof(RenderableGpuView2), sizeof(U32),
-																m_gpuSceneRenderableGpuView);
+	getExternalSubsystems(*node).m_gpuSceneMemoryPool->allocate(sizeof(GpuSceneRenderable), sizeof(U32),
+																m_gpuSceneRenderable);
 }
 
 void RenderComponent::allocateAndSetupUniforms(const MaterialResourcePtr& mtl, const RenderQueueDrawContext& ctx,
@@ -117,7 +117,7 @@ void RenderComponent::allocateAndSetupUniforms(const MaterialResourcePtr& mtl, c
 
 void RenderComponent::onDestroy(SceneNode& node)
 {
-	getExternalSubsystems(node).m_gpuSceneMemoryPool->free(m_gpuSceneRenderableGpuView);
+	getExternalSubsystems(node).m_gpuSceneMemoryPool->free(m_gpuSceneRenderable);
 }
 
 } // end namespace anki

+ 4 - 3
AnKi/Scene/Components/RenderComponent.h

@@ -85,6 +85,7 @@ public:
 		el.m_mergeKey = m_mergeKey;
 		el.m_distanceFromCamera = -1.0f;
 		el.m_lod = kMaxU8;
+		el.m_renderableOffset = U32(m_gpuSceneRenderable.m_offset);
 	}
 
 	void setupRayTracingInstanceQueueElement(U32 lod, RayTracingInstanceQueueElement& el) const
@@ -106,8 +107,8 @@ public:
 
 	U32 getGpuSceneViewOffset() const
 	{
-		ANKI_ASSERT((m_gpuSceneRenderableGpuView.m_offset % 4) == 0);
-		return U32(m_gpuSceneRenderableGpuView.m_offset);
+		ANKI_ASSERT((m_gpuSceneRenderable.m_offset % 4) == 0);
+		return U32(m_gpuSceneRenderable.m_offset);
 	}
 
 private:
@@ -116,7 +117,7 @@ private:
 	U64 m_mergeKey = kMaxU64;
 	FillRayTracingInstanceQueueElementCallback m_rtCallback = nullptr;
 	const void* m_rtCallbackUserData = nullptr;
-	SegregatedListsGpuMemoryPoolToken m_gpuSceneRenderableGpuView;
+	SegregatedListsGpuMemoryPoolToken m_gpuSceneRenderable;
 	RenderComponentFlag m_flags = RenderComponentFlag::kNone;
 
 	void onDestroy(SceneNode& node);

+ 10 - 77
AnKi/Scene/ModelNode.cpp

@@ -193,14 +193,14 @@ void ModelNode::initRenderComponents()
 		}
 
 		// Upload to GPU scene
-		RenderableGpuView2 view = {};
-		view.m_worldTransformsOffset = getFirstComponentOfType<MoveComponent>().getTransformsGpuSceneOffset();
-		view.m_aabbOffset = getFirstComponentOfType<SpatialComponent>().getAabbGpuSceneOffset();
-		view.m_uniformsOffset = getFirstComponentOfType<ModelComponent>().getUniformsGpuSceneOffset(patchIdx);
-		view.m_geometryOffset =
-			getFirstComponentOfType<ModelComponent>().getMeshViewsGpuSceneOffset() + sizeof(MeshGpuView) * patchIdx;
+		GpuSceneRenderable renderable = {};
+		renderable.m_worldTransformsOffset = getFirstComponentOfType<MoveComponent>().getTransformsGpuSceneOffset();
+		renderable.m_aabbOffset = getFirstComponentOfType<SpatialComponent>().getAabbGpuSceneOffset();
+		renderable.m_uniformsOffset = getFirstComponentOfType<ModelComponent>().getUniformsGpuSceneOffset(patchIdx);
+		renderable.m_geometryOffset =
+			getFirstComponentOfType<ModelComponent>().getMeshViewsGpuSceneOffset() + sizeof(GpuSceneMesh) * patchIdx;
 		getExternalSubsystems().m_gpuSceneMicroPatcher->newCopy(getFrameMemoryPool(), rc.getGpuSceneViewOffset(),
-																sizeof(view), &view);
+																sizeof(renderable), &renderable);
 
 		// Init the proxy
 		RenderProxy& proxy = m_renderProxies[patchIdx];
@@ -228,26 +228,8 @@ void ModelNode::draw(RenderQueueDrawContext& ctx, ConstWeakArray<void*> userData
 		const ModelPatch& patch = modelc.getModelResource()->getModelPatches()[modelPatchIdx];
 		const SkinComponent& skinc = getFirstComponentOfType<SkinComponent>();
 
-		// Transforms
-		auto computeTranform = [&](const Transform& trf) -> Mat3x4 {
-			if(skinc.isEnabled())
-			{
-				return Mat3x4(trf);
-			}
-			else
-			{
-				// Bake the decompression in the model matrix
-				const Mat4 m4 = Mat4(trf) * m_renderProxies[modelPatchIdx].m_compressedToModelTransform;
-				const Mat3x4 out(m4);
-				return out;
-			}
-		};
-		Array<Mat3x4, kMaxInstanceCount> trfs;
-		Array<Mat3x4, kMaxInstanceCount> prevTrfs;
 		const MoveComponent& movec = getFirstComponentOfType<MoveComponent>();
-		trfs[0] = computeTranform(movec.getWorldTransform());
-		prevTrfs[0] = computeTranform(movec.getPreviousWorldTransform());
-		Bool moved = trfs[0] != prevTrfs[0];
+		Bool moved = movec.wasDirtyThisFrame();
 		for(U32 i = 1; i < instanceCount; ++i)
 		{
 			const ModelNode& otherNode = *static_cast<const RenderProxy*>(userData[i])->m_node;
@@ -257,68 +239,19 @@ void ModelNode::draw(RenderQueueDrawContext& ctx, ConstWeakArray<void*> userData
 			ANKI_ASSERT(otherNodeModelPatchIdx == modelPatchIdx);
 
 			const MoveComponent& otherNodeMovec = otherNode.getFirstComponentOfType<MoveComponent>();
-			trfs[i] = computeTranform(otherNodeMovec.getWorldTransform());
-			prevTrfs[i] = computeTranform(otherNodeMovec.getPreviousWorldTransform());
 
-			moved = moved || (trfs[i] != prevTrfs[i]);
+			moved = moved || otherNodeMovec.wasDirtyThisFrame();
 		}
 
 		ctx.m_key.setVelocity(moved && ctx.m_key.getRenderingTechnique() == RenderingTechnique::kGBuffer);
 		ctx.m_key.setSkinned(skinc.isEnabled());
+
 		ModelRenderingInfo modelInf;
 		patch.getRenderingInfo(ctx.m_key, modelInf);
 
-		// Bones storage
-		if(skinc.isEnabled())
-		{
-			const U32 boneCount = skinc.getBoneTransforms().getSize();
-			RebarGpuMemoryToken token, tokenPrev;
-			void* trfs = ctx.m_rebarStagingPool->allocateFrame(boneCount * sizeof(Mat4), token);
-			memcpy(trfs, &skinc.getBoneTransforms()[0], boneCount * sizeof(Mat4));
-
-			trfs = ctx.m_rebarStagingPool->allocateFrame(boneCount * sizeof(Mat4), tokenPrev);
-			memcpy(trfs, &skinc.getPreviousFrameBoneTransforms()[0], boneCount * sizeof(Mat4));
-
-			cmdb->bindStorageBuffer(kMaterialSetLocal, kMaterialBindingBoneTransforms,
-									ctx.m_rebarStagingPool->getBuffer(), token.m_offset, token.m_range);
-
-			cmdb->bindStorageBuffer(kMaterialSetLocal, kMaterialBindingPreviousBoneTransforms,
-									ctx.m_rebarStagingPool->getBuffer(), tokenPrev.m_offset, tokenPrev.m_range);
-		}
-
 		// Program
 		cmdb->bindShaderProgram(modelInf.m_program);
 
-		// Uniforms
-		const Vec4 positionScaleAndTransform(
-			m_renderProxies[modelPatchIdx].m_compressedToModelTransform(0, 0),
-			m_renderProxies[modelPatchIdx].m_compressedToModelTransform.getTranslationPart().xyz());
-		RenderComponent::allocateAndSetupUniforms(
-			modelc.getModelResource()->getModelPatches()[modelPatchIdx].getMaterial(), ctx,
-			ConstWeakArray<Mat3x4>(&trfs[0], instanceCount), ConstWeakArray<Mat3x4>(&prevTrfs[0], instanceCount),
-			*ctx.m_rebarStagingPool, positionScaleAndTransform);
-
-		// Bind attributes & vertex buffers
-		for(VertexStreamId streamId :
-			EnumIterable<VertexStreamId>(VertexStreamId::kMeshRelatedFirst, VertexStreamId::kMeshRelatedCount))
-		{
-			if(modelInf.m_vertexBufferOffsets[streamId] == kMaxPtrSize)
-			{
-				continue;
-			}
-
-			const U32 attribLocation = U32(streamId);
-			const U32 bufferBinding = U32(streamId);
-			const Format fmt = kMeshRelatedVertexStreamFormats[streamId];
-			const U32 relativeOffset = 0;
-			const U32 vertexStride = getFormatInfo(fmt).m_texelSize;
-
-			cmdb->setVertexAttribute(attribLocation, bufferBinding, fmt, relativeOffset);
-
-			cmdb->bindVertexBuffer(bufferBinding, getExternalSubsystems().m_unifiedGeometryMemPool->getBuffer(),
-								   modelInf.m_vertexBufferOffsets[streamId], vertexStride, VertexStepRate::kVertex);
-		}
-
 		// Bind index buffer
 		cmdb->bindIndexBuffer(getExternalSubsystems().m_unifiedGeometryMemPool->getBuffer(),
 							  modelInf.m_indexBufferOffset, IndexType::kU16);

+ 8 - 0
AnKi/Scene/ParticleEmitterNode.cpp

@@ -100,6 +100,14 @@ Error ParticleEmitterNode::frameUpdate([[maybe_unused]] Second prevUpdateTime, [
 	{
 		RenderComponent& rc = getFirstComponentOfType<RenderComponent>();
 		rc.setFlagsFromMaterial(pec.getParticleEmitterResource()->getMaterial());
+
+		// GPU scene update
+		GpuSceneRenderable renderable = {};
+		renderable.m_worldTransformsOffset = getFirstComponentOfType<MoveComponent>().getTransformsGpuSceneOffset();
+		renderable.m_uniformsOffset = pec.getGpuSceneUniforms();
+		renderable.m_geometryOffset = pec.getGpuSceneParticlesOffset();
+		getExternalSubsystems().m_gpuSceneMicroPatcher->newCopy(getFrameMemoryPool(), rc.getGpuSceneViewOffset(),
+																sizeof(renderable), &renderable);
 	}
 
 	return Error::kNone;

+ 9 - 6
AnKi/Shaders/ForwardShadingCommon.hlsl

@@ -10,13 +10,11 @@
 #include <AnKi/Shaders/Include/MaterialTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 
+#if defined(GPU_SCENE) // TODO rm that eventually
+#	define FORWARD_SHADING 1
+#	include <AnKi/Shaders/MaterialShadersCommon.hlsl>
+#else
 ANKI_BINDLESS_SET(kMaterialSetBindless)
-
-//
-// Frag
-//
-#if defined(ANKI_FRAGMENT_SHADER)
-// Global resources
 [[vk::binding(kMaterialBindingLinearClampSampler, kMaterialSetGlobal)]] SamplerState g_linearAnyClampSampler;
 [[vk::binding(kMaterialBindingDepthRt, kMaterialSetGlobal)]] Texture2D g_gbufferDepthTex;
 [[vk::binding(kMaterialBindingLightVolume, kMaterialSetGlobal)]] Texture3D<RVec4> g_lightVol;
@@ -26,7 +24,12 @@ ANKI_BINDLESS_SET(kMaterialSetBindless)
 #	define CLUSTERED_SHADING_LIGHTS_BINDING kMaterialBindingClusterShadingLights
 #	define CLUSTERED_SHADING_CLUSTERS_BINDING kMaterialBindingClusters
 #	include <AnKi/Shaders/ClusteredShadingCommon.hlsl>
+#endif
 
+//
+// Frag
+//
+#if defined(ANKI_FRAGMENT_SHADER)
 struct FragOut
 {
 	RVec4 m_color : SV_TARGET0;

+ 18 - 17
AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog

@@ -9,8 +9,8 @@
 #pragma anki mutator TEXTURE 0 1
 #pragma anki mutator LIGHT 0 1
 
+#define GPU_SCENE 1
 #include <AnKi/Shaders/ForwardShadingCommon.hlsl>
-#include <AnKi/Shaders/Functions.hlsl>
 
 #pragma anki reflect AnKiLocalUniforms
 #pragma anki struct AnKiLocalUniforms
@@ -19,25 +19,18 @@
 #pragma anki member RVec4 m_colorBias
 #pragma anki struct end
 
-[[vk::binding(kMaterialBindingGlobalUniforms, kMaterialSetGlobal)]] ConstantBuffer<MaterialGlobalUniforms>
-	g_globalUniforms;
-[[vk::binding(kMaterialBindingTrilinearRepeatSampler, kMaterialSetGlobal)]] SamplerState g_globalSampler;
-[[vk::binding(kMaterialBindingLocalUniforms, kMaterialSetLocal)]] ByteAddressBuffer g_localUniforms;
-[[vk::binding(kMaterialBindingRenderableGpuView, kMaterialSetLocal)]] StructuredBuffer<RenderableGpuView>
-	g_renderableGpuViews;
-
 struct VertIn
 {
-	[[vk::location(VertexStreamId::kPosition)]] Vec3 m_modelPos : POSITION;
-	[[vk::location(VertexStreamId::kUv)]] Vec2 m_uv : TEXCOORD;
-	U32 m_svInstanceId : SV_INSTANCEID;
+	[[vk::location(0)]] PackedGpuSceneRenderableInstance m_gpuSceneRenderableInstance : INSTANCE;
+	U32 m_svVertexId : SV_VERTEXID;
 };
 
 struct VertOut
 {
-	[[vk::location(0)]] Vec2 m_uv : TEXCOORD;
-	[[vk::location(1)]] Vec3 m_worldPosition : WORLD_POSITION;
+	Vec2 m_uv : TEXCOORD;
+	Vec3 m_worldPosition : WORLD_POSITION;
 	Vec4 m_svPosition : SV_POSITION;
+	nointerpolation U32 m_uniformsOffset : UNIFORMS_OFFSET;
 };
 
 #pragma anki start vert
@@ -46,12 +39,19 @@ VertOut main(VertIn input)
 {
 	VertOut output;
 
-	output.m_worldPosition =
-		mul(g_renderableGpuViews[input.m_svInstanceId].m_worldTransform, Vec4(input.m_modelPos, 1.0));
+	const UnpackedGpuSceneRenderableInstance instance =
+		unpackRenderableGpuViewInstance(input.m_gpuSceneRenderableInstance);
+	const GpuSceneRenderable renderable = g_gpuScene.Load<GpuSceneRenderable>(instance.m_renderableOffset);
+	const GpuSceneMesh mesh = g_gpuScene.Load<GpuSceneMesh>(renderable.m_geometryOffset);
+	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
+	const UnpackedMeshVertex vertex = loadVertex(mesh, instance.m_lod, input.m_svVertexId, false);
+
+	output.m_worldPosition = mul(worldTransform, Vec4(vertex.m_position, 1.0));
 
 	output.m_svPosition = mul(g_globalUniforms.m_viewProjectionMatrix, Vec4(output.m_worldPosition, 1.0));
 
-	output.m_uv = input.m_uv;
+	output.m_uv = vertex.m_uv;
+	output.m_uniformsOffset = renderable.m_uniformsOffset;
 
 	return output;
 }
@@ -64,7 +64,8 @@ FragOut main(VertOut input)
 	ANKI_MAYBE_UNUSED(input);
 	FragOut output;
 
-	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(g_localUniforms, 0u);
+	const AnKiLocalUniforms localUniforms =
+		loadAnKiLocalUniforms(g_gpuScene, WaveReadLaneFirst(input.m_uniformsOffset));
 
 	output.m_color = RVec4(1.0, 1.0, 1.0, 1.0);
 

+ 26 - 17
AnKi/Shaders/ForwardShadingParticles.ankiprog

@@ -9,18 +9,18 @@
 #pragma anki mutator ANIMATED_TEXTURE 0 1
 #pragma anki mutator LIGHT 0 1
 
+#define GPU_SCENE 1
 #include <AnKi/Shaders/ForwardShadingCommon.hlsl>
 
 struct VertIn
 {
-	[[vk::location(VertexStreamId::kParticlePosition)]] Vec3 m_position : POSITION;
-	[[vk::location(VertexStreamId::kParticleScale)]] RF32 m_scale : SCALE;
-	[[vk::location(VertexStreamId::kParticleAlpha)]] RF32 m_alpha : ALPHA;
-	U32 m_vertexId : SV_VERTEXID;
+	[[vk::location(0)]] PackedGpuSceneRenderableInstance m_gpuSceneRenderableInstance : INSTANCE;
+	U32 m_svVertexId : SV_VERTEXID;
 };
 
 struct VertOut
 {
+	nointerpolation U32 m_uniformsOffset : UNIS_OFFSET;
 	nointerpolation RF32 m_alpha : ALPHA;
 	Vec2 m_uv : TEXCOORD;
 	Vec3 m_worldPos : WORLD_POS;
@@ -35,28 +35,36 @@ struct VertOut
 #pragma anki member U32 m_diffuseMap
 #pragma anki struct end
 
-[[vk::binding(kMaterialBindingGlobalUniforms, kMaterialSetGlobal)]] ConstantBuffer<MaterialGlobalUniforms>
-	g_ankiGlobals;
-[[vk::binding(kMaterialBindingTrilinearRepeatSampler, kMaterialSetGlobal)]] SamplerState g_globalSampler;
-[[vk::binding(kMaterialBindingLocalUniforms, kMaterialSetLocal)]] ByteAddressBuffer g_localUniforms;
-[[vk::binding(kMaterialBindingRenderableGpuView, kMaterialSetLocal)]] StructuredBuffer<RenderableGpuView>
-	g_renderableGpuViews;
-
 #pragma anki start vert
 
 VertOut main(VertIn input)
 {
+	const U32 particleId = input.m_svVertexId / 6u;
+	const U32 vertexId = input.m_svVertexId % 6u;
+
+	const UnpackedGpuSceneRenderableInstance instance =
+		unpackRenderableGpuViewInstance(input.m_gpuSceneRenderableInstance);
+	const GpuSceneRenderable renderable = g_gpuScene.Load<GpuSceneRenderable>(instance.m_renderableOffset);
+	const GpuSceneParticles particles = g_gpuScene.Load<GpuSceneParticles>(renderable.m_geometryOffset);
+
+	U32 idx = particles.m_vertexOffsets[(U32)VertexStreamId::kParticlePosition] + particleId * sizeof(Vec3);
+	const Vec3 vertPos = g_gpuScene.Load<Vec3>(idx);
+	idx = particles.m_vertexOffsets[(U32)VertexStreamId::kParticleScale] + particleId * sizeof(F32);
+	const F32 vertScale = g_gpuScene.Load<F32>(idx);
+	idx = particles.m_vertexOffsets[(U32)VertexStreamId::kParticleColor] + particleId * sizeof(F32);
+	const F32 vertAlpha = g_gpuScene.Load<F32>(idx);
+
 	VertOut output;
 
-	output.m_uv = Vec2(input.m_vertexId & 1, input.m_vertexId >> 1);
+	output.m_uv = Vec2(vertexId & 1u, ((vertexId + 1u) / 3u) & 1u);
 
 	output.m_worldPos =
-		mul(g_ankiGlobals.m_cameraTransform, Vec4((output.m_uv - 0.5) * input.m_scale, 0.0, 0.0)) + input.m_position;
+		mul(g_globalUniforms.m_cameraTransform, Vec4((output.m_uv - 0.5) * vertScale, 0.0, 0.0)) + vertPos;
 
-	output.m_svPosition = Vec4(mul(g_renderableGpuViews[0].m_worldTransform, Vec4(output.m_worldPos, 1.0)), 1.0);
-	output.m_svPosition = mul(g_ankiGlobals.m_viewProjectionMatrix, output.m_svPosition);
+	output.m_svPosition = mul(g_globalUniforms.m_viewProjectionMatrix, Vec4(output.m_worldPos, 1.0));
 
-	output.m_alpha = input.m_alpha;
+	output.m_alpha = vertAlpha;
+	output.m_uniformsOffset = renderable.m_uniformsOffset;
 
 	return output;
 }
@@ -67,7 +75,8 @@ VertOut main(VertIn input)
 FragOut main(VertOut input)
 {
 	FragOut output = (FragOut)0;
-	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(g_localUniforms, 0u);
+	const AnKiLocalUniforms localUniforms =
+		loadAnKiLocalUniforms(g_gpuScene, WaveReadLaneFirst(input.m_uniformsOffset));
 
 #if ANIMATED_TEXTURE == 1
 	RVec4 texCol = readAnimatedTextureRgba(g_bindlessTextures2dArrayF32[localUniforms.m_diffuseMap], g_globalSampler,

+ 60 - 77
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -5,7 +5,6 @@
 
 #pragma anki hlsl
 
-#pragma anki mutator ANKI_LOD 0 1 2
 #pragma anki mutator ANKI_VELOCITY 0 1
 #pragma anki mutator ANKI_TECHNIQUE 0 1 2
 #pragma anki mutator ANKI_BONES 0 1
@@ -21,37 +20,18 @@
 #pragma anki skip_mutation ALPHA_TEST 1 DIFFUSE_TEX 0
 #pragma anki skip_mutation ANKI_VELOCITY 1 ANKI_TECHNIQUE 1
 #pragma anki skip_mutation ANKI_VELOCITY 1 ANKI_TECHNIQUE 2
-#pragma anki skip_mutation ANKI_LOD 1 ANKI_TECHNIQUE 1
-#pragma anki skip_mutation ANKI_LOD 2 ANKI_TECHNIQUE 1
-#pragma anki skip_mutation ANKI_LOD 1 ANKI_TECHNIQUE 2
-#pragma anki skip_mutation ANKI_LOD 2 ANKI_TECHNIQUE 2
 
 // Some defines the clear up things
 #define REALLY_ALPHA_TEST (ALPHA_TEST && DIFFUSE_TEX)
 #define UVS (ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER || REALLY_ALPHA_TEST)
 #define REALLY_VELOCITY ((ANKI_VELOCITY || ANKI_BONES) && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER)
-#define REALLY_USING_PARALLAX \
-	(PARALLAX == 1 && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER && ANKI_LOD == 0 && ALPHA_TEST == 0)
+#define REALLY_USING_PARALLAX (PARALLAX == 1 && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER && ALPHA_TEST == 0)
 
 #include <AnKi/Shaders/Include/MaterialTypes.h>
-#include <AnKi/Shaders/Include/GpuSceneTypes.h>
+#include <AnKi/Shaders/Include/GpuSceneFunctions.h>
 #include <AnKi/Shaders/PackFunctions.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
-
-ANKI_BINDLESS_SET(kMaterialSetBindless)
-
-[[vk::binding(kMaterialBindingTrilinearRepeatSampler, kMaterialSetGlobal)]] SamplerState g_globalSampler;
-[[vk::binding(kMaterialBindingLocalUniforms, kMaterialSetLocal)]] ByteAddressBuffer g_localUniforms;
-[[vk::binding(kMaterialBindingRenderableGpuView, kMaterialSetLocal)]] StructuredBuffer<RenderableGpuView>
-	g_renderableGpuViews;
-[[vk::binding(kMaterialBindingGlobalUniforms, kMaterialSetGlobal)]] ConstantBuffer<MaterialGlobalUniforms>
-	g_globalUniforms;
-
-#if ANKI_BONES
-[[vk::binding(kMaterialBindingBoneTransforms, kMaterialSetLocal)]] StructuredBuffer<Mat4> g_boneTransforms;
-[[vk::binding(kMaterialBindingPreviousBoneTransforms, kMaterialSetLocal)]] StructuredBuffer<Mat4>
-	g_prevFrameBoneTransforms;
-#endif
+#include <AnKi/Shaders/MaterialShadersCommon.hlsl>
 
 #pragma anki reflect AnKiLocalUniforms
 #pragma anki struct AnKiLocalUniforms
@@ -80,22 +60,8 @@ ANKI_BINDLESS_SET(kMaterialSetBindless)
 
 struct VertIn
 {
-	[[vk::location(VertexStreamId::kPosition)]] Vec3 m_position : POSITION;
-	U32 m_instanceId : SV_INSTANCEID;
-
-#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
-	[[vk::location(VertexStreamId::kNormal)]] RVec3 m_normal : NORMAL;
-	[[vk::location(VertexStreamId::kTangent)]] RVec4 m_tangent : TANGENT;
-#endif
-
-#if UVS
-	[[vk::location(VertexStreamId::kUv)]] Vec2 m_uv : TEXCOORD;
-#endif
-
-#if ANKI_BONES
-	[[vk::location(VertexStreamId::kBoneIds)]] UVec4 m_boneIndices : COLOR0;
-	[[vk::location(VertexStreamId::kBoneWeights)]] RVec4 m_boneWeights : COLOR1;
-#endif
+	U32 m_svVertexId : SV_VERTEXID;
+	[[vk::location(0)]] PackedGpuSceneRenderableInstance m_gpuSceneRenderableInstance : INSTANCE;
 };
 
 struct VertOut
@@ -114,8 +80,14 @@ struct VertOut
 #if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
 	RVec3 m_normal : NORMAL;
 	RVec3 m_tangent : TANGENT;
+#	if ANKI_PLATFORM_MOBILE
 	RVec3 m_bitangent : BINTANGENT;
+#	else
+	Vec3 m_bitangent : BINTANGENT; // For some reason RelaxedPrecision annoys nVidia
+#	endif
 #endif
+
+	nointerpolation U32 m_uniformsOffset : UNIS_OFFSET;
 };
 
 #if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER || ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER_EZ
@@ -130,15 +102,32 @@ struct FragOut
 
 #pragma anki start vert
 
+Mat3x4 loadBoneTransform(UnpackedMeshVertex vert, GpuSceneRenderable renderable, U32 index)
+{
+	const U32 boneIdx = vert.m_boneIndices[index];
+	U32 byteOffset = renderable.m_boneTransformsOffset;
+	byteOffset += boneIdx * sizeof(Mat3x4);
+	return g_gpuScene.Load<Mat3x4>(byteOffset);
+}
+
+Mat3x4 loadPreviousBoneTransform(UnpackedMeshVertex vert, GpuSceneRenderable renderable, U32 index)
+{
+	const U32 boneIdx = vert.m_boneIndices[index];
+	U32 byteOffset = renderable.m_previousBoneTransformsOffset;
+	byteOffset += boneIdx * sizeof(Mat3x4);
+	return g_gpuScene.Load<Mat3x4>(byteOffset);
+}
+
 #if ANKI_BONES
-void skinning(VertIn input, inout Vec3 pos, inout Vec3 prevPos, inout RVec3 normal, inout RVec4 tangent)
+void skinning(UnpackedMeshVertex vert, GpuSceneRenderable renderable, inout Vec3 pos, inout Vec3 prevPos,
+			  inout RVec3 normal, inout RVec4 tangent)
 {
-	Mat4 skinMat = g_boneTransforms[input.m_boneIndices[0]] * input.m_boneWeights[0];
-	Mat4 prevSkinMat = g_prevFrameBoneTransforms[input.m_boneIndices[0]] * input.m_boneWeights[0];
+	Mat3x4 skinMat = loadBoneTransform(vert, renderable, 0) * vert.m_boneWeights[0];
+	Mat3x4 prevSkinMat = loadPreviousBoneTransform(vert, renderable, 0) * vert.m_boneWeights[0];
 	[unroll] for(U32 i = 1u; i < 4u; ++i)
 	{
-		skinMat = skinMat + g_boneTransforms[input.m_boneIndices[i]] * input.m_boneWeights[i];
-		prevSkinMat = prevSkinMat + g_prevFrameBoneTransforms[input.m_boneIndices[i]] * input.m_boneWeights[i];
+		skinMat = skinMat + loadBoneTransform(vert, renderable, i) * vert.m_boneWeights[i];
+		prevSkinMat = prevSkinMat + loadPreviousBoneTransform(vert, renderable, i) * vert.m_boneWeights[i];
 	}
 
 #	if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
@@ -155,14 +144,17 @@ void skinning(VertIn input, inout Vec3 pos, inout Vec3 prevPos, inout RVec3 norm
 #endif
 
 #if(ANKI_VELOCITY || ANKI_BONES) && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
-void velocity(RenderableGpuView view, Vec3 prevLocalPos, inout VertOut output)
+void velocity(Mat3x4 worldTransform, Mat3x4 prevWorldTransform, Vec3 prevLocalPos, inout VertOut output)
 {
+	ANKI_MAYBE_UNUSED(prevWorldTransform);
+	ANKI_MAYBE_UNUSED(worldTransform);
+
 #	if ANKI_VELOCITY
 	// Object is also moving
-	const Mat3x4 trf = view.m_previousWorldTransform;
+	const Mat3x4 trf = prevWorldTransform;
 #	else
 	// Object is a skin that is not moving
-	const Mat3x4 trf = view.m_worldTransform;
+	const Mat3x4 trf = worldTransform;
 #	endif
 
 	Vec4 v4 = Vec4(mul(trf, Vec4(prevLocalPos, 1.0)), 1.0);
@@ -175,50 +167,41 @@ void velocity(RenderableGpuView view, Vec3 prevLocalPos, inout VertOut output)
 
 VertOut main(VertIn input)
 {
-	const RenderableGpuView view = g_renderableGpuViews[input.m_instanceId];
 	VertOut output;
 
-	// All values in local space
-#if ANKI_BONES
-	Vec3 pos =
-		input.m_position * view.m_positionScaleF32AndTranslationVec3.x + view.m_positionScaleF32AndTranslationVec3.yzw;
-#else
-	Vec3 pos = input.m_position;
-#endif
-
-	Vec3 prevPos = pos;
-	ANKI_MAYBE_UNUSED(prevPos);
+	const UnpackedGpuSceneRenderableInstance instance =
+		unpackRenderableGpuViewInstance(input.m_gpuSceneRenderableInstance);
+	const GpuSceneRenderable renderable = g_gpuScene.Load<GpuSceneRenderable>(instance.m_renderableOffset);
+	const GpuSceneMesh mesh = g_gpuScene.Load<GpuSceneMesh>(renderable.m_geometryOffset);
+	UnpackedMeshVertex vert = loadVertex(mesh, instance.m_lod, input.m_svVertexId, ANKI_BONES);
 
-#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
-	RVec3 normal = input.m_normal;
-	RVec4 tangent = input.m_tangent;
-#else
-	RVec3 normal = RVec3(0.0, 0.0, 0.0);
-	RVec4 tangent = RVec4(0.0, 0.0, 0.0, 0.0);
-	ANKI_MAYBE_UNUSED(normal);
-	ANKI_MAYBE_UNUSED(tangent);
-#endif
+	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
+	const Mat3x4 prevWorldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset + sizeof(Mat3x4));
+	ANKI_MAYBE_UNUSED(prevWorldTransform);
 
 #if UVS
-	output.m_uv = input.m_uv;
+	output.m_uv = vert.m_uv;
 #endif
+	Vec3 prevPos = vert.m_position;
+	ANKI_MAYBE_UNUSED(prevPos);
+	output.m_uniformsOffset = renderable.m_uniformsOffset;
 
 	// Do stuff
 #if ANKI_BONES
-	skinning(input, pos, prevPos, normal, tangent);
+	skinning(vert, renderable, vert.m_position, prevPos, vert.m_normal, vert.m_tangent);
 #endif
 
-	output.m_position = Vec4(mul(view.m_worldTransform, Vec4(pos, 1.0)), 1.0);
+	output.m_position = Vec4(mul(worldTransform, Vec4(vert.m_position, 1.0)), 1.0);
 	output.m_position = mul(g_globalUniforms.m_viewProjectionMatrix, output.m_position);
 
 #if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
-	output.m_normal = mul(view.m_worldTransform, Vec4(normal, 0.0));
-	output.m_tangent = mul(view.m_worldTransform, Vec4(tangent.xyz, 0.0));
-	output.m_bitangent = cross(output.m_normal, output.m_tangent) * tangent.w;
+	output.m_normal = mul(worldTransform, Vec4(vert.m_normal, 0.0));
+	output.m_tangent = mul(worldTransform, Vec4(vert.m_tangent.xyz, 0.0));
+	output.m_bitangent = cross(output.m_normal, output.m_tangent) * vert.m_tangent.w;
 #endif
 
 #if REALLY_VELOCITY
-	velocity(view, prevPos, output);
+	velocity(worldTransform, prevWorldTransform, prevPos, output);
 #endif
 
 	return output;
@@ -241,7 +224,7 @@ void main(VertOut input)
 {
 	ANKI_MAYBE_UNUSED(input);
 #	if REALLY_ALPHA_TEST
-	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(g_localUniforms, 0u);
+	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(g_gpuScene, input.m_uniformsOffset);
 	const RVec4 diffColorA = g_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(g_globalSampler, input.m_uv);
 	doAlphaTest(diffColorA.a);
 #	endif
@@ -251,7 +234,7 @@ FragOut main(VertOut input)
 {
 	ANKI_MAYBE_UNUSED(input);
 #	if REALLY_ALPHA_TEST
-	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(g_localUniforms, 0u);
+	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(g_gpuScene, input.m_uniformsOffset);
 	const RVec4 diffColorA = g_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(g_globalSampler, input.m_uv);
 	doAlphaTest(diffColorA.a);
 #	endif
@@ -275,7 +258,7 @@ RVec3 readNormalFromTexture(VertOut input, Texture2D<RVec4> map, SamplerState sa
 
 FragOut main(VertOut input)
 {
-	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(g_localUniforms, 0u);
+	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(g_gpuScene, input.m_uniformsOffset);
 
 #	if REALLY_USING_PARALLAX
 	// TODO

+ 0 - 389
AnKi/Shaders/GBufferGenericGpuScene.ankiprog

@@ -1,389 +0,0 @@
-// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
-// All rights reserved.
-// Code licensed under the BSD License.
-// http://www.anki3d.org/LICENSE
-
-#pragma anki hlsl
-
-#pragma anki mutator ANKI_LOD 0 1 2
-#pragma anki mutator ANKI_VELOCITY 0 1
-#pragma anki mutator ANKI_TECHNIQUE 0 1 2
-#pragma anki mutator ANKI_BONES 0 1
-#pragma anki mutator DIFFUSE_TEX 0 1
-#pragma anki mutator SPECULAR_TEX 0 1
-#pragma anki mutator ROUGHNESS_TEX 0 1
-#pragma anki mutator METAL_TEX 0 1
-#pragma anki mutator NORMAL_TEX 0 1
-#pragma anki mutator PARALLAX 0 1
-#pragma anki mutator EMISSIVE_TEX 0 1
-#pragma anki mutator ALPHA_TEST 0 1
-
-#pragma anki skip_mutation ALPHA_TEST 1 DIFFUSE_TEX 0
-#pragma anki skip_mutation ANKI_VELOCITY 1 ANKI_TECHNIQUE 1
-#pragma anki skip_mutation ANKI_VELOCITY 1 ANKI_TECHNIQUE 2
-#pragma anki skip_mutation ANKI_LOD 1 ANKI_TECHNIQUE 1
-#pragma anki skip_mutation ANKI_LOD 2 ANKI_TECHNIQUE 1
-#pragma anki skip_mutation ANKI_LOD 1 ANKI_TECHNIQUE 2
-#pragma anki skip_mutation ANKI_LOD 2 ANKI_TECHNIQUE 2
-
-// Some defines the clear up things
-#define REALLY_ALPHA_TEST (ALPHA_TEST && DIFFUSE_TEX)
-#define UVS (ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER || REALLY_ALPHA_TEST)
-#define REALLY_VELOCITY ((ANKI_VELOCITY || ANKI_BONES) && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER)
-#define REALLY_USING_PARALLAX \
-	(PARALLAX == 1 && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER && ANKI_LOD == 0 && ALPHA_TEST == 0)
-
-#include <AnKi/Shaders/Include/MaterialTypes.h>
-#include <AnKi/Shaders/Include/GpuSceneTypes.h>
-#include <AnKi/Shaders/PackFunctions.hlsl>
-#include <AnKi/Shaders/Functions.hlsl>
-
-ANKI_BINDLESS_SET(MaterialSet::kBindless)
-
-[[vk::binding(MaterialBinding::kTrilinearRepeatSampler, MaterialSet::kGlobal)]] SamplerState g_globalSampler;
-[[vk::binding(MaterialBinding::kGlobalUniforms, MaterialSet::kGlobal)]] ConstantBuffer<MaterialGlobalUniforms>
-	g_globalUniforms;
-[[vk::binding(MaterialBinding::kGpuScene, MaterialSet::kGlobal)]] ByteAddressBuffer g_gpuScene;
-
-[[vk::binding(MaterialBinding::kUnifiedGeometry_R16G16B16_Unorm, MaterialSet::kGlobal)]] Buffer<Vec4>
-	g_universalGeom_R16G16B16_Unorm;
-[[vk::binding(MaterialBinding::kUnifiedGeometry_R8G8B8A8_Snorm, MaterialSet::kGlobal)]] Buffer<Vec4>
-	g_universalGeom_R8G8B8A8_Snorm;
-[[vk::binding(MaterialBinding::kUnifiedGeometry_R32G32_Sfloat, MaterialSet::kGlobal)]] Buffer<Vec2>
-	g_universalGeom_R32G32_Sfloat;
-[[vk::binding(MaterialBinding::kUnifiedGeometry_R8G8B8A8_Uint, MaterialSet::kGlobal)]] Buffer<UVec4>
-	g_universalGeom_R8G8B8A8_Uint;
-
-#pragma anki reflect AnKiLocalUniforms
-#pragma anki struct AnKiLocalUniforms
-#pragma anki member U32 m_normalTex if NORMAL_TEX is 1
-
-#pragma anki member RVec3 m_diffColor if DIFFUSE_TEX is 0
-#pragma anki member U32 m_diffTex if DIFFUSE_TEX is 1
-
-#pragma anki member RF32 m_roughness if ROUGHNESS_TEX is 0
-#pragma anki member U32 m_roughnessTex if ROUGHNESS_TEX is 1
-
-#pragma anki member RVec3 m_specColor if SPECULAR_TEX is 0
-#pragma anki member U32 m_specTex if SPECULAR_TEX is 1
-
-#pragma anki member RF32 m_metallic if METAL_TEX is 0
-#pragma anki member U32 m_metallicTex if METAL_TEX is 1
-
-#pragma anki member RVec3 m_emission if EMISSIVE_TEX is 0
-#pragma anki member U32 m_emissiveTex if EMISSIVE_TEX is 1
-
-#pragma anki member RF32 m_heightmapScale if PARALLAX is 1
-#pragma anki member U32 m_heightTex if PARALLAX is 1
-
-#pragma anki member RF32 m_subsurface
-#pragma anki struct end
-
-struct VertIn
-{
-	U32 m_instanceId : SV_INSTANCEID;
-	[[vk::location(0)]] PackedRenderableGpuViewInstance m_renderableGpuViewInstance : INSTANCE;
-};
-
-struct VertOut
-{
-	Vec4 m_position : SV_POSITION;
-
-#if UVS
-	Vec2 m_uv : TEXCOORD;
-#endif
-
-#if REALLY_VELOCITY
-	Vec3 m_prevClipXyw : PREV_CLIP;
-	Vec3 m_crntClipXyw : CRNT_CLIP;
-#endif
-
-#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
-	RVec3 m_normal : NORMAL;
-	RVec3 m_tangent : TANGENT;
-	RVec3 m_bitangent : BINTANGENT;
-#endif
-
-	nointerpolation U32 m_uniformsOffset : UNIS_OFFSET;
-};
-
-#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER || ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER_EZ
-struct FragOut
-{
-	Vec4 m_color0 : SV_TARGET0;
-	Vec4 m_color1 : SV_TARGET1;
-	Vec4 m_color2 : SV_TARGET2;
-	Vec2 m_color3 : SV_TARGET3;
-};
-#endif
-
-#pragma anki start vert
-
-UnpackedMeshVertex loadVertex(MeshGpuView mesh, U32 lod)
-{
-	MeshGpuViewLod mlod = mesh.m_lods[lod];
-
-	UnpackedMeshVertex v;
-	v.m_position = g_universalGeom_R16G16B16_Unorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kPosition]];
-#if ANKI_BONES
-	v.m_position = v.m_position * mesh.m_positionScale + mesh.m_positionTranslation;
-#endif
-
-	v.m_normal = g_universalGeom_R8G8B8A8_Snorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kNormal]].xyz;
-	v.m_tangent = g_universalGeom_R8G8B8A8_Snorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kTangent]];
-	v.m_uv = g_universalGeom_R32G32_Sfloat[mlod.m_vertexOffsets[(U32)VertexStreamId::kUv]];
-
-#if ANKI_BONES
-	v.m_boneIndices = g_universalGeom_R8G8B8A8_Uint[mlod.m_vertexOffsets[(U32)VertexStreamId::kBoneIds]];
-	v.m_boneWeights = g_universalGeom_R8G8B8A8_Snorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kBoneWeights]];
-#endif
-
-	return v;
-}
-
-Mat3x4 loadBoneTransform(UnpackedMeshVertex vert, RenderableGpuView2 renderable, U32 index)
-{
-	const U32 boneIdx = vert.m_boneIndices[index];
-	U32 byteOffset = renderable.m_boneTransformsOffset;
-	byteOffset += boneIdx * sizeof(Mat3x4);
-	return g_gpuScene.Load<Mat3x4>(byteOffset);
-}
-
-Mat3x4 loadPreviousBoneTransform(UnpackedMeshVertex vert, RenderableGpuView2 renderable, U32 index)
-{
-	const U32 boneIdx = vert.m_boneIndices[index];
-	U32 byteOffset = renderable.m_previousBoneTransformsOffset;
-	byteOffset += boneIdx * sizeof(Mat3x4);
-	return g_gpuScene.Load<Mat3x4>(byteOffset);
-}
-
-UnpackedRenderableGpuViewInstance loadRenderableGpuViewInstance(VertIn input)
-{
-	UnpackedRenderableGpuViewInstance o;
-	o.m_lod = input.m_renderableGpuViewInstance & 3u;
-	o.m_renderableGpuViewOffset = input.m_renderableGpuViewInstance >> 2u;
-	return o;
-}
-
-#if ANKI_BONES
-void skinning(UnpackedMeshVertex vert, RenderableGpuView2 renderable, inout Vec3 pos, inout Vec3 prevPos,
-			  inout RVec3 normal, inout RVec4 tangent)
-{
-	Mat3x4 skinMat = loadBoneTransform(vert, renderable, 0) * vert.m_boneWeights[0];
-	Mat3x4 prevSkinMat = loadPreviousBoneTransform(vert, renderable, 0) * vert.m_boneWeights[0];
-	[unroll] for(U32 i = 1u; i < 4u; ++i)
-	{
-		skinMat = skinMat + loadBoneTransform(vert, renderable, i) * vert.m_boneWeights[i];
-		prevSkinMat = prevSkinMat + loadPreviousBoneTransform(vert, renderable, i) * vert.m_boneWeights[i];
-	}
-
-#	if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
-	prevPos = mul(prevSkinMat, Vec4(pos, 1.0)).xyz;
-	tangent.xyz = mul(skinMat, Vec4(tangent.xyz, 0.0)).xyz;
-	normal = mul(skinMat, Vec4(normal, 0.0)).xyz;
-#	endif
-	ANKI_MAYBE_UNUSED(prevPos);
-	ANKI_MAYBE_UNUSED(tangent);
-	ANKI_MAYBE_UNUSED(normal);
-
-	pos = mul(skinMat, Vec4(pos, 1.0)).xyz;
-}
-#endif
-
-#if(ANKI_VELOCITY || ANKI_BONES) && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
-void velocity(Mat3x4 worldTransform, Mat3x4 prevWorldTransform, Vec3 prevLocalPos, inout VertOut output)
-{
-	ANKI_MAYBE_UNUSED(prevWorldTransform);
-	ANKI_MAYBE_UNUSED(worldTransform);
-
-#	if ANKI_VELOCITY
-	// Object is also moving
-	const Mat3x4 trf = prevWorldTransform;
-#	else
-	// Object is a skin that is not moving
-	const Mat3x4 trf = worldTransform;
-#	endif
-
-	Vec4 v4 = Vec4(mul(trf, Vec4(prevLocalPos, 1.0)), 1.0);
-	v4 = mul(g_globalUniforms.m_previousViewProjectionMatrix, v4);
-
-	output.m_prevClipXyw = v4.xyw;
-	output.m_crntClipXyw = output.m_position.xyw;
-}
-#endif
-
-VertOut main(VertIn input)
-{
-	VertOut output;
-
-	const UnpackedRenderableGpuViewInstance instance = loadRenderableGpuViewInstance(input);
-	const RenderableGpuView2 renderable = g_gpuScene.Load<RenderableGpuView2>(instance.m_renderableGpuViewOffset);
-	const MeshGpuView mesh = g_gpuScene.Load<MeshGpuView>(renderable.m_geometryOffset);
-	UnpackedMeshVertex vert = loadVertex(mesh, instance.m_lod);
-
-	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
-	const Mat3x4 prevWorldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset + sizeof(Mat3x4));
-	ANKI_MAYBE_UNUSED(prevWorldTransform);
-
-#if UVS
-	output.m_uv = vert.m_uv;
-#endif
-	Vec3 prevPos = vert.m_position;
-	ANKI_MAYBE_UNUSED(prevPos);
-	output.m_uniformsOffset = renderable.m_uniformsOffset;
-
-	// Do stuff
-#if ANKI_BONES
-	skinning(vert, renderable, vert.m_position, prevPos, vert.m_normal, vert.m_tangent);
-#endif
-
-	output.m_position = Vec4(mul(worldTransform, Vec4(vert.m_position, 1.0)), 1.0);
-	output.m_position = mul(g_globalUniforms.m_viewProjectionMatrix, output.m_position);
-
-#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
-	output.m_normal = mul(worldTransform, Vec4(vert.m_normal, 0.0));
-	output.m_tangent = mul(worldTransform, Vec4(vert.m_tangent.xyz, 0.0));
-	output.m_bitangent = cross(output.m_normal, output.m_tangent) * vert.m_tangent.w;
-#endif
-
-#if REALLY_VELOCITY
-	velocity(worldTransform, prevWorldTransform, prevPos, output);
-#endif
-
-	return output;
-};
-
-#pragma anki end
-
-#pragma anki start frag
-
-void doAlphaTest(RF32 alpha)
-{
-	if(alpha == 0.0)
-	{
-		discard;
-	}
-}
-
-#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_SHADOWS
-void main(VertOut input)
-{
-	ANKI_MAYBE_UNUSED(input);
-#	if REALLY_ALPHA_TEST
-	const AnKiLocalUniforms localUniforms =
-		loadAnKiLocalUniforms(g_gpuScene, WaveReadLaneFirst(input.m_uniformsOffset));
-	const RVec4 diffColorA = g_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(g_globalSampler, input.m_uv);
-	doAlphaTest(diffColorA.a);
-#	endif
-}
-#elif ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER_EZ
-FragOut main(VertOut input)
-{
-	ANKI_MAYBE_UNUSED(input);
-#	if REALLY_ALPHA_TEST
-	const AnKiLocalUniforms localUniforms =
-		loadAnKiLocalUniforms(g_gpuScene, WaveReadLaneFirst(input.m_uniformsOffset));
-	const RVec4 diffColorA = g_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(g_globalSampler, input.m_uv);
-	doAlphaTest(diffColorA.a);
-#	endif
-	return (FragOut)0;
-}
-#elif ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
-// Do normal mapping
-RVec3 readNormalFromTexture(VertOut input, Texture2D<RVec4> map, SamplerState sampl, Vec2 texCoords)
-{
-	// First read the texture
-	const RVec3 nAtTangentspace = normalize((map.Sample(sampl, texCoords).rgb - 0.5) * 2.0);
-
-	const RVec3 n = normalize(input.m_normal);
-	const RVec3 t = normalize(input.m_tangent);
-	const RVec3 b = normalize(input.m_bitangent);
-
-	const RMat3 tbnMat = constructMatrixColumns(t, b, n);
-
-	return mul(tbnMat, nAtTangentspace);
-}
-
-FragOut main(VertOut input)
-{
-	const AnKiLocalUniforms localUniforms =
-		loadAnKiLocalUniforms(g_gpuScene, WaveReadLaneFirst(input.m_uniformsOffset));
-
-#	if REALLY_USING_PARALLAX
-	// TODO
-	const Vec2 uv = input.m_uv;
-#	else
-	const Vec2 uv = input.m_uv;
-#	endif
-	ANKI_MAYBE_UNUSED(uv);
-
-#	if DIFFUSE_TEX
-#		if REALLY_ALPHA_TEST
-	const RVec4 diffColorA = g_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(g_globalSampler, uv);
-	doAlphaTest(diffColorA.a);
-	const RVec3 diffColor = diffColorA.rgb;
-#		else
-	const RVec3 diffColor = g_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(g_globalSampler, uv).rgb;
-#		endif
-#	else
-	const RVec3 diffColor = localUniforms.m_diffColor;
-#	endif
-
-#	if SPECULAR_TEX
-	const RVec3 specColor = g_bindlessTextures2dF32[localUniforms.m_specTex].Sample(g_globalSampler, uv).rgb;
-#	else
-	const RVec3 specColor = localUniforms.m_specColor;
-#	endif
-
-#	if ROUGHNESS_TEX
-	const RF32 roughness = g_bindlessTextures2dF32[localUniforms.m_roughnessTex].Sample(g_globalSampler, uv).g;
-#	else
-	const RF32 roughness = localUniforms.m_roughness;
-#	endif
-
-#	if METAL_TEX
-	const RF32 metallic = g_bindlessTextures2dF32[localUniforms.m_metallicTex].Sample(g_globalSampler, uv).b;
-#	else
-	const RF32 metallic = localUniforms.m_metallic;
-#	endif
-
-#	if NORMAL_TEX
-	const RVec3 normal =
-		readNormalFromTexture(input, g_bindlessTextures2dF32[localUniforms.m_normalTex], g_globalSampler, uv);
-#	else
-	const RVec3 normal = normalize(input.m_normal);
-#	endif
-
-#	if EMISSIVE_TEX
-	const RVec3 emission = g_bindlessTextures2dF32[localUniforms.m_emissiveTex].Sample(g_globalSampler, uv).rgb;
-#	else
-	const RVec3 emission = localUniforms.m_emission;
-#	endif
-
-#	if ANKI_VELOCITY || ANKI_BONES
-	const Vec2 prevNdc = input.m_prevClipXyw.xy / input.m_prevClipXyw.z;
-	const Vec2 crntNdc = input.m_crntClipXyw.xy / input.m_crntClipXyw.z;
-
-	// It's NDC_TO_UV(prevNdc) - NDC_TO_UV(crntNdc) or:
-	const Vec2 velocity = (prevNdc - crntNdc) * 0.5;
-#	else
-	const Vec2 velocity = Vec2(1.0, 1.0);
-#	endif
-
-	GbufferInfo g;
-	g.m_diffuse = diffColor;
-	g.m_normal = normal;
-	g.m_f0 = specColor;
-	g.m_roughness = roughness;
-	g.m_subsurface = localUniforms.m_subsurface;
-	g.m_emission = emission;
-	g.m_metallic = metallic;
-	g.m_velocity = velocity;
-
-	FragOut output;
-	packGBuffer(g, output.m_color0, output.m_color1, output.m_color2, output.m_color3);
-	return output;
-}
-#endif
-
-#pragma anki end

+ 27 - 0
AnKi/Shaders/Include/GpuSceneFunctions.h

@@ -0,0 +1,27 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Shaders/Include/GpuSceneTypes.h>
+
+ANKI_BEGIN_NAMESPACE
+
+ANKI_SHADER_FUNC_INLINE PackedGpuSceneRenderableInstance
+packGpuSceneRenderableInstance(UnpackedGpuSceneRenderableInstance x)
+{
+	return (x.m_renderableOffset << 2u) | x.m_lod;
+}
+
+ANKI_SHADER_FUNC_INLINE UnpackedGpuSceneRenderableInstance
+unpackRenderableGpuViewInstance(PackedGpuSceneRenderableInstance x)
+{
+	UnpackedGpuSceneRenderableInstance o;
+	o.m_lod = x & 3u;
+	o.m_renderableOffset = x >> 2u;
+	return o;
+}
+
+ANKI_END_NAMESPACE

+ 18 - 9
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -10,7 +10,7 @@
 ANKI_BEGIN_NAMESPACE
 
 /// @note All offsets in bytes
-struct RenderableGpuView2
+struct GpuSceneRenderable
 {
 	U32 m_worldTransformsOffset; ///< First is the crnt transform and the 2nd the previous
 	U32 m_aabbOffset;
@@ -21,31 +21,40 @@ struct RenderableGpuView2
 	U32 m_padding0;
 	U32 m_padding1;
 };
-static_assert(sizeof(RenderableGpuView2) == sizeof(Vec4) * 2);
+static_assert(sizeof(GpuSceneRenderable) == sizeof(Vec4) * 2);
 
-struct MeshGpuViewLod
+struct GpuSceneMeshLod
 {
 	U32 m_vertexOffsets[(U32)VertexStreamId::kMeshRelatedCount];
 	U32 m_indexCount;
 	U32 m_indexOffset; // TODO Decide on its type
 };
-static_assert(sizeof(MeshGpuViewLod) == sizeof(Vec4) * 2);
+static_assert(sizeof(GpuSceneMeshLod) == sizeof(Vec4) * 2);
 
-struct MeshGpuView
+struct GpuSceneMesh
 {
-	MeshGpuViewLod m_lods[kMaxLodCount];
+	GpuSceneMeshLod m_lods[kMaxLodCount];
 
 	Vec3 m_positionTranslation;
 	F32 m_positionScale;
 };
+static_assert(sizeof(GpuSceneMesh) == sizeof(Vec4) * (kMaxLodCount * 2 + 1));
 
-struct UnpackedRenderableGpuViewInstance
+struct GpuSceneParticles
 {
-	U32 m_renderableGpuViewOffset;
+	U32 m_vertexOffsets[(U32)VertexStreamId::kParticleRelatedCount];
+	U32 m_padding0;
+	U32 m_padding1;
+};
+static_assert(sizeof(GpuSceneParticles) == sizeof(Vec4) * 2);
+
+struct UnpackedGpuSceneRenderableInstance
+{
+	U32 m_renderableOffset;
 	U32 m_lod;
 };
 
-typedef U32 PackedRenderableGpuViewInstance;
+typedef U32 PackedGpuSceneRenderableInstance;
 
 struct RenderableGpuView
 {

+ 8 - 5
AnKi/Shaders/Include/MaterialTypes.h

@@ -60,20 +60,23 @@ enum class MaterialBinding : U32
 	kGlobalUniforms,
 	kGpuScene,
 
-	// Texture buffer bindings pointing to universal geom buffer:
-	kUnifiedGeometry_R16G16B16_Unorm,
-	kUnifiedGeometry_R8G8B8A8_Snorm,
+	// Texture buffer bindings pointing to unified geom buffer:
+	kUnifiedGeometry_R32_Sfloat,
 	kUnifiedGeometry_R32G32_Sfloat,
+	kUnifiedGeometry_R32G32B32_Sfloat,
+	kUnifiedGeometry_R32G32B32A32_Sfloat,
+	kUnifiedGeometry_R16G16B16A16_Unorm,
+	kUnifiedGeometry_R8G8B8A8_Snorm,
 	kUnifiedGeometry_R8G8B8A8_Uint,
 
 	// For FW shading:
 	kLinearClampSampler,
+	kShadowSampler,
 	kDepthRt,
 	kLightVolume,
 	kClusterShadingUniforms,
 	kClusterShadingLights,
-	kClusters,
-	kShadowSampler,
+	kClusters = kClusterShadingLights + 3,
 
 	kCount,
 	kFirst = 0

+ 11 - 7
AnKi/Shaders/Include/MeshTypes.h

@@ -28,11 +28,11 @@ enum class VertexStreamId : U8
 
 	// For particles
 	kParticlePosition = 0,
+	kParticlePreviousPosition,
 	kParticleScale,
-	kParticleAlpha,
+	kParticleColor,
 	kParticleLife,
 	kParticleStartingLife,
-	kParticlePreviousPosition,
 
 	kParticleRelatedCount,
 	kParticleRelatedFirst = 0,
@@ -55,11 +55,11 @@ enum class VertexStreamMask : U8
 	kBoneWeights = 1 << 5,
 
 	kParticlePosition = 1 << 0,
-	kParticleScale = 1 << 1,
-	kParticleAlpha = 1 << 2,
-	kParticleLife = 1 << 3,
-	kParticleStartingLife = 1 << 4,
-	kParticlePreviousPosition = 1 << 5,
+	kParticlePreviousPosition = 1 << 1,
+	kParticleScale = 1 << 2,
+	kParticleColor = 1 << 3,
+	kParticleLife = 1 << 4,
+	kParticleStartingLife = 1 << 5,
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VertexStreamMask)
 
@@ -67,6 +67,10 @@ ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VertexStreamMask)
 inline constexpr Array<Format, U32(VertexStreamId::kMeshRelatedCount)> kMeshRelatedVertexStreamFormats = {
 	Format::kR16G16B16A16_Unorm, Format::kR8G8B8A8_Snorm, Format::kR8G8B8A8_Snorm,
 	Format::kR32G32_Sfloat,      Format::kR8G8B8A8_Uint,  Format::kR8G8B8A8_Snorm};
+
+inline constexpr Array<Format, U32(VertexStreamId::kParticleRelatedCount)> kParticleRelatedVertexStreamFormats = {
+	Format::kR32G32B32_Sfloat,    Format::kR32G32B32_Sfloat, Format::kR32_Sfloat,
+	Format::kR32G32B32A32_Sfloat, Format::kR32_Sfloat,       Format::kR32_Sfloat};
 #endif
 
 struct UnpackedMeshVertex

+ 69 - 0
AnKi/Shaders/MaterialShadersCommon.hlsl

@@ -0,0 +1,69 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+// Common stuff for GBuffer, Forward shading and the rest of shaders that appear in materials.
+
+#pragma once
+
+#include <AnKi/Shaders/Include/MaterialTypes.h>
+#include <AnKi/Shaders/Include/MeshTypes.h>
+#include <AnKi/Shaders/Include/GpuSceneFunctions.h>
+
+ANKI_BINDLESS_SET(MaterialSet::kBindless)
+
+[[vk::binding(MaterialBinding::kTrilinearRepeatSampler, MaterialSet::kGlobal)]] SamplerState g_globalSampler;
+[[vk::binding(MaterialBinding::kGlobalUniforms, MaterialSet::kGlobal)]] ConstantBuffer<MaterialGlobalUniforms>
+	g_globalUniforms;
+[[vk::binding(MaterialBinding::kGpuScene, MaterialSet::kGlobal)]] ByteAddressBuffer g_gpuScene;
+
+// Unified geom:
+#define _ANKI_DEFINE_BINDING(format, type) \
+	[[vk::binding(MaterialBinding::kUnifiedGeometry_##format, MaterialSet::kGlobal)]] Buffer<type> \
+		g_unifiedGeom_##format;
+
+_ANKI_DEFINE_BINDING(R32_Sfloat, F32)
+_ANKI_DEFINE_BINDING(R32G32_Sfloat, Vec2)
+_ANKI_DEFINE_BINDING(R32G32B32_Sfloat, Vec3)
+_ANKI_DEFINE_BINDING(R32G32B32A32_Sfloat, Vec4)
+_ANKI_DEFINE_BINDING(R16G16B16A16_Unorm, Vec4)
+_ANKI_DEFINE_BINDING(R8G8B8A8_Snorm, Vec4)
+_ANKI_DEFINE_BINDING(R8G8B8A8_Uint, UVec4)
+
+#undef _ANKI_DEFINE_BINDING
+
+// FW shading specific
+#if defined(FORWARD_SHADING)
+[[vk::binding(MaterialBinding::kLinearClampSampler, MaterialSet::kGlobal)]] SamplerState g_linearAnyClampSampler;
+[[vk::binding(MaterialBinding::kDepthRt, MaterialSet::kGlobal)]] Texture2D g_gbufferDepthTex;
+[[vk::binding(MaterialBinding::kLightVolume, MaterialSet::kGlobal)]] Texture3D<RVec4> g_lightVol;
+[[vk::binding(MaterialBinding::kShadowSampler, MaterialSet::kGlobal)]] SamplerComparisonState g_shadowSampler;
+#	define CLUSTERED_SHADING_SET MaterialSet::kGlobal
+#	define CLUSTERED_SHADING_UNIFORMS_BINDING (U32) MaterialBinding::kClusterShadingUniforms
+#	define CLUSTERED_SHADING_LIGHTS_BINDING (U32) MaterialBinding::kClusterShadingLights
+#	define CLUSTERED_SHADING_CLUSTERS_BINDING (U32) MaterialBinding::kClusters
+#	include <AnKi/Shaders/ClusteredShadingCommon.hlsl>
+#endif
+
+UnpackedMeshVertex loadVertex(GpuSceneMesh mesh, U32 lod, U32 svVertexId, Bool bones)
+{
+	GpuSceneMeshLod mlod = mesh.m_lods[lod];
+
+	UnpackedMeshVertex v;
+	v.m_position = g_unifiedGeom_R16G16B16A16_Unorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kPosition] + svVertexId];
+	v.m_position = v.m_position * mesh.m_positionScale + mesh.m_positionTranslation;
+
+	v.m_normal = g_unifiedGeom_R8G8B8A8_Snorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kNormal] + svVertexId].xyz;
+	v.m_tangent = g_unifiedGeom_R8G8B8A8_Snorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kTangent] + svVertexId];
+	v.m_uv = g_unifiedGeom_R32G32_Sfloat[mlod.m_vertexOffsets[(U32)VertexStreamId::kUv] + svVertexId];
+
+	if(bones)
+	{
+		v.m_boneIndices = g_unifiedGeom_R8G8B8A8_Uint[mlod.m_vertexOffsets[(U32)VertexStreamId::kBoneIds] + svVertexId];
+		v.m_boneWeights =
+			g_unifiedGeom_R8G8B8A8_Snorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kBoneWeights] + svVertexId];
+	}
+
+	return v;
+}

+ 3 - 0
AnKi/Util/Process.cpp

@@ -10,6 +10,9 @@
 #	include <ThirdParty/Reproc/reproc/include/reproc/reproc.h>
 #	include <ThirdParty/Subprocess/subprocess.h>
 #endif
+#if ANKI_POSIX
+#	include <unistd.h>
+#endif
 
 namespace anki {
 

+ 21 - 10
Tools/Shader/ShaderProgramBinaryDumpMain.cpp

@@ -14,10 +14,11 @@ using namespace anki;
 static const char* kUsage = R"(Dump the shader binary to stdout
 Usage: %s [options] input_shader_program_binary
 Options:
--stats : Print performance statistics for all shaders. By default it doesn't
+-stats      : Print performance statistics for all shaders. By default it doesn't
+-only-stats : Print only stats
 )";
 
-static Error parseCommandLineArgs(WeakArray<char*> argv, Bool& dumpStats, StringRaii& filename)
+static Error parseCommandLineArgs(WeakArray<char*> argv, Bool& dumpStats, Bool& dumpBinary, StringRaii& filename)
 {
 	// Parse config
 	if(argv.getSize() < 2)
@@ -26,14 +27,20 @@ static Error parseCommandLineArgs(WeakArray<char*> argv, Bool& dumpStats, String
 	}
 
 	dumpStats = false;
+	dumpBinary = true;
 	filename = argv[argv.getSize() - 1];
 
 	for(U32 i = 1; i < argv.getSize() - 1; i++)
 	{
-		if(strcmp(argv[i], "-stats") == 0)
+		if(CString(argv[i]) == "-stats")
 		{
 			dumpStats = true;
 		}
+		else if(CString(argv[i]) == "-only-stats")
+		{
+			dumpBinary = false;
+			dumpStats = true;
+		}
 	}
 
 	return Error::kNone;
@@ -185,7 +192,7 @@ Error dumpStats(const ShaderProgramBinary& bin)
 
 				// AMD
 				RgaOutput rgaOut = {};
-#if 0
+#if 1
 				err = runRadeonGpuAnalyzer(
 #	if ANKI_OS_LINUX
 					ANKI_SOURCE_DIRECTORY "/ThirdParty/Bin/Linux64/RadeonGpuAnalyzer/rga",
@@ -306,17 +313,20 @@ Error dumpStats(const ShaderProgramBinary& bin)
 	return Error::kNone;
 }
 
-Error dump(CString fname, Bool bDumpStats)
+Error dump(CString fname, Bool bDumpStats, Bool dumpBinary)
 {
 	HeapMemoryPool pool(allocAligned, nullptr);
 
 	ShaderProgramBinaryWrapper binw(&pool);
 	ANKI_CHECK(binw.deserializeFromFile(fname));
 
-	StringRaii txt(&pool);
-	dumpShaderProgramBinary(binw.getBinary(), txt);
+	if(dumpBinary)
+	{
+		StringRaii txt(&pool);
+		dumpShaderProgramBinary(binw.getBinary(), txt);
 
-	printf("%s\n", txt.cstr());
+		printf("%s\n", txt.cstr());
+	}
 
 	if(bDumpStats)
 	{
@@ -331,13 +341,14 @@ int main(int argc, char** argv)
 	HeapMemoryPool pool(allocAligned, nullptr);
 	StringRaii filename(&pool);
 	Bool dumpStats;
-	if(parseCommandLineArgs(WeakArray<char*>(argv, argc), dumpStats, filename))
+	Bool dumpBinary;
+	if(parseCommandLineArgs(WeakArray<char*>(argv, argc), dumpStats, dumpBinary, filename))
 	{
 		ANKI_LOGE(kUsage, argv[0]);
 		return 1;
 	}
 
-	const Error err = dump(filename, dumpStats);
+	const Error err = dump(filename, dumpStats, dumpBinary);
 	if(err)
 	{
 		ANKI_LOGE("Can't dump due to an error. Bye");