Browse Source

Re-think async loading a bit

Panagiotis Christopoulos Charitos 2 months ago
parent
commit
17a848baca
37 changed files with 398 additions and 246 deletions
  1. 2 1
      AnKi/GpuMemory/RebarTransientMemoryPool.cpp
  2. 1 1
      AnKi/GpuMemory/UnifiedGeometryBuffer.h
  3. 8 3
      AnKi/Gr/AccelerationStructure.h
  4. 1 1
      AnKi/Gr/Common.h
  5. 24 1
      AnKi/Gr/D3D/D3DGrManager.cpp
  6. 9 3
      AnKi/Gr/Vulkan/VkAccelerationStructure.cpp
  7. 1 1
      AnKi/Gr/Vulkan/VkCommandBuffer.cpp
  8. 2 2
      AnKi/Gr/Vulkan/VkGpuMemoryManager.cpp
  9. 21 0
      AnKi/Gr/Vulkan/VkGrManager.cpp
  10. 23 28
      AnKi/Renderer/Dbg.cpp
  11. 3 5
      AnKi/Renderer/ForwardShading.h
  12. 6 4
      AnKi/Renderer/GBuffer.cpp
  13. 4 6
      AnKi/Renderer/GBuffer.h
  14. 0 1
      AnKi/Renderer/Reflections.cpp
  15. 31 12
      AnKi/Renderer/ShadowMapping.cpp
  16. 2 2
      AnKi/Renderer/Tonemapping.cpp
  17. 26 2
      AnKi/Renderer/Utils/GpuVisibility.cpp
  18. 28 15
      AnKi/Resource/AsyncLoader.cpp
  19. 19 13
      AnKi/Resource/AsyncLoader.h
  20. 3 3
      AnKi/Resource/ImageAtlasResource.cpp
  21. 27 48
      AnKi/Resource/ImageResource.cpp
  22. 7 26
      AnKi/Resource/ImageResource.h
  23. 14 0
      AnKi/Resource/MaterialResource.cpp
  24. 2 0
      AnKi/Resource/MaterialResource.h
  25. 3 37
      AnKi/Resource/MeshResource.cpp
  26. 7 0
      AnKi/Resource/MeshResource.h
  27. 11 9
      AnKi/Scene/Components/MaterialComponent.cpp
  28. 16 1
      AnKi/Scene/Components/SceneComponent.h
  29. 35 7
      AnKi/Scene/SceneGraph.cpp
  30. 7 11
      AnKi/Scene/SceneGraph.h
  31. 4 0
      AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog
  32. 4 0
      AnKi/Shaders/Reflections.ankiprog
  33. 2 0
      AnKi/Shaders/RtMaterialFetch.hlsl
  34. 1 1
      Samples/PhysicsPlayground/FpsCharacterNode.cpp
  35. 0 1
      Tests/Gr/Gr.cpp
  36. 2 1
      Tests/Gr/GrCommon.h
  37. 42 0
      Tests/Gr/GrRayTracing.cpp

+ 2 - 1
AnKi/GpuMemory/RebarTransientMemoryPool.cpp

@@ -26,7 +26,8 @@ void RebarTransientMemoryPool::init()
 	buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
 	buffInit.m_size = g_cvarCoreRebarGpuMemorySize;
 	buffInit.m_usage = BufferUsageBit::kAllConstant | BufferUsageBit::kAllUav | BufferUsageBit::kAllSrv | BufferUsageBit::kVertexOrIndex
-					   | BufferUsageBit::kShaderBindingTable | BufferUsageBit::kAllIndirect | BufferUsageBit::kCopySource;
+					   | BufferUsageBit::kShaderBindingTable | BufferUsageBit::kAllIndirect | BufferUsageBit::kCopySource
+					   | BufferUsageBit::kAccelerationStructureBuild;
 	m_buffer = GrManager::getSingleton().newBuffer(buffInit);
 
 	m_bufferSize = buffInit.m_size;

+ 1 - 1
AnKi/GpuMemory/UnifiedGeometryBuffer.h

@@ -14,7 +14,7 @@ namespace anki {
 /// @addtogroup gpu_memory
 /// @{
 
-ANKI_CVAR(NumericCVar<PtrSize>, Core, UnifiedGeometryBufferSize, 256_MB, 16_MB, 2_GB, "Global index and vertex buffer size")
+ANKI_CVAR(NumericCVar<PtrSize>, Core, UnifiedGeometryBufferSize, 512_MB, 16_MB, 2_GB, "Global index and vertex buffer size")
 
 /// @memberof UnifiedGeometryBuffer
 class UnifiedGeometryBufferAllocation

+ 8 - 3
AnKi/Gr/AccelerationStructure.h

@@ -61,11 +61,16 @@ public:
 	{
 		Bool valid = true;
 
-		valid = valid && m_instanceCount > 0;
-
 		if(validateBuffers)
 		{
-			valid = valid && (m_instancesBuffer.getRange() == sizeof(AccelerationStructureInstance) * m_instanceCount);
+			if(m_instanceCount)
+			{
+				valid = valid && (m_instancesBuffer.getRange() == sizeof(AccelerationStructureInstance) * m_instanceCount);
+			}
+			else
+			{
+				valid = valid && (!m_instancesBuffer.isValid());
+			}
 		}
 
 		return valid;

+ 1 - 1
AnKi/Gr/Common.h

@@ -730,7 +730,7 @@ enum class BufferUsageBit : U64
 	kCopySource = 1ull << 17ull,
 	kCopyDestination = 1ull << 18ull,
 
-	kAccelerationStructureBuild = 1ull << 19ull, ///< Will be used as a position or index buffer in a BLAS build.
+	kAccelerationStructureBuild = 1ull << 19ull, ///< Will be used as a position or index buffer in a BLAS build or instances buffer in a TLAS build.
 	kShaderBindingTable = 1ull << 20ull, ///< Will be used as SBT in a dispatchRays() command.
 	kAccelerationStructureBuildScratch = 1ull << 21ull, ///< Used in buildAccelerationStructureXXX commands.
 	kAccelerationStructure = 1ull << 22ull, ///< Will be used as AS.

+ 24 - 1
AnKi/Gr/D3D/D3DGrManager.cpp

@@ -296,7 +296,30 @@ void GrManagerImpl::submitInternal(WeakArray<CommandBuffer*> cmdbs, WeakArray<Fe
 	}
 
 	LockGuard lock(m_globalMtx);
-	m_frames[m_crntFrame].m_fences.emplaceBack(fence.get());
+	PerFrame& frame = m_frames[m_crntFrame];
+
+	frame.m_fences.emplaceBack(fence.get());
+
+	// Throttle the number of fences
+	Bool fencesThrottled = false;
+	while(frame.m_fences.getSize() > 64)
+	{
+		fencesThrottled = true;
+		auto it = frame.m_fences.getBegin();
+		for(; it != frame.m_fences.getEnd(); ++it)
+		{
+			if((*it)->signaled())
+			{
+				frame.m_fences.erase(it);
+				break;
+			}
+		}
+	}
+
+	if(fencesThrottled)
+	{
+		ANKI_D3D_LOGW("Had to throttle the number of fences");
+	}
 }
 
 void GrManagerImpl::finishInternal()

+ 9 - 3
AnKi/Gr/Vulkan/VkAccelerationStructure.cpp

@@ -121,15 +121,21 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 	}
 	else
 	{
-		ANKI_ASSERT(sizeof(VkAccelerationStructureInstanceKHR) * inf.m_topLevel.m_instanceCount <= inf.m_topLevel.m_instancesBuffer.getRange());
-		m_tlas.m_instancesBuffer.reset(&inf.m_topLevel.m_instancesBuffer.getBuffer());
+		const Bool isEmpty = inf.m_topLevel.m_instanceCount == 0;
+
+		if(!isEmpty)
+		{
+			ANKI_ASSERT(sizeof(VkAccelerationStructureInstanceKHR) * inf.m_topLevel.m_instanceCount <= inf.m_topLevel.m_instancesBuffer.getRange());
+			m_tlas.m_instancesBuffer.reset(&inf.m_topLevel.m_instancesBuffer.getBuffer());
+		}
 
 		// Geom
 		VkAccelerationStructureGeometryKHR& geom = m_geometry;
 		geom.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
 		geom.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
 		geom.geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR;
-		geom.geometry.instances.data.deviceAddress = m_tlas.m_instancesBuffer->getGpuAddress() + inf.m_topLevel.m_instancesBuffer.getOffset();
+		geom.geometry.instances.data.deviceAddress =
+			(isEmpty) ? 0 : (m_tlas.m_instancesBuffer->getGpuAddress() + inf.m_topLevel.m_instancesBuffer.getOffset());
 		geom.geometry.instances.arrayOfPointers = false;
 		geom.flags = 0;
 

+ 1 - 1
AnKi/Gr/Vulkan/VkCommandBuffer.cpp

@@ -1293,9 +1293,9 @@ void CommandBufferImpl::traceRaysInternal(const BufferView& sbtBuffer, U32 sbtRe
 										  U32 height, U32 depth, BufferView argsBuff)
 {
 	ANKI_ASSERT(sbtBuffer.isValid());
+	ANKI_ASSERT(rayTypeCount > 0);
 
 	const PtrSize sbtRecordSize = sbtRecordSize32;
-	ANKI_ASSERT(hitGroupSbtRecordCount > 0);
 	ANKI_ASSERT(m_rtProg);
 
 	ANKI_ASSERT((hitGroupSbtRecordCount % rayTypeCount) == 0);

+ 2 - 2
AnKi/Gr/Vulkan/VkGpuMemoryManager.cpp

@@ -16,8 +16,8 @@ ANKI_SVAR(HostMemoryAllocated, StatCategory::kGpuMem, "Host mem", StatFlag::kByt
 ANKI_SVAR(HostMemoryInUse, StatCategory::kGpuMem, "Host mem in use", StatFlag::kBytes)
 ANKI_SVAR(HostMemoryAllocationCount, StatCategory::kGpuMem, "Host mem allocations", StatFlag::kNone)
 
-static constexpr Array<GpuMemoryManagerClassInfo, 7> kClasses{
-	{{4_KB, 256_KB}, {128_KB, 8_MB}, {1_MB, 64_MB}, {16_MB, 128_MB}, {64_MB, 128_MB}, {128_MB, 128_MB}, {256_MB, 256_MB}}};
+static constexpr Array<GpuMemoryManagerClassInfo, 8> kClasses{
+	{{4_KB, 256_KB}, {128_KB, 8_MB}, {1_MB, 64_MB}, {16_MB, 128_MB}, {64_MB, 128_MB}, {128_MB, 128_MB}, {256_MB, 256_MB}, {512_MB, 512_MB}}};
 
 /// Special classes for the ReBAR memory. Have that as a special case because it's so limited and needs special care.
 static constexpr Array<GpuMemoryManagerClassInfo, 3> kRebarClasses{{{1_MB, 1_MB}, {12_MB, 12_MB}, {24_MB, 24_MB}}};

+ 21 - 0
AnKi/Gr/Vulkan/VkGrManager.cpp

@@ -1495,6 +1495,27 @@ void GrManagerImpl::submitInternal(WeakArray<CommandBuffer*> cmdbs, WeakArray<Fe
 
 		ANKI_TRACE_SCOPED_EVENT(VkQueueSubmit);
 		ANKI_VK_CHECKF(vkQueueSubmit(m_queues[queueType], 1, &submit, fence->getImplementation().m_handle));
+
+		// Throttle the number of fences
+		Bool fencesThrottled = false;
+		while(frame.m_fences.getSize() > 64)
+		{
+			fencesThrottled = true;
+			auto it = frame.m_fences.getBegin();
+			for(; it != frame.m_fences.getEnd(); ++it)
+			{
+				if((*it)->signaled())
+				{
+					frame.m_fences.erase(it);
+					break;
+				}
+			}
+		}
+
+		if(fencesThrottled)
+		{
+			ANKI_VK_LOGW("Had to throttle the number of fences");
+		}
 	}
 }
 

+ 23 - 28
AnKi/Renderer/Dbg.cpp

@@ -158,11 +158,8 @@ void Dbg::run(RenderPassWorkContext& rgraphCtx, const RenderingContext& ctx)
 	cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
 	rgraphCtx.bindSrv(0, 0, getGBuffer().getDepthRt());
 
-	// GBuffer renderables
-	if(g_cvarRenderDbgScene)
+	// Common code for boxes stuff
 	{
-		const U32 allAabbCount = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getElementCount();
-
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_dbgProg);
 		variantInitInfo.addMutation("DEPTH_FAIL_VISUALIZATION", U32(m_ditheredDepthTestOn != 0));
 		variantInitInfo.addMutation("OBJECT_TYPE", 0);
@@ -184,33 +181,30 @@ void Dbg::run(RenderPassWorkContext& rgraphCtx, const RenderingContext& ctx)
 		cmdb.bindVertexBuffer(0, BufferView(m_cubeVertsBuffer.get()), sizeof(Vec3));
 		cmdb.setVertexAttribute(VertexAttributeSemantic::kPosition, 0, Format::kR32G32B32_Sfloat, 0);
 		cmdb.bindIndexBuffer(BufferView(m_cubeIndicesBuffer.get()), IndexType::kU16);
+	}
 
+	// GBuffer renderables
+	const U32 gbufferAllAabbCount = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getElementCount();
+	if(g_cvarRenderDbgScene && gbufferAllAabbCount)
+	{
 		cmdb.bindSrv(1, 0, GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getBufferView());
 
-		BufferView indicesBuff;
-		BufferHandle dep;
-		getGBuffer().getVisibleAabbsBuffer(indicesBuff, dep);
-		cmdb.bindSrv(2, 0, indicesBuff);
+		const GpuVisibilityOutput& visOut = getGBuffer().getVisibilityOutput();
+		cmdb.bindSrv(2, 0, visOut.m_visibleAaabbIndicesBuffer);
 
-		cmdb.drawIndexed(PrimitiveTopology::kLines, 12 * 2, allAabbCount);
+		cmdb.drawIndexed(PrimitiveTopology::kLines, 12 * 2, gbufferAllAabbCount);
 	}
 
 	// Forward shading renderables
-	if(g_cvarRenderDbgScene)
+	const U32 forwardAllAabbCount = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getElementCount();
+	if(g_cvarRenderDbgScene && forwardAllAabbCount)
 	{
-		const U32 allAabbCount = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getElementCount();
-
-		if(allAabbCount)
-		{
-			cmdb.bindSrv(1, 0, GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getBufferView());
+		cmdb.bindSrv(1, 0, GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getBufferView());
 
-			BufferView indicesBuff;
-			BufferHandle dep;
-			getRenderer().getForwardShading().getVisibleAabbsBuffer(indicesBuff, dep);
-			cmdb.bindSrv(2, 0, indicesBuff);
+		const GpuVisibilityOutput& visOut = getForwardShading().getGpuVisibilityOutput();
+		cmdb.bindSrv(2, 0, visOut.m_visibleAaabbIndicesBuffer);
 
-			cmdb.drawIndexed(PrimitiveTopology::kLines, 12 * 2, allAabbCount);
-		}
+		cmdb.drawIndexed(PrimitiveTopology::kLines, 12 * 2, forwardAllAabbCount);
 	}
 
 	// Draw non-renderables
@@ -323,15 +317,16 @@ void Dbg::populateRenderGraph(RenderingContext& ctx)
 
 	if(g_cvarRenderDbgScene)
 	{
-		BufferView indicesBuff;
-		BufferHandle dep;
-		getGBuffer().getVisibleAabbsBuffer(indicesBuff, dep);
-		pass.newBufferDependency(dep, BufferUsageBit::kSrvGeometry);
+		const GpuVisibilityOutput& visOut = getGBuffer().getVisibilityOutput();
+		if(visOut.m_dependency.isValid())
+		{
+			pass.newBufferDependency(visOut.m_dependency, BufferUsageBit::kSrvGeometry);
+		}
 
-		if(GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getElementCount())
+		const GpuVisibilityOutput& fvisOut = getForwardShading().getGpuVisibilityOutput();
+		if(fvisOut.m_dependency.isValid())
 		{
-			getRenderer().getForwardShading().getVisibleAabbsBuffer(indicesBuff, dep);
-			pass.newBufferDependency(dep, BufferUsageBit::kSrvGeometry);
+			pass.newBufferDependency(fvisOut.m_dependency, BufferUsageBit::kSrvGeometry);
 		}
 	}
 }

+ 3 - 5
AnKi/Renderer/ForwardShading.h

@@ -32,12 +32,10 @@ public:
 
 	void run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
 
-	/// Returns a buffer with indices of the visible AABBs. Used in debug drawing.
-	void getVisibleAabbsBuffer(BufferView& visibleAaabbIndicesBuffer, BufferHandle& dep) const
+	/// Use some of the output for debug drawing.
+	const GpuVisibilityOutput& getGpuVisibilityOutput() const
 	{
-		visibleAaabbIndicesBuffer = m_runCtx.m_visOut.m_visibleAaabbIndicesBuffer;
-		dep = m_runCtx.m_visOut.m_dependency;
-		ANKI_ASSERT(visibleAaabbIndicesBuffer.isValid() && dep.isValid());
+		return m_runCtx.m_visOut;
 	}
 
 private:

+ 6 - 4
AnKi/Renderer/GBuffer.cpp

@@ -94,7 +94,7 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 	RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
 
 	// Visibility
-	GpuVisibilityOutput visOut;
+	GpuVisibilityOutput& visOut = m_runCtx.m_visOut;
 	FrustumGpuVisibilityInput visIn;
 	{
 		const CommonMatrices& matrices = ctx.m_matrices;
@@ -112,9 +112,6 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 		visIn.m_twoPhaseOcclusionCulling = getRenderer().getMeshletRenderingType() != MeshletRenderingType::kNone;
 
 		getRenderer().getGpuVisibility().populateRenderGraph(visIn, visOut);
-
-		m_runCtx.m_visibleAaabbIndicesBuffer = visOut.m_visibleAaabbIndicesBuffer;
-		m_runCtx.m_visibleAaabbIndicesBufferDepedency = visOut.m_dependency;
 	}
 
 	// Create RTs
@@ -167,6 +164,11 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 		pass.setWork([&ctx, visOut, this](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(GBuffer);
 
+			if(!visOut.containsDrawcalls()) [[unlikely]]
+			{
+				return;
+			}
+
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 			// Set some state, leave the rest to default

+ 4 - 6
AnKi/Renderer/GBuffer.h

@@ -6,6 +6,7 @@
 #pragma once
 
 #include <AnKi/Renderer/RendererObject.h>
+#include <AnKi/Renderer/Utils/GpuVisibility.h>
 #include <AnKi/Gr.h>
 
 namespace anki {
@@ -60,11 +61,9 @@ public:
 							  [[maybe_unused]] ShaderProgramPtr& optionalShaderProgram) const override;
 
 	/// Returns a buffer with indices of the visible AABBs. Used in debug drawing.
-	void getVisibleAabbsBuffer(BufferView& visibleAaabbIndicesBuffer, BufferHandle& dep) const
+	const GpuVisibilityOutput& getVisibilityOutput() const
 	{
-		visibleAaabbIndicesBuffer = m_runCtx.m_visibleAaabbIndicesBuffer;
-		dep = m_runCtx.m_visibleAaabbIndicesBufferDepedency;
-		ANKI_ASSERT(visibleAaabbIndicesBuffer.isValid() && dep.isValid());
+		return m_runCtx.m_visOut;
 	}
 
 private:
@@ -87,8 +86,7 @@ private:
 		RenderTargetHandle m_prevFrameDepthRt;
 		RenderTargetHandle m_hzbRt;
 
-		BufferView m_visibleAaabbIndicesBuffer; ///< Optional
-		BufferHandle m_visibleAaabbIndicesBufferDepedency;
+		GpuVisibilityOutput m_visOut;
 	} m_runCtx;
 };
 /// @}

+ 0 - 1
AnKi/Renderer/Reflections.cpp

@@ -333,7 +333,6 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 			}
 			else
 			{
-
 				cmdb.dispatchRaysIndirect(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
 										  BufferView(m_indirectArgsBuffer.get()).setRange(sizeof(DispatchIndirectArgs)));
 			}

+ 31 - 12
AnKi/Renderer/ShadowMapping.cpp

@@ -542,13 +542,20 @@ BufferView ShadowMapping::createVetVisibilityPass(CString passName, const LightC
 
 	NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(passName);
 
-	// The shader doesn't actually write to the handle but have it as a write dependency for the drawer to correctly wait for this pass
-	pass.newBufferDependency(visOut.m_dependency, BufferUsageBit::kUavCompute);
+	if(visOut.containsDrawcalls())
+	{
+		// The shader doesn't actually write to the handle but have it as a write dependency for the drawer to correctly wait for this pass
+		pass.newBufferDependency(visOut.m_dependency, BufferUsageBit::kUavCompute);
+	}
 
-	pass.setWork([this, &lightc, hashBuff = visOut.m_visiblesHashBuffer, mdiBuff = visOut.m_legacy.m_mdiDrawCountsBuffer, clearTileIndirectArgs,
-				  dispatchMeshIndirectArgs = visOut.m_mesh.m_dispatchMeshIndirectArgsBuffer,
-				  drawIndirectArgs = visOut.m_mesh.m_drawIndirectArgs](RenderPassWorkContext& rpass) {
+	pass.setWork([this, &lightc, clearTileIndirectArgs, visOut](RenderPassWorkContext& rpass) {
 		ANKI_TRACE_SCOPED_EVENT(ShadowmappingVet);
+
+		if(!visOut.containsDrawcalls()) [[unlikely]]
+		{
+			return;
+		}
+
 		CommandBuffer& cmdb = *rpass.m_commandBuffer;
 
 		cmdb.bindShaderProgram(m_vetVisibilityGrProg.get());
@@ -556,17 +563,21 @@ BufferView ShadowMapping::createVetVisibilityPass(CString passName, const LightC
 		const UVec4 lightIndex(lightc.getGpuSceneLightAllocation().getIndex());
 		cmdb.setFastConstants(&lightIndex, sizeof(lightIndex));
 
-		cmdb.bindSrv(0, 0, hashBuff);
-		cmdb.bindUav(0, 0, mdiBuff.isValid() ? mdiBuff : BufferView(getDummyGpuResources().m_buffer.get()).setRange(sizeof(U32)));
+		cmdb.bindSrv(0, 0, visOut.m_visiblesHashBuffer);
+		cmdb.bindUav(0, 0,
+					 visOut.m_legacy.m_mdiDrawCountsBuffer.isValid() ? visOut.m_legacy.m_mdiDrawCountsBuffer
+																	 : BufferView(getDummyGpuResources().m_buffer.get()).setRange(sizeof(U32)));
 		cmdb.bindUav(1, 0, GpuSceneArrays::Light::getSingleton().getBufferView());
 		cmdb.bindUav(2, 0, GpuSceneArrays::LightVisibleRenderablesHash::getSingleton().getBufferView());
 		cmdb.bindUav(3, 0, clearTileIndirectArgs);
 		cmdb.bindUav(4, 0,
-					 dispatchMeshIndirectArgs.isValid() ? dispatchMeshIndirectArgs
-														: BufferView(getDummyGpuResources().m_buffer.get()).setRange(sizeof(DispatchIndirectArgs)));
+					 visOut.m_mesh.m_dispatchMeshIndirectArgsBuffer.isValid()
+						 ? visOut.m_mesh.m_dispatchMeshIndirectArgsBuffer
+						 : BufferView(getDummyGpuResources().m_buffer.get()).setRange(sizeof(DispatchIndirectArgs)));
 		cmdb.bindUav(5, 0,
-					 drawIndirectArgs.isValid() ? drawIndirectArgs
-												: BufferView(getDummyGpuResources().m_buffer.get()).setRange(sizeof(DrawIndirectArgs)));
+					 visOut.m_mesh.m_drawIndirectArgs.isValid()
+						 ? visOut.m_mesh.m_drawIndirectArgs
+						 : BufferView(getDummyGpuResources().m_buffer.get()).setRange(sizeof(DrawIndirectArgs)));
 
 		ANKI_ASSERT(RenderStateBucketContainer::getSingleton().getBucketCount(RenderingTechnique::kDepth) <= 64 && "TODO");
 		cmdb.dispatchCompute(1, 1, 1);
@@ -605,12 +616,20 @@ void ShadowMapping::createDrawShadowsPass(ConstWeakArray<ShadowSubpassInfo> subp
 	smRti.m_subresource.m_depthStencilAspect = DepthStencilAspectBit::kDepth;
 	pass.setRenderpassInfo({}, &smRti);
 
-	pass.newBufferDependency(visOut.m_dependency, BufferUsageBit::kIndirectDraw);
+	if(visOut.containsDrawcalls())
+	{
+		pass.newBufferDependency(visOut.m_dependency, BufferUsageBit::kIndirectDraw);
+	}
 	pass.newTextureDependency(m_runCtx.m_rt, TextureUsageBit::kRtvDsvWrite);
 
 	pass.setWork([this, visOut, subpasses](RenderPassWorkContext& rgraphCtx) {
 		ANKI_TRACE_SCOPED_EVENT(ShadowMapping);
 
+		if(!visOut.containsDrawcalls()) [[unlikely]]
+		{
+			return;
+		}
+
 		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 		for(U32 i = 0; i < subpasses.getSize(); ++i)

+ 2 - 2
AnKi/Renderer/Tonemapping.cpp

@@ -42,8 +42,8 @@ Error Tonemapping::init()
 
 		m_tonemapping.m_lut.reset(nullptr);
 		ANKI_CHECK(ResourceManager::getSingleton().loadResource("EngineAssets/DefaultLut.ankitex", m_tonemapping.m_lut));
-		ANKI_ASSERT(m_tonemapping.m_lut->getWidth() == m_tonemapping.m_lut->getHeight());
-		ANKI_ASSERT(m_tonemapping.m_lut->getWidth() == m_tonemapping.m_lut->getDepth());
+		ANKI_ASSERT(m_tonemapping.m_lut->getTexture().getWidth() == m_tonemapping.m_lut->getTexture().getHeight());
+		ANKI_ASSERT(m_tonemapping.m_lut->getTexture().getWidth() == m_tonemapping.m_lut->getTexture().getDepth());
 	}
 
 	return Error::kNone;

+ 26 - 2
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -185,7 +185,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 	if(RenderStateBucketContainer::getSingleton().getBucketsActiveUserCount(in.m_technique) == 0) [[unlikely]]
 	{
 		// Early exit
-		in = {};
+		out = {};
 		return;
 	}
 
@@ -828,6 +828,13 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 
 void GpuVisibility::populateRenderGraphStage3(FrustumGpuVisibilityInput& in, GpuVisibilityOutput& out)
 {
+	if(RenderStateBucketContainer::getSingleton().getBucketsActiveUserCount(in.m_technique) == 0) [[unlikely]]
+	{
+		// Early exit
+		out = {};
+		return;
+	}
+
 	RenderGraphBuilder& rgraph = *in.m_rgraph;
 
 	const GpuVisLimits limits = computeLimits(in.m_technique);
@@ -1110,9 +1117,26 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 	m_lastFrameIdx = getRenderer().getFrameCount();
 #endif
 
-	// Allocate the transient buffers
 	const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
 
+	if(aabbCount == 0) [[unlikely]]
+	{
+		out.m_instancesBuffer = {};
+
+		WeakArray<U32> arr2;
+		out.m_renderablesBuffer = RebarTransientMemoryPool::getSingleton().allocateStructuredBuffer<U32>(1, arr2);
+		arr2[0] = 0;
+
+		WeakArray<DispatchIndirectArgs> arr3;
+		out.m_buildSbtIndirectArgsBuffer = RebarTransientMemoryPool::getSingleton().allocateStructuredBuffer<DispatchIndirectArgs>(1, arr3);
+		zeroMemory(arr3[0]);
+
+		out.m_dependency = rgraph.importBuffer(out.m_renderablesBuffer, BufferUsageBit::kNone);
+
+		return;
+	}
+
+	// Allocate the transient buffers
 	out.m_instancesBuffer = allocateStructuredBuffer<AccelerationStructureInstance>(aabbCount);
 	out.m_dependency = rgraph.importBuffer(out.m_instancesBuffer, BufferUsageBit::kNone);
 

+ 28 - 15
AnKi/Resource/AsyncLoader.cpp

@@ -7,6 +7,7 @@
 #include <AnKi/Core/StatsSet.h>
 #include <AnKi/Util/Logger.h>
 #include <AnKi/Util/Tracer.h>
+#include <AnKi/Util/HighRezTimer.h>
 
 namespace anki {
 
@@ -22,15 +23,17 @@ AsyncLoader::~AsyncLoader()
 {
 	stop();
 
-	if(!m_taskQueue.isEmpty())
+	for(auto& queue : m_taskQueues)
 	{
-		ANKI_RESOURCE_LOGW("Stoping loading thread while there is work to do");
-
-		while(!m_taskQueue.isEmpty())
+		if(!queue.isEmpty())
 		{
-			AsyncLoaderTask* task = &m_taskQueue.getFront();
-			m_taskQueue.popFront();
-			deleteInstance(ResourceMemoryPool::getSingleton(), task);
+			ANKI_RESOURCE_LOGW("Stoping loading thread while there is work to do");
+
+			while(!queue.isEmpty())
+			{
+				AsyncLoaderTask* task = queue.popFront();
+				deleteInstance(ResourceMemoryPool::getSingleton(), task);
+			}
 		}
 	}
 }
@@ -59,25 +62,33 @@ Error AsyncLoader::threadWorker()
 	while(!err)
 	{
 		AsyncLoaderTask* task = nullptr;
+		AsyncLoaderPriority taskPriority = AsyncLoaderPriority::kCount;
 		Bool quit = false;
 
+		// Block until there is work to do
 		{
-			// Wait for something
 			LockGuard<Mutex> lock(m_mtx);
-			while(m_taskQueue.isEmpty() && !m_quit)
+			while(m_taskQueues[AsyncLoaderPriority::kHigh].isEmpty() && m_taskQueues[AsyncLoaderPriority::kMedium].isEmpty()
+				  && m_taskQueues[AsyncLoaderPriority::kLow].isEmpty() && !m_quit)
 			{
 				m_condVar.wait(m_mtx);
 			}
 
-			// Do some work
 			if(m_quit)
 			{
 				quit = true;
 			}
 			else
 			{
-				task = &m_taskQueue.getFront();
-				m_taskQueue.popFront();
+				for(AsyncLoaderPriority priority : EnumIterable<AsyncLoaderPriority>())
+				{
+					if(!m_taskQueues[priority].isEmpty())
+					{
+						task = m_taskQueues[priority].popFront();
+						taskPriority = priority;
+						break;
+					}
+				}
 			}
 		}
 
@@ -90,8 +101,10 @@ Error AsyncLoader::threadWorker()
 			// Exec the task
 			ANKI_ASSERT(task);
 			AsyncLoaderTaskContext ctx;
+			ctx.m_priority = taskPriority;
 
 			{
+				// HighRezTimer::sleep(250.0_ms);
 				ANKI_TRACE_SCOPED_EVENT(RsrcAsyncTask);
 				err = (*task)(ctx);
 				g_svarAsyncTasksInFlight.decrement(1u);
@@ -110,7 +123,7 @@ Error AsyncLoader::threadWorker()
 			if(ctx.m_resubmitTask)
 			{
 				LockGuard<Mutex> lock(m_mtx);
-				m_taskQueue.pushBack(task);
+				m_taskQueues[ctx.m_priority].pushBack(task);
 			}
 			else
 			{
@@ -123,7 +136,7 @@ Error AsyncLoader::threadWorker()
 	return err;
 }
 
-void AsyncLoader::submitTask(AsyncLoaderTask* task)
+void AsyncLoader::submitTask(AsyncLoaderTask* task, AsyncLoaderPriority priority)
 {
 	ANKI_ASSERT(task);
 
@@ -131,7 +144,7 @@ void AsyncLoader::submitTask(AsyncLoaderTask* task)
 	g_svarAsyncTasksInFlight.increment(1);
 
 	LockGuard<Mutex> lock(m_mtx);
-	m_taskQueue.pushBack(task);
+	m_taskQueues[priority].pushBack(task);
 	m_condVar.notifyOne();
 }
 

+ 19 - 13
AnKi/Resource/AsyncLoader.h

@@ -17,14 +17,28 @@ class AsyncLoader;
 /// @addtogroup resource
 /// @{
 
+/// @memberof AsyncLoader
+enum class AsyncLoaderPriority : U8
+{
+	kHigh,
+	kMedium,
+	kLow,
+
+	kCount,
+	kFirst = 0
+};
+ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(AsyncLoaderPriority)
+
+/// @memberof AsyncLoader
 class AsyncLoaderTaskContext
 {
 public:
-	/// Resubmit the same task at the end of the queue.
-	Bool m_resubmitTask = false;
+	Bool m_resubmitTask = false; ///< Resubmit the same task at the end of the queue.
+	AsyncLoaderPriority m_priority = AsyncLoaderPriority::kCount;
 };
 
 /// Interface for tasks for the AsyncLoader.
+/// @memberof AsyncLoader
 class AsyncLoaderTask : public IntrusiveListEnabled<AsyncLoaderTask>
 {
 public:
@@ -43,9 +57,6 @@ public:
 
 	~AsyncLoader();
 
-	/// Submit a task.
-	void submitTask(AsyncLoaderTask* task);
-
 	/// Create a new asynchronous loading task.
 	template<typename TTask, typename... TArgs>
 	TTask* newTask(TArgs&&... args)
@@ -53,12 +64,8 @@ public:
 		return newInstance<TTask>(ResourceMemoryPool::getSingleton(), std::forward<TArgs>(args)...);
 	}
 
-	/// Create and submit a new asynchronous loading task.
-	template<typename TTask, typename... TArgs>
-	void submitNewTask(TArgs&&... args)
-	{
-		submitTask(newTask<TTask>(std::forward<TArgs>(args)...));
-	}
+	/// Submit a task.
+	void submitTask(AsyncLoaderTask* task, AsyncLoaderPriority priority);
 
 	/// Get the total number of completed tasks.
 	U32 getTasksInFlightCount() const
@@ -68,11 +75,10 @@ public:
 
 private:
 	Thread m_thread;
-	Barrier m_barrier = {2};
 
 	Mutex m_mtx;
 	ConditionVariable m_condVar;
-	IntrusiveList<AsyncLoaderTask> m_taskQueue;
+	Array<IntrusiveList<AsyncLoaderTask>, U32(AsyncLoaderPriority::kCount)> m_taskQueues;
 	Bool m_quit = false;
 
 	Atomic<U32> m_tasksInFlightCount = {0};

+ 3 - 3
AnKi/Resource/ImageAtlasResource.cpp

@@ -29,8 +29,8 @@ Error ImageAtlasResource::load(const ResourceFilename& filename, Bool async)
 	ANKI_CHECK(el.getText(texFname));
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource<ImageResource>(texFname, m_image, async));
 
-	m_size[0] = m_image->getWidth();
-	m_size[1] = m_image->getHeight();
+	m_size[0] = m_image->getTexture().getWidth();
+	m_size[1] = m_image->getTexture().getHeight();
 
 	//
 	// <subImageMargin>
@@ -38,7 +38,7 @@ Error ImageAtlasResource::load(const ResourceFilename& filename, Bool async)
 	ANKI_CHECK(rootel.getChildElement("subImageMargin", el));
 	I64 margin = 0;
 	ANKI_CHECK(el.getNumber(margin));
-	if(margin >= I(m_image->getWidth()) || margin >= I(m_image->getHeight()) || margin < 0)
+	if(margin >= I(m_image->getTexture().getWidth()) || margin >= I(m_image->getTexture().getHeight()) || margin < 0)
 	{
 		ANKI_RESOURCE_LOGE("Too big margin %d", I32(margin));
 		return Error::kUserData;

+ 27 - 48
AnKi/Resource/ImageResource.cpp

@@ -16,10 +16,7 @@ class ImageResource::LoadingContext
 {
 public:
 	ImageLoader m_loader{&ResourceMemoryPool::getSingleton()};
-	U32 m_faces = 0;
-	U32 m_layerCount = 0;
-	TextureType m_texType;
-	TexturePtr m_tex;
+	ImageResourcePtr m_image;
 };
 
 /// Image upload async task.
@@ -30,7 +27,12 @@ public:
 
 	Error operator()([[maybe_unused]] AsyncLoaderTaskContext& ctx) final
 	{
-		return ImageResource::load(m_ctx);
+		return m_ctx.m_image->loadAsync(m_ctx);
+	}
+
+	static BaseMemoryPool& getMemoryPool()
+	{
+		return ResourceMemoryPool::getSingleton();
 	}
 };
 
@@ -40,18 +42,18 @@ ImageResource::~ImageResource()
 
 Error ImageResource::load(const ResourceFilename& filename, Bool async)
 {
-	TexUploadTask* task;
+	UniquePtr<TexUploadTask> task;
 	LoadingContext* ctx;
 	LoadingContext localCtx;
 
 	if(async)
 	{
-		task = AsyncLoader::getSingleton().newTask<TexUploadTask>();
+		task.reset(AsyncLoader::getSingleton().newTask<TexUploadTask>());
 		ctx = &task->m_ctx;
+		ctx->m_image.reset(this);
 	}
 	else
 	{
-		task = nullptr;
 		ctx = &localCtx;
 	}
 	ImageLoader& loader = ctx->m_loader;
@@ -196,48 +198,25 @@ Error ImageResource::load(const ResourceFilename& filename, Bool async)
 	// Create the texture
 	m_tex = GrManager::getSingleton().newTexture(init);
 
-	// Transition it. TODO remove this
-	{
-		const TextureView view(m_tex.get(), TextureSubresourceDesc::all());
-
-		CommandBufferInitInfo cmdbinit;
-		cmdbinit.m_flags = CommandBufferFlag::kGeneralWork | CommandBufferFlag::kSmallBatch;
-		CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cmdbinit);
-
-		const TextureBarrierInfo barrier = {view, TextureUsageBit::kNone, TextureUsageBit::kAllSrv};
-		cmdb->setPipelineBarrier({&barrier, 1}, {}, {});
-
-		FencePtr outFence;
-		cmdb->endRecording();
-		GrManager::getSingleton().submit(cmdb.get(), {}, &outFence);
-		outFence->clientWait(60.0_sec);
-	}
-
-	// Set the context
-	ctx->m_faces = faces;
-	ctx->m_layerCount = init.m_layerCount;
-	ctx->m_texType = init.m_type;
-	ctx->m_tex = m_tex;
-
 	// Upload the data
 	if(async)
 	{
-		AsyncLoader::getSingleton().submitTask(task);
+		TexUploadTask* pTask;
+		task.moveAndReset(pTask);
+		AsyncLoader::getSingleton().submitTask(pTask, AsyncLoaderPriority::kMedium);
 	}
 	else
 	{
-		ANKI_CHECK(load(*ctx));
+		ANKI_CHECK(loadAsync(*ctx));
 	}
 
-	m_size = UVec3(init.m_width, init.m_height, init.m_depth);
-	m_layerCount = init.m_layerCount;
-
 	return Error::kNone;
 }
 
-Error ImageResource::load(LoadingContext& ctx)
+Error ImageResource::loadAsync(LoadingContext& ctx) const
 {
-	const U32 copyCount = ctx.m_layerCount * ctx.m_faces * ctx.m_loader.getMipmapCount();
+	const U32 faceCount = textureTypeIsCube(m_tex->getTextureType()) ? 6 : 1;
+	const U32 copyCount = m_tex->getLayerCount() * faceCount * ctx.m_loader.getMipmapCount();
 
 	for(U32 b = 0; b < copyCount; b += kMaxCopiesBeforeFlush)
 	{
@@ -254,9 +233,9 @@ Error ImageResource::load(LoadingContext& ctx)
 		for(U32 i = begin; i < end; ++i)
 		{
 			U32 mip, layer, face;
-			unflatten3dArrayIndex(ctx.m_layerCount, ctx.m_faces, ctx.m_loader.getMipmapCount(), i, layer, face, mip);
+			unflatten3dArrayIndex(m_tex->getLayerCount(), faceCount, ctx.m_loader.getMipmapCount(), i, layer, face, mip);
 
-			barriers[barrierCount++] = {TextureView(ctx.m_tex.get(), TextureSubresourceDesc::surface(mip, face, layer)), TextureUsageBit::kAllSrv,
+			barriers[barrierCount++] = {TextureView(m_tex.get(), TextureSubresourceDesc::surface(mip, face, layer)), TextureUsageBit::kAllSrv,
 										TextureUsageBit::kCopyDestination};
 		}
 		cmdb->setPipelineBarrier({&barriers[0], barrierCount}, {}, {});
@@ -267,20 +246,19 @@ Error ImageResource::load(LoadingContext& ctx)
 		for(U32 i = begin; i < end; ++i)
 		{
 			U32 mip, layer, face;
-			unflatten3dArrayIndex(ctx.m_layerCount, ctx.m_faces, ctx.m_loader.getMipmapCount(), i, layer, face, mip);
+			unflatten3dArrayIndex(m_tex->getLayerCount(), faceCount, ctx.m_loader.getMipmapCount(), i, layer, face, mip);
 
 			PtrSize surfOrVolSize;
 			const void* surfOrVolData;
 			PtrSize allocationSize;
 
-			if(ctx.m_texType == TextureType::k3D)
+			if(m_tex->getTextureType() == TextureType::k3D)
 			{
 				const auto& vol = ctx.m_loader.getVolume(mip);
 				surfOrVolSize = vol.m_data.getSize();
 				surfOrVolData = &vol.m_data[0];
 
-				allocationSize = computeVolumeSize(ctx.m_tex->getWidth() >> mip, ctx.m_tex->getHeight() >> mip, ctx.m_tex->getDepth() >> mip,
-												   ctx.m_tex->getFormat());
+				allocationSize = computeVolumeSize(m_tex->getWidth() >> mip, m_tex->getHeight() >> mip, m_tex->getDepth() >> mip, m_tex->getFormat());
 			}
 			else
 			{
@@ -288,7 +266,7 @@ Error ImageResource::load(LoadingContext& ctx)
 				surfOrVolSize = surf.m_data.getSize();
 				surfOrVolData = &surf.m_data[0];
 
-				allocationSize = computeSurfaceSize(ctx.m_tex->getWidth() >> mip, ctx.m_tex->getHeight() >> mip, ctx.m_tex->getFormat());
+				allocationSize = computeSurfaceSize(m_tex->getWidth() >> mip, m_tex->getHeight() >> mip, m_tex->getFormat());
 			}
 
 			ANKI_ASSERT(allocationSize >= surfOrVolSize);
@@ -301,7 +279,7 @@ Error ImageResource::load(LoadingContext& ctx)
 
 			// Create temp tex view
 			const TextureSubresourceDesc subresource = TextureSubresourceDesc::surface(mip, face, layer);
-			cmdb->copyBufferToTexture(handle, TextureView(ctx.m_tex.get(), subresource));
+			cmdb->copyBufferToTexture(handle, TextureView(m_tex.get(), subresource));
 		}
 
 		// Set the barriers of the batch
@@ -309,9 +287,9 @@ Error ImageResource::load(LoadingContext& ctx)
 		for(U32 i = begin; i < end; ++i)
 		{
 			U32 mip, layer, face;
-			unflatten3dArrayIndex(ctx.m_layerCount, ctx.m_faces, ctx.m_loader.getMipmapCount(), i, layer, face, mip);
+			unflatten3dArrayIndex(m_tex->getLayerCount(), faceCount, ctx.m_loader.getMipmapCount(), i, layer, face, mip);
 
-			barriers[barrierCount++] = {TextureView(ctx.m_tex.get(), TextureSubresourceDesc::surface(mip, face, layer)),
+			barriers[barrierCount++] = {TextureView(m_tex.get(), TextureSubresourceDesc::surface(mip, face, layer)),
 										TextureUsageBit::kCopyDestination, TextureUsageBit::kSrvPixel | TextureUsageBit::kSrvGeometry};
 		}
 		cmdb->setPipelineBarrier({&barriers[0], barrierCount}, {}, {});
@@ -328,6 +306,7 @@ Error ImageResource::load(LoadingContext& ctx)
 		cmdb.reset(nullptr);
 	}
 
+	m_loadedMipCount.store(m_tex->getMipmapCount());
 	return Error::kNone;
 }
 

+ 7 - 26
AnKi/Resource/ImageResource.h

@@ -36,33 +36,14 @@ public:
 		return *m_tex;
 	}
 
-	U32 getWidth() const
-	{
-		ANKI_ASSERT(m_size.x());
-		return m_size.x();
-	}
-
-	U32 getHeight() const
-	{
-		ANKI_ASSERT(m_size.y());
-		return m_size.y();
-	}
-
-	U32 getDepth() const
-	{
-		ANKI_ASSERT(m_size.z());
-		return m_size.z();
-	}
-
-	U32 getLayerCount() const
+	Vec4 getAverageColor() const
 	{
-		ANKI_ASSERT(m_layerCount);
-		return m_layerCount;
+		return m_avgColor;
 	}
 
-	Vec4 getAverageColor() const
+	Bool isLoaded() const
 	{
-		return m_avgColor;
+		return m_loadedMipCount.load() == m_tex->getMipmapCount();
 	}
 
 private:
@@ -72,12 +53,12 @@ private:
 	class LoadingContext;
 
 	TexturePtr m_tex;
-	UVec3 m_size = UVec3(0u);
-	U32 m_layerCount = 0;
 
 	Vec4 m_avgColor = Vec4(0.0f);
 
-	[[nodiscard]] static Error load(LoadingContext& ctx);
+	mutable Atomic<U32> m_loadedMipCount = {0};
+
+	Error loadAsync(LoadingContext& ctx) const;
 };
 /// @}
 

+ 14 - 0
AnKi/Resource/MaterialResource.cpp

@@ -652,4 +652,18 @@ const MaterialVariant& MaterialResource::getOrCreateVariant(const RenderingKey&
 	return variant;
 }
 
+Bool MaterialResource::isLoaded() const
+{
+	Bool loaded = true;
+	for(const MaterialVariable& var : m_vars)
+	{
+		if(var.m_image)
+		{
+			loaded = loaded && var.m_image->isLoaded();
+		}
+	}
+
+	return loaded;
+}
+
 } // end namespace anki

+ 2 - 0
AnKi/Resource/MaterialResource.h

@@ -227,6 +227,8 @@ public:
 		return ConstWeakArray<U8>(static_cast<const U8*>(m_prefilledLocalConstants), m_localConstantsSize);
 	}
 
+	Bool isLoaded() const;
+
 private:
 	class PartialMutation
 	{

+ 3 - 37
AnKi/Resource/MeshResource.cpp

@@ -191,48 +191,12 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 		}
 	}
 
-	// Clear the buffers
-	if(async)
-	{
-		CommandBufferInitInfo cmdbinit("MeshResourceClear");
-		cmdbinit.m_flags = CommandBufferFlag::kSmallBatch | CommandBufferFlag::kGeneralWork;
-		CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cmdbinit);
-
-		for(const Lod& lod : m_lods)
-		{
-			cmdb->zeroBuffer(lod.m_indexBufferAllocationToken.getCompleteBufferView());
-
-			for(VertexStreamId stream : EnumIterable(VertexStreamId::kMeshRelatedFirst, VertexStreamId::kMeshRelatedCount))
-			{
-				if(header.m_vertexAttributes[stream].m_format != Format::kNone)
-				{
-					cmdb->zeroBuffer(lod.m_vertexBuffersAllocationToken[stream].getCompleteBufferView());
-				}
-			}
-
-			if(lod.m_meshletIndices.isValid())
-			{
-				cmdb->zeroBuffer(lod.m_meshletIndices);
-				cmdb->zeroBuffer(lod.m_meshletBoundingVolumes);
-				cmdb->zeroBuffer(lod.m_meshletGeometryDescriptors);
-			}
-		}
-
-		const BufferBarrierInfo barrier = {UnifiedGeometryBuffer::getSingleton().getBufferView(), BufferUsageBit::kCopyDestination,
-										   BufferUsageBit::kVertexOrIndex};
-
-		cmdb->setPipelineBarrier({}, {&barrier, 1}, {});
-
-		cmdb->endRecording();
-		GrManager::getSingleton().submit(cmdb.get());
-	}
-
 	// Submit the loading task
 	if(async)
 	{
-		AsyncLoader::getSingleton().submitTask(task.get());
 		LoadTask* pTask;
 		task.moveAndReset(pTask);
+		AsyncLoader::getSingleton().submitTask(pTask, AsyncLoaderPriority::kMedium);
 	}
 	else
 	{
@@ -442,6 +406,8 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 		transferAlloc.release(handles[i], fence);
 	}
 
+	m_loadedLodCount.store(m_lods.getSize());
+
 	return Error::kNone;
 }
 

+ 7 - 0
AnKi/Resource/MeshResource.h

@@ -112,6 +112,11 @@ public:
 
 	Error getOrCreateCollisionShape(Bool wantStatic, U32 lod, PhysicsCollisionShapePtr& out) const;
 
+	Bool isLoaded() const
+	{
+		return m_loadedLodCount.load() == m_lods.getSize();
+	}
+
 private:
 	class LoadTask;
 	class LoadContext;
@@ -157,6 +162,8 @@ private:
 	F32 m_positionsScale = 0.0f;
 	Vec3 m_positionsTranslation = Vec3(0.0f);
 
+	mutable Atomic<U32> m_loadedLodCount = {0};
+
 	Bool m_isConvex = false;
 
 	Error loadAsync(MeshBinaryLoader& loader) const;

+ 11 - 9
AnKi/Scene/Components/MaterialComponent.cpp

@@ -117,13 +117,8 @@ void MaterialComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 	const Bool skinUpdated = m_skinDirty;
 	const Bool submeshUpdated = m_submeshIdxDirty;
 	const Bool hasSkin = m_skinComponent && m_skinComponent->isEnabled();
-	const Bool isValid = m_resource.isCreated() && m_meshComponent && m_meshComponent->isEnabled();
-	m_resourceDirty = false;
-	m_firstTimeUpdate = false;
-	m_meshComponentDirty = false;
-	m_movedLastFrame = moved;
-	m_skinDirty = false;
-	m_submeshIdxDirty = false;
+	const Bool isValid = m_resource.isCreated() && m_resource->isLoaded() && m_meshComponent && m_meshComponent->isEnabled()
+						 && m_meshComponent->getMeshResource().isLoaded();
 
 	updated = mtlUpdated || meshUpdated || moved || skinUpdated || submeshUpdated;
 
@@ -144,6 +139,13 @@ void MaterialComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 
 	// From now on the component is considered valid
 
+	m_resourceDirty = false;
+	m_firstTimeUpdate = false;
+	m_meshComponentDirty = false;
+	m_movedLastFrame = moved;
+	m_skinDirty = false;
+	m_submeshIdxDirty = false;
+
 	const MaterialResource& mtl = *m_resource;
 	const MeshResource& mesh = m_meshComponent->getMeshResource();
 	const U32 submeshIdx = min(mesh.getSubMeshCount() - 1, m_submeshIdx);
@@ -331,10 +333,10 @@ void MaterialComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 
 	// Scene bounds update
 	const Bool aabbUpdated = moved || meshUpdated || submeshUpdated || hasSkin;
-	if(aabbUpdated) [[unlikely]]
+	if(aabbUpdated || info.m_forceUpdateSceneBounds) [[unlikely]]
 	{
 		const Aabb aabbWorld = computeAabb(submeshIdx, *info.m_node);
-		SceneGraph::getSingleton().updateSceneBounds(aabbWorld.getMin().xyz(), aabbWorld.getMax().xyz());
+		info.updateSceneBounds(aabbWorld.getMin().xyz(), aabbWorld.getMax().xyz());
 	}
 
 	// Update the buckets

+ 16 - 1
AnKi/Scene/Components/SceneComponent.h

@@ -54,19 +54,34 @@ private:
 /// @memberof SceneComponent
 class SceneComponentUpdateInfo
 {
+	friend class SceneGraph;
+
 public:
 	SceneNode* m_node = nullptr;
 	const Second m_previousTime;
 	const Second m_currentTime;
 	const Second m_dt;
+	const Bool m_forceUpdateSceneBounds;
 	StackMemoryPool* m_framePool = nullptr;
 
-	SceneComponentUpdateInfo(Second prevTime, Second crntTime)
+	SceneComponentUpdateInfo(Second prevTime, Second crntTime, Bool forceUpdateSceneBounds)
 		: m_previousTime(prevTime)
 		, m_currentTime(crntTime)
 		, m_dt(crntTime - prevTime)
+		, m_forceUpdateSceneBounds(forceUpdateSceneBounds)
+	{
+	}
+
+	void updateSceneBounds(Vec3 aabbMin, Vec3 aabbMax)
 	{
+		ANKI_ASSERT(aabbMin <= aabbMax);
+		m_sceneMin = m_sceneMin.min(aabbMin);
+		m_sceneMax = m_sceneMax.max(aabbMax);
 	}
+
+private:
+	Vec3 m_sceneMin = Vec3(kMaxF32);
+	Vec3 m_sceneMax = Vec3(kMinF32);
 };
 
 /// Scene node component.

+ 35 - 7
AnKi/Scene/SceneGraph.cpp

@@ -54,6 +54,10 @@ public:
 
 	DynamicArray<SceneNode*, MemoryPoolPtrWrapper<StackMemoryPool>> m_nodesForDeletion;
 
+	Vec3 m_sceneMin = Vec3(kMaxF32);
+	Vec3 m_sceneMax = Vec3(kMinF32);
+	SpinLock m_sceneBoundsLock;
+
 	UpdateSceneNodesCtx()
 		: m_nodesForDeletion(&SceneGraph::getSingleton().m_framePool)
 	{
@@ -200,6 +204,21 @@ void SceneGraph::update(Second prevUpdateTime, Second crntTime)
 		}
 
 		CoreThreadJobManager::getSingleton().waitForAllTasksToFinish();
+
+		if(updateCtx.m_sceneMin != Vec3(kMaxF32))
+		{
+			const Bool forceUpdateSceneBounds = (m_frame % kForceSetSceneBoundsFrameCount) == 0;
+			if(forceUpdateSceneBounds)
+			{
+				m_sceneMin = updateCtx.m_sceneMin;
+				m_sceneMax = updateCtx.m_sceneMax;
+			}
+			else
+			{
+				m_sceneMin = m_sceneMin.min(updateCtx.m_sceneMin);
+				m_sceneMax = m_sceneMax.max(updateCtx.m_sceneMax);
+			}
+		}
 	}
 
 	// Cleanup
@@ -234,16 +253,14 @@ void SceneGraph::update(Second prevUpdateTime, Second crntTime)
 #include <AnKi/Scene/GpuSceneArrays.def.h>
 
 	g_svarSceneUpdateTime.set((HighRezTimer::getCurrentTime() - startUpdateTime) * 1000.0);
+	++m_frame;
 }
 
-void SceneGraph::updateNode(Second prevTime, Second crntTime, SceneNode& node)
+void SceneGraph::updateNode(SceneNode& node, SceneComponentUpdateInfo& componentUpdateInfo)
 {
 	ANKI_TRACE_INC_COUNTER(SceneNodeUpdated, 1);
 
 	// Components update
-	SceneComponentUpdateInfo componentUpdateInfo(prevTime, crntTime);
-	componentUpdateInfo.m_framePool = &m_framePool;
-
 	U32 sceneComponentUpdatedCount = 0;
 	node.iterateComponents([&](SceneComponent& comp) {
 		componentUpdateInfo.m_node = &node;
@@ -271,12 +288,12 @@ void SceneGraph::updateNode(Second prevTime, Second crntTime, SceneNode& node)
 			// No components or nothing updated, don't change the timestamp
 		}
 
-		node.frameUpdate(prevTime, crntTime);
+		node.frameUpdate(componentUpdateInfo.m_previousTime, componentUpdateInfo.m_currentTime);
 	}
 
 	// Update children
 	node.visitChildrenMaxDepth(0, [&](SceneNode& child) {
-		updateNode(prevTime, crntTime, child);
+		updateNode(child, componentUpdateInfo);
 		return true;
 	});
 }
@@ -287,6 +304,10 @@ void SceneGraph::updateNodes(UpdateSceneNodesCtx& ctx)
 
 	IntrusiveList<SceneNode>::ConstIterator end = m_nodes.getEnd();
 
+	const Bool forceUpdateSceneBounds = (m_frame % kForceSetSceneBoundsFrameCount) == 0;
+	SceneComponentUpdateInfo componentUpdateInfo(ctx.m_prevUpdateTime, ctx.m_crntTime, forceUpdateSceneBounds);
+	componentUpdateInfo.m_framePool = &m_framePool;
+
 	Bool quit = false;
 	while(!quit)
 	{
@@ -332,9 +353,16 @@ void SceneGraph::updateNodes(UpdateSceneNodesCtx& ctx)
 		// Process nodes
 		for(U i = 0; i < batchSize; ++i)
 		{
-			updateNode(ctx.m_prevUpdateTime, ctx.m_crntTime, *batch[i]);
+			updateNode(*batch[i], componentUpdateInfo);
 		}
 	}
+
+	if(componentUpdateInfo.m_sceneMin != Vec3(kMaxF32))
+	{
+		LockGuard lock(ctx.m_sceneBoundsLock);
+		ctx.m_sceneMin = ctx.m_sceneMin.min(componentUpdateInfo.m_sceneMin);
+		ctx.m_sceneMax = ctx.m_sceneMax.max(componentUpdateInfo.m_sceneMax);
+	}
 }
 
 LightComponent* SceneGraph::getDirectionalLight() const

+ 7 - 11
AnKi/Scene/SceneGraph.h

@@ -188,14 +188,6 @@ public:
 		return (m_skyboxes.getSize()) ? m_skyboxes[0] : nullptr;
 	}
 
-	/// @note It's thread-safe.
-	void updateSceneBounds(const Vec3& min, const Vec3& max)
-	{
-		LockGuard lock(m_sceneBoundsMtx);
-		m_sceneMin = m_sceneMin.min(min);
-		m_sceneMax = m_sceneMax.max(max);
-	}
-
 	/// @note It's thread-safe.
 	Array<Vec3, 2> getSceneBounds() const
 	{
@@ -216,6 +208,8 @@ private:
 		}
 	} m_initMemPoolDummy;
 
+	static constexpr U32 kForceSetSceneBoundsFrameCount = 60 * 2; ///< Re-set the scene bounds after 2".
+
 	mutable StackMemoryPool m_framePool;
 
 	IntrusiveList<SceneNode> m_nodes;
@@ -227,8 +221,10 @@ private:
 
 	EventManager m_events;
 
-	Vec3 m_sceneMin = Vec3(kMaxF32);
-	Vec3 m_sceneMax = Vec3(kMinF32);
+	U64 m_frame = 0;
+
+	Vec3 m_sceneMin = Vec3(-0.1f);
+	Vec3 m_sceneMax = Vec3(+0.1f);
 	mutable SpinLock m_sceneBoundsMtx;
 
 	IntrusiveList<SceneNode> m_nodesForRegistration;
@@ -246,7 +242,7 @@ private:
 	~SceneGraph();
 
 	void updateNodes(UpdateSceneNodesCtx& ctx);
-	void updateNode(Second prevTime, Second crntTime, SceneNode& node);
+	void updateNode(SceneNode& node, SceneComponentUpdateInfo& compUpdate);
 };
 /// @}
 

+ 4 - 0
AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog

@@ -136,7 +136,11 @@ ANKI_FAST_CONSTANTS(ProbeUpdateConsts, g_consts)
 	F32 rayT = 0.0;
 	Bool backfacing = false;
 #	if ANKI_COMPUTE_SHADER
+#		if ANKI_GR_BACKEND_VULKAN
 	const Bool hit = materialRayTraceInlineRt<F16>(probeWorldPos, dir, 0.0, tMax, 1000.0, gbuffer, rayT, backfacing);
+#		else
+	const Bool hit = false;
+#		endif
 #	else
 	const Bool hit = materialRayTrace<F16>(probeWorldPos, dir, 0.0, tMax, 1000.0, gbuffer, rayT, backfacing, traceFlags);
 #	endif

+ 4 - 0
AnKi/Shaders/Reflections.ankiprog

@@ -747,8 +747,12 @@ vector<T, 3> getDiffuseIndirect(Vec3 worldPos, Vec3 worldNormal)
 	F32 rayT;
 	Bool unused;
 #	if ANKI_COMPUTE_SHADER
+#		if ANKI_GR_BACKEND_VULKAN
 	const Bool hasHitSky =
 		!materialRayTraceInlineRt(worldPos, reflDir, max(tmin + kTMinBias, 0.05), g_consts.m_maxRayT, textureLod, gbuffer, rayT, unused);
+#		else
+	const Bool hasHitSky = true;
+#		endif
 #	else
 	const Bool hasHitSky = !materialRayTrace(worldPos, reflDir, max(tmin + kTMinBias, 0.05), g_consts.m_maxRayT, textureLod, gbuffer, rayT, unused);
 #	endif

+ 2 - 0
AnKi/Shaders/RtMaterialFetch.hlsl

@@ -113,6 +113,7 @@ Bool materialRayTrace(Vec3 rayOrigin, Vec3 rayDir, F32 tMin, F32 tMax, T texture
 	return !hasHitSky;
 }
 
+#	if ANKI_GR_BACKEND_VULKAN
 template<typename T>
 Bool materialRayTraceInlineRt(Vec3 rayOrigin, Vec3 rayDir, F32 tMin, F32 tMax, T textureLod, out GBufferLight<T> gbuffer, out F32 rayT,
 							  out Bool backfacing)
@@ -158,6 +159,7 @@ Bool materialRayTraceInlineRt(Vec3 rayOrigin, Vec3 rayDir, F32 tMin, F32 tMax, T
 
 	return hit;
 }
+#	endif
 
 Bool rayVisibility(Vec3 rayOrigin, Vec3 rayDir, F32 tMax, U32 traceFlags)
 {

+ 1 - 1
Samples/PhysicsPlayground/FpsCharacterNode.cpp

@@ -183,7 +183,7 @@ void FpsCharacter::frameUpdate([[maybe_unused]] Second prevUpdateTime, [[maybe_u
 	{
 		fireShotgun();
 
-		const Vec3 newPosition(0.0f, getRandomRange(-0.03f, -0.05f), 0.15f);
+		const Vec3 newPosition(0.0f, getRandomRange(-0.05f, -0.03f), 0.15f);
 		m_shotgunNode->setLocalOrigin(m_shotgunRestingPosition + newPosition);
 		const Euler newRotation(getRandomRange(0.0_degrees, 10.0_degrees), getRandomRange(-10.0_degrees, 1.0_degrees), 0.0f);
 		m_shotgunNode->setLocalRotation(Mat3(newRotation) * Mat3(m_shotgunRestingRotation));

+ 0 - 1
Tests/Gr/Gr.cpp

@@ -1,4 +1,3 @@
-
 // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
 // All rights reserved.
 // Code licensed under the BSD License.

+ 2 - 1
Tests/Gr/GrCommon.h

@@ -99,7 +99,8 @@ inline void commonInit(Bool validation = true)
 	g_cvarGrDebugMarkers = true;
 	if(validation)
 	{
-		[[maybe_unused]] Error err = CVarSet::getSingleton().setMultiple(Array<const Char*, 4>{"Validation", "1", "DebugMarkers", "1"});
+		g_cvarGrValidation = true;
+		g_cvarGrDebugMarkers = true;
 	}
 #if ANKI_TRACING_ENABLED
 	{

+ 42 - 0
Tests/Gr/GrRayTracing.cpp

@@ -0,0 +1,42 @@
+// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <Tests/Framework/Framework.h>
+#include <Tests/Gr/GrCommon.h>
+#include <AnKi/Gr.h>
+#include <AnKi/Window/NativeWindow.h>
+#include <AnKi/Window/Input.h>
+#include <AnKi/Util/CVarSet.h>
+#include <AnKi/GpuMemory/RebarTransientMemoryPool.h>
+#include <AnKi/Util/HighRezTimer.h>
+#include <AnKi/Resource/TransferGpuAllocator.h>
+#include <AnKi/ShaderCompiler/ShaderParser.h>
+#include <AnKi/Collision/Aabb.h>
+#include <AnKi/Util/WeakArray.h>
+#include <ctime>
+
+using namespace anki;
+
+ANKI_TEST(Gr, EmptyAs)
+{
+	g_cvarGrRayTracing = true;
+	commonInit();
+
+	{
+		AccelerationStructureInitInfo init;
+		init.m_type = AccelerationStructureType::kTopLevel;
+
+		GrManager::getSingleton().getAccelerationStructureMemoryRequirement(init);
+	}
+
+	{
+		AccelerationStructureInitInfo init;
+		init.m_type = AccelerationStructureType::kTopLevel;
+
+		AccelerationStructurePtr as = GrManager::getSingleton().newAccelerationStructure(init);
+	}
+
+	commonDestroy();
+}