Explorar o código

Merge branch 'master' of https://github.com/godlikepanos/anki-3d-engine into DLSS_Integration_DONOTPUSH

# Conflicts:
#	AnKi/Gr/CMakeLists.txt
#	AnKi/Gr/Common.h
Sergio Alapont %!s(int64=3) %!d(string=hai) anos
pai
achega
f05f223382
Modificáronse 51 ficheiros con 749 adicións e 404 borrados
  1. 2 1
      AnKi/Collision/CMakeLists.txt
  2. 18 1
      AnKi/Core/CMakeLists.txt
  3. 5 1
      AnKi/Core/NativeWindowSdl.cpp
  4. 112 48
      AnKi/Gr/CMakeLists.txt
  5. 40 35
      AnKi/Gr/CommandBuffer.h
  6. 4 1
      AnKi/Gr/Common.h
  7. 8 3
      AnKi/Gr/GrObject.h
  8. 87 82
      AnKi/Gr/Vulkan/CommandBuffer.cpp
  9. 16 6
      AnKi/Gr/Vulkan/CommandBufferFactory.h
  10. 8 9
      AnKi/Gr/Vulkan/CommandBufferImpl.cpp
  11. 81 69
      AnKi/Gr/Vulkan/CommandBufferImpl.h
  12. 52 49
      AnKi/Gr/Vulkan/CommandBufferImpl.inl.h
  13. 13 3
      AnKi/Gr/Vulkan/DeferredBarrierFactory.h
  14. 8 3
      AnKi/Gr/Vulkan/FenceFactory.h
  15. 41 8
      AnKi/Gr/Vulkan/GrManagerImpl.cpp
  16. 2 2
      AnKi/Gr/Vulkan/MicroObjectRecycler.inl.h
  17. 1 1
      AnKi/Gr/Vulkan/SamplerFactory.cpp
  18. 13 3
      AnKi/Gr/Vulkan/SamplerFactory.h
  19. 13 3
      AnKi/Gr/Vulkan/SemaphoreFactory.h
  20. 13 3
      AnKi/Gr/Vulkan/SwapchainFactory.h
  21. 2 1
      AnKi/Importer/CMakeLists.txt
  22. 3 1
      AnKi/Input/CMakeLists.txt
  23. 2 1
      AnKi/Math/CMakeLists.txt
  24. 2 1
      AnKi/Physics/CMakeLists.txt
  25. 8 3
      AnKi/Physics/PhysicsObject.h
  26. 2 1
      AnKi/Renderer/CMakeLists.txt
  27. 5 2
      AnKi/Renderer/ConfigVars.defs.h
  28. 21 3
      AnKi/Renderer/IndirectDiffuse.cpp
  29. 1 0
      AnKi/Renderer/IndirectSpecular.cpp
  30. 8 0
      AnKi/Renderer/VrsSriGeneration.cpp
  31. 1 1
      AnKi/Renderer/VrsSriGeneration.h
  32. 2 1
      AnKi/Resource/CMakeLists.txt
  33. 8 3
      AnKi/Resource/ResourceFilesystem.h
  34. 3 3
      AnKi/Resource/ResourceManager.cpp
  35. 10 5
      AnKi/Resource/ResourceObject.h
  36. 2 1
      AnKi/Scene/CMakeLists.txt
  37. 2 1
      AnKi/Script/CMakeLists.txt
  38. 2 1
      AnKi/ShaderCompiler/CMakeLists.txt
  39. 2 1
      AnKi/Shaders/Include/MiscRendererTypes.h
  40. 28 0
      AnKi/Shaders/IndirectDiffuseVrsSriGeneration.ankiprog
  41. 19 7
      AnKi/Shaders/IndirectSpecular.glsl
  42. 3 3
      AnKi/Shaders/LightShadingApplyIndirect.ankiprog
  43. 42 5
      AnKi/Shaders/VrsSriGeneration.glsl
  44. 2 1
      AnKi/Ui/CMakeLists.txt
  45. 8 3
      AnKi/Ui/UiObject.h
  46. 4 4
      AnKi/Util/Allocator.h
  47. 3 1
      AnKi/Util/CMakeLists.txt
  48. 8 4
      AnKi/Util/Memory.h
  49. 3 9
      AnKi/Util/Ptr.h
  50. 1 1
      CMakeLists.txt
  51. 5 5
      Tests/Resource/ResourceManager.cpp

+ 2 - 1
AnKi/Collision/CMakeLists.txt

@@ -1,4 +1,5 @@
 file(GLOB_RECURSE sources *.cpp)
-add_library(AnKiCollision ${sources})
+file(GLOB_RECURSE headers *.h)
+add_library(AnKiCollision ${sources} ${headers})
 target_compile_definitions(AnKiCollision PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiCollision AnKiMath)

+ 18 - 1
AnKi/Core/CMakeLists.txt

@@ -7,17 +7,34 @@ set(sources
 	MaliHwCounters.cpp
 	StatsUi.cpp)
 
+set(headers
+	AllConfigVars.defs.h
+	App.h
+	Common.h
+	ConfigSet.h
+	ConfigVars.defs.h
+	CoreTracer.h
+	DeveloperConsole.h
+	GpuMemoryPools.h
+	MaliHwCounters.h
+	NativeWindow.h
+	StatsUi.h
+	StdinListener.h)
+
 if(ANKI_HEADLESS)
 	set(sources ${sources} NativeWindowHeadless.cpp)
+	set(headers ${headers} NativeWindowHeadless.h)
 elseif(SDL)
 	set(sources ${sources} NativeWindowSdl.cpp)
+	set(headers ${headers} NativeWindowSdl.h)
 elseif(ANDROID)
 	set(sources ${sources} NativeWindowAndroid.cpp)
+	set(headers ${headers} NativeWindowAndroid.h)
 else()
 	message(FATAL_ERROR "Not implemented")
 endif()
 
-add_library(AnKiCore ${sources})
+add_library(AnKiCore ${sources} ${headers})
 
 if(SDL)
 	if(LINUX)

+ 5 - 1
AnKi/Core/NativeWindowSdl.cpp

@@ -96,12 +96,16 @@ Error NativeWindowSdl::init(const NativeWindowInitInfo& init)
 	{
 #if ANKI_OS_WINDOWS
 		flags |= SDL_WINDOW_FULLSCREEN;
-#endif
 
 		if(init.m_exclusiveFullscreen)
 		{
 			flags |= SDL_WINDOW_FULLSCREEN_DESKTOP;
 		}
+#elif ANKI_OS_LINUX
+		flags |= SDL_WINDOW_FULLSCREEN_DESKTOP;
+#else
+#	error See file
+#endif
 
 		// Alter the window size
 		SDL_DisplayMode mode;

+ 112 - 48
AnKi/Gr/CMakeLists.txt

@@ -1,53 +1,117 @@
 set(common_sources
-	"Common.cpp"
-	"GrObject.cpp"
-	"RenderGraph.cpp"
-	"ShaderProgram.cpp"
-	"Enums.cpp"
-	"Utils/FrameGpuAllocator.cpp"
-	"Utils/Functions.cpp")
+	Common.cpp
+	GrObject.cpp
+	RenderGraph.cpp
+	ShaderProgram.cpp
+	Enums.cpp
+	Utils/FrameGpuAllocator.cpp
+	Utils/Functions.cpp)
+
+set(common_headers
+	AccelerationStructure.h
+	Buffer.h
+	CommandBuffer.h
+	Common.h
+	ConfigVars.defs.h
+	Enums.h
+	Fence.h
+	Format.defs.h
+	Framebuffer.h
+	GrManager.h
+	GrObject.h
+	OcclusionQuery.h
+	RenderGraph.h
+	RenderGraph.inl.h
+	Sampler.h
+	Shader.h
+	ShaderProgram.h
+	ShaderVariableDataType.defs.h
+	Texture.h
+	TextureView.h
+	TimestampQuery.h
+	Utils/FrameGpuAllocator.h
+	Utils/Functions.h
+	Utils/InstantiationMacros.h)
 
 if(VULKAN)
 	set(backend_sources
-		"Vulkan/AccelerationStructure.cpp"
-		"Vulkan/AccelerationStructureImpl.cpp"
-		"Vulkan/Buffer.cpp"
-		"Vulkan/BufferImpl.cpp"
-		"Vulkan/CommandBuffer.cpp"
-		"Vulkan/CommandBufferFactory.cpp"
-		"Vulkan/CommandBufferImpl.cpp"
-		"Vulkan/Common.cpp"
-		"Vulkan/DescriptorSet.cpp"
-		"Vulkan/Fence.cpp"
-		"Vulkan/FenceFactory.cpp"
-		"Vulkan/Framebuffer.cpp"
-		"Vulkan/FramebufferImpl.cpp"
-		"Vulkan/GpuMemoryManager.cpp"
-		"Vulkan/GrManager.cpp"
-		"Vulkan/GrManagerImpl.cpp"
-		"Vulkan/OcclusionQuery.cpp"
-		"Vulkan/OcclusionQueryImpl.cpp"
-		"Vulkan/PipelineCache.cpp"
-		"Vulkan/Pipeline.cpp"
-		"Vulkan/PipelineLayout.cpp"
-		"Vulkan/QueryFactory.cpp"
-		"Vulkan/Sampler.cpp"
-		"Vulkan/SamplerFactory.cpp"
-		"Vulkan/SamplerImpl.cpp"
-		"Vulkan/Shader.cpp"
-		"Vulkan/ShaderImpl.cpp"
-		"Vulkan/ShaderProgram.cpp"
-		"Vulkan/ShaderProgramImpl.cpp"
-		"Vulkan/SwapchainFactory.cpp"
-		"Vulkan/Texture.cpp"
-		"Vulkan/TextureImpl.cpp"
-		"Vulkan/TextureView.cpp"
-		"Vulkan/TextureViewImpl.cpp"
-		"Vulkan/TimestampQuery.cpp"
-		"Vulkan/TimestampQueryImpl.cpp"
-		"Vulkan/VulkanObject.cpp"
-		"Vulkan/FrameGarbageCollector.cpp"
-		"Vulkan/DLSSCtx.cpp")
+		Vulkan/AccelerationStructure.cpp
+		Vulkan/AccelerationStructureImpl.cpp
+		Vulkan/Buffer.cpp
+		Vulkan/BufferImpl.cpp
+		Vulkan/CommandBuffer.cpp
+		Vulkan/CommandBufferFactory.cpp
+		Vulkan/CommandBufferImpl.cpp
+		Vulkan/Common.cpp
+		Vulkan/DescriptorSet.cpp
+		Vulkan/Fence.cpp
+		Vulkan/FenceFactory.cpp
+		Vulkan/Framebuffer.cpp
+		Vulkan/FramebufferImpl.cpp
+		Vulkan/GpuMemoryManager.cpp
+		Vulkan/GrManager.cpp
+		Vulkan/GrManagerImpl.cpp
+		Vulkan/OcclusionQuery.cpp
+		Vulkan/OcclusionQueryImpl.cpp
+		Vulkan/PipelineCache.cpp
+		Vulkan/Pipeline.cpp
+		Vulkan/PipelineLayout.cpp
+		Vulkan/QueryFactory.cpp
+		Vulkan/Sampler.cpp
+		Vulkan/SamplerFactory.cpp
+		Vulkan/SamplerImpl.cpp
+		Vulkan/Shader.cpp
+		Vulkan/ShaderImpl.cpp
+		Vulkan/ShaderProgram.cpp
+		Vulkan/ShaderProgramImpl.cpp
+		Vulkan/SwapchainFactory.cpp
+		Vulkan/Texture.cpp
+		Vulkan/TextureImpl.cpp
+		Vulkan/TextureView.cpp
+		Vulkan/TextureViewImpl.cpp
+		Vulkan/TimestampQuery.cpp
+		Vulkan/TimestampQueryImpl.cpp
+		Vulkan/VulkanObject.cpp
+		Vulkan/FrameGarbageCollector.cpp
+		Vulkan/DLSSCtx.cpp)
+
+	set(backend_headers
+		Vulkan/AccelerationStructureImpl.h
+		Vulkan/BufferImpl.h
+		Vulkan/CommandBufferFactory.h
+		Vulkan/CommandBufferFactory.inl.h
+		Vulkan/CommandBufferImpl.h
+		Vulkan/CommandBufferImpl.inl.h
+		Vulkan/Common.h
+		Vulkan/DeferredBarrierFactory.h
+		Vulkan/DeferredBarrierFactory.inl.h
+		Vulkan/DescriptorSet.h
+		Vulkan/FenceFactory.h
+		Vulkan/FenceFactory.inl.h
+		Vulkan/FenceImpl.h
+		Vulkan/FramebufferImpl.h
+		Vulkan/FrameGarbageCollector.h
+		Vulkan/GpuMemoryManager.h
+		Vulkan/GrManagerImpl.h
+		Vulkan/MicroObjectRecycler.h
+		Vulkan/MicroObjectRecycler.inl.h
+		Vulkan/OcclusionQueryImpl.h
+		Vulkan/Pipeline.h
+		Vulkan/PipelineCache.h
+		Vulkan/PipelineLayout.h
+		Vulkan/QueryFactory.h
+		Vulkan/SamplerFactory.h
+		Vulkan/SamplerImpl.h
+		Vulkan/SemaphoreFactory.h
+		Vulkan/SemaphoreFactory.inl.h
+		Vulkan/ShaderImpl.h
+		Vulkan/ShaderProgramImpl.h
+		Vulkan/SwapchainFactory.h
+		Vulkan/TextureImpl.h
+		Vulkan/TextureViewImpl.h
+		Vulkan/TimestampQueryImpl.h
+		Vulkan/VulkanObject.h
+		Vulkan/DLSSCtxImpl.h)
 
 	if(DLSS_SUPPORT)
 		set(backend_sources ${backend_sources} "Vulkan/DLSSCtxImpl.cpp")
@@ -67,10 +131,10 @@ endif()
 # Have 2 libraries. The AnKiGrCommon is the bare minimum for the AnKiShaderCompiler to work. Don't have
 # AnKiShaderCompiler depend on what AnKiGr depends on
 
-add_library(AnKiGrCommon ${common_sources})
+add_library(AnKiGrCommon ${common_sources} ${common_headers})
 target_compile_definitions(AnKiGrCommon PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiGrCommon AnKiUtil) # Only depend on Util
 
-add_library(AnKiGr ${backend_sources})
+add_library(AnKiGr ${backend_sources} ${backend_headers})
 target_compile_definitions(AnKiGr PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiGr AnKiCore AnKiSpirvCross AnKiGrCommon)

+ 40 - 35
AnKi/Gr/CommandBuffer.h

@@ -134,14 +134,14 @@ public:
 	/// @{
 
 	/// Bind vertex buffer.
-	void bindVertexBuffer(U32 binding, BufferPtr buff, PtrSize offset, PtrSize stride,
+	void bindVertexBuffer(U32 binding, const BufferPtr& buff, PtrSize offset, PtrSize stride,
 						  VertexStepRate stepRate = VertexStepRate::VERTEX);
 
 	/// Setup a vertex attribute.
 	void setVertexAttribute(U32 location, U32 buffBinding, Format fmt, PtrSize relativeOffset);
 
 	/// Bind index buffer.
-	void bindIndexBuffer(BufferPtr buff, PtrSize offset, IndexType type);
+	void bindIndexBuffer(const BufferPtr& buff, PtrSize offset, IndexType type);
 
 	/// Enable primitive restart.
 	void setPrimitiveRestart(Bool enable);
@@ -224,21 +224,22 @@ public:
 	/// @param texView The texture view to bind.
 	/// @param sampler The sampler to override the default sampler of the tex.
 	/// @param arrayIdx The array index if the binding is an array.
-	void bindTextureAndSampler(U32 set, U32 binding, TextureViewPtr texView, SamplerPtr sampler, U32 arrayIdx = 0);
+	void bindTextureAndSampler(U32 set, U32 binding, const TextureViewPtr& texView, const SamplerPtr& sampler,
+							   U32 arrayIdx = 0);
 
 	/// Bind sampler.
 	/// @param set The set to bind to.
 	/// @param binding The binding to bind to.
 	/// @param sampler The sampler to override the default sampler of the tex.
 	/// @param arrayIdx The array index if the binding is an array.
-	void bindSampler(U32 set, U32 binding, SamplerPtr sampler, U32 arrayIdx = 0);
+	void bindSampler(U32 set, U32 binding, const SamplerPtr& sampler, U32 arrayIdx = 0);
 
 	/// Bind a texture.
 	/// @param set The set to bind to.
 	/// @param binding The binding to bind to.
 	/// @param texView The texture view to bind.
 	/// @param arrayIdx The array index if the binding is an array.
-	void bindTexture(U32 set, U32 binding, TextureViewPtr texView, U32 arrayIdx = 0);
+	void bindTexture(U32 set, U32 binding, const TextureViewPtr& texView, U32 arrayIdx = 0);
 
 	/// Bind uniform buffer.
 	/// @param set The set to bind to.
@@ -248,7 +249,8 @@ public:
 	/// @param range The bytes to bind starting from the offset. If it's MAX_PTR_SIZE then map from offset to the end
 	///              of the buffer.
 	/// @param arrayIdx The array index if the binding is an array.
-	void bindUniformBuffer(U32 set, U32 binding, BufferPtr buff, PtrSize offset, PtrSize range, U32 arrayIdx = 0);
+	void bindUniformBuffer(U32 set, U32 binding, const BufferPtr& buff, PtrSize offset, PtrSize range,
+						   U32 arrayIdx = 0);
 
 	/// Bind storage buffer.
 	/// @param set The set to bind to.
@@ -258,14 +260,15 @@ public:
 	/// @param range The bytes to bind starting from the offset. If it's MAX_PTR_SIZE then map from offset to the end
 	///              of the buffer.
 	/// @param arrayIdx The array index if the binding is an array.
-	void bindStorageBuffer(U32 set, U32 binding, BufferPtr buff, PtrSize offset, PtrSize range, U32 arrayIdx = 0);
+	void bindStorageBuffer(U32 set, U32 binding, const BufferPtr& buff, PtrSize offset, PtrSize range,
+						   U32 arrayIdx = 0);
 
 	/// Bind load/store image.
 	/// @param set The set to bind to.
 	/// @param binding The binding to bind to.
 	/// @param img The view to bind.
 	/// @param arrayIdx The array index if the binding is an array.
-	void bindImage(U32 set, U32 binding, TextureViewPtr img, U32 arrayIdx = 0);
+	void bindImage(U32 set, U32 binding, const TextureViewPtr& img, U32 arrayIdx = 0);
 
 	/// Bind texture buffer.
 	/// @param set The set to bind to.
@@ -276,7 +279,7 @@ public:
 	///              of the buffer.
 	/// @param fmt The format of the buffer.
 	/// @param arrayIdx The array index if the binding is an array.
-	void bindTextureBuffer(U32 set, U32 binding, BufferPtr buff, PtrSize offset, PtrSize range, Format fmt,
+	void bindTextureBuffer(U32 set, U32 binding, const BufferPtr& buff, PtrSize offset, PtrSize range, Format fmt,
 						   U32 arrayIdx = 0);
 
 	/// Bind an acceleration structure.
@@ -284,7 +287,7 @@ public:
 	/// @param binding The binding to bind to.
 	/// @param[in,out] as The AS to bind.
 	/// @param arrayIdx The array index if the binding is an array.
-	void bindAccelerationStructure(U32 set, U32 binding, AccelerationStructurePtr as, U32 arrayIdx = 0);
+	void bindAccelerationStructure(U32 set, U32 binding, const AccelerationStructurePtr& as, U32 arrayIdx = 0);
 
 	/// Bind the bindless descriptor set into a slot.
 	void bindAllBindless(U32 set);
@@ -293,12 +296,13 @@ public:
 	void setPushConstants(const void* data, U32 dataSize);
 
 	/// Bind a program.
-	void bindShaderProgram(ShaderProgramPtr prog);
+	void bindShaderProgram(const ShaderProgramPtr& prog);
 
 	/// Begin renderpass.
 	/// The minx, miny, width, height control the area that the load and store operations will happen. If the scissor is
 	/// bigger than the render area the results are undefined.
-	void beginRenderPass(FramebufferPtr fb, const Array<TextureUsageBit, MAX_COLOR_ATTACHMENTS>& colorAttachmentUsages,
+	void beginRenderPass(const FramebufferPtr& fb,
+						 const Array<TextureUsageBit, MAX_COLOR_ATTACHMENTS>& colorAttachmentUsages,
 						 TextureUsageBit depthStencilAttachmentUsage, U32 minx = 0, U32 miny = 0, U32 width = MAX_U32,
 						 U32 height = MAX_U32);
 
@@ -316,9 +320,9 @@ public:
 
 	void drawArrays(PrimitiveTopology topology, U32 count, U32 instanceCount = 1, U32 first = 0, U32 baseInstance = 0);
 
-	void drawElementsIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset, BufferPtr indirectBuff);
+	void drawElementsIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset, const BufferPtr& indirectBuff);
 
-	void drawArraysIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset, BufferPtr indirectBuff);
+	void drawArraysIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset, const BufferPtr& indirectBuff);
 
 	void dispatchCompute(U32 groupCountX, U32 groupCountY, U32 groupCountZ);
 
@@ -351,48 +355,48 @@ public:
 	/// @param width Width.
 	/// @param height Height.
 	/// @param depth Depth.
-	void traceRays(BufferPtr sbtBuffer, PtrSize sbtBufferOffset, U32 sbtRecordSize, U32 hitGroupSbtRecordCount,
+	void traceRays(const BufferPtr& sbtBuffer, PtrSize sbtBufferOffset, U32 sbtRecordSize, U32 hitGroupSbtRecordCount,
 				   U32 rayTypeCount, U32 width, U32 height, U32 depth);
 
 	/// Generate mipmaps for non-3D textures. You have to transition all the mip levels of this face and layer to
 	/// TextureUsageBit::GENERATE_MIPMAPS before calling this method.
 	/// @param texView The texture view to generate mips. It should point to a subresource that contains the whole
 	///                mip chain and only one face and one layer.
-	void generateMipmaps2d(TextureViewPtr texView);
+	void generateMipmaps2d(const TextureViewPtr& texView);
 
 	/// Generate mipmaps only for 3D textures.
 	/// @param texView The texture view to generate mips.
-	void generateMipmaps3d(TextureViewPtr tex);
+	void generateMipmaps3d(const TextureViewPtr& tex);
 
 	/// Blit from surface to surface.
 	/// @param srcView The source view that points to a surface.
 	/// @param dstView The destination view that points to a surface.
-	void blitTextureViews(TextureViewPtr srcView, TextureViewPtr destView);
+	void blitTextureViews(const TextureViewPtr& srcView, const TextureViewPtr& destView);
 
 	/// Clear a single texture surface. Can be used for all textures except 3D.
 	/// @param[in,out] texView The texture view to clear.
 	/// @param[in] clearValue The value to clear it with.
-	void clearTextureView(TextureViewPtr texView, const ClearValue& clearValue);
+	void clearTextureView(const TextureViewPtr& texView, const ClearValue& clearValue);
 
 	/// Copy a buffer to a texture surface or volume.
 	/// @param buff The source buffer to copy from.
 	/// @param offset The offset in the buffer to start reading from.
 	/// @param range The size of the buffer to read.
 	/// @param texView The texture view that points to a surface or volume to write to.
-	void copyBufferToTextureView(BufferPtr buff, PtrSize offset, PtrSize range, TextureViewPtr texView);
+	void copyBufferToTextureView(const BufferPtr& buff, PtrSize offset, PtrSize range, const TextureViewPtr& texView);
 
 	/// Fill a buffer with some value.
 	/// @param[in,out] buff The buffer to fill.
 	/// @param offset From where to start filling. Must be multiple of 4.
 	/// @param size The bytes to fill. Must be multiple of 4 or MAX_PTR_SIZE to indicate the whole buffer.
 	/// @param value The value to fill the buffer with.
-	void fillBuffer(BufferPtr buff, PtrSize offset, PtrSize size, U32 value);
+	void fillBuffer(const BufferPtr& buff, PtrSize offset, PtrSize size, U32 value);
 
 	/// Write the occlusion result to buffer.
 	/// @param[in] query The query to get the result from.
 	/// @param offset The offset inside the buffer to write the result.
 	/// @param buff The buffer to update.
-	void writeOcclusionQueryResultToBuffer(OcclusionQueryPtr query, PtrSize offset, BufferPtr buff);
+	void writeOcclusionQueryResultToBuffer(const OcclusionQueryPtr& query, PtrSize offset, const BufferPtr& buff);
 
 	/// Copy buffer to buffer.
 	/// @param[in] src Source buffer.
@@ -400,27 +404,28 @@ public:
 	/// @param[out] dst Destination buffer.
 	/// @param dstOffset Offset in the destination buffer.
 	/// @param range Size to copy.
-	void copyBufferToBuffer(BufferPtr src, PtrSize srcOffset, BufferPtr dst, PtrSize dstOffset, PtrSize range);
+	void copyBufferToBuffer(const BufferPtr& src, PtrSize srcOffset, const BufferPtr& dst, PtrSize dstOffset,
+							PtrSize range);
 
 	/// Build the acceleration structure.
-	void buildAccelerationStructure(AccelerationStructurePtr as);
+	void buildAccelerationStructure(const AccelerationStructurePtr& as);
 	/// @}
 
 	/// @name Sync
 	/// @{
-	void setTextureBarrier(TexturePtr tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
+	void setTextureBarrier(const TexturePtr& tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
 						   const TextureSubresourceInfo& subresource);
 
-	void setTextureSurfaceBarrier(TexturePtr tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
+	void setTextureSurfaceBarrier(const TexturePtr& tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
 								  const TextureSurfaceInfo& surf);
 
-	void setTextureVolumeBarrier(TexturePtr tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
+	void setTextureVolumeBarrier(const TexturePtr& tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
 								 const TextureVolumeInfo& vol);
 
-	void setBufferBarrier(BufferPtr buff, BufferUsageBit prevUsage, BufferUsageBit nextUsage, PtrSize offset,
+	void setBufferBarrier(const BufferPtr& buff, BufferUsageBit prevUsage, BufferUsageBit nextUsage, PtrSize offset,
 						  PtrSize size);
 
-	void setAccelerationStructureBarrier(AccelerationStructurePtr as, AccelerationStructureUsageBit prevUsage,
+	void setAccelerationStructureBarrier(const AccelerationStructurePtr& as, AccelerationStructureUsageBit prevUsage,
 										 AccelerationStructureUsageBit nextUsage);
 	/// @}
 
@@ -428,22 +433,22 @@ public:
 	/// @{
 
 	/// Reset query before beginOcclusionQuery.
-	void resetOcclusionQuery(OcclusionQueryPtr query);
+	void resetOcclusionQuery(const OcclusionQueryPtr& query);
 
 	/// Begin query.
-	void beginOcclusionQuery(OcclusionQueryPtr query);
+	void beginOcclusionQuery(const OcclusionQueryPtr& query);
 
 	/// End query.
-	void endOcclusionQuery(OcclusionQueryPtr query);
+	void endOcclusionQuery(const OcclusionQueryPtr& query);
 
 	/// Reset timestamp query before writeTimestamp.
-	void resetTimestampQuery(TimestampQueryPtr query);
+	void resetTimestampQuery(const TimestampQueryPtr& query);
 
 	/// Write a timestamp.
-	void writeTimestamp(TimestampQueryPtr query);
+	void writeTimestamp(const TimestampQueryPtr& query);
 
 	/// Append a second level command buffer.
-	void pushSecondLevelCommandBuffer(CommandBufferPtr cmdb);
+	void pushSecondLevelCommandBuffer(const CommandBufferPtr& cmdb);
 
 	Bool isEmpty() const;
 	/// @}

+ 4 - 1
AnKi/Gr/Common.h

@@ -154,6 +154,9 @@ public:
 	/// Max subgroup size of the GPU.
 	U32 m_maxSubgroupSize = 0;
 
+	/// Min size of a texel in the shading rate image.
+	U32 m_minShadingRateImageTexelSize = 0;
+
 	/// GPU vendor.
 	GpuVendor m_gpuVendor = GpuVendor::UNKNOWN;
 
@@ -185,7 +188,7 @@ public:
 	Bool m_dlssSupport = false;
 };
 ANKI_END_PACKED_STRUCT
-static_assert(sizeof(GpuDeviceCapabilities) == sizeof(PtrSize) * 5 + sizeof(U32) * 7 + sizeof(U8) * 3 + sizeof(Bool) * 7, "Should be packed");
+static_assert(sizeof(GpuDeviceCapabilities) == sizeof(PtrSize) * 5 + sizeof(U32) * 8 + sizeof(U8) * 3 + sizeof(Bool) * 7, "Should be packed");
 
 /// The type of the allocator for heap allocations
 template<typename T>

+ 8 - 3
AnKi/Gr/GrObject.h

@@ -64,9 +64,14 @@ public:
 
 	GrAllocator<U8> getAllocator() const;
 
-	Atomic<I32>& getRefcount()
+	void retain() const
 	{
-		return m_refcount;
+		m_refcount.fetchAdd(1);
+	}
+
+	I32 release() const
+	{
+		return m_refcount.fetchSub(1);
 	}
 
 	/// A unique identifier for caching objects.
@@ -85,7 +90,7 @@ private:
 	GrManager* m_manager;
 	Char* m_name = nullptr;
 	U64 m_uuid;
-	Atomic<I32> m_refcount;
+	mutable Atomic<I32> m_refcount;
 	GrObjectType m_type;
 };
 /// @}

+ 87 - 82
AnKi/Gr/Vulkan/CommandBuffer.cpp

@@ -58,174 +58,176 @@ void CommandBuffer::flush(ConstWeakArray<FencePtr> waitFences, FencePtr* signalF
 	}
 }
 
-void CommandBuffer::bindVertexBuffer(U32 binding, BufferPtr buff, PtrSize offset, PtrSize stride,
+void CommandBuffer::bindVertexBuffer(U32 binding, const BufferPtr& buff, PtrSize offset, PtrSize stride,
 									 VertexStepRate stepRate)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.bindVertexBuffer(binding, buff, offset, stride, stepRate);
+	self.bindVertexBufferInternal(binding, buff, offset, stride, stepRate);
 }
 
 void CommandBuffer::setVertexAttribute(U32 location, U32 buffBinding, Format fmt, PtrSize relativeOffset)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setVertexAttribute(location, buffBinding, fmt, relativeOffset);
+	self.setVertexAttributeInternal(location, buffBinding, fmt, relativeOffset);
 }
 
-void CommandBuffer::bindIndexBuffer(BufferPtr buff, PtrSize offset, IndexType type)
+void CommandBuffer::bindIndexBuffer(const BufferPtr& buff, PtrSize offset, IndexType type)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.bindIndexBuffer(buff, offset, type);
+	self.bindIndexBufferInternal(buff, offset, type);
 }
 
 void CommandBuffer::setPrimitiveRestart(Bool enable)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setPrimitiveRestart(enable);
+	self.setPrimitiveRestartInternal(enable);
 }
 
 void CommandBuffer::setViewport(U32 minx, U32 miny, U32 width, U32 height)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setViewport(minx, miny, width, height);
+	self.setViewportInternal(minx, miny, width, height);
 }
 
 void CommandBuffer::setScissor(U32 minx, U32 miny, U32 width, U32 height)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setScissor(minx, miny, width, height);
+	self.setScissorInternal(minx, miny, width, height);
 }
 
 void CommandBuffer::setFillMode(FillMode mode)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setFillMode(mode);
+	self.setFillModeInternal(mode);
 }
 
 void CommandBuffer::setCullMode(FaceSelectionBit mode)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setCullMode(mode);
+	self.setCullModeInternal(mode);
 }
 
 void CommandBuffer::setPolygonOffset(F32 factor, F32 units)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setPolygonOffset(factor, units);
+	self.setPolygonOffsetInternal(factor, units);
 }
 
 void CommandBuffer::setStencilOperations(FaceSelectionBit face, StencilOperation stencilFail,
 										 StencilOperation stencilPassDepthFail, StencilOperation stencilPassDepthPass)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setStencilOperations(face, stencilFail, stencilPassDepthFail, stencilPassDepthPass);
+	self.setStencilOperationsInternal(face, stencilFail, stencilPassDepthFail, stencilPassDepthPass);
 }
 
 void CommandBuffer::setStencilCompareOperation(FaceSelectionBit face, CompareOperation comp)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setStencilCompareOperation(face, comp);
+	self.setStencilCompareOperationInternal(face, comp);
 }
 
 void CommandBuffer::setStencilCompareMask(FaceSelectionBit face, U32 mask)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setStencilCompareMask(face, mask);
+	self.setStencilCompareMaskInternal(face, mask);
 }
 
 void CommandBuffer::setStencilWriteMask(FaceSelectionBit face, U32 mask)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setStencilWriteMask(face, mask);
+	self.setStencilWriteMaskInternal(face, mask);
 }
 
 void CommandBuffer::setStencilReference(FaceSelectionBit face, U32 ref)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setStencilReference(face, ref);
+	self.setStencilReferenceInternal(face, ref);
 }
 
 void CommandBuffer::setDepthWrite(Bool enable)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setDepthWrite(enable);
+	self.setDepthWriteInternal(enable);
 }
 
 void CommandBuffer::setDepthCompareOperation(CompareOperation op)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setDepthCompareOperation(op);
+	self.setDepthCompareOperationInternal(op);
 }
 
 void CommandBuffer::setAlphaToCoverage(Bool enable)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setAlphaToCoverage(enable);
+	self.setAlphaToCoverageInternal(enable);
 }
 
 void CommandBuffer::setColorChannelWriteMask(U32 attachment, ColorBit mask)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setColorChannelWriteMask(attachment, mask);
+	self.setColorChannelWriteMaskInternal(attachment, mask);
 }
 
 void CommandBuffer::setBlendFactors(U32 attachment, BlendFactor srcRgb, BlendFactor dstRgb, BlendFactor srcA,
 									BlendFactor dstA)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setBlendFactors(attachment, srcRgb, dstRgb, srcA, dstA);
+	self.setBlendFactorsInternal(attachment, srcRgb, dstRgb, srcA, dstA);
 }
 
 void CommandBuffer::setBlendOperation(U32 attachment, BlendOperation funcRgb, BlendOperation funcA)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setBlendOperation(attachment, funcRgb, funcA);
+	self.setBlendOperationInternal(attachment, funcRgb, funcA);
 }
 
-void CommandBuffer::bindTextureAndSampler(U32 set, U32 binding, TextureViewPtr texView, SamplerPtr sampler,
-										  U32 arrayIdx)
+void CommandBuffer::bindTextureAndSampler(U32 set, U32 binding, const TextureViewPtr& texView,
+										  const SamplerPtr& sampler, U32 arrayIdx)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.bindTextureAndSamplerInternal(set, binding, texView, sampler, arrayIdx);
 }
 
-void CommandBuffer::bindTexture(U32 set, U32 binding, TextureViewPtr texView, U32 arrayIdx)
+void CommandBuffer::bindTexture(U32 set, U32 binding, const TextureViewPtr& texView, U32 arrayIdx)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.bindTextureInternal(set, binding, texView, arrayIdx);
 }
 
-void CommandBuffer::bindSampler(U32 set, U32 binding, SamplerPtr sampler, U32 arrayIdx)
+void CommandBuffer::bindSampler(U32 set, U32 binding, const SamplerPtr& sampler, U32 arrayIdx)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.bindSamplerInternal(set, binding, sampler, arrayIdx);
 }
 
-void CommandBuffer::bindUniformBuffer(U32 set, U32 binding, BufferPtr buff, PtrSize offset, PtrSize range, U32 arrayIdx)
+void CommandBuffer::bindUniformBuffer(U32 set, U32 binding, const BufferPtr& buff, PtrSize offset, PtrSize range,
+									  U32 arrayIdx)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.bindUniformBufferInternal(set, binding, buff, offset, range, arrayIdx);
 }
 
-void CommandBuffer::bindStorageBuffer(U32 set, U32 binding, BufferPtr buff, PtrSize offset, PtrSize range, U32 arrayIdx)
+void CommandBuffer::bindStorageBuffer(U32 set, U32 binding, const BufferPtr& buff, PtrSize offset, PtrSize range,
+									  U32 arrayIdx)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.bindStorageBufferInternal(set, binding, buff, offset, range, arrayIdx);
 }
 
-void CommandBuffer::bindImage(U32 set, U32 binding, TextureViewPtr img, U32 arrayIdx)
+void CommandBuffer::bindImage(U32 set, U32 binding, const TextureViewPtr& img, U32 arrayIdx)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.bindImageInternal(set, binding, img, arrayIdx);
 }
 
-void CommandBuffer::bindAccelerationStructure(U32 set, U32 binding, AccelerationStructurePtr as, U32 arrayIdx)
+void CommandBuffer::bindAccelerationStructure(U32 set, U32 binding, const AccelerationStructurePtr& as, U32 arrayIdx)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.bindAccelerationStructureInternal(set, binding, as, arrayIdx);
 }
 
-void CommandBuffer::bindTextureBuffer(U32 set, U32 binding, BufferPtr buff, PtrSize offset, PtrSize range, Format fmt,
-									  U32 arrayIdx)
+void CommandBuffer::bindTextureBuffer(U32 set, U32 binding, const BufferPtr& buff, PtrSize offset, PtrSize range,
+									  Format fmt, U32 arrayIdx)
 {
 	ANKI_ASSERT(!"TODO");
 }
@@ -236,25 +238,25 @@ void CommandBuffer::bindAllBindless(U32 set)
 	self.bindAllBindlessInternal(set);
 }
 
-void CommandBuffer::bindShaderProgram(ShaderProgramPtr prog)
+void CommandBuffer::bindShaderProgram(const ShaderProgramPtr& prog)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.bindShaderProgram(prog);
+	self.bindShaderProgramInternal(prog);
 }
 
-void CommandBuffer::beginRenderPass(FramebufferPtr fb,
+void CommandBuffer::beginRenderPass(const FramebufferPtr& fb,
 									const Array<TextureUsageBit, MAX_COLOR_ATTACHMENTS>& colorAttachmentUsages,
 									TextureUsageBit depthStencilAttachmentUsage, U32 minx, U32 miny, U32 width,
 									U32 height)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.beginRenderPass(fb, colorAttachmentUsages, depthStencilAttachmentUsage, minx, miny, width, height);
+	self.beginRenderPassInternal(fb, colorAttachmentUsages, depthStencilAttachmentUsage, minx, miny, width, height);
 }
 
 void CommandBuffer::endRenderPass()
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.endRenderPass();
+	self.endRenderPassInternal();
 }
 
 void CommandBuffer::setVrsRate(VrsRate rate)
@@ -267,34 +269,35 @@ void CommandBuffer::drawElements(PrimitiveTopology topology, U32 count, U32 inst
 								 U32 baseVertex, U32 baseInstance)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.drawElements(topology, count, instanceCount, firstIndex, baseVertex, baseInstance);
+	self.drawElementsInternal(topology, count, instanceCount, firstIndex, baseVertex, baseInstance);
 }
 
 void CommandBuffer::drawArrays(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 first, U32 baseInstance)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.drawArrays(topology, count, instanceCount, first, baseInstance);
+	self.drawArraysInternal(topology, count, instanceCount, first, baseInstance);
 }
 
-void CommandBuffer::drawArraysIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset, BufferPtr buff)
+void CommandBuffer::drawArraysIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset, const BufferPtr& buff)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.drawArraysIndirect(topology, drawCount, offset, buff);
+	self.drawArraysIndirectInternal(topology, drawCount, offset, buff);
 }
 
-void CommandBuffer::drawElementsIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset, BufferPtr buff)
+void CommandBuffer::drawElementsIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset,
+										 const BufferPtr& buff)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.drawElementsIndirect(topology, drawCount, offset, buff);
+	self.drawElementsIndirectInternal(topology, drawCount, offset, buff);
 }
 
 void CommandBuffer::dispatchCompute(U32 groupCountX, U32 groupCountY, U32 groupCountZ)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.dispatchCompute(groupCountX, groupCountY, groupCountZ);
+	self.dispatchComputeInternal(groupCountX, groupCountY, groupCountZ);
 }
 
-void CommandBuffer::traceRays(BufferPtr sbtBuffer, PtrSize sbtBufferOffset, U32 sbtRecordSize,
+void CommandBuffer::traceRays(const BufferPtr& sbtBuffer, PtrSize sbtBufferOffset, U32 sbtRecordSize,
 							  U32 hitGroupSbtRecordCount, U32 rayTypeCount, U32 width, U32 height, U32 depth)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
@@ -302,88 +305,90 @@ void CommandBuffer::traceRays(BufferPtr sbtBuffer, PtrSize sbtBufferOffset, U32
 						   height, depth);
 }
 
-void CommandBuffer::generateMipmaps2d(TextureViewPtr texView)
+void CommandBuffer::generateMipmaps2d(const TextureViewPtr& texView)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.generateMipmaps2d(texView);
+	self.generateMipmaps2dInternal(texView);
 }
 
-void CommandBuffer::generateMipmaps3d(TextureViewPtr texView)
+void CommandBuffer::generateMipmaps3d(const TextureViewPtr& texView)
 {
 	ANKI_ASSERT(!"TODO");
 }
 
-void CommandBuffer::blitTextureViews(TextureViewPtr srcView, TextureViewPtr destView)
+void CommandBuffer::blitTextureViews(const TextureViewPtr& srcView, const TextureViewPtr& destView)
 {
 	ANKI_ASSERT(!"TODO");
 }
 
-void CommandBuffer::clearTextureView(TextureViewPtr texView, const ClearValue& clearValue)
+void CommandBuffer::clearTextureView(const TextureViewPtr& texView, const ClearValue& clearValue)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.clearTextureView(texView, clearValue);
+	self.clearTextureViewInternal(texView, clearValue);
 }
 
-void CommandBuffer::copyBufferToTextureView(BufferPtr buff, PtrSize offset, PtrSize range, TextureViewPtr texView)
+void CommandBuffer::copyBufferToTextureView(const BufferPtr& buff, PtrSize offset, PtrSize range,
+											const TextureViewPtr& texView)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.copyBufferToTextureViewInternal(buff, offset, range, texView);
 }
 
-void CommandBuffer::fillBuffer(BufferPtr buff, PtrSize offset, PtrSize size, U32 value)
+void CommandBuffer::fillBuffer(const BufferPtr& buff, PtrSize offset, PtrSize size, U32 value)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.fillBuffer(buff, offset, size, value);
+	self.fillBufferInternal(buff, offset, size, value);
 }
 
-void CommandBuffer::writeOcclusionQueryResultToBuffer(OcclusionQueryPtr query, PtrSize offset, BufferPtr buff)
+void CommandBuffer::writeOcclusionQueryResultToBuffer(const OcclusionQueryPtr& query, PtrSize offset,
+													  const BufferPtr& buff)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.writeOcclusionQueryResultToBuffer(query, offset, buff);
+	self.writeOcclusionQueryResultToBufferInternal(query, offset, buff);
 }
 
-void CommandBuffer::copyBufferToBuffer(BufferPtr src, PtrSize srcOffset, BufferPtr dst, PtrSize dstOffset,
+void CommandBuffer::copyBufferToBuffer(const BufferPtr& src, PtrSize srcOffset, const BufferPtr& dst, PtrSize dstOffset,
 									   PtrSize range)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.copyBufferToBuffer(src, srcOffset, dst, dstOffset, range);
+	self.copyBufferToBufferInternal(src, srcOffset, dst, dstOffset, range);
 }
 
-void CommandBuffer::buildAccelerationStructure(AccelerationStructurePtr as)
+void CommandBuffer::buildAccelerationStructure(const AccelerationStructurePtr& as)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.buildAccelerationStructureInternal(as);
 }
 
-void CommandBuffer::setTextureBarrier(TexturePtr tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
+void CommandBuffer::setTextureBarrier(const TexturePtr& tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
 									  const TextureSubresourceInfo& subresource)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setTextureBarrier(tex, prevUsage, nextUsage, subresource);
+	self.setTextureBarrierInternal(tex, prevUsage, nextUsage, subresource);
 }
 
-void CommandBuffer::setTextureSurfaceBarrier(TexturePtr tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
-											 const TextureSurfaceInfo& surf)
+void CommandBuffer::setTextureSurfaceBarrier(const TexturePtr& tex, TextureUsageBit prevUsage,
+											 TextureUsageBit nextUsage, const TextureSurfaceInfo& surf)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setTextureSurfaceBarrier(tex, prevUsage, nextUsage, surf);
+	self.setTextureSurfaceBarrierInternal(tex, prevUsage, nextUsage, surf);
 }
 
-void CommandBuffer::setTextureVolumeBarrier(TexturePtr tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
+void CommandBuffer::setTextureVolumeBarrier(const TexturePtr& tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
 											const TextureVolumeInfo& vol)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setTextureVolumeBarrier(tex, prevUsage, nextUsage, vol);
+	self.setTextureVolumeBarrierInternal(tex, prevUsage, nextUsage, vol);
 }
 
-void CommandBuffer::setBufferBarrier(BufferPtr buff, BufferUsageBit before, BufferUsageBit after, PtrSize offset,
+void CommandBuffer::setBufferBarrier(const BufferPtr& buff, BufferUsageBit before, BufferUsageBit after, PtrSize offset,
 									 PtrSize size)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setBufferBarrier(buff, before, after, offset, size);
+	self.setBufferBarrierInternal(buff, before, after, offset, size);
 }
 
-void CommandBuffer::setAccelerationStructureBarrier(AccelerationStructurePtr as,
+void CommandBuffer::setAccelerationStructureBarrier(const AccelerationStructurePtr& as,
 													AccelerationStructureUsageBit prevUsage,
 													AccelerationStructureUsageBit nextUsage)
 {
@@ -391,37 +396,37 @@ void CommandBuffer::setAccelerationStructureBarrier(AccelerationStructurePtr as,
 	self.setAccelerationStructureBarrierInternal(as, prevUsage, nextUsage);
 }
 
-void CommandBuffer::resetOcclusionQuery(OcclusionQueryPtr query)
+void CommandBuffer::resetOcclusionQuery(const OcclusionQueryPtr& query)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.resetOcclusionQuery(query);
+	self.resetOcclusionQueryInternal(query);
 }
 
-void CommandBuffer::beginOcclusionQuery(OcclusionQueryPtr query)
+void CommandBuffer::beginOcclusionQuery(const OcclusionQueryPtr& query)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.beginOcclusionQuery(query);
+	self.beginOcclusionQueryInternal(query);
 }
 
-void CommandBuffer::endOcclusionQuery(OcclusionQueryPtr query)
+void CommandBuffer::endOcclusionQuery(const OcclusionQueryPtr& query)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.endOcclusionQuery(query);
+	self.endOcclusionQueryInternal(query);
 }
 
-void CommandBuffer::pushSecondLevelCommandBuffer(CommandBufferPtr cmdb)
+void CommandBuffer::pushSecondLevelCommandBuffer(const CommandBufferPtr& cmdb)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.pushSecondLevelCommandBuffer(cmdb);
+	self.pushSecondLevelCommandBufferInternal(cmdb);
 }
 
-void CommandBuffer::resetTimestampQuery(TimestampQueryPtr query)
+void CommandBuffer::resetTimestampQuery(const TimestampQueryPtr& query)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.resetTimestampQueryInternal(query);
 }
 
-void CommandBuffer::writeTimestamp(TimestampQueryPtr query)
+void CommandBuffer::writeTimestamp(const TimestampQueryPtr& query)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.writeTimestampInternal(query);
@@ -436,19 +441,19 @@ Bool CommandBuffer::isEmpty() const
 void CommandBuffer::setPushConstants(const void* data, U32 dataSize)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setPushConstants(data, dataSize);
+	self.setPushConstantsInternal(data, dataSize);
 }
 
 void CommandBuffer::setRasterizationOrder(RasterizationOrder order)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setRasterizationOrder(order);
+	self.setRasterizationOrderInternal(order);
 }
 
 void CommandBuffer::setLineWidth(F32 width)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
-	self.setLineWidth(width);
+	self.setLineWidthInternal(width);
 }
 
 } // end namespace anki

+ 16 - 6
AnKi/Gr/Vulkan/CommandBufferFactory.h

@@ -33,9 +33,19 @@ public:
 
 	~MicroCommandBuffer();
 
-	Atomic<I32>& getRefcount()
+	void retain() const
 	{
-		return m_refcount;
+		m_refcount.fetchAdd(1);
+	}
+
+	I32 release() const
+	{
+		return m_refcount.fetchSub(1);
+	}
+
+	I32 getRefcount() const
+	{
+		return m_refcount.load();
 	}
 
 	void setFence(MicroFencePtr& fence)
@@ -70,7 +80,7 @@ public:
 	}
 
 	template<typename T>
-	void pushObjectRef(GrObjectPtrT<T>& x)
+	void pushObjectRef(const GrObjectPtrT<T>& x)
 	{
 		pushToArray(m_objectRefs[T::CLASS_TYPE], x.get());
 	}
@@ -98,7 +108,7 @@ private:
 	// Cacheline boundary
 
 	CommandBufferThreadAllocator* m_threadAlloc;
-	Atomic<I32> m_refcount = {0};
+	mutable Atomic<I32> m_refcount = {0};
 	CommandBufferFlag m_flags = CommandBufferFlag::NONE;
 	VulkanQueueType m_queue = VulkanQueueType::COUNT;
 
@@ -121,12 +131,12 @@ private:
 		}
 
 		// Not found in the temp cache, add it
-		arr.emplaceBack(m_fastAlloc, GrObjectPtr(grobj));
+		arr.emplaceBack(m_fastAlloc, grobj);
 	}
 };
 
 template<>
-inline void MicroCommandBuffer::pushObjectRef<GrObject>(GrObjectPtr& x)
+inline void MicroCommandBuffer::pushObjectRef<GrObject>(const GrObjectPtr& x)
 {
 	pushToArray(m_objectRefs[x->getType()], x.get());
 }

+ 8 - 9
AnKi/Gr/Vulkan/CommandBufferImpl.cpp

@@ -116,10 +116,9 @@ void CommandBufferImpl::beginRecording()
 	}
 }
 
-void CommandBufferImpl::beginRenderPass(FramebufferPtr fb,
-										const Array<TextureUsageBit, MAX_COLOR_ATTACHMENTS>& colorAttachmentUsages,
-										TextureUsageBit depthStencilAttachmentUsage, U32 minx, U32 miny, U32 width,
-										U32 height)
+void CommandBufferImpl::beginRenderPassInternal(
+	const FramebufferPtr& fb, const Array<TextureUsageBit, MAX_COLOR_ATTACHMENTS>& colorAttachmentUsages,
+	TextureUsageBit depthStencilAttachmentUsage, U32 minx, U32 miny, U32 width, U32 height)
 {
 	commandCommon();
 	ANKI_ASSERT(!insideRenderPass());
@@ -219,7 +218,7 @@ void CommandBufferImpl::beginRenderPassInternal()
 	}
 }
 
-void CommandBufferImpl::endRenderPass()
+void CommandBufferImpl::endRenderPassInternal()
 {
 	commandCommon();
 	ANKI_ASSERT(insideRenderPass());
@@ -291,7 +290,7 @@ void CommandBufferImpl::endRecording()
 #endif
 }
 
-void CommandBufferImpl::generateMipmaps2d(TextureViewPtr texView)
+void CommandBufferImpl::generateMipmaps2dInternal(const TextureViewPtr& texView)
 {
 	commandCommon();
 
@@ -636,8 +635,8 @@ void CommandBufferImpl::flushWriteQueryResults()
 	m_writeQueryAtoms.resize(m_alloc, 0);
 }
 
-void CommandBufferImpl::copyBufferToTextureViewInternal(BufferPtr buff, PtrSize offset, PtrSize range,
-														TextureViewPtr texView)
+void CommandBufferImpl::copyBufferToTextureViewInternal(const BufferPtr& buff, PtrSize offset, PtrSize range,
+														const TextureViewPtr& texView)
 {
 	commandCommon();
 
@@ -745,7 +744,7 @@ void CommandBufferImpl::rebindDynamicState()
 	}
 }
 
-void CommandBufferImpl::buildAccelerationStructureInternal(AccelerationStructurePtr& as)
+void CommandBufferImpl::buildAccelerationStructureInternal(const AccelerationStructurePtr& as)
 {
 	commandCommon();
 

+ 81 - 69
AnKi/Gr/Vulkan/CommandBufferImpl.h

@@ -91,22 +91,23 @@ public:
 		return !!(m_flags & CommandBufferFlag::SECOND_LEVEL);
 	}
 
-	void bindVertexBuffer(U32 binding, BufferPtr buff, PtrSize offset, PtrSize stride, VertexStepRate stepRate)
+	void bindVertexBufferInternal(U32 binding, const BufferPtr& buff, PtrSize offset, PtrSize stride,
+								  VertexStepRate stepRate)
 	{
 		commandCommon();
 		m_state.bindVertexBuffer(binding, stride, stepRate);
-		VkBuffer vkbuff = static_cast<const BufferImpl&>(*buff).getHandle();
+		const VkBuffer vkbuff = static_cast<const BufferImpl&>(*buff).getHandle();
 		ANKI_CMD(vkCmdBindVertexBuffers(m_handle, binding, 1, &vkbuff, &offset), ANY_OTHER_COMMAND);
 		m_microCmdb->pushObjectRef(buff);
 	}
 
-	void setVertexAttribute(U32 location, U32 buffBinding, const Format fmt, PtrSize relativeOffset)
+	void setVertexAttributeInternal(U32 location, U32 buffBinding, const Format fmt, PtrSize relativeOffset)
 	{
 		commandCommon();
 		m_state.setVertexAttribute(location, buffBinding, fmt, relativeOffset);
 	}
 
-	void bindIndexBuffer(BufferPtr buff, PtrSize offset, IndexType type)
+	void bindIndexBufferInternal(const BufferPtr& buff, PtrSize offset, IndexType type)
 	{
 		commandCommon();
 		ANKI_CMD(vkCmdBindIndexBuffer(m_handle, static_cast<const BufferImpl&>(*buff).getHandle(), offset,
@@ -115,25 +116,25 @@ public:
 		m_microCmdb->pushObjectRef(buff);
 	}
 
-	void setPrimitiveRestart(Bool enable)
+	void setPrimitiveRestartInternal(Bool enable)
 	{
 		commandCommon();
 		m_state.setPrimitiveRestart(enable);
 	}
 
-	void setFillMode(FillMode mode)
+	void setFillModeInternal(FillMode mode)
 	{
 		commandCommon();
 		m_state.setFillMode(mode);
 	}
 
-	void setCullMode(FaceSelectionBit mode)
+	void setCullModeInternal(FaceSelectionBit mode)
 	{
 		commandCommon();
 		m_state.setCullMode(mode);
 	}
 
-	void setViewport(U32 minx, U32 miny, U32 width, U32 height)
+	void setViewportInternal(U32 minx, U32 miny, U32 width, U32 height)
 	{
 		ANKI_ASSERT(width > 0 && height > 0);
 		commandCommon();
@@ -149,7 +150,7 @@ public:
 		}
 	}
 
-	void setScissor(U32 minx, U32 miny, U32 width, U32 height)
+	void setScissorInternal(U32 minx, U32 miny, U32 width, U32 height)
 	{
 		ANKI_ASSERT(width > 0 && height > 0);
 		commandCommon();
@@ -165,68 +166,70 @@ public:
 		}
 	}
 
-	void setPolygonOffset(F32 factor, F32 units)
+	void setPolygonOffsetInternal(F32 factor, F32 units)
 	{
 		commandCommon();
 		m_state.setPolygonOffset(factor, units);
 	}
 
-	void setStencilOperations(FaceSelectionBit face, StencilOperation stencilFail,
-							  StencilOperation stencilPassDepthFail, StencilOperation stencilPassDepthPass)
+	void setStencilOperationsInternal(FaceSelectionBit face, StencilOperation stencilFail,
+									  StencilOperation stencilPassDepthFail, StencilOperation stencilPassDepthPass)
 	{
 		commandCommon();
 		m_state.setStencilOperations(face, stencilFail, stencilPassDepthFail, stencilPassDepthPass);
 	}
 
-	void setStencilCompareOperation(FaceSelectionBit face, CompareOperation comp)
+	void setStencilCompareOperationInternal(FaceSelectionBit face, CompareOperation comp)
 	{
 		commandCommon();
 		m_state.setStencilCompareOperation(face, comp);
 	}
 
-	void setStencilCompareMask(FaceSelectionBit face, U32 mask);
+	void setStencilCompareMaskInternal(FaceSelectionBit face, U32 mask);
 
-	void setStencilWriteMask(FaceSelectionBit face, U32 mask);
+	void setStencilWriteMaskInternal(FaceSelectionBit face, U32 mask);
 
-	void setStencilReference(FaceSelectionBit face, U32 ref);
+	void setStencilReferenceInternal(FaceSelectionBit face, U32 ref);
 
-	void setDepthWrite(Bool enable)
+	void setDepthWriteInternal(Bool enable)
 	{
 		commandCommon();
 		m_state.setDepthWrite(enable);
 	}
 
-	void setDepthCompareOperation(CompareOperation op)
+	void setDepthCompareOperationInternal(CompareOperation op)
 	{
 		commandCommon();
 		m_state.setDepthCompareOperation(op);
 	}
 
-	void setAlphaToCoverage(Bool enable)
+	void setAlphaToCoverageInternal(Bool enable)
 	{
 		commandCommon();
 		m_state.setAlphaToCoverage(enable);
 	}
 
-	void setColorChannelWriteMask(U32 attachment, ColorBit mask)
+	void setColorChannelWriteMaskInternal(U32 attachment, ColorBit mask)
 	{
 		commandCommon();
 		m_state.setColorChannelWriteMask(attachment, mask);
 	}
 
-	void setBlendFactors(U32 attachment, BlendFactor srcRgb, BlendFactor dstRgb, BlendFactor srcA, BlendFactor dstA)
+	void setBlendFactorsInternal(U32 attachment, BlendFactor srcRgb, BlendFactor dstRgb, BlendFactor srcA,
+								 BlendFactor dstA)
 	{
 		commandCommon();
 		m_state.setBlendFactors(attachment, srcRgb, dstRgb, srcA, dstA);
 	}
 
-	void setBlendOperation(U32 attachment, BlendOperation funcRgb, BlendOperation funcA)
+	void setBlendOperationInternal(U32 attachment, BlendOperation funcRgb, BlendOperation funcA)
 	{
 		commandCommon();
 		m_state.setBlendOperation(attachment, funcRgb, funcA);
 	}
 
-	void bindTextureAndSamplerInternal(U32 set, U32 binding, TextureViewPtr& texView, SamplerPtr sampler, U32 arrayIdx)
+	void bindTextureAndSamplerInternal(U32 set, U32 binding, const TextureViewPtr& texView, const SamplerPtr& sampler,
+									   U32 arrayIdx)
 	{
 		commandCommon();
 		const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*texView);
@@ -240,7 +243,7 @@ public:
 		m_microCmdb->pushObjectRef(sampler);
 	}
 
-	void bindTextureInternal(U32 set, U32 binding, TextureViewPtr& texView, U32 arrayIdx)
+	void bindTextureInternal(U32 set, U32 binding, const TextureViewPtr& texView, U32 arrayIdx)
 	{
 		commandCommon();
 		const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*texView);
@@ -253,14 +256,14 @@ public:
 		m_microCmdb->pushObjectRef(texView);
 	}
 
-	void bindSamplerInternal(U32 set, U32 binding, SamplerPtr& sampler, U32 arrayIdx)
+	void bindSamplerInternal(U32 set, U32 binding, const SamplerPtr& sampler, U32 arrayIdx)
 	{
 		commandCommon();
 		m_dsetState[set].bindSampler(binding, arrayIdx, sampler.get());
 		m_microCmdb->pushObjectRef(sampler);
 	}
 
-	void bindImageInternal(U32 set, U32 binding, TextureViewPtr& img, U32 arrayIdx)
+	void bindImageInternal(U32 set, U32 binding, const TextureViewPtr& img, U32 arrayIdx)
 	{
 		commandCommon();
 		m_dsetState[set].bindImage(binding, arrayIdx, img.get());
@@ -275,7 +278,7 @@ public:
 		m_microCmdb->pushObjectRef(img);
 	}
 
-	void bindAccelerationStructureInternal(U32 set, U32 binding, AccelerationStructurePtr& as, U32 arrayIdx)
+	void bindAccelerationStructureInternal(U32 set, U32 binding, const AccelerationStructurePtr& as, U32 arrayIdx)
 	{
 		commandCommon();
 		m_dsetState[set].bindAccelerationStructure(binding, arrayIdx, as.get());
@@ -288,99 +291,108 @@ public:
 		m_dsetState[set].bindBindlessDescriptorSet();
 	}
 
-	void beginRenderPass(FramebufferPtr fb, const Array<TextureUsageBit, MAX_COLOR_ATTACHMENTS>& colorAttachmentUsages,
-						 TextureUsageBit depthStencilAttachmentUsage, U32 minx, U32 miny, U32 width, U32 height);
+	void beginRenderPassInternal(const FramebufferPtr& fb,
+								 const Array<TextureUsageBit, MAX_COLOR_ATTACHMENTS>& colorAttachmentUsages,
+								 TextureUsageBit depthStencilAttachmentUsage, U32 minx, U32 miny, U32 width,
+								 U32 height);
 
-	void endRenderPass();
+	void endRenderPassInternal();
 
 	void setVrsRateInternal(VrsRate rate);
 
-	void drawArrays(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 first, U32 baseInstance);
+	void drawArraysInternal(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 first, U32 baseInstance);
 
-	void drawElements(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 firstIndex, U32 baseVertex,
-					  U32 baseInstance);
+	void drawElementsInternal(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 firstIndex, U32 baseVertex,
+							  U32 baseInstance);
 
-	void drawArraysIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset, BufferPtr& buff);
+	void drawArraysIndirectInternal(PrimitiveTopology topology, U32 drawCount, PtrSize offset, const BufferPtr& buff);
 
-	void drawElementsIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset, BufferPtr& buff);
+	void drawElementsIndirectInternal(PrimitiveTopology topology, U32 drawCount, PtrSize offset, const BufferPtr& buff);
 
-	void dispatchCompute(U32 groupCountX, U32 groupCountY, U32 groupCountZ);
+	void dispatchComputeInternal(U32 groupCountX, U32 groupCountY, U32 groupCountZ);
 
-	void traceRaysInternal(BufferPtr& sbtBuffer, PtrSize sbtBufferOffset, U32 sbtRecordSize, U32 hitGroupSbtRecordCount,
-						   U32 rayTypeCount, U32 width, U32 height, U32 depth);
+	void traceRaysInternal(const BufferPtr& sbtBuffer, PtrSize sbtBufferOffset, U32 sbtRecordSize,
+						   U32 hitGroupSbtRecordCount, U32 rayTypeCount, U32 width, U32 height, U32 depth);
 
-	void resetOcclusionQuery(OcclusionQueryPtr query);
+	void resetOcclusionQueryInternal(const OcclusionQueryPtr& query);
 
-	void beginOcclusionQuery(OcclusionQueryPtr query);
+	void beginOcclusionQueryInternal(const OcclusionQueryPtr& query);
 
-	void endOcclusionQuery(OcclusionQueryPtr query);
+	void endOcclusionQueryInternal(const OcclusionQueryPtr& query);
 
-	void resetTimestampQueryInternal(TimestampQueryPtr& query);
+	void resetTimestampQueryInternal(const TimestampQueryPtr& query);
 
-	void writeTimestampInternal(TimestampQueryPtr& query);
+	void writeTimestampInternal(const TimestampQueryPtr& query);
 
-	void generateMipmaps2d(TextureViewPtr texView);
+	void generateMipmaps2dInternal(const TextureViewPtr& texView);
 
-	void clearTextureView(TextureViewPtr texView, const ClearValue& clearValue);
+	void clearTextureViewInternal(const TextureViewPtr& texView, const ClearValue& clearValue);
 
-	void pushSecondLevelCommandBuffer(CommandBufferPtr cmdb);
+	void pushSecondLevelCommandBufferInternal(const CommandBufferPtr& cmdb);
 
 	// To enable using Anki's commandbuffers for external workloads
 	void beginRecordingExt();
 
 	void endRecording();
 
-	void setTextureBarrier(TexturePtr tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
-						   const TextureSubresourceInfo& subresource);
+	void setTextureBarrierInternal(const TexturePtr& tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
+								   const TextureSubresourceInfo& subresource);
 
-	void setTextureSurfaceBarrier(TexturePtr tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
-								  const TextureSurfaceInfo& surf);
+	void setTextureSurfaceBarrierInternal(const TexturePtr& tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
+										  const TextureSurfaceInfo& surf);
 
-	void setTextureVolumeBarrier(TexturePtr tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
-								 const TextureVolumeInfo& vol);
+	void setTextureVolumeBarrierInternal(const TexturePtr& tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
+										 const TextureVolumeInfo& vol);
 
-	void setTextureBarrierRange(TexturePtr tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
-								const VkImageSubresourceRange& range);
+	void setTextureBarrierRangeInternal(const TexturePtr& tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
+										const VkImageSubresourceRange& range);
 
-	void setBufferBarrier(VkPipelineStageFlags srcStage, VkAccessFlags srcAccess, VkPipelineStageFlags dstStage,
-						  VkAccessFlags dstAccess, PtrSize offset, PtrSize size, VkBuffer buff);
+	void setBufferBarrierInternal(VkPipelineStageFlags srcStage, VkAccessFlags srcAccess, VkPipelineStageFlags dstStage,
+								  VkAccessFlags dstAccess, PtrSize offset, PtrSize size, VkBuffer buff);
 
-	void setBufferBarrier(BufferPtr& buff, BufferUsageBit before, BufferUsageBit after, PtrSize offset, PtrSize size);
+	void setBufferBarrierInternal(const BufferPtr& buff, BufferUsageBit before, BufferUsageBit after, PtrSize offset,
+								  PtrSize size);
 
-	void setAccelerationStructureBarrierInternal(AccelerationStructurePtr& as, AccelerationStructureUsageBit prevUsage,
+	void setAccelerationStructureBarrierInternal(const AccelerationStructurePtr& as,
+												 AccelerationStructureUsageBit prevUsage,
 												 AccelerationStructureUsageBit nextUsage);
 
-	void fillBuffer(BufferPtr buff, PtrSize offset, PtrSize size, U32 value);
+	void fillBufferInternal(const BufferPtr& buff, PtrSize offset, PtrSize size, U32 value);
 
-	void writeOcclusionQueryResultToBuffer(OcclusionQueryPtr query, PtrSize offset, BufferPtr buff);
+	void writeOcclusionQueryResultToBufferInternal(const OcclusionQueryPtr& query, PtrSize offset,
+												   const BufferPtr& buff);
 
-	void bindShaderProgram(ShaderProgramPtr& prog);
+	void bindShaderProgramInternal(const ShaderProgramPtr& prog);
 
-	void bindUniformBufferInternal(U32 set, U32 binding, BufferPtr& buff, PtrSize offset, PtrSize range, U32 arrayIdx)
+	void bindUniformBufferInternal(U32 set, U32 binding, const BufferPtr& buff, PtrSize offset, PtrSize range,
+								   U32 arrayIdx)
 	{
 		commandCommon();
 		m_dsetState[set].bindUniformBuffer(binding, arrayIdx, buff.get(), offset, range);
 		m_microCmdb->pushObjectRef(buff);
 	}
 
-	void bindStorageBufferInternal(U32 set, U32 binding, BufferPtr& buff, PtrSize offset, PtrSize range, U32 arrayIdx)
+	void bindStorageBufferInternal(U32 set, U32 binding, const BufferPtr& buff, PtrSize offset, PtrSize range,
+								   U32 arrayIdx)
 	{
 		commandCommon();
 		m_dsetState[set].bindStorageBuffer(binding, arrayIdx, buff.get(), offset, range);
 		m_microCmdb->pushObjectRef(buff);
 	}
 
-	void copyBufferToTextureViewInternal(BufferPtr buff, PtrSize offset, PtrSize range, TextureViewPtr texView);
+	void copyBufferToTextureViewInternal(const BufferPtr& buff, PtrSize offset, PtrSize range,
+										 const TextureViewPtr& texView);
 
-	void copyBufferToBuffer(BufferPtr& src, PtrSize srcOffset, BufferPtr& dst, PtrSize dstOffset, PtrSize range);
+	void copyBufferToBufferInternal(const BufferPtr& src, PtrSize srcOffset, const BufferPtr& dst, PtrSize dstOffset,
+									PtrSize range);
 
-	void buildAccelerationStructureInternal(AccelerationStructurePtr& as);
+	void buildAccelerationStructureInternal(const AccelerationStructurePtr& as);
 
-	void setPushConstants(const void* data, U32 dataSize);
+	void setPushConstantsInternal(const void* data, U32 dataSize);
 
-	void setRasterizationOrder(RasterizationOrder order);
+	void setRasterizationOrderInternal(RasterizationOrder order);
 
-	void setLineWidth(F32 width);
+	void setLineWidthInternal(F32 width);
 
 private:
 	StackAllocator<U8> m_alloc;

+ 52 - 49
AnKi/Gr/Vulkan/CommandBufferImpl.inl.h

@@ -13,7 +13,7 @@
 
 namespace anki {
 
-inline void CommandBufferImpl::setStencilCompareMask(FaceSelectionBit face, U32 mask)
+inline void CommandBufferImpl::setStencilCompareMaskInternal(FaceSelectionBit face, U32 mask)
 {
 	commandCommon();
 
@@ -37,7 +37,7 @@ inline void CommandBufferImpl::setStencilCompareMask(FaceSelectionBit face, U32
 	}
 }
 
-inline void CommandBufferImpl::setStencilWriteMask(FaceSelectionBit face, U32 mask)
+inline void CommandBufferImpl::setStencilWriteMaskInternal(FaceSelectionBit face, U32 mask)
 {
 	commandCommon();
 
@@ -61,7 +61,7 @@ inline void CommandBufferImpl::setStencilWriteMask(FaceSelectionBit face, U32 ma
 	}
 }
 
-inline void CommandBufferImpl::setStencilReference(FaceSelectionBit face, U32 ref)
+inline void CommandBufferImpl::setStencilReferenceInternal(FaceSelectionBit face, U32 ref)
 {
 	commandCommon();
 
@@ -122,8 +122,9 @@ inline void CommandBufferImpl::setImageBarrier(VkPipelineStageFlags srcStage, Vk
 #endif
 }
 
-inline void CommandBufferImpl::setTextureBarrierRange(TexturePtr tex, TextureUsageBit prevUsage,
-													  TextureUsageBit nextUsage, const VkImageSubresourceRange& range)
+inline void CommandBufferImpl::setTextureBarrierRangeInternal(const TexturePtr& tex, TextureUsageBit prevUsage,
+															  TextureUsageBit nextUsage,
+															  const VkImageSubresourceRange& range)
 {
 	const TextureImpl& impl = static_cast<const TextureImpl&>(*tex);
 	ANKI_ASSERT(impl.usageValid(prevUsage));
@@ -147,8 +148,9 @@ inline void CommandBufferImpl::setTextureBarrierRange(TexturePtr tex, TextureUsa
 	m_microCmdb->pushObjectRef(tex);
 }
 
-inline void CommandBufferImpl::setTextureBarrier(TexturePtr tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
-												 const TextureSubresourceInfo& subresource_)
+inline void CommandBufferImpl::setTextureBarrierInternal(const TexturePtr& tex, TextureUsageBit prevUsage,
+														 TextureUsageBit nextUsage,
+														 const TextureSubresourceInfo& subresource_)
 {
 	TextureSubresourceInfo subresource = subresource_;
 	const TextureImpl& impl = static_cast<const TextureImpl&>(*tex);
@@ -167,11 +169,12 @@ inline void CommandBufferImpl::setTextureBarrier(TexturePtr tex, TextureUsageBit
 
 	VkImageSubresourceRange range;
 	impl.computeVkImageSubresourceRange(subresource, range);
-	setTextureBarrierRange(tex, prevUsage, nextUsage, range);
+	setTextureBarrierRangeInternal(tex, prevUsage, nextUsage, range);
 }
 
-inline void CommandBufferImpl::setTextureSurfaceBarrier(TexturePtr tex, TextureUsageBit prevUsage,
-														TextureUsageBit nextUsage, const TextureSurfaceInfo& surf)
+inline void CommandBufferImpl::setTextureSurfaceBarrierInternal(const TexturePtr& tex, TextureUsageBit prevUsage,
+																TextureUsageBit nextUsage,
+																const TextureSurfaceInfo& surf)
 {
 	if(ANKI_UNLIKELY(surf.m_level > 0 && nextUsage == TextureUsageBit::GENERATE_MIPMAPS))
 	{
@@ -183,11 +186,11 @@ inline void CommandBufferImpl::setTextureSurfaceBarrier(TexturePtr tex, TextureU
 
 	VkImageSubresourceRange range;
 	impl.computeVkImageSubresourceRange(TextureSubresourceInfo(surf, impl.getDepthStencilAspect()), range);
-	setTextureBarrierRange(tex, prevUsage, nextUsage, range);
+	setTextureBarrierRangeInternal(tex, prevUsage, nextUsage, range);
 }
 
-inline void CommandBufferImpl::setTextureVolumeBarrier(TexturePtr tex, TextureUsageBit prevUsage,
-													   TextureUsageBit nextUsage, const TextureVolumeInfo& vol)
+inline void CommandBufferImpl::setTextureVolumeBarrierInternal(const TexturePtr& tex, TextureUsageBit prevUsage,
+															   TextureUsageBit nextUsage, const TextureVolumeInfo& vol)
 {
 	if(vol.m_level > 0)
 	{
@@ -199,12 +202,12 @@ inline void CommandBufferImpl::setTextureVolumeBarrier(TexturePtr tex, TextureUs
 
 	VkImageSubresourceRange range;
 	impl.computeVkImageSubresourceRange(TextureSubresourceInfo(vol, impl.getDepthStencilAspect()), range);
-	setTextureBarrierRange(tex, prevUsage, nextUsage, range);
+	setTextureBarrierRangeInternal(tex, prevUsage, nextUsage, range);
 }
 
-inline void CommandBufferImpl::setBufferBarrier(VkPipelineStageFlags srcStage, VkAccessFlags srcAccess,
-												VkPipelineStageFlags dstStage, VkAccessFlags dstAccess, PtrSize offset,
-												PtrSize size, VkBuffer buff)
+inline void CommandBufferImpl::setBufferBarrierInternal(VkPipelineStageFlags srcStage, VkAccessFlags srcAccess,
+														VkPipelineStageFlags dstStage, VkAccessFlags dstAccess,
+														PtrSize offset, PtrSize size, VkBuffer buff)
 {
 	ANKI_ASSERT(buff);
 	commandCommon();
@@ -237,8 +240,8 @@ inline void CommandBufferImpl::setBufferBarrier(VkPipelineStageFlags srcStage, V
 #endif
 }
 
-inline void CommandBufferImpl::setBufferBarrier(BufferPtr& buff, BufferUsageBit before, BufferUsageBit after,
-												PtrSize offset, PtrSize size)
+inline void CommandBufferImpl::setBufferBarrierInternal(const BufferPtr& buff, BufferUsageBit before,
+														BufferUsageBit after, PtrSize offset, PtrSize size)
 {
 	const BufferImpl& impl = static_cast<const BufferImpl&>(*buff);
 
@@ -248,12 +251,12 @@ inline void CommandBufferImpl::setBufferBarrier(BufferPtr& buff, BufferUsageBit
 	VkAccessFlags dstAccess;
 	impl.computeBarrierInfo(before, after, srcStage, srcAccess, dstStage, dstAccess);
 
-	setBufferBarrier(srcStage, srcAccess, dstStage, dstAccess, offset, size, impl.getHandle());
+	setBufferBarrierInternal(srcStage, srcAccess, dstStage, dstAccess, offset, size, impl.getHandle());
 
 	m_microCmdb->pushObjectRef(buff);
 }
 
-inline void CommandBufferImpl::setAccelerationStructureBarrierInternal(AccelerationStructurePtr& as,
+inline void CommandBufferImpl::setAccelerationStructureBarrierInternal(const AccelerationStructurePtr& as,
 																	   AccelerationStructureUsageBit prevUsage,
 																	   AccelerationStructureUsageBit nextUsage)
 {
@@ -287,24 +290,24 @@ inline void CommandBufferImpl::setAccelerationStructureBarrierInternal(Accelerat
 #endif
 }
 
-inline void CommandBufferImpl::drawArrays(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 first,
-										  U32 baseInstance)
+inline void CommandBufferImpl::drawArraysInternal(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 first,
+												  U32 baseInstance)
 {
 	m_state.setPrimitiveTopology(topology);
 	drawcallCommon();
 	ANKI_CMD(vkCmdDraw(m_handle, count, instanceCount, first, baseInstance), ANY_OTHER_COMMAND);
 }
 
-inline void CommandBufferImpl::drawElements(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 firstIndex,
-											U32 baseVertex, U32 baseInstance)
+inline void CommandBufferImpl::drawElementsInternal(PrimitiveTopology topology, U32 count, U32 instanceCount,
+													U32 firstIndex, U32 baseVertex, U32 baseInstance)
 {
 	m_state.setPrimitiveTopology(topology);
 	drawcallCommon();
 	ANKI_CMD(vkCmdDrawIndexed(m_handle, count, instanceCount, firstIndex, baseVertex, baseInstance), ANY_OTHER_COMMAND);
 }
 
-inline void CommandBufferImpl::drawArraysIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset,
-												  BufferPtr& buff)
+inline void CommandBufferImpl::drawArraysIndirectInternal(PrimitiveTopology topology, U32 drawCount, PtrSize offset,
+														  const BufferPtr& buff)
 {
 	m_state.setPrimitiveTopology(topology);
 	drawcallCommon();
@@ -317,8 +320,8 @@ inline void CommandBufferImpl::drawArraysIndirect(PrimitiveTopology topology, U3
 			 ANY_OTHER_COMMAND);
 }
 
-inline void CommandBufferImpl::drawElementsIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset,
-													BufferPtr& buff)
+inline void CommandBufferImpl::drawElementsIndirectInternal(PrimitiveTopology topology, U32 drawCount, PtrSize offset,
+															const BufferPtr& buff)
 {
 	m_state.setPrimitiveTopology(topology);
 	drawcallCommon();
@@ -331,7 +334,7 @@ inline void CommandBufferImpl::drawElementsIndirect(PrimitiveTopology topology,
 			 ANY_OTHER_COMMAND);
 }
 
-inline void CommandBufferImpl::dispatchCompute(U32 groupCountX, U32 groupCountY, U32 groupCountZ)
+inline void CommandBufferImpl::dispatchComputeInternal(U32 groupCountX, U32 groupCountY, U32 groupCountZ)
 {
 	ANKI_ASSERT(m_computeProg);
 	ANKI_ASSERT(m_computeProg->getReflectionInfo().m_pushConstantsSize == m_setPushConstantsSize
@@ -380,9 +383,9 @@ inline void CommandBufferImpl::dispatchCompute(U32 groupCountX, U32 groupCountY,
 	getGrManagerImpl().endMarker(m_handle);
 }
 
-inline void CommandBufferImpl::traceRaysInternal(BufferPtr& sbtBuffer, PtrSize sbtBufferOffset, U32 sbtRecordSize32,
-												 U32 hitGroupSbtRecordCount, U32 rayTypeCount, U32 width, U32 height,
-												 U32 depth)
+inline void CommandBufferImpl::traceRaysInternal(const BufferPtr& sbtBuffer, PtrSize sbtBufferOffset,
+												 U32 sbtRecordSize32, U32 hitGroupSbtRecordCount, U32 rayTypeCount,
+												 U32 width, U32 height, U32 depth)
 {
 	const PtrSize sbtRecordSize = sbtRecordSize32;
 	ANKI_ASSERT(hitGroupSbtRecordCount > 0);
@@ -466,7 +469,7 @@ inline void CommandBufferImpl::traceRaysInternal(BufferPtr& sbtBuffer, PtrSize s
 	getGrManagerImpl().endMarker(m_handle);
 }
 
-inline void CommandBufferImpl::resetOcclusionQuery(OcclusionQueryPtr query)
+inline void CommandBufferImpl::resetOcclusionQueryInternal(const OcclusionQueryPtr& query)
 {
 	commandCommon();
 
@@ -488,7 +491,7 @@ inline void CommandBufferImpl::resetOcclusionQuery(OcclusionQueryPtr query)
 	m_microCmdb->pushObjectRef(query);
 }
 
-inline void CommandBufferImpl::beginOcclusionQuery(OcclusionQueryPtr query)
+inline void CommandBufferImpl::beginOcclusionQueryInternal(const OcclusionQueryPtr& query)
 {
 	commandCommon();
 
@@ -501,7 +504,7 @@ inline void CommandBufferImpl::beginOcclusionQuery(OcclusionQueryPtr query)
 	m_microCmdb->pushObjectRef(query);
 }
 
-inline void CommandBufferImpl::endOcclusionQuery(OcclusionQueryPtr query)
+inline void CommandBufferImpl::endOcclusionQueryInternal(const OcclusionQueryPtr& query)
 {
 	commandCommon();
 
@@ -514,7 +517,7 @@ inline void CommandBufferImpl::endOcclusionQuery(OcclusionQueryPtr query)
 	m_microCmdb->pushObjectRef(query);
 }
 
-inline void CommandBufferImpl::resetTimestampQueryInternal(TimestampQueryPtr& query)
+inline void CommandBufferImpl::resetTimestampQueryInternal(const TimestampQueryPtr& query)
 {
 	commandCommon();
 
@@ -536,7 +539,7 @@ inline void CommandBufferImpl::resetTimestampQueryInternal(TimestampQueryPtr& qu
 	m_microCmdb->pushObjectRef(query);
 }
 
-inline void CommandBufferImpl::writeTimestampInternal(TimestampQueryPtr& query)
+inline void CommandBufferImpl::writeTimestampInternal(const TimestampQueryPtr& query)
 {
 	commandCommon();
 
@@ -548,7 +551,7 @@ inline void CommandBufferImpl::writeTimestampInternal(TimestampQueryPtr& query)
 	m_microCmdb->pushObjectRef(query);
 }
 
-inline void CommandBufferImpl::clearTextureView(TextureViewPtr texView, const ClearValue& clearValue)
+inline void CommandBufferImpl::clearTextureViewInternal(const TextureViewPtr& texView, const ClearValue& clearValue)
 {
 	commandCommon();
 
@@ -574,7 +577,7 @@ inline void CommandBufferImpl::clearTextureView(TextureViewPtr texView, const Cl
 	m_microCmdb->pushObjectRef(texView);
 }
 
-inline void CommandBufferImpl::pushSecondLevelCommandBuffer(CommandBufferPtr cmdb)
+inline void CommandBufferImpl::pushSecondLevelCommandBufferInternal(const CommandBufferPtr& cmdb)
 {
 	commandCommon();
 	ANKI_ASSERT(insideRenderPass());
@@ -792,7 +795,7 @@ inline void CommandBufferImpl::flushBatches(CommandBufferCommandType type)
 	}
 }
 
-inline void CommandBufferImpl::fillBuffer(BufferPtr buff, PtrSize offset, PtrSize size, U32 value)
+inline void CommandBufferImpl::fillBufferInternal(const BufferPtr& buff, PtrSize offset, PtrSize size, U32 value)
 {
 	commandCommon();
 	ANKI_ASSERT(!insideRenderPass());
@@ -812,8 +815,8 @@ inline void CommandBufferImpl::fillBuffer(BufferPtr buff, PtrSize offset, PtrSiz
 	m_microCmdb->pushObjectRef(buff);
 }
 
-inline void CommandBufferImpl::writeOcclusionQueryResultToBuffer(OcclusionQueryPtr query, PtrSize offset,
-																 BufferPtr buff)
+inline void CommandBufferImpl::writeOcclusionQueryResultToBufferInternal(const OcclusionQueryPtr& query, PtrSize offset,
+																		 const BufferPtr& buff)
 {
 	commandCommon();
 	ANKI_ASSERT(!insideRenderPass());
@@ -845,7 +848,7 @@ inline void CommandBufferImpl::writeOcclusionQueryResultToBuffer(OcclusionQueryP
 	m_microCmdb->pushObjectRef(buff);
 }
 
-inline void CommandBufferImpl::bindShaderProgram(ShaderProgramPtr& prog)
+inline void CommandBufferImpl::bindShaderProgramInternal(const ShaderProgramPtr& prog)
 {
 	commandCommon();
 
@@ -902,8 +905,8 @@ inline void CommandBufferImpl::bindShaderProgram(ShaderProgramPtr& prog)
 #endif
 }
 
-inline void CommandBufferImpl::copyBufferToBuffer(BufferPtr& src, PtrSize srcOffset, BufferPtr& dst, PtrSize dstOffset,
-												  PtrSize range)
+inline void CommandBufferImpl::copyBufferToBufferInternal(const BufferPtr& src, PtrSize srcOffset, const BufferPtr& dst,
+														  PtrSize dstOffset, PtrSize range)
 {
 	ANKI_ASSERT(static_cast<const BufferImpl&>(*src).usageValid(BufferUsageBit::TRANSFER_SOURCE));
 	ANKI_ASSERT(static_cast<const BufferImpl&>(*dst).usageValid(BufferUsageBit::TRANSFER_DESTINATION));
@@ -930,7 +933,7 @@ inline Bool CommandBufferImpl::flipViewport() const
 	return static_cast<const FramebufferImpl&>(*m_activeFb).hasPresentableTexture();
 }
 
-inline void CommandBufferImpl::setPushConstants(const void* data, U32 dataSize)
+inline void CommandBufferImpl::setPushConstantsInternal(const void* data, U32 dataSize)
 {
 	ANKI_ASSERT(data && dataSize && dataSize % 16 == 0);
 	const ShaderProgramImpl& prog = getBoundProgram();
@@ -947,7 +950,7 @@ inline void CommandBufferImpl::setPushConstants(const void* data, U32 dataSize)
 #endif
 }
 
-inline void CommandBufferImpl::setRasterizationOrder(RasterizationOrder order)
+inline void CommandBufferImpl::setRasterizationOrderInternal(RasterizationOrder order)
 {
 	commandCommon();
 
@@ -957,7 +960,7 @@ inline void CommandBufferImpl::setRasterizationOrder(RasterizationOrder order)
 	}
 }
 
-inline void CommandBufferImpl::setLineWidth(F32 width)
+inline void CommandBufferImpl::setLineWidthInternal(F32 width)
 {
 	commandCommon();
 	vkCmdSetLineWidth(m_handle, width);

+ 13 - 3
AnKi/Gr/Vulkan/DeferredBarrierFactory.h

@@ -33,9 +33,19 @@ public:
 		return m_handle;
 	}
 
-	Atomic<U32>& getRefcount()
+	void retain() const
 	{
-		return m_refcount;
+		m_refcount.fetchAdd(1);
+	}
+
+	I32 release() const
+	{
+		return m_refcount.fetchSub(1);
+	}
+
+	I32 getRefcount() const
+	{
+		return m_refcount.load();
 	}
 
 	GrAllocator<U8> getAllocator() const;
@@ -58,7 +68,7 @@ public:
 
 private:
 	VkEvent m_handle = VK_NULL_HANDLE;
-	Atomic<U32> m_refcount = {0};
+	mutable Atomic<I32> m_refcount = {0};
 	DeferredBarrierFactory* m_factory = nullptr;
 
 	/// Fence to find out when it's safe to reuse this barrier.

+ 8 - 3
AnKi/Gr/Vulkan/FenceFactory.h

@@ -36,9 +36,14 @@ public:
 		return m_handle;
 	}
 
-	Atomic<U32>& getRefcount()
+	void retain() const
 	{
-		return m_refcount;
+		m_refcount.fetchAdd(1);
+	}
+
+	I32 release() const
+	{
+		return m_refcount.fetchSub(1);
 	}
 
 	GrAllocator<U8> getAllocator() const;
@@ -58,7 +63,7 @@ public:
 
 private:
 	VkFence m_handle = VK_NULL_HANDLE;
-	Atomic<U32> m_refcount = {0};
+	mutable Atomic<I32> m_refcount = {0};
 	FenceFactory* m_factory = nullptr;
 };
 

+ 41 - 8
AnKi/Gr/Vulkan/GrManagerImpl.cpp

@@ -159,7 +159,7 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 	m_occlusionQueryFactory.init(getAllocator(), m_device, VK_QUERY_TYPE_OCCLUSION);
 	m_timestampQueryFactory.init(getAllocator(), m_device, VK_QUERY_TYPE_TIMESTAMP);
 
-	// See if analigned formats are supported
+	// See if unaligned formats are supported
 	{
 		m_capabilities.m_unalignedBbpTextureFormats = true;
 
@@ -1037,16 +1037,49 @@ Error GrManagerImpl::initDevice(const GrManagerInitInfo& init)
 		if(!m_fragmentShadingRateFeatures.attachmentFragmentShadingRate
 		   || !m_fragmentShadingRateFeatures.pipelineFragmentShadingRate)
 		{
-			ANKI_VK_LOGE(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME
-						 " doesn't support attachment and/or pipeline rates");
-			return Error::FUNCTION_FAILED;
+			ANKI_VK_LOGW(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME
+						 " doesn't support attachment and/or pipeline rates. Will disable VRS");
+			m_capabilities.m_vrs = false;
+		}
+		else
+		{
+			// Disable some things
+			m_fragmentShadingRateFeatures.primitiveFragmentShadingRate = false;
 		}
 
-		// Disable some things
-		m_fragmentShadingRateFeatures.primitiveFragmentShadingRate = false;
+		if(m_capabilities.m_vrs)
+		{
+			VkPhysicalDeviceFragmentShadingRatePropertiesKHR fragmentShadingRateProperties = {};
+			fragmentShadingRateProperties.sType =
+				VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR;
+
+			VkPhysicalDeviceProperties2 properties = {};
+			properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+			properties.pNext = &fragmentShadingRateProperties;
+			vkGetPhysicalDeviceProperties2(m_physicalDevice, &properties);
+
+			if(fragmentShadingRateProperties.minFragmentShadingRateAttachmentTexelSize.width > 16
+			   || fragmentShadingRateProperties.minFragmentShadingRateAttachmentTexelSize.height > 16
+			   || fragmentShadingRateProperties.maxFragmentShadingRateAttachmentTexelSize.width < 8
+			   || fragmentShadingRateProperties.maxFragmentShadingRateAttachmentTexelSize.height < 8)
+			{
+				ANKI_VK_LOGW(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME
+							 " doesn't support 8x8 or 16x16 shading rate attachment texel size. Will disable VRS");
+				m_capabilities.m_vrs = false;
+			}
+			else
+			{
+				m_capabilities.m_minShadingRateImageTexelSize =
+					max(fragmentShadingRateProperties.minFragmentShadingRateAttachmentTexelSize.width,
+						fragmentShadingRateProperties.minFragmentShadingRateAttachmentTexelSize.height);
+			}
+		}
 
-		m_fragmentShadingRateFeatures.pNext = const_cast<void*>(ci.pNext);
-		ci.pNext = &m_fragmentShadingRateFeatures;
+		if(m_capabilities.m_vrs)
+		{
+			m_fragmentShadingRateFeatures.pNext = const_cast<void*>(ci.pNext);
+			ci.pNext = &m_fragmentShadingRateFeatures;
+		}
 	}
 
 	ANKI_VK_CHECK(vkCreateDevice(m_physicalDevice, &ci, nullptr, &m_device));

+ 2 - 2
AnKi/Gr/Vulkan/MicroObjectRecycler.inl.h

@@ -55,7 +55,7 @@ inline T* MicroObjectRecycler<T>::findToReuse()
 		}
 	}
 
-	ANKI_ASSERT(out == nullptr || out->getRefcount().getNonAtomically() == 0);
+	ANKI_ASSERT(out == nullptr || out->getRefcount() == 0);
 
 	m_cacheMisses += (out == nullptr);
 
@@ -73,7 +73,7 @@ template<typename T>
 void MicroObjectRecycler<T>::recycle(T* mobj)
 {
 	ANKI_ASSERT(mobj);
-	ANKI_ASSERT(mobj->getRefcount().getNonAtomically() == 0);
+	ANKI_ASSERT(mobj->getRefcount() == 0);
 
 	LockGuard<Mutex> lock(m_mtx);
 

+ 1 - 1
AnKi/Gr/Vulkan/SamplerFactory.cpp

@@ -130,7 +130,7 @@ void SamplerFactory::destroy()
 	for(auto it : m_map)
 	{
 		MicroSampler* const sampler = it;
-		ANKI_ASSERT(sampler->getRefcount().load() == 0 && "Someone still holds a reference to a sampler");
+		ANKI_ASSERT(sampler->getRefcount() == 0 && "Someone still holds a reference to a sampler");
 		alloc.deleteInstance(sampler);
 	}
 

+ 13 - 3
AnKi/Gr/Vulkan/SamplerFactory.h

@@ -31,14 +31,24 @@ public:
 		return m_handle;
 	}
 
-	Atomic<U32>& getRefcount()
+	void retain() const
 	{
-		return m_refcount;
+		m_refcount.fetchAdd(1);
+	}
+
+	I32 release() const
+	{
+		return m_refcount.fetchSub(1);
+	}
+
+	I32 getRefcount() const
+	{
+		return m_refcount.load();
 	}
 
 private:
 	VkSampler m_handle = VK_NULL_HANDLE;
-	Atomic<U32> m_refcount = {0};
+	mutable Atomic<I32> m_refcount = {0};
 	SamplerFactory* m_factory = nullptr;
 
 	MicroSampler(SamplerFactory* f)

+ 13 - 3
AnKi/Gr/Vulkan/SemaphoreFactory.h

@@ -37,9 +37,19 @@ public:
 
 	GrAllocator<U8> getAllocator() const;
 
-	Atomic<U32>& getRefcount()
+	void retain() const
 	{
-		return m_refcount;
+		m_refcount.fetchAdd(1);
+	}
+
+	I32 release() const
+	{
+		return m_refcount.fetchSub(1);
+	}
+
+	I32 getRefcount() const
+	{
+		return m_refcount.load();
 	}
 
 	MicroFencePtr& getFence()
@@ -83,7 +93,7 @@ public:
 
 private:
 	VkSemaphore m_handle = VK_NULL_HANDLE;
-	Atomic<U32> m_refcount = {0};
+	mutable Atomic<I32> m_refcount = {0};
 	SemaphoreFactory* m_factory = nullptr;
 
 	/// Fence to find out when it's safe to reuse this semaphore.

+ 13 - 3
AnKi/Gr/Vulkan/SwapchainFactory.h

@@ -32,9 +32,19 @@ public:
 
 	~MicroSwapchain();
 
-	Atomic<U32>& getRefcount()
+	void retain() const
 	{
-		return m_refcount;
+		m_refcount.fetchAdd(1);
+	}
+
+	I32 release() const
+	{
+		return m_refcount.fetchSub(1);
+	}
+
+	I32 getRefcount() const
+	{
+		return m_refcount.load();
 	}
 
 	GrAllocator<U8> getAllocator() const;
@@ -62,7 +72,7 @@ public:
 	}
 
 private:
-	Atomic<U32> m_refcount = {0};
+	mutable Atomic<I32> m_refcount = {0};
 	SwapchainFactory* m_factory = nullptr;
 
 	enum

+ 2 - 1
AnKi/Importer/CMakeLists.txt

@@ -1,4 +1,5 @@
 file(GLOB_RECURSE sources *.cpp)
-add_library(AnKiImporter ${sources})
+file(GLOB_RECURSE headers *.h)
+add_library(AnKiImporter ${sources} ${headers})
 target_compile_definitions(AnKiImporter PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiImporter AnKiResource)

+ 3 - 1
AnKi/Input/CMakeLists.txt

@@ -1,3 +1,5 @@
+file(GLOB_RECURSE headers *.h)
+
 if(ANKI_HEADLESS)
 	set(sources InputDummy.cpp)
 elseif(SDL)
@@ -6,6 +8,6 @@ elseif(ANDROID)
 	set(sources InputAndroid.cpp)
 endif()
 
-add_library(AnKiInput ${sources})
+add_library(AnKiInput ${sources} ${headers})
 target_compile_definitions(AnKiInput PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiInput AnKiCore AnKiImGui)

+ 2 - 1
AnKi/Math/CMakeLists.txt

@@ -1,4 +1,5 @@
 file(GLOB_RECURSE sources *.cpp)
-add_library(AnKiMath ${sources})
+file(GLOB_RECURSE headers *.h)
+add_library(AnKiMath ${sources} ${headers})
 target_compile_definitions(AnKiMath PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiMath AnKiUtil)

+ 2 - 1
AnKi/Physics/CMakeLists.txt

@@ -1,4 +1,5 @@
 file(GLOB_RECURSE sources *.cpp)
-add_library(AnKiPhysics ${sources})
+file(GLOB_RECURSE headers *.h)
+add_library(AnKiPhysics ${sources} ${headers})
 target_compile_definitions(AnKiPhysics PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiPhysics AnKiUtil BulletSoftBody BulletDynamics BulletCollision LinearMath)

+ 8 - 3
AnKi/Physics/PhysicsObject.h

@@ -89,9 +89,14 @@ protected:
 		return *m_world;
 	}
 
-	Atomic<I32>& getRefcount()
+	void retain() const
 	{
-		return m_refcount;
+		m_refcount.fetchAdd(1);
+	}
+
+	I32 release() const
+	{
+		return m_refcount.fetchSub(1);
 	}
 
 	HeapAllocator<U8> getAllocator() const;
@@ -104,7 +109,7 @@ private:
 	virtual void unregisterFromWorld() = 0;
 
 private:
-	Atomic<I32> m_refcount = {0};
+	mutable Atomic<I32> m_refcount = {0};
 	PhysicsObjectType m_type;
 	void* m_userData = nullptr;
 };

+ 2 - 1
AnKi/Renderer/CMakeLists.txt

@@ -1,4 +1,5 @@
 file(GLOB_RECURSE sources *.cpp)
-add_library(AnKiRenderer ${sources})
+file(GLOB_RECURSE headers *.h)
+add_library(AnKiRenderer ${sources} ${headers})
 target_compile_definitions(AnKiRenderer PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiRenderer AnKiGr AnKiResource AnKiUi)

+ 5 - 2
AnKi/Renderer/ConfigVars.defs.h

@@ -31,9 +31,12 @@ ANKI_CONFIG_VAR_U32(RSsrFirstStepPixels, 32, 1, 256, "The 1st step in ray marchi
 ANKI_CONFIG_VAR_U32(RSsrDepthLod, 2, 0, 1000, "Texture LOD of the depth texture that will be raymarched")
 ANKI_CONFIG_VAR_U32(RSsrMaxSteps, 64, 1, 256, "Max SSR raymarching steps")
 ANKI_CONFIG_VAR_BOOL(RSsrStochastic, false, "Stochastic reflections")
+ANKI_CONFIG_VAR_F32(RSsrRoughnessCutoff, ((ANKI_PLATFORM_MOBILE) ? 0.7f : 0.9f), 0.0f, 1.0f,
+					"Materials with roughness higher that this value will fallback to probe reflections")
 
 // GI probes
-ANKI_CONFIG_VAR_U32(RIndirectDiffuseProbeTileResolution, (ANKI_PLATFORM_MOBILE) ? 16 : 32, 8, 32, "GI tile resolution")
+ANKI_CONFIG_VAR_U32(RIndirectDiffuseProbeTileResolution, ((ANKI_PLATFORM_MOBILE) ? 16 : 32), 8, 32,
+					"GI tile resolution")
 ANKI_CONFIG_VAR_U32(RIndirectDiffuseProbeShadowMapResolution, 128, 4, 2048, "GI shadowmap resolution")
 ANKI_CONFIG_VAR_U32(RIndirectDiffuseProbeMaxCachedProbes, 16, 4, 2048, "Max cached probes")
 ANKI_CONFIG_VAR_U32(RIndirectDiffuseProbeMaxVisibleProbes, 8, 1, 256, "Max visible GI probes")
@@ -48,7 +51,7 @@ ANKI_CONFIG_VAR_F32(RIndirectDiffuseVrsDistanceThreshold, 0.01f, 0.00001f, 10.0f
 					"The meters that control the VRS SRI generation")
 
 // Shadows
-ANKI_CONFIG_VAR_U32(RShadowMappingTileResolution, (ANKI_PLATFORM_MOBILE) ? 128 : 512, 16, 2048,
+ANKI_CONFIG_VAR_U32(RShadowMappingTileResolution, ((ANKI_PLATFORM_MOBILE) ? 128 : 512), 16, 2048,
 					"Shadowmapping tile resolution")
 ANKI_CONFIG_VAR_U32(RShadowMappingTileCountPerRowOrColumn, 16, 1, 256,
 					"Shadowmapping atlas will have this number squared number of tiles")

+ 21 - 3
AnKi/Renderer/IndirectDiffuse.cpp

@@ -47,10 +47,18 @@ Error IndirectDiffuse::initInternal()
 	texInit.setName("IndirectDiffuse #2");
 	m_rts[1] = m_r->createAndClearRenderTarget(texInit, TextureUsageBit::ALL_SAMPLED);
 
-	// Init VRS SRI generation
+	if(!preferCompute)
 	{
 		m_main.m_fbDescr.m_colorAttachmentCount = 1;
 		m_main.m_fbDescr.bake();
+	}
+
+	// Init VRS SRI generation
+	const Bool enableVrs = getGrManager().getDeviceCapabilities().m_vrs && getConfig().getRVrs() && !preferCompute;
+	if(enableVrs)
+	{
+		m_vrs.m_sriTexelDimension = getGrManager().getDeviceCapabilities().m_minShadingRateImageTexelSize;
+		ANKI_ASSERT(m_vrs.m_sriTexelDimension == 8 || m_vrs.m_sriTexelDimension == 16);
 
 		const UVec2 rez = (size + m_vrs.m_sriTexelDimension - 1) / m_vrs.m_sriTexelDimension;
 		m_vrs.m_rtHandle =
@@ -69,6 +77,12 @@ Error IndirectDiffuse::initInternal()
 			// need for shared mem
 			variantInit.addMutation("SHARED_MEMORY", 0);
 		}
+		else if(m_vrs.m_sriTexelDimension == 8 && getGrManager().getDeviceCapabilities().m_minSubgroupSize >= 16)
+		{
+			// Algorithm's workgroup size is 16, GPU's subgroup size is min 16 -> each workgroup has 1 subgroup -> No
+			// need for shared mem
+			variantInit.addMutation("SHARED_MEMORY", 0);
+		}
 		else
 		{
 			variantInit.addMutation("SHARED_MEMORY", 1);
@@ -239,7 +253,7 @@ void IndirectDiffuse::populateRenderGraph(RenderingContext& ctx)
 		prpass->newDependency(RenderPassDependency(m_r->getMotionVectors().getHistoryLengthRt(), readUsage));
 		prpass->newDependency(RenderPassDependency(m_runCtx.m_mainRtHandles[READ], readUsage));
 
-		prpass->setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
+		prpass->setWork([this, &ctx, enableVrs](RenderPassWorkContext& rgraphCtx) {
 			CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 			cmdb->bindShaderProgram(m_main.m_grProg);
 
@@ -285,7 +299,11 @@ void IndirectDiffuse::populateRenderGraph(RenderingContext& ctx)
 			else
 			{
 				cmdb->setViewport(0, 0, unis.m_viewportSize.x(), unis.m_viewportSize.y());
-				cmdb->setVrsRate(VrsRate::_1x1);
+
+				if(enableVrs)
+				{
+					cmdb->setVrsRate(VrsRate::_1x1);
+				}
 
 				cmdb->drawArrays(PrimitiveTopology::TRIANGLES, 3);
 			}

+ 1 - 0
AnKi/Renderer/IndirectSpecular.cpp

@@ -126,6 +126,7 @@ void IndirectSpecular::run(const RenderingContext& ctx, RenderPassWorkContext& r
 	unis->m_projMat = ctx.m_matrices.m_projectionJitter;
 	unis->m_invProjMat = ctx.m_matrices.m_projectionJitter.getInverse();
 	unis->m_normalMat = Mat3x4(Vec3(0.0f), ctx.m_matrices.m_view.getRotationPart());
+	unis->m_roughnessCutoff = getConfig().getRSsrRoughnessCutoff();
 
 	// Bind all
 	cmdb->bindSampler(0, 1, m_r->getSamplers().m_trilinearClamp);

+ 8 - 0
AnKi/Renderer/VrsSriGeneration.cpp

@@ -37,6 +37,8 @@ Error VrsSriGeneration::initInternal()
 		return Error::NONE;
 	}
 
+	m_sriTexelDimension = getGrManager().getDeviceCapabilities().m_minShadingRateImageTexelSize;
+	ANKI_ASSERT(m_sriTexelDimension == 8 || m_sriTexelDimension == 16);
 	const UVec2 rez = (m_r->getInternalResolution() + m_sriTexelDimension - 1) / m_sriTexelDimension;
 
 	ANKI_R_LOGV("Intializing VRS SRI generation. SRI resolution %ux%u", rez.x(), rez.y());
@@ -63,6 +65,12 @@ Error VrsSriGeneration::initInternal()
 		// for shared mem
 		variantInit.addMutation("SHARED_MEMORY", 0);
 	}
+	else if(m_sriTexelDimension == 8 && getGrManager().getDeviceCapabilities().m_minSubgroupSize >= 16)
+	{
+		// Algorithm's workgroup size is 16, GPU's subgroup size is min 16 -> each workgroup has 1 subgroup -> No need
+		// for shared mem
+		variantInit.addMutation("SHARED_MEMORY", 0);
+	}
 	else
 	{
 		variantInit.addMutation("SHARED_MEMORY", 1);

+ 1 - 1
AnKi/Renderer/VrsSriGeneration.h

@@ -47,7 +47,7 @@ public:
 	Bool m_sriTexImportedOnce = false;
 	FramebufferDescription m_fbDescr;
 
-	static constexpr U32 m_sriTexelDimension = 16;
+	U32 m_sriTexelDimension = 16;
 
 	class
 	{

+ 2 - 1
AnKi/Resource/CMakeLists.txt

@@ -1,4 +1,5 @@
 file(GLOB_RECURSE sources *.cpp)
-add_library(AnKiResource ${sources})
+file(GLOB_RECURSE headers *.h)
+add_library(AnKiResource ${sources} ${headers})
 target_compile_definitions(AnKiResource PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiResource AnKiCore AnKiGr AnKiPhysics AnKiZLib AnKiShaderCompiler)

+ 8 - 3
AnKi/Resource/ResourceFilesystem.h

@@ -56,9 +56,14 @@ public:
 	/// Get the size of the file.
 	virtual PtrSize getSize() const = 0;
 
-	Atomic<I32>& getRefcount()
+	void retain() const
 	{
-		return m_refcount;
+		m_refcount.fetchAdd(1);
+	}
+
+	I32 release() const
+	{
+		return m_refcount.fetchSub(1);
 	}
 
 	GenericMemoryPoolAllocator<U8> getAllocator() const
@@ -68,7 +73,7 @@ public:
 
 private:
 	GenericMemoryPoolAllocator<U8> m_alloc;
-	Atomic<I32> m_refcount = {0};
+	mutable Atomic<I32> m_refcount = {0};
 };
 
 /// Resource file smart pointer.

+ 3 - 3
AnKi/Resource/ResourceManager.cpp

@@ -96,10 +96,10 @@ Error ResourceManager::loadResource(const CString& filename, ResourcePtr<T>& out
 	{
 		// Allocate ptr
 		T* ptr = m_alloc.newInstance<T>(this);
-		ANKI_ASSERT(ptr->getRefcount().load() == 0);
+		ANKI_ASSERT(ptr->getRefcount() == 0);
 
 		// Increment the refcount in that case where async jobs increment it and decrement it in the scope of a load()
-		ptr->getRefcount().fetchAdd(1);
+		ptr->retain();
 
 		// Populate the ptr. Use a block to cleanup temp_pool allocations
 		auto& pool = m_tmpAlloc.getMemoryPool();
@@ -134,7 +134,7 @@ Error ResourceManager::loadResource(const CString& filename, ResourcePtr<T>& out
 		out.reset(ptr);
 
 		// Decrement because of the increment happened a few lines above
-		ptr->getRefcount().fetchSub(1);
+		ptr->release();
 	}
 
 	return err;

+ 10 - 5
AnKi/Resource/ResourceObject.h

@@ -37,14 +37,19 @@ public:
 	ResourceAllocator<U8> getAllocator() const;
 	TempResourceAllocator<U8> getTempAllocator() const;
 
-	Atomic<I32>& getRefcount()
+	void retain() const
 	{
-		return m_refcount;
+		m_refcount.fetchAdd(1);
 	}
 
-	const Atomic<I32>& getRefcount() const
+	I32 release() const
 	{
-		return m_refcount;
+		return m_refcount.fetchSub(1);
+	}
+
+	I32 getRefcount() const
+	{
+		return m_refcount.load();
 	}
 
 	CString getFilename() const
@@ -84,7 +89,7 @@ public:
 
 private:
 	ResourceManager* m_manager;
-	Atomic<I32> m_refcount;
+	mutable Atomic<I32> m_refcount;
 	String m_fname; ///< Unique resource name.
 	U64 m_uuid = 0;
 };

+ 2 - 1
AnKi/Scene/CMakeLists.txt

@@ -1,4 +1,5 @@
 file(GLOB_RECURSE sources *.cpp)
-add_library(AnKiScene ${sources})
+file(GLOB_RECURSE headers *.h)
+add_library(AnKiScene ${sources} ${headers})
 target_compile_definitions(AnKiScene PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiScene AnKiResource AnKiScript AnKiCollision)

+ 2 - 1
AnKi/Script/CMakeLists.txt

@@ -1,4 +1,5 @@
 file(GLOB_RECURSE sources *.cpp)
-add_library(AnKiScript ${sources})
+file(GLOB_RECURSE headers *.h)
+add_library(AnKiScript ${sources} ${headers})
 target_compile_definitions(AnKiScript PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiScript AnKiRenderer AnKiScene AnKiMath AnKiLua)

+ 2 - 1
AnKi/ShaderCompiler/CMakeLists.txt

@@ -1,4 +1,5 @@
 file(GLOB_RECURSE sources *.cpp)
-add_library(AnKiShaderCompiler ${sources})
+file(GLOB_RECURSE headers *.h)
+add_library(AnKiShaderCompiler ${sources} ${headers})
 target_compile_definitions(AnKiShaderCompiler PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiShaderCompiler AnKiGrCommon AnKiSpirvCross glslang SPIRV OGLCompiler OSDependent)

+ 2 - 1
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -116,7 +116,8 @@ struct SsrUniforms
 	U32 m_maxSteps;
 	U32 m_lightBufferMipCount;
 
-	UVec3 m_padding0;
+	UVec2 m_padding0;
+	F32 m_roughnessCutoff;
 	U32 m_firstStepPixels;
 
 	Mat4 m_prevViewProjMatMulInvViewProjMat;

+ 28 - 0
AnKi/Shaders/IndirectDiffuseVrsSriGeneration.ankiprog

@@ -13,7 +13,11 @@
 layout(set = 0, binding = 0) uniform texture2D u_inputTex;
 layout(set = 0, binding = 1) uniform sampler u_nearestClampSampler;
 
+#if SRI_TEXEL_DIMENSION == 8
+const UVec2 REGION_SIZE = UVec2(2u, 2u);
+#else
 const UVec2 REGION_SIZE = UVec2(2u, 4u);
+#endif
 
 const UVec2 WORKGROUP_SIZE = UVec2(SRI_TEXEL_DIMENSION) / REGION_SIZE;
 layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
@@ -51,6 +55,28 @@ void main()
 	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) * Vec2(REGION_SIZE) + 0.5) * u_oneOverViewportSize;
 	const Vec2 ndc = UV_TO_NDC(uv);
 
+#if SRI_TEXEL_DIMENSION == 8
+	// Get positions
+	// l0.z  l0.w
+	// l0.x  l0.y
+	Vec4 l0;
+	l0.x = sampleViewPositionZ(uv, 0, 0);
+	l0.y = sampleViewPositionZ(uv, 1, 0);
+	l0.z = sampleViewPositionZ(uv, 0, 1);
+	l0.w = sampleViewPositionZ(uv, 1, 1);
+
+	// Calculate derivatives.
+	Vec2 a = Vec2(l0.y, l0.z);
+	Vec2 b = Vec2(l0.x, l0.w);
+	const Vec2 dx = abs(a - b);
+
+	a = Vec2(l0.z, l0.w);
+	b = Vec2(l0.x, l0.y);
+	const Vec2 dy = abs(a - b);
+
+	F32 maxDerivativeX = max(dx.x, dx.y);
+	F32 maxDerivativeY = max(dy.x, dy.y);
+#else
 	// Get positions
 	// l1.z  l1.w
 	// l1.x  l1.y
@@ -79,6 +105,8 @@ void main()
 
 	F32 maxDerivativeX = max(max(dx.x, dx.y), max(dx.z, dx.w));
 	F32 maxDerivativeY = max(max(dy.x, dy.y), max(dy.z, dy.w));
+#endif
+
 	maxDerivativeX = subgroupMax(maxDerivativeX);
 	maxDerivativeY = subgroupMax(maxDerivativeY);
 

+ 19 - 7
AnKi/Shaders/IndirectSpecular.glsl

@@ -80,16 +80,28 @@ void main()
 	const Vec3 reflDir = reflect(-viewDir, viewNormal);
 #endif
 
+	// Is rough enough to deserve SSR?
+	const F32 ssrFactor = saturate(1.0f - pow(roughness / u_unis.m_roughnessCutoff, 16.0f));
+
 	// Do the heavy work
 	Vec3 hitPoint;
 	F32 hitAttenuation;
-	const U32 lod = 8u; // Use the max LOD for ray marching
-	const U32 step = u_unis.m_firstStepPixels;
-	const F32 stepf = F32(step);
-	const F32 minStepf = stepf / 4.0;
-	raymarchGroundTruth(viewPos, reflDir, uv, depth, u_unis.m_projMat, u_unis.m_maxSteps, u_depthRt,
-						u_trilinearClampSampler, F32(lod), u_unis.m_depthBufferSize, step,
-						U32((stepf - minStepf) * noise.x + minStepf), hitPoint, hitAttenuation);
+	if(ssrFactor > EPSILON)
+	{
+		const U32 lod = 8u; // Use the max LOD for ray marching
+		const U32 step = u_unis.m_firstStepPixels;
+		const F32 stepf = F32(step);
+		const F32 minStepf = stepf / 4.0;
+		raymarchGroundTruth(viewPos, reflDir, uv, depth, u_unis.m_projMat, u_unis.m_maxSteps, u_depthRt,
+							u_trilinearClampSampler, F32(lod), u_unis.m_depthBufferSize, step,
+							U32((stepf - minStepf) * noise.x + minStepf), hitPoint, hitAttenuation);
+
+		hitAttenuation *= ssrFactor;
+	}
+	else
+	{
+		hitAttenuation = 0.0f;
+	}
 
 #if EXTRA_REJECTION
 	// Reject backfacing

+ 3 - 3
AnKi/Shaders/LightShadingApplyIndirect.ankiprog

@@ -114,9 +114,9 @@ void main()
 
 	// Do specular
 	const Vec2 ndc = UV_TO_NDC(in_uv);
-	const Vec4 viewPos4 = u_clusteredShading.m_matrices.m_invertedProjectionJitter * Vec4(ndc, depthCenter, 1.0);
-	const Vec3 viewPos = viewPos4.xyz / viewPos4.w;
-	const ANKI_RP Vec3 viewDir = normalize(-viewPos);
+	const Vec4 worldPos4 = u_clusteredShading.m_matrices.m_invertedViewProjectionJitter * Vec4(ndc, depthCenter, 1.0);
+	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
+	const ANKI_RP Vec3 viewDir = normalize(u_clusteredShading.m_cameraPosition - worldPos);
 	const F32 NoV = max(0.0, dot(gbuffer.m_normal, viewDir));
 	const Vec3 env = specularDFG(gbuffer.m_f0, gbuffer.m_roughness, u_integrationLut, u_linearAnyClampSampler, NoV);
 	specular *= env;

+ 42 - 5
AnKi/Shaders/VrsSriGeneration.glsl

@@ -10,12 +10,16 @@
 #include <AnKi/Shaders/TonemappingFunctions.glsl>
 
 // Find the maximum luma derivative in x and y, relative to the average luma of the block.
-// Each thread handles a 2x4 region.
+// Each thread handles a 2x2 region when using 8x8 VRS tiles and a 2x4 region when using 16x16 VRS tiles.
 
 layout(set = 0, binding = 0) uniform ANKI_RP texture2D u_inputTex;
 layout(set = 0, binding = 1) uniform sampler u_nearestClampSampler;
 
+#if SRI_TEXEL_DIMENSION == 8
+const UVec2 REGION_SIZE = UVec2(2u, 2u);
+#else
 const UVec2 REGION_SIZE = UVec2(2u, 4u);
+#endif
 
 const UVec2 WORKGROUP_SIZE = UVec2(SRI_TEXEL_DIMENSION) / REGION_SIZE;
 layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
@@ -44,8 +48,38 @@ shared Vec2 s_maxDerivative[SHARED_MEMORY_ENTRIES];
 
 void main()
 {
-	const Vec2 uv = Vec2(gl_GlobalInvocationID.xy) * Vec2(REGION_SIZE) * u_oneOverViewportSize;
+	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) * Vec2(REGION_SIZE) + 0.5) * u_oneOverViewportSize;
 
+#if SRI_TEXEL_DIMENSION == 8
+	// Get luminance.
+	//       l1.y
+	// l0.z  l0.w  l1.x
+	// l0.x  l0.y
+	Vec4 l0;
+	l0.x = sampleLuma(0, 0);
+	l0.y = sampleLuma(1, 0);
+	l0.z = sampleLuma(0, 1);
+	l0.w = sampleLuma(1, 1);
+
+	Vec2 l1;
+	l1.x = sampleLuma(2, 1);
+	l1.y = sampleLuma(1, 2);
+
+	// Calculate derivatives.
+	Vec2 a = Vec2(l0.y, l1.x);
+	Vec2 b = Vec2(l0.x, l0.w);
+	const Vec2 dx = abs(a - b);
+
+	a = Vec2(l0.z, l1.y);
+	b = Vec2(l0.x, l0.w);
+	const Vec2 dy = abs(a - b);
+
+	F32 maxDerivativeX = max(dx.x, dx.y);
+	F32 maxDerivativeY = max(dy.x, dy.y);
+
+	// Calculate average luma.
+	F32 averageLuma = (l0.x + l0.y + l0.z + l0.w) / 4.0;
+#else
 	// Get luminance.
 	//       l2.z
 	// l1.z  l1.w  l2.y
@@ -80,12 +114,15 @@ void main()
 
 	F32 maxDerivativeX = max(max(dx.x, dx.y), max(dx.z, dx.w));
 	F32 maxDerivativeY = max(max(dy.x, dy.y), max(dy.z, dy.w));
-	maxDerivativeX = subgroupMax(maxDerivativeX);
-	maxDerivativeY = subgroupMax(maxDerivativeY);
 
-	// Calculate average luma in block.
+	// Calculate average luma.
 	const Vec4 sumL0L1 = l0 + l1;
 	F32 averageLuma = (sumL0L1.x + sumL0L1.y + sumL0L1.z + sumL0L1.w) / 8.0;
+#endif
+
+	// Share values in subgroup.
+	maxDerivativeX = subgroupMax(maxDerivativeX);
+	maxDerivativeY = subgroupMax(maxDerivativeY);
 	averageLuma = subgroupAdd(averageLuma);
 
 #if SHARED_MEMORY

+ 2 - 1
AnKi/Ui/CMakeLists.txt

@@ -1,4 +1,5 @@
 file(GLOB_RECURSE sources *.cpp)
-add_library(AnKiUi ${sources})
+file(GLOB_RECURSE headers *.h)
+add_library(AnKiUi ${sources} ${headers})
 target_compile_definitions(AnKiUi PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiUi AnKiGr AnKiResource AnKiImGui)

+ 8 - 3
AnKi/Ui/UiObject.h

@@ -26,9 +26,14 @@ public:
 
 	UiAllocator getAllocator() const;
 
-	Atomic<I32>& getRefcount()
+	void retain() const
 	{
-		return m_refcount;
+		m_refcount.fetchAdd(1);
+	}
+
+	I32 release() const
+	{
+		return m_refcount.fetchSub(1);
 	}
 
 	/// Set the global IMGUI allocator.
@@ -60,7 +65,7 @@ public:
 
 protected:
 	UiManager* m_manager;
-	Atomic<I32> m_refcount = {0};
+	mutable Atomic<I32> m_refcount = {0};
 };
 /// @}
 

+ 4 - 4
AnKi/Util/Allocator.h

@@ -79,7 +79,7 @@ public:
 		m_pool = balloc.m_pool;
 		if(m_pool)
 		{
-			m_pool->getRefcount().fetchAdd(1);
+			m_pool->retain();
 		}
 	}
 
@@ -95,7 +95,7 @@ public:
 
 		::new(m_pool) TPool(allocCb, allocCbUserData, std::forward<TArgs>(args)...);
 
-		m_pool->getRefcount().store(1);
+		m_pool->retain();
 	}
 
 	/// Destructor
@@ -355,7 +355,7 @@ private:
 		if(b.m_pool)
 		{
 			m_pool = b.m_pool;
-			m_pool->getRefcount().fetchAdd(1);
+			m_pool->retain();
 		}
 	}
 
@@ -363,7 +363,7 @@ private:
 	{
 		if(m_pool)
 		{
-			auto count = m_pool->getRefcount().fetchSub(1);
+			auto count = m_pool->release();
 			if(count == 1)
 			{
 				auto allocCb = m_pool->getAllocationCallback();

+ 3 - 1
AnKi/Util/CMakeLists.txt

@@ -38,6 +38,8 @@ elseif(WINDOWS)
 	set(sources ${sources} INotifyWindows.cpp)
 endif()
 
-add_library(AnKiUtil ${sources})
+file(GLOB_RECURSE headers *.h)
+
+add_library(AnKiUtil ${sources} ${headers})
 target_compile_definitions(AnKiUtil PRIVATE -DANKI_SOURCE_FILE)
 target_link_libraries(AnKiUtil AnKiTinyXml2)

+ 8 - 4
AnKi/Util/Memory.h

@@ -65,10 +65,14 @@ public:
 	/// @param[in, out] ptr Memory block to deallocate
 	void free(void* ptr);
 
-	/// Get refcount.
-	Atomic<U32>& getRefcount()
+	void retain() const
 	{
-		return m_refcount;
+		m_refcount.fetchAdd(1);
+	}
+
+	I32 release() const
+	{
+		return m_refcount.fetchSub(1);
 	}
 
 	/// Get number of users.
@@ -125,7 +129,7 @@ protected:
 
 private:
 	/// Refcount.
-	Atomic<U32> m_refcount = {0};
+	mutable Atomic<I32> m_refcount = {0};
 
 	/// Optional name.
 	char* m_name = nullptr;

+ 3 - 9
AnKi/Util/Ptr.h

@@ -34,13 +34,7 @@ public:
 		return m_ptr;
 	}
 
-	T* get()
-	{
-		ANKI_ASSERT(m_ptr);
-		return m_ptr;
-	}
-
-	const T* get() const
+	T* get() const
 	{
 		ANKI_ASSERT(m_ptr);
 		return m_ptr;
@@ -448,7 +442,7 @@ public:
 		destroy();
 		if(ptr)
 		{
-			ptr->getRefcount().fetchAdd(1);
+			ptr->retain();
 			m_ptr = ptr;
 		}
 	}
@@ -458,7 +452,7 @@ private:
 	{
 		if(m_ptr)
 		{
-			auto count = m_ptr->getRefcount().fetchSub(1);
+			auto count = m_ptr->release();
 			if(ANKI_UNLIKELY(count == 1))
 			{
 				TDeleter deleter;

+ 1 - 1
CMakeLists.txt

@@ -216,7 +216,7 @@ if(NOT MSVC)
 	set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${LINKER_FLAGS}")
 else()
 	#ProcessorCount(PC)
-	#add_definitions("/MP${PC}")
+	add_compile_options("/MP")
 
 	if(${CMAKE_BUILD_TYPE} STREQUAL "Release" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
 		#add_definitions("/Ox")

+ 5 - 5
Tests/Resource/ResourceManager.cpp

@@ -48,24 +48,24 @@ ANKI_TEST(Resource, ResourceManager)
 	{
 		DummyResourcePtr a;
 		ANKI_TEST_EXPECT_NO_ERR(resources->loadResource("blah", a));
-		auto refcount = a->getRefcount().load();
+		auto refcount = a->getRefcount();
 
 		DummyResourcePtr b;
 		ANKI_TEST_EXPECT_NO_ERR(resources->loadResource("blah", b));
-		ANKI_TEST_EXPECT_EQ(b->getRefcount().load(), a->getRefcount().load());
-		ANKI_TEST_EXPECT_EQ(a->getRefcount().load(), refcount + 1);
+		ANKI_TEST_EXPECT_EQ(b->getRefcount(), a->getRefcount());
+		ANKI_TEST_EXPECT_EQ(a->getRefcount(), refcount + 1);
 
 		ANKI_TEST_EXPECT_EQ(b.get(), a.get());
 
 		// Again
 		DummyResourcePtr c;
 		ANKI_TEST_EXPECT_NO_ERR(resources->loadResource("blah", c));
-		ANKI_TEST_EXPECT_EQ(a->getRefcount().load(), refcount + 2);
+		ANKI_TEST_EXPECT_EQ(a->getRefcount(), refcount + 2);
 
 		// Load something else
 		DummyResourcePtr d;
 		ANKI_TEST_EXPECT_NO_ERR(resources->loadResource("blih", d));
-		ANKI_TEST_EXPECT_EQ(a->getRefcount().load(), refcount + 2);
+		ANKI_TEST_EXPECT_EQ(a->getRefcount(), refcount + 2);
 	}
 
 	// Error