浏览代码

Add flushing and invalidation support to GPU buffers

Panagiotis Christopoulos Charitos 4 年之前
父节点
当前提交
ddae6246dc

+ 3 - 0
AnKi/Config.h.cmake

@@ -89,6 +89,9 @@
 #	error Unknown OS
 #endif
 
+// Mobile or not
+#define ANKI_PLATFORM_MOBILE (ANKI_OS_ANDROID || ANKI_OS_IOS)
+
 // POSIX system or not
 #if ANKI_OS_LINUX || ANKI_OS_ANDROID || ANKI_OS_MACOS || ANKI_OS_IOS
 #	define ANKI_POSIX 1

+ 10 - 0
AnKi/Gr/Buffer.h

@@ -76,6 +76,16 @@ public:
 	/// @param access The access to the buffer.
 	void* map(PtrSize offset, PtrSize range, BufferMapAccessBit access);
 
+	/// Flush the buffer from the CPU caches. Call it to make the buffer memory available to the GPU.
+	/// @param offset The starting offset.
+	/// @param range The range to map or MAX_PTR_SIZE to map until the end.
+	void flush(PtrSize offset, PtrSize range) const;
+
+	/// Invalidate the buffer from the CPU caches. Call it to ready the buffer to see GPU updates.
+	/// @param offset The starting offset.
+	/// @param range The range to map or MAX_PTR_SIZE to map until the end.
+	void invalidate(PtrSize offset, PtrSize range) const;
+
 	/// Convenience map method.
 	/// @param offset The starting offset.
 	/// @param elementCount The number of T element sto map.

+ 1 - 0
AnKi/Gr/Vulkan/AccelerationStructureImpl.cpp

@@ -120,6 +120,7 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 			m_topLevelInfo.m_blas.emplaceBack(getAllocator(), inf.m_topLevel.m_instances[i].m_bottomLevel);
 		}
 
+		m_topLevelInfo.m_instancesBuffer->flush(0, MAX_PTR_SIZE);
 		m_topLevelInfo.m_instancesBuffer->unmap();
 
 		// Geom

+ 12 - 0
AnKi/Gr/Vulkan/Buffer.cpp

@@ -34,4 +34,16 @@ void Buffer::unmap()
 	self.unmap();
 }
 
+void Buffer::flush(PtrSize offset, PtrSize range) const
+{
+	ANKI_VK_SELF_CONST(BufferImpl);
+	self.flush(offset, range);
+}
+
+void Buffer::invalidate(PtrSize offset, PtrSize range) const
+{
+	ANKI_VK_SELF_CONST(BufferImpl);
+	self.invalidate(offset, range);
+}
+
 } // end namespace anki

+ 30 - 20
AnKi/Gr/Vulkan/BufferImpl.cpp

@@ -22,6 +22,18 @@ BufferImpl::~BufferImpl()
 	{
 		getGrManagerImpl().getGpuMemoryManager().freeMemory(m_memHandle);
 	}
+
+#if ANKI_EXTRA_CHECKS
+	if(m_needsFlush && m_flushCount.load() == 0)
+	{
+		ANKI_VK_LOGW("Buffer needed flushing but you never flushed");
+	}
+
+	if(m_needsInvalidate && m_invalidateCount.load() == 0)
+	{
+		ANKI_VK_LOGW("Buffer needed invalidation but you never invalidated");
+	}
+#endif
 }
 
 Error BufferImpl::init(const BufferInitInfo& inf)
@@ -64,8 +76,9 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	{
 		// Only write, probably for uploads
 
-		VkMemoryPropertyFlags preferDeviceLocal;
-		VkMemoryPropertyFlags avoidDeviceLocal;
+		VkMemoryPropertyFlags preferDeviceLocal = 0;
+		VkMemoryPropertyFlags avoidDeviceLocal = 0;
+#if !ANKI_PLATFORM_MOBILE
 		if((usage & (~BufferUsageBit::ALL_TRANSFER)) != BufferUsageBit::NONE)
 		{
 			// Will be used for something other than transfer, try to put it in the device
@@ -78,6 +91,7 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 			preferDeviceLocal = 0;
 			avoidDeviceLocal = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
 		}
+#endif
 
 		// Device & host & coherent but not cached
 		memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(
@@ -88,19 +102,13 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 		// Fallback: host & coherent and not cached
 		if(memIdx == MAX_U32)
 		{
+#if !ANKI_PLATFORM_MOBILE
 			ANKI_VK_LOGW("Using a fallback mode for write-only buffer");
+#endif
 			memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(
 				req.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
 				VK_MEMORY_PROPERTY_HOST_CACHED_BIT | avoidDeviceLocal);
 		}
-
-		// Fallback: just host
-		if(memIdx == MAX_U32)
-		{
-			ANKI_VK_LOGW("Using a fallback mode for write-only buffer");
-			memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(req.memoryTypeBits,
-																			 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
-		}
 	}
 	else if(!!(access & BufferMapAccessBit::READ))
 	{
@@ -116,18 +124,12 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 		// Fallback: Just cached
 		if(memIdx == MAX_U32)
 		{
+#if !ANKI_PLATFORM_MOBILE
 			ANKI_VK_LOGW("Using a fallback mode for read/write buffer");
+#endif
 			memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(
 				req.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, 0);
 		}
-
-		// Fallback: Just host
-		if(memIdx == MAX_U32)
-		{
-			ANKI_VK_LOGW("Using a fallback mode for read/write buffer");
-			memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(req.memoryTypeBits,
-																			 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
-		}
 	}
 	else
 	{
@@ -152,6 +154,16 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	const VkPhysicalDeviceMemoryProperties& props = getGrManagerImpl().getMemoryProperties();
 	m_memoryFlags = props.memoryTypes[memIdx].propertyFlags;
 
+	if(!!(m_access & BufferMapAccessBit::READ) && !(m_memoryFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
+	{
+		m_needsInvalidate = true;
+	}
+
+	if(!!(m_access & BufferMapAccessBit::WRITE) && !(m_memoryFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
+	{
+		m_needsFlush = true;
+	}
+
 	// Allocate
 	getGrManagerImpl().getGpuMemoryManager().allocateMemory(memIdx, req.size, U32(req.alignment), true, m_memHandle);
 
@@ -203,8 +215,6 @@ void* BufferImpl::map(PtrSize offset, PtrSize range, BufferMapAccessBit access)
 	m_mapped = true;
 #endif
 
-	// TODO Flush or invalidate caches
-
 	return static_cast<void*>(static_cast<U8*>(ptr) + offset);
 }
 

+ 46 - 1
AnKi/Gr/Vulkan/BufferImpl.h

@@ -21,6 +21,8 @@ class BufferImpl final : public Buffer, public VulkanObject<Buffer, BufferImpl>
 public:
 	BufferImpl(GrManager* manager, CString name)
 		: Buffer(manager, name)
+		, m_needsFlush(false)
+		, m_needsInvalidate(false)
 	{
 	}
 
@@ -38,7 +40,6 @@ public:
 #if ANKI_EXTRA_CHECKS
 		m_mapped = false;
 #endif
-		// TODO Flush or invalidate caches
 	}
 
 	VkBuffer getHandle() const
@@ -62,14 +63,58 @@ public:
 							VkAccessFlags& srcAccesses, VkPipelineStageFlags& dstStages,
 							VkAccessFlags& dstAccesses) const;
 
+	ANKI_FORCE_INLINE void flush(PtrSize offset, PtrSize range) const
+	{
+		ANKI_ASSERT(!!(m_access & BufferMapAccessBit::WRITE) && "No need to flush when the CPU doesn't write");
+		ANKI_ASSERT(offset < m_size);
+		range = (range == MAX_PTR_SIZE) ? m_size - offset : range;
+		ANKI_ASSERT(offset + range <= m_size);
+		if(m_needsFlush)
+		{
+			VkMappedMemoryRange vkrange = {};
+			vkrange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+			vkrange.memory = m_memHandle.m_memory;
+			vkrange.offset = offset;
+			vkrange.size = range;
+			ANKI_VK_CHECKF(vkFlushMappedMemoryRanges(getDevice(), 1, &vkrange));
+#if ANKI_EXTRA_CHECKS
+			m_flushCount.fetchAdd(1);
+#endif
+		}
+	}
+
+	ANKI_FORCE_INLINE void invalidate(PtrSize offset, PtrSize range) const
+	{
+		ANKI_ASSERT(!!(m_access & BufferMapAccessBit::READ) && "No need to invalidate when the CPU doesn't read");
+		ANKI_ASSERT(offset < m_size);
+		range = (range == MAX_PTR_SIZE) ? m_size - offset : range;
+		ANKI_ASSERT(offset + range <= m_size);
+		if(m_needsInvalidate)
+		{
+			VkMappedMemoryRange vkrange = {};
+			vkrange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+			vkrange.memory = m_memHandle.m_memory;
+			vkrange.offset = offset;
+			vkrange.size = range;
+			ANKI_VK_CHECKF(vkInvalidateMappedMemoryRanges(getDevice(), 1, &vkrange));
+#if ANKI_EXTRA_CHECKS
+			m_invalidateCount.fetchAdd(1);
+#endif
+		}
+	}
+
 private:
 	VkBuffer m_handle = VK_NULL_HANDLE;
 	GpuMemoryHandle m_memHandle;
 	VkMemoryPropertyFlags m_memoryFlags = 0;
 	PtrSize m_actualSize = 0;
+	Bool m_needsFlush : 1;
+	Bool m_needsInvalidate : 1;
 
 #if ANKI_EXTRA_CHECKS
 	Bool m_mapped = false;
+	mutable Atomic<U32> m_flushCount = {0};
+	mutable Atomic<U32> m_invalidateCount = {0};
 #endif
 
 	Bool isCreated() const

+ 1 - 0
AnKi/Renderer/DepthDownscale.h

@@ -50,6 +50,7 @@ public:
 		width = m_copyToBuff.m_lastMipWidth;
 		height = m_copyToBuff.m_lastMipHeight;
 		ANKI_ASSERT(m_copyToBuff.m_buffAddr);
+		m_copyToBuff.m_buff->invalidate(0, MAX_PTR_SIZE);
 		depthValues = static_cast<F32*>(m_copyToBuff.m_buffAddr);
 	}
 

+ 3 - 0
AnKi/Scene/Components/GpuParticleEmitterComponent.cpp

@@ -71,6 +71,7 @@ Error GpuParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 		props->m_maxStartingPosition = inProps.m_particle.m_maxStartingPosition;
 		props->m_particleCount = inProps.m_maxNumOfParticles;
 
+		m_propsBuff->flush(0, MAX_PTR_SIZE);
 		m_propsBuff->unmap();
 	}
 
@@ -90,6 +91,7 @@ Error GpuParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 			particle->m_life = -1.0f; // Force GPU to init the particle
 		}
 
+		m_particlesBuff->flush(0, MAX_PTR_SIZE);
 		m_particlesBuff->unmap();
 	}
 
@@ -112,6 +114,7 @@ Error GpuParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 			*randFactors = getRandomRange(0.0f, 1.0f);
 		}
 
+		m_randFactorsBuff->flush(0, MAX_PTR_SIZE);
 		m_randFactorsBuff->unmap();
 	}
 

+ 2 - 0
AnKi/Scene/DebugDrawer.cpp

@@ -89,6 +89,7 @@ Error DebugDrawer2::init(ResourceManager* rsrcManager)
 		verts[6] = Vec3(-size, -size, -size); // back bottom left
 		verts[7] = Vec3(size, -size, -size); // back bottom right
 
+		m_cubePositionsBuffer->flush(0, MAX_PTR_SIZE);
 		m_cubePositionsBuffer->unmap();
 	}
 
@@ -131,6 +132,7 @@ Error DebugDrawer2::init(ResourceManager* rsrcManager)
 		indices[indexCount++] = 3;
 		indices[indexCount++] = 7;
 
+		m_cubeIndicesBuffer->flush(0, MAX_PTR_SIZE);
 		m_cubeIndicesBuffer->unmap();
 	}
 

+ 1 - 0
AnKi/Ui/Font.cpp

@@ -76,6 +76,7 @@ void Font::createTexture(const void* data, U32 width, U32 height)
 		BufferInitInfo(buffSize, BufferUsageBit::TRANSFER_SOURCE, BufferMapAccessBit::WRITE, "UI"));
 	void* mapped = buff->map(0, buffSize, BufferMapAccessBit::WRITE);
 	memcpy(mapped, data, buffSize);
+	buff->flush(0, buffSize);
 	buff->unmap();
 
 	// Create the texture