Преглед на файлове

Add support for VK_KHR_buffer_device_address

Panagiotis Christopoulos Charitos преди 5 години
родител
ревизия
f165a2577a

+ 9 - 0
src/anki/gr/Buffer.h

@@ -20,6 +20,7 @@ public:
 	PtrSize m_size = 0;
 	BufferUsageBit m_usage = BufferUsageBit::NONE;
 	BufferMapAccessBit m_access = BufferMapAccessBit::NONE;
+	Bool m_exposeGpuAddress = false; ///< Expose the buffer's GPU address.
 
 	BufferInitInfo(CString name = {})
 		: GrBaseInitInfo(name)
@@ -77,10 +78,18 @@ public:
 	/// Unmap the buffer.
 	void unmap();
 
+	/// Get the GPU adress of the buffer.
+	U64 getGpuAddress() const
+	{
+		ANKI_ASSERT(m_gpuAddress);
+		return m_gpuAddress;
+	}
+
 protected:
 	PtrSize m_size = 0;
 	BufferUsageBit m_usage = BufferUsageBit::NONE;
 	BufferMapAccessBit m_access = BufferMapAccessBit::NONE;
+	U64 m_gpuAddress = 0;
 
 	/// Construct.
 	Buffer(GrManager* manager, CString name)

+ 22 - 2
src/anki/gr/vulkan/BufferImpl.cpp

@@ -43,9 +43,13 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
 	ci.size = size;
 	ci.usage = convertBufferUsageBit(usage);
+	if(inf.m_exposeGpuAddress)
+	{
+		ci.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
+	}
 	ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
 	ci.queueFamilyIndexCount = 1;
-	U32 queueIdx = getGrManagerImpl().getGraphicsQueueFamily();
+	const U32 queueIdx = getGrManagerImpl().getGraphicsQueueFamily();
 	ci.pQueueFamilyIndices = &queueIdx;
 	ANKI_VK_CHECK(vkCreateBuffer(getDevice(), &ci, nullptr, &m_handle));
 	getGrManagerImpl().trySetVulkanHandleName(inf.getName(), VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, m_handle);
@@ -144,7 +148,8 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	m_memoryFlags = props.memoryTypes[memIdx].propertyFlags;
 
 	// Allocate
-	getGrManagerImpl().getGpuMemoryManager().allocateMemory(memIdx, req.size, U32(req.alignment), true, m_memHandle);
+	getGrManagerImpl().getGpuMemoryManager().allocateMemory(
+		memIdx, req.size, U32(req.alignment), true, inf.m_exposeGpuAddress, m_memHandle);
 
 	// Bind mem to buffer
 	{
@@ -152,6 +157,21 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 		ANKI_VK_CHECK(vkBindBufferMemory(getDevice(), m_handle, m_memHandle.m_memory, m_memHandle.m_offset));
 	}
 
+	// Get GPU buffer address
+	if(inf.m_exposeGpuAddress)
+	{
+		VkBufferDeviceAddressInfo info = {};
+		info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO;
+		info.buffer = m_handle;
+		m_gpuAddress = vkGetBufferDeviceAddress(getDevice(), &info);
+
+		if(m_gpuAddress == 0)
+		{
+			ANKI_VK_LOGE("vkGetBufferDeviceAddress() failed");
+			return Error::FUNCTION_FAILED;
+		}
+	}
+
 	m_access = access;
 	m_size = inf.m_size;
 	m_actualSize = size;

+ 89 - 31
src/anki/gr/vulkan/GpuMemoryManager.cpp

@@ -18,7 +18,7 @@ public:
 	PtrSize m_chunkSize;
 };
 
-static const Array<ClassInf, CLASS_COUNT> CLASSES = {{{256_B, 16_KB},
+static const Array<ClassInf, CLASS_COUNT> CLASSES{{{256_B, 16_KB},
 	{4_KB, 256_KB},
 	{128_KB, 8_MB},
 	{1_MB, 64_MB},
@@ -46,6 +46,7 @@ public:
 	Mutex m_mtx;
 	VkDevice m_dev = VK_NULL_HANDLE;
 	U8 m_memTypeIdx = MAX_U8;
+	Bool m_exposesBufferGpuAddress = false;
 
 	Error allocate(U32 classIdx, ClassGpuAllocatorMemory*& cmem) override
 	{
@@ -68,6 +69,15 @@ public:
 			ci.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
 			ci.allocationSize = CLASSES[classIdx].m_chunkSize;
 			ci.memoryTypeIndex = m_memTypeIdx;
+
+			VkMemoryAllocateFlagsInfo flags = {};
+			flags.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
+			flags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
+			if(m_exposesBufferGpuAddress)
+			{
+				ci.pNext = &flags;
+			}
+
 			ANKI_VK_CHECKF(vkAllocateMemory(m_dev, &ci, nullptr, &mem->m_handle));
 
 			mem->m_classIdx = U8(classIdx);
@@ -134,7 +144,7 @@ public:
 		}
 	}
 
-	// Mapp memory
+	// Map memory
 	void* mapMemory(ClassGpuAllocatorMemory* cmem)
 	{
 		ANKI_ASSERT(cmem);
@@ -169,9 +179,12 @@ GpuMemoryManager::~GpuMemoryManager()
 
 void GpuMemoryManager::destroy()
 {
-	for(Interface& iface : m_ifaces)
+	for(U32 i = 0; i < m_ifaces.getSize(); ++i)
 	{
-		iface.collectGarbage();
+		for(U32 j = 0; j < 2; j++)
+		{
+			m_ifaces[i][j].collectGarbage();
+		}
 	}
 
 	m_ifaces.destroy(m_alloc);
@@ -198,46 +211,88 @@ void GpuMemoryManager::init(VkPhysicalDevice pdev, VkDevice dev, GrAllocator<U8>
 	m_alloc = alloc;
 
 	m_ifaces.create(alloc, m_memoryProperties.memoryTypeCount);
-	for(U32 i = 0; i < m_ifaces.getSize(); ++i)
+	for(U32 memTypeIdx = 0; memTypeIdx < m_ifaces.getSize(); ++memTypeIdx)
 	{
-		Interface& iface = m_ifaces[i];
-
-		iface.m_alloc = alloc;
-		iface.m_dev = dev;
-		iface.m_memTypeIdx = U8(i);
+		for(U32 type = 0; type < 2; ++type)
+		{
+			m_ifaces[memTypeIdx][type].m_alloc = alloc;
+			m_ifaces[memTypeIdx][type].m_dev = dev;
+			m_ifaces[memTypeIdx][type].m_memTypeIdx = U8(memTypeIdx);
+			m_ifaces[memTypeIdx][type].m_exposesBufferGpuAddress = (type == 1);
+		}
 	}
 
 	// One allocator per type per linear/non-linear resources
-	m_callocs.create(alloc, m_memoryProperties.memoryTypeCount * 2);
-	for(U32 i = 0; i < m_callocs.getSize(); ++i)
+	m_callocs.create(alloc, m_memoryProperties.memoryTypeCount);
+	for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
 	{
-		m_callocs[i].init(m_alloc, &m_ifaces[i / 2]);
-
-		const U32 memTypeIdx = i / 2;
-		const U32 heapIdx = m_memoryProperties.memoryTypes[memTypeIdx].heapIndex;
-		m_callocs[i].m_isDeviceMemory =
-			!!(m_memoryProperties.memoryHeaps[heapIdx].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
+		for(U32 type = 0; type < 3; ++type)
+		{
+			const Bool exposesBufferGpuAddress = (type == 2);
+			ANKI_ASSERT(
+				m_ifaces[memTypeIdx][exposesBufferGpuAddress].m_exposesBufferGpuAddress == exposesBufferGpuAddress);
+			m_callocs[memTypeIdx][type].init(m_alloc, &m_ifaces[memTypeIdx][exposesBufferGpuAddress]);
+
+			const U32 heapIdx = m_memoryProperties.memoryTypes[memTypeIdx].heapIndex;
+			m_callocs[memTypeIdx][type].m_isDeviceMemory =
+				!!(m_memoryProperties.memoryHeaps[heapIdx].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
+		}
 	}
 }
 
-void GpuMemoryManager::allocateMemory(
-	U32 memTypeIdx, PtrSize size, U32 alignment, Bool linearResource, GpuMemoryHandle& handle)
+void GpuMemoryManager::allocateMemory(U32 memTypeIdx,
+	PtrSize size,
+	U32 alignment,
+	Bool linearResource,
+	Bool exposesBufferGpuAddress,
+	GpuMemoryHandle& handle)
 {
-	ClassGpuAllocator& calloc = m_callocs[memTypeIdx * 2 + ((linearResource) ? 0 : 1)];
-	Error err = calloc.allocate(size, alignment, handle.m_classHandle);
+	U32 type;
+	if(!linearResource)
+	{
+		type = 0;
+	}
+	else if(!exposesBufferGpuAddress)
+	{
+		type = 1;
+	}
+	else
+	{
+		ANKI_ASSERT(linearResource);
+		type = 2;
+	}
+
+	ClassGpuAllocator& calloc = m_callocs[memTypeIdx][type];
+	const Error err = calloc.allocate(size, alignment, handle.m_classHandle);
 	(void)err;
 
 	handle.m_memory = static_cast<Memory*>(handle.m_classHandle.m_memory)->m_handle;
 	handle.m_offset = handle.m_classHandle.m_offset;
 	handle.m_linear = linearResource;
 	handle.m_memTypeIdx = U8(memTypeIdx);
+	handle.m_exposesBufferGpuAddress = exposesBufferGpuAddress;
 }
 
 void GpuMemoryManager::freeMemory(GpuMemoryHandle& handle)
 {
 	ANKI_ASSERT(handle);
 
-	ClassGpuAllocator& calloc = m_callocs[handle.m_memTypeIdx * 2 + ((handle.m_linear) ? 0 : 1)];
+	U32 type;
+	if(handle.m_exposesBufferGpuAddress)
+	{
+		type = 2;
+	}
+	else if(handle.m_linear)
+	{
+		type = 1;
+	}
+	else
+	{
+		ANKI_ASSERT(!handle.m_exposesBufferGpuAddress);
+		type = 0;
+	}
+
+	ClassGpuAllocator& calloc = m_callocs[handle.m_memTypeIdx][type];
 	calloc.free(handle.m_classHandle);
 
 	handle = {};
@@ -247,7 +302,7 @@ void* GpuMemoryManager::getMappedAddress(GpuMemoryHandle& handle)
 {
 	ANKI_ASSERT(handle);
 
-	Interface& iface = m_ifaces[handle.m_memTypeIdx];
+	Interface& iface = m_ifaces[handle.m_memTypeIdx][handle.m_exposesBufferGpuAddress];
 	U8* out = static_cast<U8*>(iface.mapMemory(handle.m_classHandle.m_memory));
 	return static_cast<void*>(out + handle.m_offset);
 }
@@ -299,15 +354,18 @@ void GpuMemoryManager::getAllocatedMemory(PtrSize& gpuMemory, PtrSize& cpuMemory
 	gpuMemory = 0;
 	cpuMemory = 0;
 
-	for(const ClassAllocator& alloc : m_callocs)
+	for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
 	{
-		if(alloc.m_isDeviceMemory)
-		{
-			gpuMemory += alloc.getAllocatedMemory();
-		}
-		else
+		for(U32 type = 0; type < 3; ++type)
 		{
-			cpuMemory += alloc.getAllocatedMemory();
+			if(m_callocs[memTypeIdx][type].m_isDeviceMemory)
+			{
+				gpuMemory += m_callocs[memTypeIdx][type].getAllocatedMemory();
+			}
+			else
+			{
+				cpuMemory += m_callocs[memTypeIdx][type].getAllocatedMemory();
+			}
 		}
 	}
 }

+ 9 - 3
src/anki/gr/vulkan/GpuMemoryManager.h

@@ -32,6 +32,7 @@ private:
 	ClassGpuAllocatorHandle m_classHandle;
 	U8 m_memTypeIdx = MAX_U8;
 	Bool m_linear = false;
+	Bool m_exposesBufferGpuAddress = false;
 };
 
 /// Dynamic GPU memory allocator for all types.
@@ -47,7 +48,12 @@ public:
 	void destroy();
 
 	/// Allocate memory.
-	void allocateMemory(U32 memTypeIdx, PtrSize size, U32 alignment, Bool linearResource, GpuMemoryHandle& handle);
+	void allocateMemory(U32 memTypeIdx,
+		PtrSize size,
+		U32 alignment,
+		Bool linearResource,
+		Bool exposesBufferGpuAddress,
+		GpuMemoryHandle& handle);
 
 	/// Free memory.
 	void freeMemory(GpuMemoryHandle& handle);
@@ -68,8 +74,8 @@ private:
 	class ClassAllocator;
 
 	GrAllocator<U8> m_alloc;
-	DynamicArray<Interface> m_ifaces;
-	DynamicArray<ClassAllocator> m_callocs;
+	DynamicArray<Array<Interface, 2>> m_ifaces;
+	DynamicArray<Array<ClassAllocator, 3>> m_callocs;
 	VkPhysicalDeviceMemoryProperties m_memoryProperties;
 };
 /// @}

+ 22 - 1
src/anki/gr/vulkan/GrManagerImpl.cpp

@@ -535,7 +535,7 @@ Error GrManagerImpl::initDevice(const GrManagerInitInfo& init)
 		// Enable the bindless features required
 		{
 			m_descriptorIndexingFeatures = {};
-			m_descriptorIndexingFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT;
+			m_descriptorIndexingFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES;
 
 			VkPhysicalDeviceFeatures2 features = {};
 			features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
@@ -566,6 +566,27 @@ Error GrManagerImpl::initDevice(const GrManagerInitInfo& init)
 			ci.pNext = &m_descriptorIndexingFeatures;
 		}
 
+		// Enable the buffer address features required
+		{
+			m_bufferDeviceAddressFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES;
+
+			VkPhysicalDeviceFeatures2 features = {};
+			features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+			features.pNext = &m_bufferDeviceAddressFeatures;
+			vkGetPhysicalDeviceFeatures2(m_physicalDevice, &features);
+
+			if(!m_bufferDeviceAddressFeatures.bufferDeviceAddress)
+			{
+				ANKI_VK_LOGE("Buffer device address is required and not supported");
+				return Error::FUNCTION_FAILED;
+			}
+
+			m_bufferDeviceAddressFeatures.bufferDeviceAddressCaptureReplay = false;
+			m_bufferDeviceAddressFeatures.bufferDeviceAddressMultiDevice = false;
+
+			m_descriptorIndexingFeatures.pNext = &m_bufferDeviceAddressFeatures;
+		}
+
 		ANKI_VK_LOGI("Will enable the following device extensions:");
 		for(U32 i = 0; i < extensionsToEnableCount; ++i)
 		{

+ 2 - 1
src/anki/gr/vulkan/GrManagerImpl.h

@@ -248,7 +248,8 @@ private:
 
 	VkPhysicalDeviceProperties m_devProps = {};
 	VkPhysicalDeviceFeatures m_devFeatures = {};
-	VkPhysicalDeviceDescriptorIndexingFeaturesEXT m_descriptorIndexingFeatures = {};
+	VkPhysicalDeviceDescriptorIndexingFeatures m_descriptorIndexingFeatures = {};
+	VkPhysicalDeviceBufferDeviceAddressFeatures m_bufferDeviceAddressFeatures = {};
 
 	PFN_vkDebugMarkerSetObjectNameEXT m_pfnDebugMarkerSetObjectNameEXT = nullptr;
 	PFN_vkCmdDebugMarkerBeginEXT m_pfnCmdDebugMarkerBeginEXT = nullptr;

+ 1 - 0
src/anki/gr/vulkan/TextureImpl.cpp

@@ -355,6 +355,7 @@ Error TextureImpl::initImage(const TextureInitInfo& init_)
 			requirements.memoryRequirements.size,
 			U32(requirements.memoryRequirements.alignment),
 			false,
+			false,
 			m_memHandle);
 
 		// Bind mem to image

+ 11 - 0
src/anki/shader_compiler/ShaderProgramParser.cpp

@@ -42,6 +42,9 @@ static const char* SHADER_HEADER = R"(#version 450 core
 #extension GL_EXT_shader_image_load_formatted : require
 #extension GL_EXT_nonuniform_qualifier : enable
 
+#extension GL_EXT_buffer_reference : enable
+#extension GL_ARB_gpu_shader_int64 : enable
+
 #define ANKI_MAX_BINDLESS_TEXTURES %u
 #define ANKI_MAX_BINDLESS_IMAGES %u
 
@@ -74,6 +77,8 @@ static const char* SHADER_HEADER = R"(#version 450 core
 
 #define Bool bool
 
+#define U64 uint64_t
+
 #define _ANKI_CONCATENATE(a, b) a##b
 #define ANKI_CONCATENATE(a, b) _ANKI_CONCATENATE(a, b)
 
@@ -126,6 +131,12 @@ static const char* SHADER_HEADER = R"(#version 450 core
 #define ANKI_SPECIALIZATION_CONSTANT_VEC2(n, id, defltVal) _ANKI_SCONST_X2(Vec2, F32, n, id, defltVal,)
 #define ANKI_SPECIALIZATION_CONSTANT_VEC3(n, id, defltVal) _ANKI_SCONST_X3(Vec3, F32, n, id, defltVal,)
 #define ANKI_SPECIALIZATION_CONSTANT_VEC4(n, id, defltVal) _ANKI_SCONST_X4(Vec4, F32, n, id, defltVal,)
+
+#define ANKI_REF(type) \
+	layout(buffer_reference, std430) buffer type##Ref \
+	{ \
+		type m_value; \
+	}
 )";
 
 static const U64 SHADER_HEADER_HASH = computeHash(SHADER_HEADER, sizeof(SHADER_HEADER));

+ 79 - 0
tests/gr/Gr.cpp

@@ -2380,4 +2380,83 @@ void main()
 	COMMON_END()
 }
 
+ANKI_TEST(Gr, BufferAddress)
+{
+	COMMON_BEGIN()
+
+	// Create program
+	static const char* PROG_SRC = R"(
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+ANKI_REF(Vec4);
+
+layout(push_constant) uniform u_
+{
+	U64 u_bufferAddress;
+	U64 u_padding0;
+};
+
+layout(set = 0, binding = 0) writeonly buffer ss_
+{
+	Vec4 u_result;
+};
+
+void main()
+{
+	u_result = Vec4Ref(u_bufferAddress).m_value + Vec4Ref(u_bufferAddress + 16u).m_value;
+})";
+
+	ShaderPtr shader = createShader(PROG_SRC, ShaderType::COMPUTE, *gr);
+	ShaderProgramInitInfo sprogInit;
+	sprogInit.m_shaders[ShaderType::COMPUTE] = shader;
+	ShaderProgramPtr prog = gr->newShaderProgram(sprogInit);
+
+	// Create buffers
+	BufferInitInfo info;
+	info.m_size = sizeof(Vec4) * 2;
+	info.m_usage = BufferUsageBit::ALL_COMPUTE;
+	info.m_access = BufferMapAccessBit::WRITE;
+	info.m_exposeGpuAddress = true;
+	BufferPtr ptrBuff = gr->newBuffer(info);
+
+	Vec4* mapped = static_cast<Vec4*>(ptrBuff->map(0, MAX_PTR_SIZE, BufferMapAccessBit::WRITE));
+	const Vec4 VEC(123.456f, -1.1f, 100.0f, -666.0f);
+	*mapped = VEC;
+	++mapped;
+	*mapped = VEC * 10.0f;
+	ptrBuff->unmap();
+
+	BufferPtr resBuff =
+		gr->newBuffer(BufferInitInfo(sizeof(Vec4), BufferUsageBit::ALL_COMPUTE, BufferMapAccessBit::READ));
+
+	// Run
+	CommandBufferInitInfo cinit;
+	cinit.m_flags = CommandBufferFlag::COMPUTE_WORK | CommandBufferFlag::SMALL_BATCH;
+	CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
+
+	cmdb->bindShaderProgram(prog);
+
+	struct Address
+	{
+		PtrSize m_address;
+		PtrSize m_padding;
+	} address;
+	address.m_address = ptrBuff->getGpuAddress();
+	cmdb->setPushConstants(&address, sizeof(address));
+
+	cmdb->bindStorageBuffer(0, 0, resBuff, 0, MAX_PTR_SIZE);
+
+	cmdb->dispatchCompute(1, 1, 1);
+
+	cmdb->flush();
+	gr->finish();
+
+	// Check
+	mapped = static_cast<Vec4*>(resBuff->map(0, MAX_PTR_SIZE, BufferMapAccessBit::READ));
+	ANKI_TEST_EXPECT_EQ(*mapped, VEC + VEC * 10.0f);
+	resBuff->unmap();
+
+	COMMON_END();
+}
+
 } // end namespace anki

+ 1 - 1
thirdparty

@@ -1 +1 @@
-Subproject commit 0f99da7dae3f33ee3caa74bbf7a1a828442f6f2f
+Subproject commit 21057dacf51dddfb7417d5651eda05094a630776