Przeglądaj źródła

vulkan: reduce memory usage when many shaders are being used with many draw calls.

Descriptor pools are now shared among shaders that have the same descriptor counts, instead of being unique per shader.
Sasha Szpakowski 7 miesięcy temu
rodzic
commit
92e79ab609

+ 48 - 1
src/modules/graphics/vulkan/Graphics.cpp

@@ -612,6 +612,7 @@ void Graphics::present(void *screenshotCallbackdata)
 	updateTemporaryResources();
 
 	currentFrame = (currentFrame + 1) % MAX_FRAMES_IN_FLIGHT;
+	realFrameIndex++;
 
 	beginFrame();
 }
@@ -1413,7 +1414,7 @@ void Graphics::beginFrame()
 	Vulkan::resetShaderSwitches();
 
 	for (const auto &shader : usedShadersInFrame)
-		shader->newFrame();
+		shader->newFrame(realFrameIndex);
 	usedShadersInFrame.clear();
 
 	localUniformBuffer->nextFrame();
@@ -2852,6 +2853,52 @@ VkSampler Graphics::getCachedSampler(const SamplerState &samplerState)
 	}
 }
 
+static uint64 getDescriptorPoolsKey(int dynamicUniformBuffers, int sampledTextures, int storageTextures, int texelBuffers, int storageBuffers)
+{
+	return (((int64)dynamicUniformBuffers & 0xFF) << 0)
+		| (((int64)sampledTextures & 0xFF) << 8)
+		| (((int64)storageTextures & 0xFF) << 16)
+		| (((int64)texelBuffers    & 0xFF) << 24)
+		| (((int64)storageBuffers  & 0xFF) << 32);
+}
+
+SharedDescriptorPools *Graphics::acquireDescriptorPools(int dynamicUniformBuffers, int sampledTextures, int storageTextures, int texelBuffers, int storageBuffers)
+{
+	uint64 key = getDescriptorPoolsKey(dynamicUniformBuffers, sampledTextures, storageTextures, texelBuffers, storageBuffers);
+
+	auto it = sharedDescriptorPools.find(key);
+	if (it != sharedDescriptorPools.end())
+	{
+		it->second.referenceCount++;
+		return it->second.pools;
+	}
+
+	auto pools = new SharedDescriptorPools(device, dynamicUniformBuffers, sampledTextures, storageTextures, texelBuffers, storageBuffers);
+
+	SharedDescriptorPoolsRef ref{};
+	ref.pools = pools;
+	ref.referenceCount = 1;
+	sharedDescriptorPools[key] = ref;
+
+	return pools;
+}
+
+void Graphics::releaseDescriptorPools(SharedDescriptorPools *p)
+{
+	uint64 key = getDescriptorPoolsKey(p->dynamicUniformBuffers, p->sampledTextures, p->storageTextures, p->texelBuffers, p->storageBuffers);
+
+	auto it = sharedDescriptorPools.find(key);
+	if (it != sharedDescriptorPools.end())
+	{
+		it->second.referenceCount--;
+		if (it->second.referenceCount <= 0)
+		{
+			delete it->second.pools;
+			sharedDescriptorPools.erase(key);
+		}
+	}
+}
+
 VkPipeline Graphics::createGraphicsPipeline(Shader *shader, const GraphicsPipelineConfigurationCore &configuration, const GraphicsPipelineConfigurationNoDynamicState *noDynamicStateConfiguration)
 {
 	VkGraphicsPipelineCreateInfo pipelineInfo{};

+ 13 - 0
src/modules/graphics/vulkan/Graphics.h

@@ -273,6 +273,8 @@ public:
 	void addReadbackCallback(std::function<void()> callback);
 	void submitGpuCommands(SubmitMode, void *screenshotCallbackData = nullptr);
 	VkSampler getCachedSampler(const SamplerState &sampler);
+	SharedDescriptorPools *acquireDescriptorPools(int dynamicUniformBuffers, int sampledTextures, int storageTextures, int texelBuffers, int storageBuffers);
+	void releaseDescriptorPools(SharedDescriptorPools *pools);
 	graphics::Shader::BuiltinUniformData getCurrentBuiltinUniformData();
 	const OptionalDeviceExtensions &getEnabledOptionalDeviceExtensions() const;
 	const OptionalInstanceExtensions &getEnabledOptionalInstanceExtensions() const;
@@ -287,6 +289,8 @@ public:
 
 	uint32 getDeviceApiVersion() const { return deviceApiVersion; }
 
+	uint64 getRealFrameIndex() const { return realFrameIndex; }
+
 protected:
 	graphics::ShaderStage *newShaderStageInternal(ShaderStageType stage, const std::string &cachekey, const std::string &source, bool gles) override;
 	graphics::Shader *newShaderInternal(StrongRef<love::graphics::ShaderStage> stages[SHADERSTAGE_MAX_ENUM], const Shader::CompileOptions &options) override;
@@ -298,6 +302,13 @@ protected:
 	void setRenderTargetsInternal(const RenderTargets &rts, int pixelw, int pixelh, bool hasSRGBtexture) override;
 
 private:
+
+	struct SharedDescriptorPoolsRef
+	{
+		SharedDescriptorPools *pools = nullptr;
+		int referenceCount = 0;
+	};
+
 	bool checkValidationSupport();
 	void pickPhysicalDevice();
 	int rateDeviceSuitability(VkPhysicalDevice device, bool querySwapChain);
@@ -381,6 +392,7 @@ private:
 	std::unordered_map<FramebufferConfiguration, VkFramebuffer, FramebufferConfigurationHasher> framebuffers;
 	std::unordered_map<VkFramebuffer, bool> framebufferUsages;
 	std::unordered_map<uint64, VkSampler> samplers;
+	std::unordered_map<uint64, SharedDescriptorPoolsRef> sharedDescriptorPools;
 	VkCommandPool commandPool = VK_NULL_HANDLE;
 	std::vector<VkCommandBuffer> commandBuffers;
 	std::vector<VkSemaphore> imageAvailableSemaphores;
@@ -392,6 +404,7 @@ private:
 	bool imageRequested = false;
 	size_t currentFrame = 0;
 	uint32_t imageIndex = 0;
+	uint64 realFrameIndex = 0;
 	bool swapChainRecreationRequested = false;
 	bool transitionColorDepthLayouts = false;
 	VmaAllocator vmaAllocator = VK_NULL_HANDLE;

+ 149 - 84
src/modules/graphics/vulkan/Shader.cpp

@@ -38,6 +38,129 @@ namespace vulkan
 
 static const uint32_t DESCRIPTOR_POOL_SIZE = 1000;
 
+SharedDescriptorPools::SharedDescriptorPools(VkDevice device, int dynamicUniformBuffers, int sampledTextures, int storageTextures, int texelBuffers, int storageBuffers)
+	: device(device)
+	, dynamicUniformBuffers(dynamicUniformBuffers)
+	, sampledTextures(sampledTextures)
+	, storageTextures(storageTextures)
+	, texelBuffers(texelBuffers)
+	, storageBuffers(storageBuffers)
+{
+	VkDescriptorPoolSize size{};
+
+	if (dynamicUniformBuffers > 0)
+	{
+		size.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
+		size.descriptorCount = dynamicUniformBuffers;
+		descriptorPoolSizes.push_back({ size });
+	}
+
+	if (sampledTextures > 0)
+	{
+		size.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
+		size.descriptorCount = sampledTextures;
+		descriptorPoolSizes.push_back(size);
+	}
+
+	if (storageTextures > 0)
+	{
+		size.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+		size.descriptorCount = storageTextures;
+		descriptorPoolSizes.push_back(size);
+	}
+
+	if (texelBuffers > 0)
+	{
+		size.type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
+		size.descriptorCount = texelBuffers;
+		descriptorPoolSizes.push_back(size);
+	}
+
+	if (storageBuffers > 0)
+	{
+		size.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+		size.descriptorCount = storageBuffers;
+		descriptorPoolSizes.push_back(size);
+	}
+
+	pools.resize(MAX_FRAMES_IN_FLIGHT);
+}
+
+SharedDescriptorPools::~SharedDescriptorPools()
+{
+	auto vgfx = (Graphics *)Module::getInstance<Graphics>(Module::M_GRAPHICS);
+	if (vgfx == nullptr)
+		return;
+
+	vgfx->queueCleanUp([device = device, descriptorPools = pools]()
+	{
+		for (const auto &pools : descriptorPools)
+		{
+			for (const auto pool : pools)
+				vkDestroyDescriptorPool(device, pool, nullptr);
+		}
+	});
+}
+
+void SharedDescriptorPools::newFrame(uint64 frameIndex)
+{
+	if (!lastFrameIndex.hasValue || lastFrameIndex.value != frameIndex)
+	{
+		lastFrameIndex.set(frameIndex);
+		currentFrame = (size_t)(frameIndex % MAX_FRAMES_IN_FLIGHT);
+		currentPool = 0;
+		for (VkDescriptorPool pool : pools[currentFrame])
+			vkResetDescriptorPool(device, pool, 0);
+	}
+}
+
+void SharedDescriptorPools::createDescriptorPool()
+{
+	VkDescriptorPoolCreateInfo createInfo{};
+	createInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
+	createInfo.maxSets = DESCRIPTOR_POOL_SIZE;
+	createInfo.poolSizeCount = static_cast<uint32_t>(descriptorPoolSizes.size());
+	createInfo.pPoolSizes = descriptorPoolSizes.data();
+
+	VkDescriptorPool pool;
+	VkResult result = vkCreateDescriptorPool(device, &createInfo, nullptr, &pool);
+	if (result != VK_SUCCESS)
+		throw love::Exception("Failed to create Vulkan descriptor pool: %s", Vulkan::getErrorString(result));
+
+	pools[currentFrame].push_back(pool);
+}
+
+VkDescriptorSet SharedDescriptorPools::allocateDescriptorSet(const VkDescriptorSetLayout &descriptorSetLayout)
+{
+	if (pools[currentFrame].empty())
+		createDescriptorPool();
+
+	while (true)
+	{
+		VkDescriptorSetAllocateInfo allocInfo{};
+		allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
+		allocInfo.descriptorPool = pools[currentFrame][currentPool];
+		allocInfo.descriptorSetCount = 1;
+		allocInfo.pSetLayouts = &descriptorSetLayout;
+
+		VkDescriptorSet descriptorSet;
+		VkResult result = vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet);
+
+		switch (result)
+		{
+		case VK_SUCCESS:
+			return descriptorSet;
+		case VK_ERROR_OUT_OF_POOL_MEMORY:
+			currentPool++;
+			if (pools[currentFrame].size() <= currentPool)
+				createDescriptorPool();
+			continue;
+		default:
+			throw love::Exception("Failed to allocate Vulkan descriptor set: %s", Vulkan::getErrorString(result));
+		}
+	}
+}
+
 class BindingMapper
 {
 public:
@@ -187,10 +310,8 @@ bool Shader::loadVolatile()
 	compileShaders();
 	createDescriptorSetLayout();
 	createPipelineLayout();
-	createDescriptorPoolSizes();
-	descriptorPools.resize(MAX_FRAMES_IN_FLIGHT);
-	currentFrame = 0;
-	newFrame();
+	acquireDescriptorPools();
+	newFrame(vgfx->getRealFrameIndex());
 
 	return true;
 }
@@ -200,14 +321,12 @@ void Shader::unloadVolatile()
 	if (shaderModules.empty())
 		return;
 
+	vgfx->releaseDescriptorPools(descriptorPools);
+	descriptorPools = nullptr;
+
 	vgfx->queueCleanUp([shaderModules = std::move(shaderModules), device = device, descriptorSetLayout = descriptorSetLayout, pipelineLayout = pipelineLayout,
-		descriptorPools = descriptorPools, computePipeline = computePipeline,
+		computePipeline = computePipeline,
 		graphicsPipelinesCore = std::move(graphicsPipelinesDynamicState), graphicsPipelinesFull = std::move(graphicsPipelinesNoDynamicState)]() {
-		for (const auto &pools : descriptorPools)
-		{
-			for (const auto pool : pools)
-				vkDestroyDescriptorPool(device, pool, nullptr);
-		}
 		for (const auto shaderModule : shaderModules)
 			vkDestroyShaderModule(device, shaderModule, nullptr);
 		vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr);
@@ -222,7 +341,6 @@ void Shader::unloadVolatile()
 
 	shaderModules.clear();
 	shaderStages.clear();
-	descriptorPools.clear();
 }
 
 const std::vector<VkPipelineShaderStageCreateInfo> &Shader::getShaderStages() const
@@ -240,16 +358,12 @@ VkPipeline Shader::getComputePipeline() const
 	return computePipeline;
 }
 
-void Shader::newFrame()
+void Shader::newFrame(uint64 graphicsFrameIndex)
 {
-	currentFrame = (currentFrame + 1) % MAX_FRAMES_IN_FLIGHT;
-
-	currentDescriptorPool = 0;
 	currentDescriptorSet = VK_NULL_HANDLE;
 	resourceDescriptorsDirty = true;
 
-	for (VkDescriptorPool pool : descriptorPools[currentFrame])
-		vkResetDescriptorPool(device, pool, 0);
+	descriptorPools->newFrame(graphicsFrameIndex);
 }
 
 void Shader::cmdPushDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint bindPoint)
@@ -309,7 +423,7 @@ void Shader::cmdPushDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBind
 
 	if (resourceDescriptorsDirty || currentDescriptorSet == VK_NULL_HANDLE)
 	{
-		currentDescriptorSet = allocateDescriptorSet();
+		currentDescriptorSet = descriptorPools->allocateDescriptorSet(descriptorSetLayout);
 
 		for (auto &write : descriptorWrites)
 			write.dstSet = currentDescriptorSet;
@@ -996,31 +1110,29 @@ void Shader::createPipelineLayout()
 	}
 }
 
-void Shader::createDescriptorPoolSizes()
+static int getDescriptorPoolSize(const std::map<std::string, Shader::UniformInfo> &uniforms)
 {
-	if (!localUniformData.empty())
+	int size = 0;
+	for (const auto &entry : uniforms)
 	{
-		VkDescriptorPoolSize size{};
-		size.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
-		size.descriptorCount = 1;
-
-		descriptorPoolSizes.push_back(size);
+		if (entry.second.active)
+			size += entry.second.count;
 	}
+	return size;
+}
 
-	for (const auto &entry : reflection.allUniforms)
-	{
-		if (!entry.second->active)
-			continue;
+void Shader::acquireDescriptorPools()
+{
+	int dynamicUniformBuffers = 0;
+	if (!localUniformData.empty())
+		dynamicUniformBuffers++;
 
-		VkDescriptorPoolSize size{};
-		auto type = Vulkan::getDescriptorType(entry.second->baseType);
-		if (type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)
-			continue;
+	int sampledTextures = getDescriptorPoolSize(reflection.sampledTextures);
+	int storageTextures = getDescriptorPoolSize(reflection.storageTextures);
+	int texelBuffers = getDescriptorPoolSize(reflection.texelBuffers);
+	int storageBuffers = getDescriptorPoolSize(reflection.storageBuffers);
 
-		size.type = type;
-		size.descriptorCount = entry.second->count;
-		descriptorPoolSizes.push_back(size);
-	}
+	descriptorPools = vgfx->acquireDescriptorPools(dynamicUniformBuffers, sampledTextures, storageTextures, texelBuffers, storageBuffers);
 }
 
 void Shader::setMainTex(graphics::Texture *texture)
@@ -1085,53 +1197,6 @@ void Shader::setBufferDescriptor(const UniformInfo *info, love::graphics::Buffer
 	}
 }
 
-void Shader::createDescriptorPool()
-{
-	VkDescriptorPoolCreateInfo createInfo{};
-	createInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
-	createInfo.maxSets = DESCRIPTOR_POOL_SIZE;
-	createInfo.poolSizeCount = static_cast<uint32_t>(descriptorPoolSizes.size());
-	createInfo.pPoolSizes = descriptorPoolSizes.data();
-
-	VkDescriptorPool pool;
-	VkResult result = vkCreateDescriptorPool(device, &createInfo, nullptr, &pool);
-	if (result != VK_SUCCESS)
-		throw love::Exception("Failed to create Vulkan descriptor pool: %s", Vulkan::getErrorString(result));
-
-	descriptorPools[currentFrame].push_back(pool);
-}
-
-VkDescriptorSet Shader::allocateDescriptorSet()
-{
-	if (descriptorPools[currentFrame].empty())
-		createDescriptorPool();
-
-	while (true)
-	{
-		VkDescriptorSetAllocateInfo allocInfo{};
-		allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
-		allocInfo.descriptorPool = descriptorPools[currentFrame][currentDescriptorPool];
-		allocInfo.descriptorSetCount = 1;
-		allocInfo.pSetLayouts = &descriptorSetLayout;
-
-		VkDescriptorSet descriptorSet;
-		VkResult result = vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet);
-
-		switch (result)
-		{
-		case VK_SUCCESS:
-			return descriptorSet;
-		case VK_ERROR_OUT_OF_POOL_MEMORY:
-			currentDescriptorPool++;
-			if (descriptorPools[currentFrame].size() <= currentDescriptorPool)
-				createDescriptorPool();
-			continue;
-		default:
-			throw love::Exception("Failed to allocate Vulkan descriptor set: %s", Vulkan::getErrorString(result));
-		}
-	}
-}
-
 VkPipeline Shader::getCachedGraphicsPipeline(Graphics *vgfx, const GraphicsPipelineConfigurationCore &configuration)
 {
 	auto it = graphicsPipelinesDynamicState.find(configuration);

+ 34 - 10
src/modules/graphics/vulkan/Shader.h

@@ -112,6 +112,36 @@ struct GraphicsPipelineConfigurationFullHasher
 
 class Graphics;
 
+class SharedDescriptorPools
+{
+public:
+
+	SharedDescriptorPools(VkDevice device, int dynamicUniformBuffers, int sampledTextures, int storageTextures, int texelBuffers, int storageBuffers);
+	virtual ~SharedDescriptorPools();
+
+	VkDescriptorSet allocateDescriptorSet(const VkDescriptorSetLayout &descriptorSetLayout);
+
+	void newFrame(uint64 frameIndex);
+
+	int dynamicUniformBuffers = 0;
+	int sampledTextures = 0;
+	int storageTextures = 0;
+	int texelBuffers = 0;
+	int storageBuffers = 0;
+
+private:
+
+	void createDescriptorPool();
+
+	std::vector<VkDescriptorPoolSize> descriptorPoolSizes;
+	std::vector<std::vector<VkDescriptorPool>> pools;
+	Optional<uint64> lastFrameIndex;
+	size_t currentFrame = 0;
+	uint32 currentPool = 0;
+	VkDevice device = VK_NULL_HANDLE;
+
+};
+
 class Shader final
 	: public graphics::Shader
 	, public Volatile
@@ -148,7 +178,7 @@ public:
 
 	const VkPipelineLayout getGraphicsPipelineLayout() const;
 
-	void newFrame();
+	void newFrame(uint64 graphicsFrameIndex);
 
 	void cmdPushDescriptorSets(VkCommandBuffer, VkPipelineBindPoint);
 
@@ -177,10 +207,8 @@ private:
 	void compileShaders();
 	void createDescriptorSetLayout();
 	void createPipelineLayout();
-	void createDescriptorPoolSizes();
+	void acquireDescriptorPools();
 	void buildLocalUniforms(spirv_cross::Compiler &comp, const spirv_cross::SPIRType &type, size_t baseoff, const std::string &basename);
-	void createDescriptorPool();
-	VkDescriptorSet allocateDescriptorSet();
 
 	void setTextureDescriptor(const UniformInfo *info, love::graphics::Texture *texture, int index);
 	void setBufferDescriptor(const UniformInfo *info, love::graphics::Buffer *buffer, int index);
@@ -192,9 +220,6 @@ private:
 
 	VkDescriptorSetLayout descriptorSetLayout = VK_NULL_HANDLE;
 	VkPipelineLayout pipelineLayout = VK_NULL_HANDLE;
-	std::vector<VkDescriptorPoolSize> descriptorPoolSizes;
-
-	std::vector<std::vector<VkDescriptorPool>> descriptorPools;
 
 	std::vector<VkDescriptorBufferInfo> descriptorBuffers;
 	std::vector<VkDescriptorImageInfo> descriptorImages;
@@ -210,6 +235,8 @@ private:
 	Graphics *vgfx = nullptr;
 	VkDevice device = VK_NULL_HANDLE;
 
+	SharedDescriptorPools *descriptorPools = nullptr;
+
 	bool isCompute = false;
 	bool resourceDescriptorsDirty = false;
 	VkDescriptorSet currentDescriptorSet = VK_NULL_HANDLE;
@@ -226,9 +253,6 @@ private:
 
 	std::unordered_map<GraphicsPipelineConfigurationCore, VkPipeline, GraphicsPipelineConfigurationCoreHasher> graphicsPipelinesDynamicState;
 	std::unordered_map<GraphicsPipelineConfigurationFull, VkPipeline, GraphicsPipelineConfigurationFullHasher> graphicsPipelinesNoDynamicState;
-
-	uint32_t currentFrame = 0;
-	uint32_t currentDescriptorPool = 0;
 };
 
 }

+ 1 - 1
src/modules/graphics/vulkan/Vulkan.cpp

@@ -810,7 +810,7 @@ VkDescriptorType Vulkan::getDescriptorType(graphics::Shader::UniformType type)
 	case graphics::Shader::UniformType::UNIFORM_STORAGETEXTURE:
 		return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
 	case graphics::Shader::UniformType::UNIFORM_TEXELBUFFER:
-		return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
+		return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
 	case graphics::Shader::UniformType::UNIFORM_STORAGEBUFFER:
 		return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
 	default: