Jelajahi Sumber

Merge pull request #76348 from warriormaster12/pipeline-cache

Implement Vulkan pipeline caching
Rémi Verschelde 2 tahun lalu
induk
melakukan
3dd0307f3f

+ 1 - 0
core/config/project_settings.cpp

@@ -1346,6 +1346,7 @@ ProjectSettings::ProjectSettings() {
 	GLOBAL_DEF("rendering/rendering_device/staging_buffer/block_size_kb", 256);
 	GLOBAL_DEF("rendering/rendering_device/staging_buffer/max_size_mb", 128);
 	GLOBAL_DEF("rendering/rendering_device/staging_buffer/texture_upload_region_size_px", 64);
+	GLOBAL_DEF("rendering/rendering_device/pipeline_cache/save_chunk_size_mb", 3.0);
 	GLOBAL_DEF("rendering/rendering_device/vulkan/max_descriptors_per_pool", 64);
 
 	GLOBAL_DEF_BASIC(PropertyInfo(Variant::INT, "rendering/textures/canvas_textures/default_texture_filter", PROPERTY_HINT_ENUM, "Nearest,Linear,Linear Mipmap,Nearest Mipmap"), 1);

+ 3 - 0
doc/classes/ProjectSettings.xml

@@ -2529,6 +2529,9 @@
 		<member name="rendering/rendering_device/driver.windows" type="String" setter="" getter="" default="&quot;vulkan&quot;">
 			Windows override for [member rendering/rendering_device/driver].
 		</member>
+		<member name="rendering/rendering_device/pipeline_cache/save_chunk_size_mb" type="float" setter="" getter="" default="3.0">
+			Determines at which interval pipeline cache is saved to disk. The lower the value, the more often it is saved.
+		</member>
 		<member name="rendering/rendering_device/staging_buffer/block_size_kb" type="int" setter="" getter="" default="256">
 		</member>
 		<member name="rendering/rendering_device/staging_buffer/max_size_mb" type="int" setter="" getter="" default="128">

+ 136 - 2
drivers/vulkan/rendering_device_vulkan.cpp

@@ -32,6 +32,7 @@
 
 #include "core/config/project_settings.h"
 #include "core/io/compression.h"
+#include "core/io/dir_access.h"
 #include "core/io/file_access.h"
 #include "core/io/marshalls.h"
 #include "core/os/os.h"
@@ -6398,9 +6399,13 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma
 	graphics_pipeline_create_info.basePipelineIndex = 0;
 
 	RenderPipeline pipeline;
-	VkResult err = vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &graphics_pipeline_create_info, nullptr, &pipeline.pipeline);
+	VkResult err = vkCreateGraphicsPipelines(device, pipelines_cache.cache_object, 1, &graphics_pipeline_create_info, nullptr, &pipeline.pipeline);
 	ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateGraphicsPipelines failed with error " + itos(err) + " for shader '" + shader->name + "'.");
 
+	if (pipelines_cache.cache_object != VK_NULL_HANDLE) {
+		_update_pipeline_cache();
+	}
+
 	pipeline.set_formats = shader->set_formats;
 	pipeline.push_constant_stages_mask = shader->push_constant.vk_stages_mask;
 	pipeline.pipeline_layout = shader->pipeline_layout;
@@ -6513,9 +6518,13 @@ RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader, const Vector<Pi
 	}
 
 	ComputePipeline pipeline;
-	VkResult err = vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &compute_pipeline_create_info, nullptr, &pipeline.pipeline);
+	VkResult err = vkCreateComputePipelines(device, pipelines_cache.cache_object, 1, &compute_pipeline_create_info, nullptr, &pipeline.pipeline);
 	ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateComputePipelines failed with error " + itos(err) + ".");
 
+	if (pipelines_cache.cache_object != VK_NULL_HANDLE) {
+		_update_pipeline_cache();
+	}
+
 	pipeline.set_formats = shader->set_formats;
 	pipeline.push_constant_stages_mask = shader->push_constant.vk_stages_mask;
 	pipeline.pipeline_layout = shader->pipeline_layout;
@@ -8965,6 +8974,128 @@ void RenderingDeviceVulkan::initialize(VulkanContext *p_context, bool p_local_de
 	draw_list_split = false;
 
 	compute_list = nullptr;
+	_load_pipeline_cache();
+	print_verbose(vformat("Startup PSO cache (%.1f MiB)", pipelines_cache.buffer.size() / (1024.0f * 1024.0f)));
+	VkPipelineCacheCreateInfo cache_info = {};
+	cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
+	cache_info.pNext = nullptr;
+	cache_info.flags = 0;
+	cache_info.initialDataSize = pipelines_cache.buffer.size();
+	cache_info.pInitialData = pipelines_cache.buffer.ptr();
+	VkResult err = vkCreatePipelineCache(device, &cache_info, nullptr, &pipelines_cache.cache_object);
+
+	if (err != VK_SUCCESS) {
+		WARN_PRINT("vkCreatePipelinecache failed with error " + itos(err) + ".");
+	}
+}
+
+void RenderingDeviceVulkan::_load_pipeline_cache() {
+	if (!DirAccess::exists("user://vulkan/")) {
+		Ref<DirAccess> da = DirAccess::create(DirAccess::ACCESS_USERDATA);
+
+		if (da.is_valid()) {
+			da->make_dir_recursive("user://vulkan/");
+		}
+	}
+
+	if (FileAccess::exists("user://vulkan/pipelines.cache")) {
+		Error file_error;
+		Vector<uint8_t> file_data = FileAccess::get_file_as_bytes("user://vulkan/pipelines.cache", &file_error);
+		if (file_error != OK || file_data.size() <= (int)sizeof(PipelineCacheHeader)) {
+			WARN_PRINT("Invalid/corrupt pipelines cache.");
+			return;
+		}
+		PipelineCacheHeader header = {};
+		memcpy((char *)&header, file_data.ptr(), sizeof(PipelineCacheHeader));
+		if (header.magic != 868 + VK_PIPELINE_CACHE_HEADER_VERSION_ONE) {
+			WARN_PRINT("Invalid pipelines cache magic number.");
+			return;
+		}
+		pipelines_cache.buffer.resize(file_data.size() - sizeof(PipelineCacheHeader));
+		memcpy(pipelines_cache.buffer.ptrw(), file_data.ptr() + sizeof(PipelineCacheHeader), pipelines_cache.buffer.size());
+		VkPhysicalDeviceProperties props;
+		vkGetPhysicalDeviceProperties(context->get_physical_device(), &props);
+		bool invalid_uuid = false;
+		for (size_t i = 0; i < VK_UUID_SIZE; i++) {
+			if (header.uuid[i] != props.pipelineCacheUUID[i]) {
+				invalid_uuid = true;
+				break;
+			}
+		}
+		if (header.data_hash != hash_murmur3_buffer(pipelines_cache.buffer.ptr(), pipelines_cache.buffer.size()) || header.data_size != (uint32_t)pipelines_cache.buffer.size() || header.vendor_id != props.vendorID || header.device_id != props.deviceID || header.driver_abi != sizeof(void *) || invalid_uuid) {
+			WARN_PRINT("Invalid pipelines cache header.");
+			pipelines_cache.current_size = 0;
+			pipelines_cache.buffer.clear();
+		} else {
+			pipelines_cache.current_size = pipelines_cache.buffer.size();
+		}
+	}
+}
+
+void RenderingDeviceVulkan::_update_pipeline_cache(bool p_closing) {
+	size_t pso_blob_size = 0;
+	float save_interval = GLOBAL_GET("rendering/rendering_device/pipeline_cache/save_chunk_size_mb");
+	VkResult vr = vkGetPipelineCacheData(device, pipelines_cache.cache_object, &pso_blob_size, nullptr);
+	ERR_FAIL_COND(vr);
+	size_t difference = (pso_blob_size - pipelines_cache.current_size) / (1024 * 1024);
+	if (p_closing && Engine::get_singleton()->is_editor_hint()) {
+		// This is mostly for the editor to check if after playing the game, game's pipeline cache size still matches with editor's cache.
+		_load_pipeline_cache();
+		if (pipelines_cache.current_size > pso_blob_size) {
+			pso_blob_size = pipelines_cache.current_size;
+			if (pipelines_cache_save_task != WorkerThreadPool::INVALID_TASK_ID || !WorkerThreadPool::get_singleton()->is_task_completed(pipelines_cache_save_task)) {
+				WorkerThreadPool::get_singleton()->wait_for_task_completion(pipelines_cache_save_task);
+			}
+		}
+	}
+	if (pso_blob_size == pipelines_cache.current_size) {
+		return;
+	} else if (difference < save_interval && !p_closing) {
+		return;
+	}
+
+	if (p_closing) {
+		if (pipelines_cache_save_task == WorkerThreadPool::INVALID_TASK_ID || WorkerThreadPool::get_singleton()->is_task_completed(pipelines_cache_save_task)) {
+			pipelines_cache_save_task = WorkerThreadPool::get_singleton()->add_template_task(this, &RenderingDeviceVulkan::_save_pipeline_cache_threaded, pso_blob_size, false, "PipelineCacheSave");
+			WorkerThreadPool::get_singleton()->wait_for_task_completion(pipelines_cache_save_task);
+		} else {
+			WorkerThreadPool::get_singleton()->wait_for_task_completion(pipelines_cache_save_task);
+			pipelines_cache_save_task = WorkerThreadPool::get_singleton()->add_template_task(this, &RenderingDeviceVulkan::_save_pipeline_cache_threaded, pso_blob_size, false, "PipelineCacheSave");
+			WorkerThreadPool::get_singleton()->wait_for_task_completion(pipelines_cache_save_task);
+		}
+	} else {
+		if (pipelines_cache_save_task == WorkerThreadPool::INVALID_TASK_ID || WorkerThreadPool::get_singleton()->is_task_completed(pipelines_cache_save_task)) {
+			pipelines_cache_save_task = WorkerThreadPool::get_singleton()->add_template_task(this, &RenderingDeviceVulkan::_save_pipeline_cache_threaded, pso_blob_size, false, "PipelineCacheSave");
+		}
+	}
+}
+
+void RenderingDeviceVulkan::_save_pipeline_cache_threaded(size_t p_pso_blob_size) {
+	pipelines_cache.current_size = p_pso_blob_size;
+	pipelines_cache.buffer.clear();
+	pipelines_cache.buffer.resize(p_pso_blob_size);
+	VkResult vr = vkGetPipelineCacheData(device, pipelines_cache.cache_object, &p_pso_blob_size, pipelines_cache.buffer.ptrw());
+	ERR_FAIL_COND(vr);
+	print_verbose(vformat("Updated PSO cache (%.1f MiB)", p_pso_blob_size / (1024.0f * 1024.0f)));
+
+	VkPhysicalDeviceProperties props;
+	vkGetPhysicalDeviceProperties(context->get_physical_device(), &props);
+	PipelineCacheHeader header = {};
+	header.magic = 868 + VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
+	header.data_size = pipelines_cache.buffer.size();
+	header.data_hash = hash_murmur3_buffer(pipelines_cache.buffer.ptr(), pipelines_cache.buffer.size());
+	header.device_id = props.deviceID;
+	header.vendor_id = props.vendorID;
+	header.driver_version = props.driverVersion;
+	for (size_t i = 0; i < VK_UUID_SIZE; i++) {
+		header.uuid[i] = props.pipelineCacheUUID[i];
+	}
+	header.driver_abi = sizeof(void *);
+	Ref<FileAccess> f = FileAccess::open("user://vulkan/pipelines.cache", FileAccess::WRITE, nullptr);
+	if (f.is_valid()) {
+		f->store_buffer((const uint8_t *)&header, sizeof(PipelineCacheHeader));
+		f->store_buffer(pipelines_cache.buffer);
+	}
 }
 
 template <class T>
@@ -9340,6 +9471,9 @@ void RenderingDeviceVulkan::finalize() {
 		vkDestroyCommandPool(device, frames[i].command_pool, nullptr);
 		vkDestroyQueryPool(device, frames[i].timestamp_pool, nullptr);
 	}
+	_update_pipeline_cache(true);
+
+	vkDestroyPipelineCache(device, pipelines_cache.cache_object, nullptr);
 
 	for (int i = 0; i < split_draw_list_allocators.size(); i++) {
 		vkDestroyCommandPool(device, split_draw_list_allocators[i].command_pool, nullptr);

+ 26 - 0
drivers/vulkan/rendering_device_vulkan.h

@@ -31,6 +31,7 @@
 #ifndef RENDERING_DEVICE_VULKAN_H
 #define RENDERING_DEVICE_VULKAN_H
 
+#include "core/object/worker_thread_pool.h"
 #include "core/os/thread_safe.h"
 #include "core/templates/local_vector.h"
 #include "core/templates/oa_hash_map.h"
@@ -792,6 +793,31 @@ class RenderingDeviceVulkan : public RenderingDevice {
 
 	RID_Owner<RenderPipeline, true> render_pipeline_owner;
 
+	struct PipelineCacheHeader {
+		uint32_t magic;
+		uint32_t data_size;
+		uint64_t data_hash;
+		uint32_t vendor_id;
+		uint32_t device_id;
+		uint32_t driver_version;
+		uint8_t uuid[VK_UUID_SIZE];
+		uint8_t driver_abi;
+	};
+
+	struct PipelineCache {
+		size_t current_size = 0;
+		Vector<uint8_t> buffer;
+		VkPipelineCache cache_object = VK_NULL_HANDLE;
+	};
+
+	PipelineCache pipelines_cache;
+
+	WorkerThreadPool::TaskID pipelines_cache_save_task = WorkerThreadPool::INVALID_TASK_ID;
+
+	void _load_pipeline_cache();
+	void _update_pipeline_cache(bool p_closing = false);
+	void _save_pipeline_cache_threaded(size_t pso_blob_size);
+
 	struct ComputePipeline {
 		RID shader;
 		Vector<uint32_t> set_formats;