Explorar el Código

Merge execute and present commands for RenderingDeviceDriver.

Dario hace 1 año
padre
commit
ee2d8f68ba

+ 21 - 25
drivers/d3d12/rendering_device_driver_d3d12.cpp

@@ -1955,48 +1955,44 @@ RDD::CommandQueueID RenderingDeviceDriverD3D12::command_queue_create(CommandQueu
 	return CommandQueueID(command_queue);
 }
 
-Error RenderingDeviceDriverD3D12::command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) {
+Error RenderingDeviceDriverD3D12::command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) {
 	CommandQueueInfo *command_queue = (CommandQueueInfo *)(p_cmd_queue.id);
 	for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) {
 		const SemaphoreInfo *semaphore = (const SemaphoreInfo *)(p_wait_semaphores[i].id);
 		command_queue->d3d_queue->Wait(semaphore->d3d_fence.Get(), semaphore->fence_value);
 	}
 
-	thread_local LocalVector<ID3D12CommandList *> command_lists;
-	command_lists.resize(p_cmd_buffers.size());
-	for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) {
-		const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)(p_cmd_buffers[i].id);
-		command_lists[i] = cmd_buf_info->cmd_list.Get();
-	}
+	if (p_cmd_buffers.size() > 0) {
+		thread_local LocalVector<ID3D12CommandList *> command_lists;
+		command_lists.resize(p_cmd_buffers.size());
+		for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) {
+			const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)(p_cmd_buffers[i].id);
+			command_lists[i] = cmd_buf_info->cmd_list.Get();
+		}
 
-	command_queue->d3d_queue->ExecuteCommandLists(command_lists.size(), command_lists.ptr());
+		command_queue->d3d_queue->ExecuteCommandLists(command_lists.size(), command_lists.ptr());
 
-	for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) {
-		SemaphoreInfo *semaphore = (SemaphoreInfo *)(p_signal_semaphores[i].id);
-		semaphore->fence_value++;
-		command_queue->d3d_queue->Signal(semaphore->d3d_fence.Get(), semaphore->fence_value);
-	}
+		for (uint32_t i = 0; i < p_cmd_semaphores.size(); i++) {
+			SemaphoreInfo *semaphore = (SemaphoreInfo *)(p_cmd_semaphores[i].id);
+			semaphore->fence_value++;
+			command_queue->d3d_queue->Signal(semaphore->d3d_fence.Get(), semaphore->fence_value);
+		}
 
-	if (p_signal_fence) {
-		FenceInfo *fence = (FenceInfo *)(p_signal_fence.id);
-		fence->fence_value++;
-		command_queue->d3d_queue->Signal(fence->d3d_fence.Get(), fence->fence_value);
-		fence->d3d_fence->SetEventOnCompletion(fence->fence_value, fence->event_handle);
+		if (p_cmd_fence) {
+			FenceInfo *fence = (FenceInfo *)(p_cmd_fence.id);
+			fence->fence_value++;
+			command_queue->d3d_queue->Signal(fence->d3d_fence.Get(), fence->fence_value);
+			fence->d3d_fence->SetEventOnCompletion(fence->fence_value, fence->event_handle);
+		}
 	}
 
-	return OK;
-}
-
-Error RenderingDeviceDriverD3D12::command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) {
-	// D3D12 does not require waiting for the command queue's semaphores to handle presentation.
-	// We just present the swap chains that were specified and ignore the command queue and the semaphores.
 	HRESULT res;
 	bool any_present_failed = false;
 	for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
 		SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id);
 		res = swap_chain->d3d_swap_chain->Present(swap_chain->sync_interval, swap_chain->present_flags);
 		if (!SUCCEEDED(res)) {
-			print_verbose("D3D12: Presenting swapchain failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");
+			print_verbose(vformat("D3D12: Presenting swapchain failed with error 0x%08ux.", (uint64_t)res));
 			any_present_failed = true;
 		}
 	}

+ 1 - 2
drivers/d3d12/rendering_device_driver_d3d12.h

@@ -413,8 +413,7 @@ private:
 
 public:
 	virtual CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) override;
-	virtual Error command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) override;
-	virtual Error command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) override;
+	virtual Error command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) override;
 	virtual void command_queue_free(CommandQueueID p_cmd_queue) override;
 
 private:

+ 125 - 98
drivers/vulkan/rendering_device_driver_vulkan.cpp

@@ -1113,12 +1113,12 @@ void RenderingDeviceDriverVulkan::_set_object_name(VkObjectType p_object_type, u
 }
 
 Error RenderingDeviceDriverVulkan::initialize(uint32_t p_device_index, uint32_t p_frame_count) {
-	// Frame count is not required for the Vulkan driver, so we just ignore it.
-
 	context_device = context_driver->device_get(p_device_index);
 	physical_device = context_driver->physical_device_get(p_device_index);
 	vkGetPhysicalDeviceProperties(physical_device, &physical_device_properties);
 
+	frame_count = p_frame_count;
+
 	// Copy the queue family properties the context already retrieved.
 	uint32_t queue_family_count = context_driver->queue_family_get_count(p_device_index);
 	queue_family_properties.resize(queue_family_count);
@@ -2131,21 +2131,18 @@ RDD::CommandQueueID RenderingDeviceDriverVulkan::command_queue_create(CommandQue
 	return CommandQueueID(command_queue);
 }
 
-Error RenderingDeviceDriverVulkan::command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) {
+Error RenderingDeviceDriverVulkan::command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) {
 	DEV_ASSERT(p_cmd_queue.id != 0);
 
+	VkResult err;
 	CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id);
 	Queue &device_queue = queue_families[command_queue->queue_family][command_queue->queue_index];
-	Fence *fence = (Fence *)(p_signal_fence.id);
+	Fence *fence = (Fence *)(p_cmd_fence.id);
 	VkFence vk_fence = (fence != nullptr) ? fence->vk_fence : VK_NULL_HANDLE;
 
-	thread_local LocalVector<VkCommandBuffer> command_buffers;
 	thread_local LocalVector<VkSemaphore> wait_semaphores;
-	thread_local LocalVector<VkSemaphore> signal_semaphores;
 	thread_local LocalVector<VkPipelineStageFlags> wait_semaphores_stages;
-	command_buffers.clear();
 	wait_semaphores.clear();
-	signal_semaphores.clear();
 	wait_semaphores_stages.clear();
 
 	if (!command_queue->pending_semaphores_for_execute.is_empty()) {
@@ -2158,117 +2155,142 @@ Error RenderingDeviceDriverVulkan::command_queue_execute(CommandQueueID p_cmd_qu
 		command_queue->pending_semaphores_for_execute.clear();
 	}
 
-	for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) {
-		command_buffers.push_back(VkCommandBuffer(p_cmd_buffers[i].id));
-	}
-
 	for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) {
 		// FIXME: Allow specifying the stage mask in more detail.
 		wait_semaphores.push_back(VkSemaphore(p_wait_semaphores[i].id));
 		wait_semaphores_stages.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
 	}
 
-	for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) {
-		signal_semaphores.push_back(VkSemaphore(p_signal_semaphores[i].id));
-	}
-
-	VkSubmitInfo submit_info = {};
-	submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-	submit_info.waitSemaphoreCount = wait_semaphores.size();
-	submit_info.pWaitSemaphores = wait_semaphores.ptr();
-	submit_info.pWaitDstStageMask = wait_semaphores_stages.ptr();
-	submit_info.commandBufferCount = command_buffers.size();
-	submit_info.pCommandBuffers = command_buffers.ptr();
-	submit_info.signalSemaphoreCount = signal_semaphores.size();
-	submit_info.pSignalSemaphores = signal_semaphores.ptr();
+	if (p_cmd_buffers.size() > 0) {
+		thread_local LocalVector<VkCommandBuffer> command_buffers;
+		thread_local LocalVector<VkSemaphore> signal_semaphores;
+		command_buffers.clear();
+		signal_semaphores.clear();
 
-	device_queue.submit_mutex.lock();
-	VkResult err = vkQueueSubmit(device_queue.queue, 1, &submit_info, vk_fence);
-	device_queue.submit_mutex.unlock();
-	ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
-
-	if (fence != nullptr && !command_queue->pending_semaphores_for_fence.is_empty()) {
-		fence->queue_signaled_from = command_queue;
+		for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) {
+			command_buffers.push_back(VkCommandBuffer(p_cmd_buffers[i].id));
+		}
 
-		// Indicate to the fence that it should release the semaphores that were waited on this submission the next time the fence is waited on.
-		for (uint32_t i = 0; i < command_queue->pending_semaphores_for_fence.size(); i++) {
-			command_queue->image_semaphores_for_fences.push_back({ fence, command_queue->pending_semaphores_for_fence[i] });
+		for (uint32_t i = 0; i < p_cmd_semaphores.size(); i++) {
+			signal_semaphores.push_back(VkSemaphore(p_cmd_semaphores[i].id));
 		}
 
-		command_queue->pending_semaphores_for_fence.clear();
-	}
+		VkSemaphore present_semaphore = VK_NULL_HANDLE;
+		if (p_swap_chains.size() > 0) {
+			if (command_queue->present_semaphores.is_empty()) {
+				// Create the semaphores used for presentation if they haven't been created yet.
+				VkSemaphore semaphore = VK_NULL_HANDLE;
+				VkSemaphoreCreateInfo create_info = {};
+				create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+
+				for (uint32_t i = 0; i < frame_count; i++) {
+					err = vkCreateSemaphore(vk_device, &create_info, nullptr, &semaphore);
+					ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
+					command_queue->present_semaphores.push_back(semaphore);
+				}
+			}
 
-	return OK;
-}
+			// If a presentation semaphore is required, cycle across the ones available on the queue. It is technically possible
+			// and valid to reuse the same semaphore for this particular operation, but we create multiple ones anyway in case
+			// some hardware expects multiple semaphores to be used.
+			present_semaphore = command_queue->present_semaphores[command_queue->present_semaphore_index];
+			signal_semaphores.push_back(present_semaphore);
+			command_queue->present_semaphore_index = (command_queue->present_semaphore_index + 1) % command_queue->present_semaphores.size();
+		}
 
-Error RenderingDeviceDriverVulkan::command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) {
-	DEV_ASSERT(p_cmd_queue.id != 0);
+		VkSubmitInfo submit_info = {};
+		submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+		submit_info.waitSemaphoreCount = wait_semaphores.size();
+		submit_info.pWaitSemaphores = wait_semaphores.ptr();
+		submit_info.pWaitDstStageMask = wait_semaphores_stages.ptr();
+		submit_info.commandBufferCount = command_buffers.size();
+		submit_info.pCommandBuffers = command_buffers.ptr();
+		submit_info.signalSemaphoreCount = signal_semaphores.size();
+		submit_info.pSignalSemaphores = signal_semaphores.ptr();
+
+		device_queue.submit_mutex.lock();
+		err = vkQueueSubmit(device_queue.queue, 1, &submit_info, vk_fence);
+		device_queue.submit_mutex.unlock();
+		ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
+
+		if (fence != nullptr && !command_queue->pending_semaphores_for_fence.is_empty()) {
+			fence->queue_signaled_from = command_queue;
+
+			// Indicate to the fence that it should release the semaphores that were waited on this submission the next time the fence is waited on.
+			for (uint32_t i = 0; i < command_queue->pending_semaphores_for_fence.size(); i++) {
+				command_queue->image_semaphores_for_fences.push_back({ fence, command_queue->pending_semaphores_for_fence[i] });
+			}
 
-	CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id);
-	Queue &device_queue = queue_families[command_queue->queue_family][command_queue->queue_index];
+			command_queue->pending_semaphores_for_fence.clear();
+		}
 
-	thread_local LocalVector<VkSwapchainKHR> swapchains;
-	thread_local LocalVector<uint32_t> image_indices;
-	thread_local LocalVector<VkSemaphore> wait_semaphores;
-	thread_local LocalVector<VkResult> results;
-	swapchains.clear();
-	image_indices.clear();
-	for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
-		SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id);
-		swapchains.push_back(swap_chain->vk_swapchain);
-		DEV_ASSERT(swap_chain->image_index < swap_chain->images.size());
-		image_indices.push_back(swap_chain->image_index);
+		if (present_semaphore != VK_NULL_HANDLE) {
+			// If command buffers were executed, swap chains must wait on the present semaphore used by the command queue.
+			wait_semaphores.clear();
+			wait_semaphores.push_back(present_semaphore);
+		}
 	}
 
-	wait_semaphores.clear();
-	for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) {
-		wait_semaphores.push_back(VkSemaphore(p_wait_semaphores[i].id));
-	}
+	if (p_swap_chains.size() > 0) {
+		thread_local LocalVector<VkSwapchainKHR> swapchains;
+		thread_local LocalVector<uint32_t> image_indices;
+		thread_local LocalVector<VkResult> results;
+		swapchains.clear();
+		image_indices.clear();
 
-	results.resize(swapchains.size());
-
-	VkPresentInfoKHR present_info = {};
-	present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
-	present_info.waitSemaphoreCount = wait_semaphores.size();
-	present_info.pWaitSemaphores = wait_semaphores.ptr();
-	present_info.swapchainCount = swapchains.size();
-	present_info.pSwapchains = swapchains.ptr();
-	present_info.pImageIndices = image_indices.ptr();
-	present_info.pResults = results.ptr();
-	device_queue.submit_mutex.lock();
-	VkResult err = device_functions.QueuePresentKHR(device_queue.queue, &present_info);
-	device_queue.submit_mutex.unlock();
-
-	// Set the index to an invalid value. If any of the swap chains returned out of date, indicate it should be resized the next time it's acquired.
-	bool any_result_is_out_of_date = false;
-	for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
-		SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id);
-		swap_chain->image_index = UINT_MAX;
-		if (results[i] == VK_ERROR_OUT_OF_DATE_KHR) {
-			context_driver->surface_set_needs_resize(swap_chain->surface, true);
-			any_result_is_out_of_date = true;
+		for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
+			SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id);
+			swapchains.push_back(swap_chain->vk_swapchain);
+			DEV_ASSERT(swap_chain->image_index < swap_chain->images.size());
+			image_indices.push_back(swap_chain->image_index);
 		}
-	}
 
-	if (any_result_is_out_of_date || err == VK_ERROR_OUT_OF_DATE_KHR) {
-		// It is possible for presentation to fail with out of date while acquire might've succeeded previously. This case
-		// will be considered a silent failure as it can be triggered easily by resizing a window in the OS natively.
-		return FAILED;
-	}
+		results.resize(swapchains.size());
+
+		VkPresentInfoKHR present_info = {};
+		present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
+		present_info.waitSemaphoreCount = wait_semaphores.size();
+		present_info.pWaitSemaphores = wait_semaphores.ptr();
+		present_info.swapchainCount = swapchains.size();
+		present_info.pSwapchains = swapchains.ptr();
+		present_info.pImageIndices = image_indices.ptr();
+		present_info.pResults = results.ptr();
+
+		device_queue.submit_mutex.lock();
+		err = device_functions.QueuePresentKHR(device_queue.queue, &present_info);
+		device_queue.submit_mutex.unlock();
+
+		// Set the index to an invalid value. If any of the swap chains returned out of date, indicate it should be resized the next time it's acquired.
+		bool any_result_is_out_of_date = false;
+		for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
+			SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id);
+			swap_chain->image_index = UINT_MAX;
+			if (results[i] == VK_ERROR_OUT_OF_DATE_KHR) {
+				context_driver->surface_set_needs_resize(swap_chain->surface, true);
+				any_result_is_out_of_date = true;
+			}
+		}
 
-	// Handling VK_SUBOPTIMAL_KHR the same as VK_SUCCESS is completely intentional.
-	//
-	// Godot does not currently support native rotation in Android when creating the swap chain. It intentionally uses
-	// VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR instead of the current transform bits available in the surface capabilities.
-	// Choosing the transform that leads to optimal presentation leads to distortion that makes the application unusable,
-	// as the rotation of all the content is not handled at the moment.
-	//
-	// VK_SUBOPTIMAL_KHR is accepted as a successful case even if it's not the most efficient solution to work around this
-	// problem. This behavior should not be changed unless the swap chain recreation uses the current transform bits, as
-	// it'll lead to very low performance in Android by entering an endless loop where it'll always resize the swap chain
-	// every frame.
+		if (any_result_is_out_of_date || err == VK_ERROR_OUT_OF_DATE_KHR) {
+			// It is possible for presentation to fail with out of date while acquire might've succeeded previously. This case
+			// will be considered a silent failure as it can be triggered easily by resizing a window in the OS natively.
+			return FAILED;
+		}
 
-	ERR_FAIL_COND_V(err != VK_SUCCESS && err != VK_SUBOPTIMAL_KHR, FAILED);
+		// Handling VK_SUBOPTIMAL_KHR the same as VK_SUCCESS is completely intentional.
+		//
+		// Godot does not currently support native rotation in Android when creating the swap chain. It intentionally uses
+		// VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR instead of the current transform bits available in the surface capabilities.
+		// Choosing the transform that leads to optimal presentation leads to distortion that makes the application unusable,
+		// as the rotation of all the content is not handled at the moment.
+		//
+		// VK_SUBOPTIMAL_KHR is accepted as a successful case even if it's not the most efficient solution to work around this
+		// problem. This behavior should not be changed unless the swap chain recreation uses the current transform bits, as
+		// it'll lead to very low performance in Android by entering an endless loop where it'll always resize the swap chain
+		// every frame.
+
+		ERR_FAIL_COND_V(err != VK_SUCCESS && err != VK_SUBOPTIMAL_KHR, FAILED);
+	}
 
 	return OK;
 }
@@ -2278,6 +2300,11 @@ void RenderingDeviceDriverVulkan::command_queue_free(CommandQueueID p_cmd_queue)
 
 	CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id);
 
+	// Erase all the semaphores used for presentation.
+	for (VkSemaphore semaphore : command_queue->present_semaphores) {
+		vkDestroySemaphore(vk_device, semaphore, nullptr);
+	}
+
 	// Erase all the semaphores used for image acquisition.
 	for (VkSemaphore semaphore : command_queue->image_semaphores) {
 		vkDestroySemaphore(vk_device, semaphore, nullptr);

+ 4 - 2
drivers/vulkan/rendering_device_driver_vulkan.h

@@ -115,6 +115,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver {
 	VkDevice vk_device = VK_NULL_HANDLE;
 	RenderingContextDriverVulkan *context_driver = nullptr;
 	RenderingContextDriver::Device context_device = {};
+	uint32_t frame_count = 1;
 	VkPhysicalDevice physical_device = VK_NULL_HANDLE;
 	VkPhysicalDeviceProperties physical_device_properties = {};
 	VkPhysicalDeviceFeatures physical_device_features = {};
@@ -276,6 +277,7 @@ public:
 	// ----- QUEUE -----
 private:
 	struct CommandQueue {
+		LocalVector<VkSemaphore> present_semaphores;
 		LocalVector<VkSemaphore> image_semaphores;
 		LocalVector<SwapChain *> image_semaphores_swap_chains;
 		LocalVector<uint32_t> pending_semaphores_for_execute;
@@ -284,12 +286,12 @@ private:
 		LocalVector<Pair<Fence *, uint32_t>> image_semaphores_for_fences;
 		uint32_t queue_family = 0;
 		uint32_t queue_index = 0;
+		uint32_t present_semaphore_index = 0;
 	};
 
 public:
 	virtual CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) override final;
-	virtual Error command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) override final;
-	virtual Error command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) override final;
+	virtual Error command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) override final;
 	virtual void command_queue_free(CommandQueueID p_cmd_queue) override final;
 
 private:

+ 14 - 11
servers/rendering/rendering_device.cpp

@@ -3185,7 +3185,7 @@ Error RenderingDevice::screen_prepare_for_drawing(DisplayServer::WindowID p_scre
 	uint32_t to_present_index = 0;
 	while (to_present_index < frames[frame].swap_chains_to_present.size()) {
 		if (frames[frame].swap_chains_to_present[to_present_index] == it->value) {
-			driver->command_queue_present(present_queue, it->value, {});
+			driver->command_queue_execute_and_present(present_queue, {}, {}, {}, {}, it->value);
 			frames[frame].swap_chains_to_present.remove_at(to_present_index);
 		} else {
 			to_present_index++;
@@ -4717,7 +4717,6 @@ void RenderingDevice::swap_buffers() {
 
 	_end_frame();
 	_execute_frame(true);
-	_present_frame();
 
 	// Advance to the next frame and begin recording again.
 	frame = (frame + 1) % frames.size();
@@ -4890,17 +4889,21 @@ void RenderingDevice::_end_frame() {
 	driver->end_segment();
 }
 
-void RenderingDevice::_execute_frame(bool p_signal_for_present) {
-	const bool frame_can_present = !frames[frame].swap_chains_to_present.is_empty();
-	const VectorView<RDD::SemaphoreID> execute_draw_semaphore = p_signal_for_present && frame_can_present ? frames[frame].draw_semaphore : VectorView<RDD::SemaphoreID>();
-	driver->command_queue_execute(main_queue, frames[frame].setup_command_buffer, {}, frames[frame].setup_semaphore, {});
-	driver->command_queue_execute(main_queue, frames[frame].draw_command_buffer, frames[frame].setup_semaphore, execute_draw_semaphore, frames[frame].draw_fence);
+void RenderingDevice::_execute_frame(bool p_present) {
+	const bool frame_can_present = p_present && !frames[frame].swap_chains_to_present.is_empty();
+	const bool separate_present_queue = main_queue != present_queue;
+	const VectorView<RDD::SemaphoreID> execute_draw_semaphore = frame_can_present && separate_present_queue ? frames[frame].draw_semaphore : VectorView<RDD::SemaphoreID>();
+	const VectorView<RDD::SwapChainID> execute_draw_swap_chains = frame_can_present && !separate_present_queue ? frames[frame].swap_chains_to_present : VectorView<RDD::SwapChainID>();
+	driver->command_queue_execute_and_present(main_queue, {}, frames[frame].setup_command_buffer, frames[frame].setup_semaphore, {}, {});
+	driver->command_queue_execute_and_present(main_queue, frames[frame].setup_semaphore, frames[frame].draw_command_buffer, execute_draw_semaphore, frames[frame].draw_fence, execute_draw_swap_chains);
 	frames[frame].draw_fence_signaled = true;
-}
 
-void RenderingDevice::_present_frame() {
-	if (!frames[frame].swap_chains_to_present.is_empty()) {
-		driver->command_queue_present(present_queue, frames[frame].swap_chains_to_present, frames[frame].draw_semaphore);
+	if (frame_can_present) {
+		if (separate_present_queue) {
+			// Issue the presentation separately if the presentation queue is different from the main queue.
+			driver->command_queue_execute_and_present(present_queue, frames[frame].draw_semaphore, {}, {}, {}, frames[frame].swap_chains_to_present);
+		}
+
 		frames[frame].swap_chains_to_present.clear();
 	}
 }

+ 1 - 2
servers/rendering/rendering_device.h

@@ -1291,8 +1291,7 @@ private:
 	void _free_internal(RID p_id);
 	void _begin_frame();
 	void _end_frame();
-	void _execute_frame(bool p_signal_for_present);
-	void _present_frame();
+	void _execute_frame(bool p_present);
 	void _stall_for_previous_frames();
 	void _flush_and_stall_for_all_frames();
 

+ 1 - 2
servers/rendering/rendering_device_driver.h

@@ -408,8 +408,7 @@ public:
 	// ----- QUEUE -----
 
 	virtual CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) = 0;
-	virtual Error command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) = 0;
-	virtual Error command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) = 0;
+	virtual Error command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) = 0;
 	virtual void command_queue_free(CommandQueueID p_cmd_queue) = 0;
 
 	// ----- POOL -----