ソースを参照

Vulkan synchronization fixes (#2386)

* Fix out of bounds index for unknown device types

* Vulkan: Insert barrier before image host reads

* Vulkan: Make commands wait for the wait semaphore

Making commands wait at BOTTOM_OF_PIPE is a no-op, resulting in instant execution

* Vulkan: Insert barrier between views/dispatches instead of waiting on the host

* Vulkan: Fix determination of access flag from image layout

This fixes two write-after-write races with copy commands after a layout transition to VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL

* Add BGFX_MAX_FRAME_LATENCY define

Affected backends either used a magic value of 3 or defined their own XX_MAX_FRAMES_IN_FLIGHT to be 3

* Vulkan: Include indirect draw in pipeline barrier

* Vulkan: honor init.resolution.numBackBuffers for swapchain size

* Make max frame latency configurable at compile time
pezcode 4 年 前
コミット
f983367d75

+ 1 - 1
src/bgfx.cpp

@@ -1831,7 +1831,7 @@ namespace bgfx
 		m_init = _init;
 		m_init = _init;
 		m_init.resolution.reset &= ~BGFX_RESET_INTERNAL_FORCE;
 		m_init.resolution.reset &= ~BGFX_RESET_INTERNAL_FORCE;
 		m_init.resolution.numBackBuffers  = bx::clamp<uint8_t>(_init.resolution.numBackBuffers, 2, BGFX_CONFIG_MAX_BACK_BUFFERS);
 		m_init.resolution.numBackBuffers  = bx::clamp<uint8_t>(_init.resolution.numBackBuffers, 2, BGFX_CONFIG_MAX_BACK_BUFFERS);
-		m_init.resolution.maxFrameLatency = bx::min<uint8_t>(_init.resolution.maxFrameLatency, 3);
+		m_init.resolution.maxFrameLatency = bx::min<uint8_t>(_init.resolution.maxFrameLatency, BGFX_CONFIG_MAX_FRAME_LATENCY);
 		dump(m_init.resolution);
 		dump(m_init.resolution);
 
 
 		if (g_platformData.ndt          == NULL
 		if (g_platformData.ndt          == NULL

+ 4 - 0
src/config.h

@@ -361,6 +361,10 @@ BX_STATIC_ASSERT(bx::isPowerOf2(BGFX_CONFIG_MAX_VIEWS), "BGFX_CONFIG_MAX_VIEWS m
 #	define BGFX_CONFIG_MAX_BACK_BUFFERS 4
 #	define BGFX_CONFIG_MAX_BACK_BUFFERS 4
 #endif // BGFX_CONFIG_MAX_BACK_BUFFERS
 #endif // BGFX_CONFIG_MAX_BACK_BUFFERS
 
 
+#ifndef BGFX_CONFIG_MAX_FRAME_LATENCY
+#	define BGFX_CONFIG_MAX_FRAME_LATENCY 3
+#endif // BGFX_CONFIG_MAX_FRAME_LATENCY
+
 #ifndef BGFX_CONFIG_PREFER_DISCRETE_GPU
 #ifndef BGFX_CONFIG_PREFER_DISCRETE_GPU
 // On laptops with integrated and discrete GPU, prefer selection of discrete GPU.
 // On laptops with integrated and discrete GPU, prefer selection of discrete GPU.
 // nVidia and AMD, on Windows only.
 // nVidia and AMD, on Windows only.

+ 1 - 1
src/renderer_d3d11.cpp

@@ -1035,7 +1035,7 @@ namespace bgfx { namespace d3d11
 					m_scd.alphaMode  = DXGI_ALPHA_MODE_IGNORE;
 					m_scd.alphaMode  = DXGI_ALPHA_MODE_IGNORE;
 					m_scd.flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
 					m_scd.flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
 
 
-					m_scd.maxFrameLatency = bx::min<uint8_t>(_init.resolution.maxFrameLatency, 3);
+					m_scd.maxFrameLatency = bx::min<uint8_t>(_init.resolution.maxFrameLatency, BGFX_CONFIG_MAX_FRAME_LATENCY);
 					m_scd.nwh             = g_platformData.nwh;
 					m_scd.nwh             = g_platformData.nwh;
 					m_scd.ndt             = g_platformData.ndt;
 					m_scd.ndt             = g_platformData.ndt;
 					m_scd.windowed        = true;
 					m_scd.windowed        = true;

+ 1 - 1
src/renderer_d3d12.cpp

@@ -954,7 +954,7 @@ namespace bgfx { namespace d3d12
 				m_scd.alphaMode  = DXGI_ALPHA_MODE_IGNORE;
 				m_scd.alphaMode  = DXGI_ALPHA_MODE_IGNORE;
 				m_scd.flags      = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
 				m_scd.flags      = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
 
 
-				m_scd.maxFrameLatency = bx::min<uint8_t>(_init.resolution.maxFrameLatency, 3);
+				m_scd.maxFrameLatency = bx::min<uint8_t>(_init.resolution.maxFrameLatency, BGFX_CONFIG_MAX_FRAME_LATENCY);
 				m_scd.nwh             = g_platformData.nwh;
 				m_scd.nwh             = g_platformData.nwh;
 				m_scd.ndt             = g_platformData.ndt;
 				m_scd.ndt             = g_platformData.ndt;
 				m_scd.windowed        = true;
 				m_scd.windowed        = true;

+ 1 - 3
src/renderer_mtl.h

@@ -67,8 +67,6 @@ namespace bgfx { namespace mtl
 	// objects with creation functions starting with 'new' has a refcount 1 after creation, object must be destroyed with release.
 	// objects with creation functions starting with 'new' has a refcount 1 after creation, object must be destroyed with release.
 	// commandBuffer, commandEncoders are autoreleased objects. Needs AutoreleasePool!
 	// commandBuffer, commandEncoders are autoreleased objects. Needs AutoreleasePool!
 
 
-#define MTL_MAX_FRAMES_IN_FLIGHT (3)
-
 #define MTL_CLASS(name)                                   \
 #define MTL_CLASS(name)                                   \
 	class name                                            \
 	class name                                            \
 	{                                                     \
 	{                                                     \
@@ -1115,7 +1113,7 @@ namespace bgfx { namespace mtl
 		int m_releaseWriteIndex;
 		int m_releaseWriteIndex;
 		int m_releaseReadIndex;
 		int m_releaseReadIndex;
 		typedef stl::vector<NSObject*> ResourceArray;
 		typedef stl::vector<NSObject*> ResourceArray;
-		ResourceArray m_release[MTL_MAX_FRAMES_IN_FLIGHT];
+		ResourceArray m_release[BGFX_CONFIG_MAX_FRAME_LATENCY];
 	};
 	};
 
 
 	struct TimerQueryMtl
 	struct TimerQueryMtl

+ 7 - 7
src/renderer_mtl.mm

@@ -423,7 +423,7 @@ namespace bgfx { namespace mtl
 			m_textureDescriptor = newTextureDescriptor();
 			m_textureDescriptor = newTextureDescriptor();
 			m_samplerDescriptor = newSamplerDescriptor();
 			m_samplerDescriptor = newSamplerDescriptor();
 
 
-			for (uint8_t ii = 0; ii < MTL_MAX_FRAMES_IN_FLIGHT; ++ii)
+			for (uint8_t ii = 0; ii < BGFX_CONFIG_MAX_FRAME_LATENCY; ++ii)
 			{
 			{
 				m_uniformBuffers[ii] = m_device.newBufferWithLength(UNIFORM_BUFFER_SIZE, 0);
 				m_uniformBuffers[ii] = m_device.newBufferWithLength(UNIFORM_BUFFER_SIZE, 0);
 			}
 			}
@@ -711,7 +711,7 @@ namespace bgfx { namespace mtl
 
 
 			m_mainFrameBuffer.destroy();
 			m_mainFrameBuffer.destroy();
 
 
-			for (uint8_t i=0; i < MTL_MAX_FRAMES_IN_FLIGHT; ++i)
+			for (uint8_t i=0; i < BGFX_CONFIG_MAX_FRAME_LATENCY; ++i)
 			{
 			{
 				MTL_RELEASE(m_uniformBuffers[i]);
 				MTL_RELEASE(m_uniformBuffers[i]);
 			}
 			}
@@ -2344,7 +2344,7 @@ namespace bgfx { namespace mtl
 		bool m_hasStoreActionStoreAndMultisampleResolve;
 		bool m_hasStoreActionStoreAndMultisampleResolve;
 
 
 		Buffer   m_uniformBuffer;
 		Buffer   m_uniformBuffer;
-		Buffer   m_uniformBuffers[MTL_MAX_FRAMES_IN_FLIGHT];
+		Buffer   m_uniformBuffers[BGFX_CONFIG_MAX_FRAME_LATENCY];
 		uint32_t m_uniformBufferVertexOffset;
 		uint32_t m_uniformBufferVertexOffset;
 		uint32_t m_uniformBufferFragmentOffset;
 		uint32_t m_uniformBufferFragmentOffset;
 
 
@@ -3403,7 +3403,7 @@ namespace bgfx { namespace mtl
 	void CommandQueueMtl::init(Device _device)
 	void CommandQueueMtl::init(Device _device)
 	{
 	{
 		m_commandQueue = _device.newCommandQueue();
 		m_commandQueue = _device.newCommandQueue();
-		m_framesSemaphore.post(MTL_MAX_FRAMES_IN_FLIGHT);
+		m_framesSemaphore.post(BGFX_CONFIG_MAX_FRAME_LATENCY);
 	}
 	}
 
 
 	void CommandQueueMtl::shutdown()
 	void CommandQueueMtl::shutdown()
@@ -3435,7 +3435,7 @@ namespace bgfx { namespace mtl
 		{
 		{
 			if (_endFrame)
 			if (_endFrame)
 			{
 			{
-				m_releaseWriteIndex = (m_releaseWriteIndex + 1) % MTL_MAX_FRAMES_IN_FLIGHT;
+				m_releaseWriteIndex = (m_releaseWriteIndex + 1) % BGFX_CONFIG_MAX_FRAME_LATENCY;
 				m_activeCommandBuffer.addCompletedHandler(commandBufferFinishedCallback, this);
 				m_activeCommandBuffer.addCompletedHandler(commandBufferFinishedCallback, this);
 			}
 			}
 
 
@@ -3480,7 +3480,7 @@ namespace bgfx { namespace mtl
 	void CommandQueueMtl::consume()
 	void CommandQueueMtl::consume()
 	{
 	{
 		m_framesSemaphore.wait();
 		m_framesSemaphore.wait();
-		m_releaseReadIndex = (m_releaseReadIndex + 1) % MTL_MAX_FRAMES_IN_FLIGHT;
+		m_releaseReadIndex = (m_releaseReadIndex + 1) % BGFX_CONFIG_MAX_FRAME_LATENCY;
 
 
 		ResourceArray& ra = m_release[m_releaseReadIndex];
 		ResourceArray& ra = m_release[m_releaseReadIndex];
 
 
@@ -3755,7 +3755,7 @@ namespace bgfx { namespace mtl
 		}
 		}
 
 
 		m_uniformBuffer = m_uniformBuffers[m_bufferIndex];
 		m_uniformBuffer = m_uniformBuffers[m_bufferIndex];
-		m_bufferIndex = (m_bufferIndex + 1) % MTL_MAX_FRAMES_IN_FLIGHT;
+		m_bufferIndex = (m_bufferIndex + 1) % BGFX_CONFIG_MAX_FRAME_LATENCY;
 		m_uniformBufferVertexOffset = 0;
 		m_uniformBufferVertexOffset = 0;
 		m_uniformBufferFragmentOffset = 0;
 		m_uniformBufferFragmentOffset = 0;
 
 

+ 66 - 28
src/renderer_vk.cpp

@@ -482,7 +482,7 @@ VK_IMPORT_DEVICE
 
 
 	const char* getName(VkPhysicalDeviceType _type)
 	const char* getName(VkPhysicalDeviceType _type)
 	{
 	{
-		return s_deviceTypeName[bx::min<int32_t>(_type, BX_COUNTOF(s_deviceTypeName) )];
+		return s_deviceTypeName[bx::min<int32_t>(_type, BX_COUNTOF(s_deviceTypeName)-1 )];
 	}
 	}
 
 
 	static const char* s_allocScopeName[] =
 	static const char* s_allocScopeName[] =
@@ -860,40 +860,38 @@ VK_IMPORT_DEVICE
 		switch (_oldLayout)
 		switch (_oldLayout)
 		{
 		{
 		case VK_IMAGE_LAYOUT_UNDEFINED:
 		case VK_IMAGE_LAYOUT_UNDEFINED:
-//			srcAccessMask |= VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_GENERAL:
 		case VK_IMAGE_LAYOUT_GENERAL:
+			srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
 		case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
-			srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+			srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
 		case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
-			srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+			srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
 		case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
 		case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
-			srcAccessMask |= VK_ACCESS_SHADER_READ_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
 		case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
-			srcAccessMask |= VK_ACCESS_TRANSFER_READ_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
 		case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+			srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_PREINITIALIZED:
 		case VK_IMAGE_LAYOUT_PREINITIALIZED:
-			srcAccessMask |= VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+			srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
 		case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
-			srcAccessMask |= VK_ACCESS_MEMORY_READ_BIT;
 			break;
 			break;
 
 
 		default:
 		default:
@@ -902,40 +900,36 @@ VK_IMPORT_DEVICE
 
 
 		switch (_newLayout)
 		switch (_newLayout)
 		{
 		{
-		case VK_IMAGE_LAYOUT_UNDEFINED:
-			break;
-
 		case VK_IMAGE_LAYOUT_GENERAL:
 		case VK_IMAGE_LAYOUT_GENERAL:
+			dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
 		case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
-			dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+			dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
 		case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
-			dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+			dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
 		case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+			dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
 		case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
-			dstAccessMask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+			dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
 		case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
-			dstAccessMask |= VK_ACCESS_SHADER_READ_BIT;
+			dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
 		case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
-			dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT;
-			break;
-
-		case VK_IMAGE_LAYOUT_PREINITIALIZED:
+			dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
 			break;
 			break;
 
 
 		case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
 		case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
-			dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT;
+			dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
 			break;
 			break;
 
 
 		default:
 		default:
@@ -2103,11 +2097,13 @@ VK_IMPORT_DEVICE
 					compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
 					compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
 				}
 				}
 
 
+				uint8_t swapBufferCount = bx::clamp<uint8_t>(_init.resolution.numBackBuffers, 2, BGFX_CONFIG_MAX_BACK_BUFFERS);
 				m_sci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
 				m_sci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
 				m_sci.pNext = NULL;
 				m_sci.pNext = NULL;
 				m_sci.flags = 0;
 				m_sci.flags = 0;
 				m_sci.surface = m_surface;
 				m_sci.surface = m_surface;
 				m_sci.minImageCount   = surfaceCapabilities.minImageCount;
 				m_sci.minImageCount   = surfaceCapabilities.minImageCount;
+				m_sci.minImageCount   = bx::clamp<uint32_t>(swapBufferCount, surfaceCapabilities.minImageCount, surfaceCapabilities.maxImageCount);
 				m_sci.imageFormat     = m_backBufferColorFormat.format;
 				m_sci.imageFormat     = m_backBufferColorFormat.format;
 				m_sci.imageColorSpace = m_backBufferColorFormat.colorSpace;
 				m_sci.imageColorSpace = m_backBufferColorFormat.colorSpace;
 				m_sci.imageExtent.width  = width;
 				m_sci.imageExtent.width  = width;
@@ -2693,6 +2689,23 @@ VK_IMPORT_DEVICE
 				, 1
 				, 1
 				);
 				);
 
 
+			// Make changes to image visible to host read
+			VkMemoryBarrier memBarrier{ VK_STRUCTURE_TYPE_MEMORY_BARRIER };
+			memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+			memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
+			vkCmdPipelineBarrier(
+				  copyCmd
+				, VK_PIPELINE_STAGE_TRANSFER_BIT
+				, VK_PIPELINE_STAGE_HOST_BIT
+				, 0
+				, 1
+				, &memBarrier
+				, 0
+				, NULL
+				, 0
+				, NULL
+			);
+
 			setImageMemoryBarrier(
 			setImageMemoryBarrier(
 				  copyCmd
 				  copyCmd
 				, srcImage
 				, srcImage
@@ -2908,6 +2921,23 @@ VK_IMPORT_DEVICE
 				, 1
 				, 1
 				);
 				);
 
 
+			// Make changes to image visible to host read
+			VkMemoryBarrier memBarrier{ VK_STRUCTURE_TYPE_MEMORY_BARRIER };
+			memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+			memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
+			vkCmdPipelineBarrier(
+				copyCmd
+				, VK_PIPELINE_STAGE_TRANSFER_BIT
+				, VK_PIPELINE_STAGE_HOST_BIT
+				, 0
+				, 1
+				, &memBarrier
+				, 0
+				, NULL
+				, 0
+				, NULL
+			);
+
 			// Transition back the swap chain image after the blit is done
 			// Transition back the swap chain image after the blit is done
 			setImageMemoryBarrier(
 			setImageMemoryBarrier(
 				  copyCmd
 				  copyCmd
@@ -4528,7 +4558,7 @@ VK_IMPORT_DEVICE
 		uint64_t kick(VkSemaphore _wait = VK_NULL_HANDLE, VkSemaphore _signal = VK_NULL_HANDLE)
 		uint64_t kick(VkSemaphore _wait = VK_NULL_HANDLE, VkSemaphore _signal = VK_NULL_HANDLE)
 		{
 		{
 			VkPipelineStageFlags stageFlags = 0
 			VkPipelineStageFlags stageFlags = 0
-				| VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
+				| VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
 				;
 				;
 
 
 			VkSubmitInfo si;
 			VkSubmitInfo si;
@@ -6659,11 +6689,20 @@ VK_DESTROY
 						beginRenderPass = false;
 						beginRenderPass = false;
 					}
 					}
 
 
-					VK_CHECK(vkEndCommandBuffer(m_commandBuffer) );
-
-					kick(renderWait);
-					renderWait = VK_NULL_HANDLE;
-					finishAll();
+					const VkPipelineStageFlags srcStage = wasCompute
+						? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
+						: VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT
+						;
+					const VkPipelineStageFlags dstStage = isCompute
+						? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
+						: VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT
+						;
+					VkMemoryBarrier memBarrier;
+					memBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+					memBarrier.pNext = NULL;
+					memBarrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
+					memBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
+					vkCmdPipelineBarrier(m_commandBuffer, srcStage, dstStage, 0, 1, &memBarrier, 0, NULL, 0, NULL);
 
 
 					view = key.m_view;
 					view = key.m_view;
 					currentPipeline = VK_NULL_HANDLE;
 					currentPipeline = VK_NULL_HANDLE;
@@ -6671,8 +6710,7 @@ VK_DESTROY
 					currentProgram         = BGFX_INVALID_HANDLE;
 					currentProgram         = BGFX_INVALID_HANDLE;
 					hasPredefined          = false;
 					hasPredefined          = false;
 					BX_UNUSED(currentSamplerStateIdx);
 					BX_UNUSED(currentSamplerStateIdx);
-
-					VK_CHECK(vkBeginCommandBuffer(m_commandBuffer, &cbbi) );
+					
 					fbh = _render->m_view[view].m_fbh;
 					fbh = _render->m_view[view].m_fbh;
 					setFrameBuffer(fbh);
 					setFrameBuffer(fbh);
 
 

+ 7 - 7
src/renderer_webgpu.cpp

@@ -569,7 +569,7 @@ namespace bgfx { namespace webgpu
 			m_cmd.init(m_queue);
 			m_cmd.init(m_queue);
 			//BGFX_FATAL(NULL != m_cmd.m_commandQueue, Fatal::UnableToInitialize, "Unable to create Metal device.");
 			//BGFX_FATAL(NULL != m_cmd.m_commandQueue, Fatal::UnableToInitialize, "Unable to create Metal device.");
 
 
-			for (uint8_t ii = 0; ii < WEBGPU_MAX_FRAMES_IN_FLIGHT; ++ii)
+			for (uint8_t ii = 0; ii < BGFX_CONFIG_MAX_FRAME_LATENCY; ++ii)
 			{
 			{
 				BX_TRACE("Create scratch buffer %d", ii);
 				BX_TRACE("Create scratch buffer %d", ii);
 				m_scratchBuffers[ii].create(BGFX_CONFIG_MAX_DRAW_CALLS * 128);
 				m_scratchBuffers[ii].create(BGFX_CONFIG_MAX_DRAW_CALLS * 128);
@@ -2372,9 +2372,9 @@ namespace bgfx { namespace webgpu
 		CommandQueueWgpu   m_cmd;
 		CommandQueueWgpu   m_cmd;
 
 
 		StagingBufferWgpu	m_uniformBuffers[WEBGPU_NUM_UNIFORM_BUFFERS];
 		StagingBufferWgpu	m_uniformBuffers[WEBGPU_NUM_UNIFORM_BUFFERS];
-		ScratchBufferWgpu   m_scratchBuffers[WEBGPU_MAX_FRAMES_IN_FLIGHT];
+		ScratchBufferWgpu   m_scratchBuffers[BGFX_CONFIG_MAX_FRAME_LATENCY];
 
 
-		BindStateCacheWgpu  m_bindStateCache[WEBGPU_MAX_FRAMES_IN_FLIGHT];
+		BindStateCacheWgpu  m_bindStateCache[BGFX_CONFIG_MAX_FRAME_LATENCY];
 
 
 		uint8_t m_frameIndex;
 		uint8_t m_frameIndex;
 
 
@@ -3800,7 +3800,7 @@ namespace bgfx { namespace webgpu
 	{
 	{
 		m_queue = _queue;
 		m_queue = _queue;
 #if BGFX_CONFIG_MULTITHREADED
 #if BGFX_CONFIG_MULTITHREADED
-		//m_framesSemaphore.post(WEBGPU_MAX_FRAMES_IN_FLIGHT);
+		//m_framesSemaphore.post(BGFX_CONFIG_MAX_FRAME_LATENCY);
 #endif
 #endif
 	}
 	}
 
 
@@ -3838,7 +3838,7 @@ namespace bgfx { namespace webgpu
 		{
 		{
 			if (_endFrame)
 			if (_endFrame)
 			{
 			{
-				m_releaseWriteIndex = (m_releaseWriteIndex + 1) % WEBGPU_MAX_FRAMES_IN_FLIGHT;
+				m_releaseWriteIndex = (m_releaseWriteIndex + 1) % BGFX_CONFIG_MAX_FRAME_LATENCY;
 				//m_encoder.addCompletedHandler(commandBufferFinishedCallback, this);
 				//m_encoder.addCompletedHandler(commandBufferFinishedCallback, this);
 			}
 			}
 
 
@@ -3898,7 +3898,7 @@ namespace bgfx { namespace webgpu
 		//m_framesSemaphore.wait();
 		//m_framesSemaphore.wait();
 #endif
 #endif
 
 
-		m_releaseReadIndex = (m_releaseReadIndex + 1) % WEBGPU_MAX_FRAMES_IN_FLIGHT;
+		m_releaseReadIndex = (m_releaseReadIndex + 1) % BGFX_CONFIG_MAX_FRAME_LATENCY;
 
 
 		for (wgpu::Buffer& buffer : m_release[m_releaseReadIndex])
 		for (wgpu::Buffer& buffer : m_release[m_releaseReadIndex])
 		{
 		{
@@ -4044,7 +4044,7 @@ namespace bgfx { namespace webgpu
 
 
 		updateResolution(_render->m_resolution);
 		updateResolution(_render->m_resolution);
 
 
-		m_frameIndex = 0; // (m_frameIndex + 1) % WEBGPU_MAX_FRAMES_IN_FLIGHT;
+		m_frameIndex = 0; // (m_frameIndex + 1) % BGFX_CONFIG_MAX_FRAME_LATENCY;
 
 
 		ScratchBufferWgpu& scratchBuffer = m_scratchBuffers[m_frameIndex];
 		ScratchBufferWgpu& scratchBuffer = m_scratchBuffers[m_frameIndex];
 		scratchBuffer.begin();
 		scratchBuffer.begin();

+ 1 - 2
src/renderer_webgpu.h

@@ -32,7 +32,6 @@
 		BGFX_PROFILER_END();       \
 		BGFX_PROFILER_END();       \
 	BX_MACRO_BLOCK_END
 	BX_MACRO_BLOCK_END
 
 
-#define WEBGPU_MAX_FRAMES_IN_FLIGHT 3
 #define WEBGPU_NUM_UNIFORM_BUFFERS  8
 #define WEBGPU_NUM_UNIFORM_BUFFERS  8
 
 
 namespace bgfx { namespace webgpu
 namespace bgfx { namespace webgpu
@@ -501,7 +500,7 @@ namespace bgfx { namespace webgpu
 		int m_releaseReadIndex = 0;
 		int m_releaseReadIndex = 0;
 
 
 		typedef stl::vector<wgpu::Buffer> ResourceArray;
 		typedef stl::vector<wgpu::Buffer> ResourceArray;
-		ResourceArray m_release[WEBGPU_MAX_FRAMES_IN_FLIGHT];
+		ResourceArray m_release[BGFX_CONFIG_MAX_FRAME_LATENCY];
 	};
 	};
 
 
 	struct TimerQueryWgpu
 	struct TimerQueryWgpu