Quellcode durchsuchen

Various Vulkan/Direct3D 12 enhancements (#2246)

* [VK] Add RT MSAA support

* [VK] Add GenerateMips support

* [VK] Add Screenshot feature

* [D3D12] Add MSAA RT support

* [VK] Fix blit operation on MSAA RT textures

* [D3D12] Fix blit operation on MSAA RT textures
Julian Xhokaxhiu vor 5 Jahren
Ursprung
Commit
2c38e090d2
4 geänderte Dateien mit 533 neuen und 36 gelöschten Zeilen
  1. 81 9
      src/renderer_d3d12.cpp
  2. 3 1
      src/renderer_d3d12.h
  3. 429 26
      src/renderer_vk.cpp
  4. 20 0
      src/renderer_vk.h

+ 81 - 9
src/renderer_d3d12.cpp

@@ -2412,7 +2412,9 @@ namespace bgfx { namespace d3d12
 			if (isValid(m_fbh)
 			&&  m_fbh.idx != _fbh.idx)
 			{
-				const FrameBufferD3D12& frameBuffer = m_frameBuffers[m_fbh.idx];
+				FrameBufferD3D12& frameBuffer = m_frameBuffers[m_fbh.idx];
+
+				if (m_rtMsaa) frameBuffer.resolve();
 
 				if (NULL == frameBuffer.m_swapChain)
 				{
@@ -3527,7 +3529,7 @@ namespace bgfx { namespace d3d12
 			}
 		}
 
-		device->CreateShaderResourceView(_texture.m_ptr
+		device->CreateShaderResourceView(NULL != _texture.m_singleMsaa ? _texture.m_singleMsaa : _texture.m_ptr
 			, srvd
 			, m_cpuHandle
 			);
@@ -4684,6 +4686,15 @@ namespace bgfx { namespace d3d12
 			const bool renderTarget = 0 != (m_flags&BGFX_TEXTURE_RT_MASK);
 			const bool blit         = 0 != (m_flags&BGFX_TEXTURE_BLIT_DST);
 
+			const uint32_t msaaQuality = bx::uint32_satsub((m_flags & BGFX_TEXTURE_RT_MSAA_MASK) >> BGFX_TEXTURE_RT_MSAA_SHIFT, 1);
+			const DXGI_SAMPLE_DESC& msaa = s_msaa[msaaQuality];
+
+			const bool needResolve = true
+				&& 1 < msaa.Count
+				&& 0 == (m_flags & BGFX_TEXTURE_MSAA_SAMPLE)
+				&& !writeOnly
+				;
+
 			BX_TRACE("Texture %3d: %s (requested: %s), %dx%d%s RT[%c], BO[%c], CW[%c]%s."
 				, this - s_renderD3D12->m_textures
 				, getName( (TextureFormat::Enum)m_textureFormat)
@@ -4768,9 +4779,6 @@ namespace bgfx { namespace d3d12
 				}
 			}
 
-			const uint32_t msaaQuality = bx::uint32_satsub( (m_flags&BGFX_TEXTURE_RT_MSAA_MASK)>>BGFX_TEXTURE_RT_MSAA_SHIFT, 1);
-			const DXGI_SAMPLE_DESC& msaa = s_msaa[msaaQuality];
-
 			bx::memSet(&m_srvd, 0, sizeof(m_srvd) );
 			m_srvd.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
 			m_srvd.Format = (m_flags & BGFX_TEXTURE_SRGB) ? s_textureFormat[m_textureFormat].m_fmtSrgb : s_textureFormat[m_textureFormat].m_fmtSrv;
@@ -4983,6 +4991,23 @@ namespace bgfx { namespace d3d12
 					}
 				}
 			}
+
+			if (needResolve)
+			{
+				D3D12_RESOURCE_DESC rd = resourceDesc;
+
+				rd.Alignment = 0;
+				rd.SampleDesc = s_msaa[0];
+				rd.Flags &= ~(D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);
+
+				m_singleMsaa = createCommittedResource(device, HeapProperty::Texture, &rd, NULL);
+
+				setResourceBarrier(commandList
+					, m_singleMsaa
+					, D3D12_RESOURCE_STATE_COMMON
+					, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE
+				);
+			}
 		}
 
 		return m_directAccessPtr;
@@ -5004,6 +5029,12 @@ namespace bgfx { namespace d3d12
 				s_renderD3D12->m_cmd.release(m_ptr);
 				m_ptr   = NULL;
 				m_state = D3D12_RESOURCE_STATE_COMMON;
+
+				if (NULL != m_singleMsaa)
+				{
+					s_renderD3D12->m_cmd.release(m_singleMsaa);
+					m_singleMsaa = NULL;
+				}
 			}
 		}
 	}
@@ -5101,9 +5132,36 @@ namespace bgfx { namespace d3d12
 		s_renderD3D12->m_cmd.release(staging);
 	}
 
-	void TextureD3D12::resolve(uint8_t _resolve) const
+	void TextureD3D12::resolve(ID3D12GraphicsCommandList* _commandList, uint8_t _resolve)
 	{
 		BX_UNUSED(_resolve);
+
+		bool needResolve = NULL != m_singleMsaa;
+		if (needResolve)
+		{
+			D3D12_RESOURCE_STATES state = setState(_commandList, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
+
+			setResourceBarrier(_commandList
+				, m_singleMsaa
+				, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE
+				, D3D12_RESOURCE_STATE_RESOLVE_DEST
+			);
+
+			_commandList->ResolveSubresource(m_singleMsaa
+				, 0
+				, m_ptr
+				, 0
+				, s_textureFormat[m_textureFormat].m_fmt
+			);
+
+			setResourceBarrier(_commandList
+				, m_singleMsaa
+				, D3D12_RESOURCE_STATE_RESOLVE_DEST
+				, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE
+			);
+
+			setState(_commandList, state);
+		}
 	}
 
 	D3D12_RESOURCE_STATES TextureD3D12::setState(ID3D12GraphicsCommandList* _commandList, D3D12_RESOURCE_STATES _state)
@@ -5418,8 +5476,8 @@ namespace bgfx { namespace d3d12
 
 				if (isValid(at.handle) )
 				{
-					const TextureD3D12& texture = s_renderD3D12->m_textures[at.handle.idx];
-					texture.resolve(at.resolve);
+					TextureD3D12& texture = s_renderD3D12->m_textures[at.handle.idx];
+					texture.resolve(s_renderD3D12->m_commandList, at.resolve);
 				}
 			}
 		}
@@ -5750,6 +5808,13 @@ namespace bgfx { namespace d3d12
 
 				currentSrc = blit.m_src;
 
+				if (NULL != src.m_singleMsaa)
+					setResourceBarrier(m_commandList
+						, src.m_singleMsaa
+						, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE
+						, D3D12_RESOURCE_STATE_COPY_SOURCE
+					);
+
 				state = src.setState(m_commandList, D3D12_RESOURCE_STATE_COPY_SOURCE);
 			}
 
@@ -5807,7 +5872,7 @@ namespace bgfx { namespace d3d12
 				dstLocation.Type      = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
 				dstLocation.SubresourceIndex = dstZ*dst.m_numMips+blit.m_dstMip;
 				D3D12_TEXTURE_COPY_LOCATION srcLocation;
-				srcLocation.pResource = src.m_ptr;
+				srcLocation.pResource = NULL != src.m_singleMsaa ? src.m_singleMsaa : src.m_ptr;
 				srcLocation.Type      = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
 				srcLocation.SubresourceIndex = srcZ*src.m_numMips+blit.m_srcMip;
 
@@ -5820,6 +5885,13 @@ namespace bgfx { namespace d3d12
 					, depthStencil ? NULL : &box
 					);
 			}
+
+			if (NULL != src.m_singleMsaa)
+				setResourceBarrier(m_commandList
+					, src.m_singleMsaa
+					, D3D12_RESOURCE_STATE_COPY_SOURCE
+					, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE
+				);
 		}
 
 		if (isValid(currentSrc)

+ 3 - 1
src/renderer_d3d12.h

@@ -321,6 +321,7 @@ namespace bgfx { namespace d3d12
 
 		TextureD3D12()
 			: m_ptr(NULL)
+			, m_singleMsaa(NULL)
 			, m_directAccessPtr(NULL)
 			, m_state(D3D12_RESOURCE_STATE_COMMON)
 			, m_numMips(0)
@@ -333,12 +334,13 @@ namespace bgfx { namespace d3d12
 		void destroy();
 		void overrideInternal(uintptr_t _ptr);
 		void update(ID3D12GraphicsCommandList* _commandList, uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem);
-		void resolve(uint8_t _resolve) const;
+		void resolve(ID3D12GraphicsCommandList* _commandList, uint8_t _resolve);
 		D3D12_RESOURCE_STATES setState(ID3D12GraphicsCommandList* _commandList, D3D12_RESOURCE_STATES _state);
 
 		D3D12_SHADER_RESOURCE_VIEW_DESC  m_srvd;
 		D3D12_UNORDERED_ACCESS_VIEW_DESC m_uavd;
 		ID3D12Resource* m_ptr;
+		ID3D12Resource* m_singleMsaa;
 		void* m_directAccessPtr;
 		D3D12_RESOURCE_STATES m_state;
 		uint64_t m_flags;

+ 429 - 26
src/renderer_vk.cpp

@@ -55,14 +55,14 @@ namespace bgfx { namespace vk
 		16,
 	};
 
-//	static DXGI_SAMPLE_DESC s_msaa[] =
-//	{
-//		{  1, 0 },
-//		{  2, 0 },
-//		{  4, 0 },
-//		{  8, 0 },
-//		{ 16, 0 },
-//	};
+	static MsaaSamplerVK s_msaa[] =
+	{
+		{  1, VK_SAMPLE_COUNT_1_BIT },
+		{  2, VK_SAMPLE_COUNT_2_BIT },
+		{  4, VK_SAMPLE_COUNT_4_BIT },
+		{  8, VK_SAMPLE_COUNT_8_BIT },
+		{ 16, VK_SAMPLE_COUNT_16_BIT },
+	};
 
 	static const VkBlendFactor s_blendFactor[][2] =
 	{
@@ -970,6 +970,7 @@ VK_IMPORT_DEVICE
 			, m_maxAnisotropy(1)
 			, m_depthClamp(false)
 			, m_wireframe(false)
+			, m_rtMsaa(false)
 		{
 		}
 
@@ -1580,6 +1581,16 @@ VK_IMPORT_INSTANCE
 				vkGetPhysicalDeviceFeatures(m_physicalDevice, &m_deviceFeatures);
 				m_deviceFeatures.robustBufferAccess = VK_FALSE;
 
+				{
+					for (uint16_t ii = 0, last = 0; ii < BX_COUNTOF(s_msaa); ii++)
+					{
+						if ((m_deviceProperties.limits.framebufferColorSampleCounts >= s_msaa[ii].Count) && (m_deviceProperties.limits.framebufferDepthSampleCounts >= s_msaa[ii].Count))
+							last = ii;
+						else
+							s_msaa[ii] = s_msaa[last];
+					}
+				}
+
 				{
 					struct ImageTest
 					{
@@ -2051,7 +2062,7 @@ VK_IMPORT_DEVICE
 				m_sci.imageExtent.width  = width;
 				m_sci.imageExtent.height = height;
 				m_sci.imageArrayLayers = 1;
-				m_sci.imageUsage       = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+				m_sci.imageUsage       = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
 				m_sci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
 				m_sci.queueFamilyIndexCount = 0;
 				m_sci.pQueueFamilyIndices   = NULL;
@@ -2624,8 +2635,198 @@ VK_IMPORT_DEVICE
 			m_uniforms[_handle.idx] = NULL;
 		}
 
-		void requestScreenShot(FrameBufferHandle /*_handle*/, const char* /*_filePath*/) override
+		void requestScreenShot(FrameBufferHandle _fbh, const char* _filePath) override
 		{
+			bool supportsBlit = true;
+
+			// Check blit support for source and destination
+			VkFormatProperties formatProps;
+
+			// Check if the device supports blitting from optimal images (the swapchain images are in optimal format)
+			vkGetPhysicalDeviceFormatProperties(m_physicalDevice, m_sci.imageFormat, &formatProps);
+			if (!(formatProps.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT)) {
+				BX_TRACE("Device does not support blitting from optimal tiled images, using copy instead of blit!\n");
+				supportsBlit = false;
+			}
+
+			// Check if the device supports blitting to linear images
+			vkGetPhysicalDeviceFormatProperties(m_physicalDevice, VK_FORMAT_R8G8B8A8_UNORM, &formatProps);
+			if (!(formatProps.linearTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT)) {
+				BX_TRACE("Device does not support blitting to linear tiled images, using copy instead of blit!\n");
+				supportsBlit = false;
+			}
+
+			// Source for the copy is the last rendered swapchain image
+			VkImage srcImage = m_backBufferColorImage[m_backBufferColorIdx];
+			uint32_t width = m_sci.imageExtent.width, height = m_sci.imageExtent.height;
+
+			if (isValid(_fbh))
+			{
+				TextureVK& texture = m_textures[m_frameBuffers[_fbh.idx].m_attachment[0].handle.idx];
+				srcImage = VK_NULL_HANDLE != texture.m_singleMsaaImage ? texture.m_singleMsaaImage : texture.m_textureImage;
+			}
+
+			// Create the linear tiled destination image to copy to and to read the memory from
+			VkImage dstImage = VK_NULL_HANDLE;
+			VkImageCreateInfo ici;
+			ici.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
+			ici.pNext = NULL;
+			ici.flags = 0;
+			// Note that vkCmdBlitImage (if supported) will also do format conversions if the swapchain color format would differ
+			ici.imageType = VK_IMAGE_TYPE_2D;
+			ici.format = VK_FORMAT_R8G8B8A8_UNORM;
+			ici.extent.width = width;
+			ici.extent.height = height;
+			ici.extent.depth = 1;
+			ici.arrayLayers = 1;
+			ici.mipLevels = 1;
+			ici.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+			ici.samples = VK_SAMPLE_COUNT_1_BIT;
+			ici.tiling = VK_IMAGE_TILING_LINEAR;
+			ici.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+			ici.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+			ici.queueFamilyIndexCount = 0;
+			ici.pQueueFamilyIndices = NULL;
+			// Create the image
+			VK_CHECK(vkCreateImage(m_device, &ici, m_allocatorCb, &dstImage));
+
+			// Create memory to back up the image
+			VkMemoryRequirements memRequirements;
+			vkGetImageMemoryRequirements(m_device, dstImage, &memRequirements);
+
+			VkMemoryAllocateInfo ma;
+			ma.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+			ma.pNext = NULL;
+			ma.allocationSize = memRequirements.size;
+
+			VkDeviceMemory dstImageMemory = VK_NULL_HANDLE;
+			// Memory must be host visible to copy from
+			VK_CHECK(allocateMemory(&memRequirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, &dstImageMemory));
+			VK_CHECK(vkBindImageMemory(m_device, dstImage, dstImageMemory, 0));
+
+			// Do the actual blit from the swapchain image to our host visible destination image
+			VkCommandBuffer copyCmd = beginNewCommand();
+
+			// Transition destination image to transfer destination layout
+			bgfx::vk::setImageMemoryBarrier(copyCmd
+				, dstImage
+				, VK_IMAGE_ASPECT_COLOR_BIT
+				, VK_IMAGE_LAYOUT_UNDEFINED
+				, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
+				, 1
+				, 1
+			);
+
+			// Transition swapchain image from present to transfer source layout
+			bgfx::vk::setImageMemoryBarrier(copyCmd
+				, srcImage
+				, VK_IMAGE_ASPECT_COLOR_BIT
+				, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
+				, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
+				, 1
+				, 1
+			);
+
+			// If source and destination support blit we'll blit as this also does automatic format conversion (e.g. from BGR to RGB)
+			if (supportsBlit)
+			{
+				// Define the region to blit (we will blit the whole swapchain image)
+				VkOffset3D blitSize;
+				blitSize.x = width;
+				blitSize.y = height;
+				blitSize.z = 1;
+				VkImageBlit imageBlitRegion{};
+				imageBlitRegion.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+				imageBlitRegion.srcSubresource.layerCount = 1;
+				imageBlitRegion.srcOffsets[1] = blitSize;
+				imageBlitRegion.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+				imageBlitRegion.dstSubresource.layerCount = 1;
+				imageBlitRegion.dstOffsets[1] = blitSize;
+
+				// Issue the blit command
+				vkCmdBlitImage(
+					copyCmd,
+					srcImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+					dstImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+					1,
+					&imageBlitRegion,
+					VK_FILTER_NEAREST);
+			}
+			else
+			{
+				// Otherwise use image copy (requires us to manually flip components)
+				VkImageCopy imageCopyRegion{};
+				imageCopyRegion.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+				imageCopyRegion.srcSubresource.layerCount = 1;
+				imageCopyRegion.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+				imageCopyRegion.dstSubresource.layerCount = 1;
+				imageCopyRegion.extent.width = width;
+				imageCopyRegion.extent.height = height;
+				imageCopyRegion.extent.depth = 1;
+
+				// Issue the copy command
+				vkCmdCopyImage(
+					copyCmd,
+					srcImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+					dstImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+					1,
+					&imageCopyRegion);
+			}
+
+			// Transition destination image to general layout, which is the required layout for mapping the image memory later on
+			bgfx::vk::setImageMemoryBarrier(copyCmd
+				, dstImage
+				, VK_IMAGE_ASPECT_COLOR_BIT
+				, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
+				, VK_IMAGE_LAYOUT_GENERAL
+				, 1
+				, 1
+			);
+
+			// Transition back the swap chain image after the blit is done
+			bgfx::vk::setImageMemoryBarrier(copyCmd
+				, srcImage
+				, VK_IMAGE_ASPECT_COLOR_BIT
+				, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
+				, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
+				, 1
+				, 1
+			);
+
+			submitCommandAndWait(copyCmd);
+
+			// Get layout of the image (including row pitch)
+			VkImageSubresource subResource{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 0 };
+			VkSubresourceLayout subResourceLayout;
+			vkGetImageSubresourceLayout(m_device, dstImage, &subResource, &subResourceLayout);
+
+			// Map image memory so we can start copying from it
+			char* data;
+			vkMapMemory(m_device, dstImageMemory, 0, VK_WHOLE_SIZE, 0, (void**)&data);
+			data += subResourceLayout.offset;
+
+			bimg::imageSwizzleBgra8(
+				data
+				, subResourceLayout.rowPitch
+				, width
+				, height
+				, data
+				, subResourceLayout.rowPitch
+			);
+
+			g_callback->screenShot(_filePath
+				, width
+				, height
+				, subResourceLayout.rowPitch
+				, data
+				, height * subResourceLayout.rowPitch
+				, false
+			);
+
+			// Clean up resources
+			vkUnmapMemory(m_device, dstImageMemory);
+			vkFreeMemory(m_device, dstImageMemory, m_allocatorCb);
+			vkDestroyImage(m_device, dstImage, m_allocatorCb);
 		}
 
 		void updateViewName(ViewId _id, const char* _name) override
@@ -3089,9 +3290,11 @@ VK_IMPORT_DEVICE
 			if (isValid(m_fbh)
 			&&  m_fbh.idx != _fbh.idx)
 			{
-				const FrameBufferVK& frameBuffer = m_frameBuffers[m_fbh.idx];
+				FrameBufferVK& frameBuffer = m_frameBuffers[m_fbh.idx];
 				BX_UNUSED(frameBuffer);
 
+				if (m_rtMsaa) frameBuffer.resolve();
+
 				for (uint8_t ii = 0, num = frameBuffer.m_num; ii < num; ++ii)
 				{
 					TextureVK& texture = m_textures[frameBuffer.m_texture[ii].idx];
@@ -3169,7 +3372,7 @@ VK_IMPORT_DEVICE
 			}
 
 			m_fbh = _fbh;
-//			m_rtMsaa = _msaa;
+			m_rtMsaa = _msaa;
 		}
 
 		void setBlendState(VkPipelineColorBlendStateCreateInfo& _desc, uint64_t _state, uint32_t _rgba = 0)
@@ -3410,7 +3613,7 @@ VK_IMPORT_DEVICE
 				TextureVK& texture = m_textures[_attachments[ii].handle.idx];
 				ad[ii].flags          = 0;
 				ad[ii].format         = texture.m_format;
-				ad[ii].samples        = VK_SAMPLE_COUNT_1_BIT;
+				ad[ii].samples        = texture.m_sampler.Sample;
 
 				if (texture.m_aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
 				{
@@ -3701,11 +3904,13 @@ VK_IMPORT_DEVICE
 			viewportState.scissorCount  = 1;
 			viewportState.pScissors     = NULL;
 
+			VkSampleCountFlagBits rasterizerMsaa = (isValid(m_fbh) && !!(BGFX_STATE_MSAA & _state) ? m_textures[m_frameBuffers[m_fbh.idx].m_attachment[0].handle.idx].m_sampler.Sample : VK_SAMPLE_COUNT_1_BIT);
+
 			VkPipelineMultisampleStateCreateInfo multisampleState;
 			multisampleState.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
 			multisampleState.pNext = NULL;
 			multisampleState.flags = 0;
-			multisampleState.rasterizationSamples  = VK_SAMPLE_COUNT_1_BIT;
+			multisampleState.rasterizationSamples  = rasterizerMsaa;
 			multisampleState.sampleShadingEnable   = VK_FALSE;
 			multisampleState.minSampleShading      = !!(BGFX_STATE_CONSERVATIVE_RASTER & _state) ? 1.0f : 0.0f;
 			multisampleState.pSampleMask           = NULL;
@@ -3912,10 +4117,17 @@ VK_IMPORT_DEVICE
 						}
 
 						imageInfo[imageCount].imageLayout = texture.m_currentImageLayout;
+
 						imageInfo[imageCount].imageView   = VK_NULL_HANDLE != texture.m_textureImageDepthView
 							? texture.m_textureImageDepthView
 							: texture.m_textureImageView
 							;
+
+						if (VK_NULL_HANDLE != texture.m_singleMsaaImageView)
+						{
+							imageInfo[imageCount].imageView = texture.m_singleMsaaImageView;
+						}
+
 						imageInfo[imageCount].sampler     = sampler;
 
 						wds[wdsCount].sType            = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
@@ -4353,6 +4565,7 @@ VK_IMPORT_DEVICE
 		uint32_t m_maxAnisotropy;
 		bool m_depthClamp;
 		bool m_wireframe;
+		bool m_rtMsaa;
 
 		TextVideoMem m_textVideoMem;
 
@@ -5208,6 +5421,8 @@ VK_DESTROY
 				: VK_IMAGE_ASPECT_COLOR_BIT
 				;
 
+			m_sampler = s_msaa[bx::uint32_satsub((m_flags & BGFX_TEXTURE_RT_MSAA_MASK) >> BGFX_TEXTURE_RT_MSAA_SHIFT, 1)];
+
 			if (m_format == VK_FORMAT_S8_UINT
 			||  m_format == VK_FORMAT_D16_UNORM_S8_UINT
 			||  m_format == VK_FORMAT_D24_UNORM_S8_UINT
@@ -5244,6 +5459,12 @@ VK_DESTROY
 			const bool renderTarget = 0 != (m_flags & BGFX_TEXTURE_RT_MASK);
 			const bool blit         = 0 != (m_flags & BGFX_TEXTURE_BLIT_DST);
 
+			const bool needResolve = true
+				&& 1 < m_sampler.Count
+				&& 0 == (m_flags & BGFX_TEXTURE_MSAA_SAMPLE)
+				&& !writeOnly
+				;
+
 			BX_UNUSED(swizzle, writeOnly, computeWrite, renderTarget, blit);
 
 			BX_TRACE(
@@ -5468,7 +5689,7 @@ VK_DESTROY
 				| (_flags & BGFX_TEXTURE_COMPUTE_WRITE ? VK_IMAGE_USAGE_STORAGE_BIT : 0)
 				;
 			ici.format        = m_format;
-			ici.samples       = VK_SAMPLE_COUNT_1_BIT;
+			ici.samples       = m_sampler.Sample;
 			ici.mipLevels     = m_numMips;
 			ici.arrayLayers   = m_numSides;
 			ici.extent.width  = m_width;
@@ -5528,9 +5749,9 @@ VK_DESTROY
 				viewInfo.components = m_components;
 				viewInfo.subresourceRange.aspectMask     = m_aspectMask;
 				viewInfo.subresourceRange.baseMipLevel   = 0;
-				viewInfo.subresourceRange.levelCount     = m_numMips; //m_numMips;
+				viewInfo.subresourceRange.levelCount     = m_numMips;
 				viewInfo.subresourceRange.baseArrayLayer = 0;
-				viewInfo.subresourceRange.layerCount     = m_numSides; //(m_type == VK_IMAGE_VIEW_TYPE_CUBE ? 6 : m_numLayers);
+				viewInfo.subresourceRange.layerCount     = m_numSides;
 				VK_CHECK(vkCreateImageView(
 					  device
 					, &viewInfo
@@ -5552,9 +5773,9 @@ VK_DESTROY
 				viewInfo.components = m_components;
 				viewInfo.subresourceRange.aspectMask     = VK_IMAGE_ASPECT_DEPTH_BIT;
 				viewInfo.subresourceRange.baseMipLevel   = 0;
-				viewInfo.subresourceRange.levelCount     = m_numMips; //m_numMips;
+				viewInfo.subresourceRange.levelCount     = m_numMips;
 				viewInfo.subresourceRange.baseArrayLayer = 0;
-				viewInfo.subresourceRange.layerCount     = m_numSides; //(m_type == VK_IMAGE_VIEW_TYPE_CUBE ? 6 : m_numLayers);
+				viewInfo.subresourceRange.layerCount     = m_numSides;
 				VK_CHECK(vkCreateImageView(
 					device
 					, &viewInfo
@@ -5579,9 +5800,9 @@ VK_DESTROY
 				viewInfo.components = m_components;
 				viewInfo.subresourceRange.aspectMask     = m_aspectMask;
 				viewInfo.subresourceRange.baseMipLevel   = 0;
-				viewInfo.subresourceRange.levelCount     = m_numMips; //m_numMips;
+				viewInfo.subresourceRange.levelCount     = m_numMips;
 				viewInfo.subresourceRange.baseArrayLayer = 0;
-				viewInfo.subresourceRange.layerCount     = m_numSides; //(m_type == VK_IMAGE_VIEW_TYPE_CUBE ? 6 : m_numLayers);
+				viewInfo.subresourceRange.layerCount     = m_numSides;
 				VK_CHECK(vkCreateImageView(
 					  device
 					, &viewInfo
@@ -5589,6 +5810,61 @@ VK_DESTROY
 					, &m_textureImageStorageView
 					) );
 			}
+
+			if (needResolve)
+			{
+				{
+					VkImageCreateInfo ici_resolve = ici;
+					ici_resolve.samples = s_msaa[0].Sample;
+					ici_resolve.usage &= ~(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
+
+					VK_CHECK(vkCreateImage(device, &ici_resolve, allocatorCb, &m_singleMsaaImage));
+
+					VkMemoryRequirements imageMemReq_resolve;
+					vkGetImageMemoryRequirements(device, m_singleMsaaImage, &imageMemReq_resolve);
+
+					VK_CHECK(s_renderVK->allocateMemory(&imageMemReq_resolve, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &m_singleMsaaDeviceMem));
+
+					vkBindImageMemory(device, m_singleMsaaImage, m_singleMsaaDeviceMem, 0);
+				}
+
+				{
+					VkCommandBuffer commandBuffer = s_renderVK->beginNewCommand();
+
+					bgfx::vk::setImageMemoryBarrier(commandBuffer
+						, m_singleMsaaImage
+						, m_aspectMask
+						, VK_IMAGE_LAYOUT_UNDEFINED
+						, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
+						, m_numMips
+						, m_numSides
+					);
+
+					s_renderVK->submitCommandAndWait(commandBuffer);
+				}
+
+				{
+					VkImageViewCreateInfo viewInfo;
+					viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
+					viewInfo.pNext = NULL;
+					viewInfo.flags = 0;
+					viewInfo.image = m_singleMsaaImage;
+					viewInfo.viewType = m_type;
+					viewInfo.format = m_format;
+					viewInfo.components = m_components;
+					viewInfo.subresourceRange.aspectMask = m_aspectMask;
+					viewInfo.subresourceRange.baseMipLevel = 0;
+					viewInfo.subresourceRange.levelCount = m_numMips;
+					viewInfo.subresourceRange.baseArrayLayer = 0;
+					viewInfo.subresourceRange.layerCount = m_numSides;
+					VK_CHECK(vkCreateImageView(
+						device
+						, &viewInfo
+						, allocatorCb
+						, &m_singleMsaaImageView
+					));
+				}
+			}
 		}
 
 		return m_directAccessPtr;
@@ -5596,20 +5872,28 @@ VK_DESTROY
 
 	void TextureVK::destroy()
 	{
+		VkAllocationCallbacks* allocatorCb = s_renderVK->m_allocatorCb;
+		VkDevice device = s_renderVK->m_device;
+
 		if (m_textureImage)
 		{
-			VkAllocationCallbacks* allocatorCb = s_renderVK->m_allocatorCb;
-			VkDevice device = s_renderVK->m_device;
-
 			vkFreeMemory(device, m_textureDeviceMem, allocatorCb);
 
 			vkDestroy(m_textureImageStorageView);
 			vkDestroy(m_textureImageDepthView);
 			vkDestroy(m_textureImageView);
 			vkDestroy(m_textureImage);
+		}
 
-			m_currentImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+		if (m_singleMsaaImage)
+		{
+			vkFreeMemory(device, m_singleMsaaDeviceMem, allocatorCb);
+
+			vkDestroy(m_singleMsaaImageView);
+			vkDestroy(m_singleMsaaImage);
 		}
+
+		m_currentImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
 	}
 
 	void TextureVK::update(VkCommandPool _commandPool, uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem)
@@ -5699,6 +5983,108 @@ VK_DESTROY
 		}
 	}
 
+	void TextureVK::resolve(uint8_t _resolve)
+	{
+		BX_UNUSED(_resolve);
+
+		bool needResolve = VK_NULL_HANDLE != m_singleMsaaImage;
+		if (needResolve)
+		{
+			VkCommandBuffer commandBuffer = s_renderVK->beginNewCommand();
+
+			VkImageResolve blitInfo;
+			blitInfo.srcOffset.x = 0;
+			blitInfo.srcOffset.y = 0;
+			blitInfo.srcOffset.z = 0;
+			blitInfo.dstOffset.x = 0;
+			blitInfo.dstOffset.y = 0;
+			blitInfo.dstOffset.z = 0;
+			blitInfo.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+			blitInfo.srcSubresource.mipLevel = 0;
+			blitInfo.srcSubresource.baseArrayLayer = 0;
+			blitInfo.srcSubresource.layerCount = 1;
+			blitInfo.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+			blitInfo.dstSubresource.mipLevel = 0;
+			blitInfo.dstSubresource.baseArrayLayer = 0;
+			blitInfo.dstSubresource.layerCount = 1;
+			blitInfo.extent.width = m_width;
+			blitInfo.extent.height = m_height;
+			blitInfo.extent.depth = 1;
+
+			vkCmdResolveImage(commandBuffer,
+				m_textureImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+				m_singleMsaaImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+				1, &blitInfo);
+
+			s_renderVK->submitCommandAndWait(commandBuffer);
+		}
+
+		const bool renderTarget = 0 != (m_flags & BGFX_TEXTURE_RT_MASK);
+		if (renderTarget
+			&& 1 < m_numMips
+			&& 0 != (_resolve & BGFX_RESOLVE_AUTO_GEN_MIPS))
+		{
+			VkCommandBuffer commandBuffer = s_renderVK->beginNewCommand();
+
+			int32_t mipWidth = m_width;
+			int32_t mipHeight = m_height;
+
+			for (uint32_t i = 1; i < m_numMips; i++) {
+				bgfx::vk::setImageMemoryBarrier(commandBuffer
+					, needResolve ? m_singleMsaaImage : m_textureImage
+					, m_aspectMask
+					, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
+					, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
+					, i - 1
+					, 1
+				);
+
+				VkImageBlit blit{};
+				blit.srcOffsets[0] = { 0, 0, 0 };
+				blit.srcOffsets[1] = { mipWidth, mipHeight, 1 };
+				blit.srcSubresource.aspectMask = m_aspectMask;
+				blit.srcSubresource.mipLevel = i - 1;
+				blit.srcSubresource.baseArrayLayer = 0;
+				blit.srcSubresource.layerCount = 1;
+				blit.dstOffsets[0] = { 0, 0, 0 };
+				blit.dstOffsets[1] = { mipWidth > 1 ? mipWidth / 2 : 1, mipHeight > 1 ? mipHeight / 2 : 1, 1 };
+				blit.dstSubresource.aspectMask = m_aspectMask;
+				blit.dstSubresource.mipLevel = i;
+				blit.dstSubresource.baseArrayLayer = 0;
+				blit.dstSubresource.layerCount = 1;
+
+				vkCmdBlitImage(commandBuffer,
+					needResolve ? m_singleMsaaImage : m_textureImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+					needResolve ? m_singleMsaaImage : m_textureImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+					1, &blit,
+					VK_FILTER_LINEAR);
+
+				bgfx::vk::setImageMemoryBarrier(commandBuffer
+					, needResolve ? m_singleMsaaImage : m_textureImage
+					, m_aspectMask
+					, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
+					, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
+					, i - 1
+					, 1
+				);
+
+				if (mipWidth > 1) mipWidth /= 2;
+				if (mipHeight > 1) mipHeight /= 2;
+			}
+
+			bgfx::vk::setImageMemoryBarrier(commandBuffer
+				, needResolve ? m_singleMsaaImage : m_textureImage
+				, m_aspectMask
+				, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
+				, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
+				, m_numMips - 1
+				, 1
+			);
+
+			s_renderVK->submitCommandAndWait(commandBuffer);
+		}
+	}
+
 	void TextureVK::copyBufferToTexture(VkBuffer stagingBuffer, uint32_t bufferImageCopyCount, VkBufferImageCopy* bufferImageCopy)
 	{
 		VkCommandBuffer commandBuffer = s_renderVK->beginNewCommand();
@@ -5779,6 +6165,23 @@ VK_DESTROY
 		m_renderPass = renderPass;
 	}
 
+	void FrameBufferVK::resolve()
+	{
+		if (0 < m_numAttachment)
+		{
+			for (uint32_t ii = 0; ii < m_numAttachment; ++ii)
+			{
+				const Attachment& at = m_attachment[ii];
+
+				if (isValid(at.handle))
+				{
+					TextureVK& texture = s_renderVK->m_textures[at.handle.idx];
+					texture.resolve(at.resolve);
+				}
+			}
+		}
+	}
+
 	void FrameBufferVK::destroy()
 	{
 		vkDestroy(m_framebuffer);
@@ -5853,7 +6256,7 @@ VK_DESTROY
 			VkFilter filter = bimg::isDepth(bimg::TextureFormat::Enum(src.m_textureFormat) ) ? VK_FILTER_NEAREST : VK_FILTER_LINEAR;
 			vkCmdBlitImage(
 				  commandBuffer
-				, src.m_textureImage
+				, VK_NULL_HANDLE != src.m_singleMsaaImage ? src.m_singleMsaaImage : src.m_textureImage
 				, src.m_currentImageLayout
 				, dst.m_textureImage
 				, dst.m_currentImageLayout

+ 20 - 0
src/renderer_vk.h

@@ -108,6 +108,7 @@
 			VK_IMPORT_DEVICE_FUNC(false, vkFreeCommandBuffers);            \
 			VK_IMPORT_DEVICE_FUNC(false, vkGetBufferMemoryRequirements);   \
 			VK_IMPORT_DEVICE_FUNC(false, vkGetImageMemoryRequirements);    \
+			VK_IMPORT_DEVICE_FUNC(false, vkGetImageSubresourceLayout);     \
 			VK_IMPORT_DEVICE_FUNC(false, vkAllocateMemory);                \
 			VK_IMPORT_DEVICE_FUNC(false, vkFreeMemory);                    \
 			VK_IMPORT_DEVICE_FUNC(false, vkCreateImage);                   \
@@ -171,6 +172,7 @@
 			VK_IMPORT_DEVICE_FUNC(false, vkCmdResolveImage);               \
 			VK_IMPORT_DEVICE_FUNC(false, vkCmdCopyBuffer);                 \
 			VK_IMPORT_DEVICE_FUNC(false, vkCmdCopyBufferToImage);          \
+			VK_IMPORT_DEVICE_FUNC(false, vkCmdCopyImage);                  \
 			VK_IMPORT_DEVICE_FUNC(false, vkCmdBlitImage);                  \
 			VK_IMPORT_DEVICE_FUNC(false, vkMapMemory);                     \
 			VK_IMPORT_DEVICE_FUNC(false, vkUnmapMemory);                   \
@@ -423,6 +425,12 @@ VK_DESTROY
 
 	typedef BufferVK IndexBufferVK;
 
+	struct MsaaSamplerVK
+	{
+		uint16_t Count;
+		VkSampleCountFlagBits Sample;
+	};
+
 	struct VertexBufferVK : public BufferVK
 	{
 		void create(uint32_t _size, void* _data, VertexLayoutHandle _layoutHandle, uint16_t _flags);
@@ -523,12 +531,17 @@ VK_DESTROY
 			, m_textureImageDepthView(VK_NULL_HANDLE)
 			, m_textureImageStorageView(VK_NULL_HANDLE)
 			, m_currentImageLayout(VK_IMAGE_LAYOUT_UNDEFINED)
+			, m_sampler({ 1, VK_SAMPLE_COUNT_1_BIT })
+			, m_singleMsaaImage(VK_NULL_HANDLE)
+			, m_singleMsaaDeviceMem(VK_NULL_HANDLE)
+			, m_singleMsaaImageView(VK_NULL_HANDLE)
 		{
 		}
 
 		void* create(const Memory* _mem, uint64_t _flags, uint8_t _skip);
 		void destroy();
 		void update(VkCommandPool commandPool, uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem);
+		void resolve(uint8_t _resolve);
 
 		void copyBufferToTexture(VkBuffer stagingBuffer, uint32_t bufferImageCopyCount, VkBufferImageCopy* bufferImageCopy);
 		void setImageMemoryBarrier(VkCommandBuffer commandBuffer, VkImageLayout newImageLayout);
@@ -544,6 +557,8 @@ VK_DESTROY
 		uint8_t  m_textureFormat;
 		uint8_t  m_numMips;
 
+		MsaaSamplerVK m_sampler;
+
 		VkImageViewType m_type;
 		VkFormat m_format;
 		VkComponentMapping m_components;
@@ -555,6 +570,10 @@ VK_DESTROY
 		VkImageView m_textureImageDepthView;
 		VkImageView m_textureImageStorageView;
 		VkImageLayout m_currentImageLayout;
+
+		VkImage m_singleMsaaImage;
+		VkDeviceMemory m_singleMsaaDeviceMem;
+		VkImageView m_singleMsaaImageView;
 	};
 
 	struct FrameBufferVK
@@ -570,6 +589,7 @@ VK_DESTROY
 		{
 		}
 		void create(uint8_t _num, const Attachment* _attachment);
+		void resolve();
 		void destroy();
 
 		TextureHandle m_texture[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS];