Bläddra i källkod

VK: descriptor set optimization / refactoring

- add support to indirect compute call
- use dynamic uniform buffer instead of uniform
- create depth view for depth/stencil texture
- fix dynamic uniform buffer bug, apply it to compute shader
- refactor descriptor set allocation / setting
- fix image layout reinit
- get and set device feature / fix independent blending
- cleanup
Rinthel 6 år sedan
förälder
incheckning
4342db8f3b
2 ändrade filer med 338 tillägg och 425 borttagningar
  1. 330 425
      src/renderer_vk.cpp
  2. 8 0
      src/renderer_vk.h

+ 330 - 425
src/renderer_vk.cpp

@@ -1149,6 +1149,7 @@ VK_IMPORT_INSTANCE
 
 				g_caps.supported |= ( 0
 					| BGFX_CAPS_TEXTURE_BLIT
+					| BGFX_CAPS_DRAW_INDIRECT
 					| BGFX_CAPS_INSTANCING
 					);
 				g_caps.limits.maxTextureSize     = m_deviceProperties.limits.maxImageDimension2D;
@@ -1156,6 +1157,8 @@ VK_IMPORT_INSTANCE
 				g_caps.limits.maxComputeBindings = BGFX_MAX_COMPUTE_BINDINGS;
 				g_caps.limits.maxVertexStreams   = BGFX_CONFIG_MAX_VERTEX_STREAMS;
 
+				vkGetPhysicalDeviceFeatures(m_physicalDevice, &m_deviceFeatures);
+
 				{
 					struct ImageTest
 					{
@@ -1345,7 +1348,7 @@ VK_IMPORT_INSTANCE
 				dci.ppEnabledLayerNames  = enabledLayerNames;
 				dci.enabledExtensionCount   = numEnabledExtensions;
 				dci.ppEnabledExtensionNames = enabledExtension;
-				dci.pEnabledFeatures = NULL;
+				dci.pEnabledFeatures = &m_deviceFeatures;
 
 				result = vkCreateDevice(
 					  m_physicalDevice
@@ -2007,9 +2010,9 @@ VK_IMPORT_DEVICE
 //					{ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, (10 * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS) << 10 },
 					{ VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,          (10 * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS) << 10 },
 					{ VK_DESCRIPTOR_TYPE_SAMPLER,                (10 * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS) << 10 },
-					{ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,         10<<10                           },
+					{ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 10<<10                           },
 					{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,         BGFX_CONFIG_MAX_TEXTURE_SAMPLERS << 10 },
-					{ VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,         BGFX_CONFIG_MAX_TEXTURE_SAMPLERS << 10 },
+					{ VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,          BGFX_CONFIG_MAX_TEXTURE_SAMPLERS << 10 },
 				};
 
 // 				VkDescriptorSetLayoutBinding dslb[] =
@@ -2611,11 +2614,13 @@ VK_IMPORT_DEVICE
 			uint32_t samplerFlags = (uint32_t)(texture.m_flags & BGFX_SAMPLER_BITS_MASK);
 			VkSampler sampler = getSampler(samplerFlags, 1);
 
+			uint32_t bufferOffset = scratchBuffer.m_pos;
 			VkDescriptorBufferInfo bufferInfo;
 			bufferInfo.buffer = scratchBuffer.m_buffer;
-			bufferInfo.offset = scratchBuffer.m_pos;
-			bufferInfo.range	 = bx::strideAlign(program.m_vsh->m_size, align);
+			bufferInfo.offset = 0;
+			bufferInfo.range  = bx::strideAlign(program.m_vsh->m_size, align);
 			bx::memCopy(&scratchBuffer.m_data[scratchBuffer.m_pos], m_vsScratch, program.m_vsh->m_size);
+			scratchBuffer.m_pos += bufferInfo.range;
 
 			VkWriteDescriptorSet wds[3];
 			wds[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
@@ -2624,7 +2629,7 @@ VK_IMPORT_DEVICE
 			wds[0].dstBinding = program.m_vsh->m_uniformBinding;
 			wds[0].dstArrayElement = 0;
 			wds[0].descriptorCount = 1;
-			wds[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+			wds[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
 			wds[0].pImageInfo = NULL;
 			wds[0].pBufferInfo = &bufferInfo;
 			wds[0].pTexelBufferView = NULL;
@@ -2667,8 +2672,8 @@ VK_IMPORT_DEVICE
 				, 0
 				, 1
 				, &scratchBuffer.m_descriptorSet[scratchBuffer.m_currentDs]
-				, 0
-				, NULL
+				, 1
+				, &bufferOffset
 				);
 
 			scratchBuffer.m_currentDs++;
@@ -2983,7 +2988,7 @@ VK_IMPORT_DEVICE
 				numAttachments = frameBuffer.m_num;
 			}
 
-			if (!!(BGFX_STATE_BLEND_INDEPENDENT & _state) )
+			if (!!(BGFX_STATE_BLEND_INDEPENDENT & _state) && m_deviceFeatures.independentBlend )
 			{
 				for (uint32_t ii = 1, rgba = _rgba; ii < numAttachments; ++ii, rgba >>= 11)
 				{
@@ -3265,7 +3270,7 @@ VK_IMPORT_DEVICE
 			sci.addressModeW     = s_textureAddress[(_samplerFlags&BGFX_SAMPLER_W_MASK)>>BGFX_SAMPLER_W_SHIFT];
 			sci.mipLodBias       = 0.0f;
 			sci.anisotropyEnable = VK_FALSE;
-			sci.maxAnisotropy    = 0;
+			sci.maxAnisotropy    = 4.0f;
 			sci.compareEnable    = 0 != cmpFunc;
 			sci.compareOp        = s_cmpFunc[cmpFunc];
 			sci.minLod           = 0.0f;
@@ -3556,6 +3561,202 @@ VK_IMPORT_DEVICE
 			return pipeline;
 		}
 
+		void allocDescriptorSet(ProgramVK& program, const RenderBind& renderBind, ScratchBufferVK& scratchBuffer)
+		{
+			VkDescriptorSetLayout dsl = m_descriptorSetLayoutCache.find(program.m_descriptorSetLayoutHash);
+			VkDescriptorSetAllocateInfo dsai;
+			dsai.sType              = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
+			dsai.pNext              = NULL;
+			dsai.descriptorPool     = m_descriptorPool;
+			dsai.descriptorSetCount = 1;
+			dsai.pSetLayouts        = &dsl;
+
+			VkDescriptorSet& descriptorSet = scratchBuffer.m_descriptorSet[scratchBuffer.m_currentDs];
+			vkAllocateDescriptorSets(m_device, &dsai, &descriptorSet);
+			scratchBuffer.m_currentDs++;
+
+			VkDescriptorImageInfo imageInfo[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
+			VkDescriptorBufferInfo bufferInfo[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
+			VkWriteDescriptorSet wds[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
+			bx::memSet(wds, 0, sizeof(VkWriteDescriptorSet) * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS);
+			uint32_t wdsCount = 0;
+			uint32_t bufferCount = 0;
+			uint32_t imageCount = 0;
+
+			for (uint32_t stage = 0; stage < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++stage)
+			{
+				const Binding& bind = renderBind.m_bind[stage];
+				if (kInvalidHandle != bind.m_idx)
+				{
+					const Binding& bind = renderBind.m_bind[stage];
+					const ShaderVK::BindInfo* bindInfo = NULL;
+					if (isValid(program.m_vsh->m_bindInfo[stage].uniformHandle))
+					{
+						bindInfo = &(program.m_vsh->m_bindInfo[stage]);
+					}
+					else if (NULL != program.m_fsh && isValid(program.m_fsh->m_bindInfo[stage].uniformHandle))
+					{
+						bindInfo = &(program.m_fsh->m_bindInfo[stage]);
+					}
+
+					if (NULL == bindInfo)
+						continue;
+
+					if (ShaderVK::BindType::Storage == bindInfo->type)
+					{
+						VkDescriptorType descriptorType = (VkDescriptorType)bindInfo->samplerBinding;
+						wds[wdsCount].sType            = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+						wds[wdsCount].pNext            = NULL;
+						wds[wdsCount].dstSet           = descriptorSet;
+						wds[wdsCount].dstBinding       = bindInfo->binding;
+						wds[wdsCount].dstArrayElement  = 0;
+						wds[wdsCount].descriptorCount  = 1;
+						wds[wdsCount].descriptorType   = descriptorType;
+						wds[wdsCount].pImageInfo       = NULL;
+						wds[wdsCount].pBufferInfo      = NULL;
+						wds[wdsCount].pTexelBufferView = NULL;
+
+						if (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == descriptorType)
+						{
+							VertexBufferVK& vb = m_vertexBuffers[bind.m_idx];
+							bufferInfo[bufferCount].buffer = vb.m_buffer;
+							bufferInfo[bufferCount].offset = 0;
+							bufferInfo[bufferCount].range  = vb.m_size;
+							wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount];
+							++bufferCount;
+						}
+						else if (VK_DESCRIPTOR_TYPE_STORAGE_IMAGE == descriptorType)
+						{
+							TextureVK& texture = m_textures[bind.m_idx];
+							VkSampler sampler = getSampler(
+								(0 == (BGFX_SAMPLER_INTERNAL_DEFAULT & bind.m_samplerFlags)
+									? bind.m_samplerFlags
+									: (uint32_t)texture.m_flags
+								) & (BGFX_SAMPLER_BITS_MASK | BGFX_SAMPLER_BORDER_COLOR_MASK)
+								, (uint32_t)texture.m_numMips);
+
+							if (VK_IMAGE_LAYOUT_GENERAL != texture.m_currentImageLayout
+							&&  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL != texture.m_currentImageLayout)
+							{
+								texture.setImageMemoryBarrier(m_commandBuffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+							}
+							imageInfo[imageCount].imageLayout = texture.m_currentImageLayout;
+							imageInfo[imageCount].imageView   = VK_NULL_HANDLE != texture.m_textureImageStorageView
+								? texture.m_textureImageStorageView
+								: texture.m_textureImageView
+								;
+							imageInfo[imageCount].sampler     = sampler;
+							wds[wdsCount].pImageInfo = &imageInfo[imageCount];
+							++imageCount;
+						}
+
+						++wdsCount;
+					}
+					else if (ShaderVK::BindType::Sampler == bindInfo->type)
+					{
+						TextureVK& texture = m_textures[bind.m_idx];
+						VkSampler sampler = getSampler(
+							(0 == (BGFX_SAMPLER_INTERNAL_DEFAULT & bind.m_samplerFlags)
+								? bind.m_samplerFlags
+								: (uint32_t)texture.m_flags
+							) & (BGFX_SAMPLER_BITS_MASK | BGFX_SAMPLER_BORDER_COLOR_MASK)
+							, (uint32_t)texture.m_numMips);
+
+						if (VK_IMAGE_LAYOUT_GENERAL != texture.m_currentImageLayout
+						&&  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL != texture.m_currentImageLayout)
+						{
+							texture.setImageMemoryBarrier(m_commandBuffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+						}
+						imageInfo[imageCount].imageLayout = texture.m_currentImageLayout;
+						imageInfo[imageCount].imageView   = VK_NULL_HANDLE != texture.m_textureImageDepthView
+							? texture.m_textureImageDepthView
+							: texture.m_textureImageView
+							;
+						imageInfo[imageCount].sampler     = sampler;
+
+						wds[wdsCount].sType            = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+						wds[wdsCount].pNext            = NULL;
+						wds[wdsCount].dstSet           = descriptorSet;
+						wds[wdsCount].dstBinding       = bindInfo->binding;
+						wds[wdsCount].dstArrayElement  = 0;
+						wds[wdsCount].descriptorCount  = 1;
+						wds[wdsCount].descriptorType   = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
+						wds[wdsCount].pImageInfo       = &imageInfo[imageCount];
+						wds[wdsCount].pBufferInfo      = NULL;
+						wds[wdsCount].pTexelBufferView = NULL;
+						++wdsCount;
+
+						wds[wdsCount].sType            = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+						wds[wdsCount].pNext            = NULL;
+						wds[wdsCount].dstSet           = descriptorSet;
+						wds[wdsCount].dstBinding       = bindInfo->samplerBinding;
+						wds[wdsCount].dstArrayElement  = 0;
+						wds[wdsCount].descriptorCount  = 1;
+						wds[wdsCount].descriptorType   = VK_DESCRIPTOR_TYPE_SAMPLER;
+						wds[wdsCount].pImageInfo       = &imageInfo[imageCount];
+						wds[wdsCount].pBufferInfo      = NULL;
+						wds[wdsCount].pTexelBufferView = NULL;
+						++wdsCount;
+
+						++imageCount;
+					}
+				}
+			}
+
+			const uint32_t align = uint32_t(m_deviceProperties.limits.minUniformBufferOffsetAlignment);
+			const uint32_t vsize = bx::strideAlign(program.m_vsh->m_size, align);
+			const uint32_t fsize = bx::strideAlign((NULL != program.m_fsh ? program.m_fsh->m_size : 0), align);
+			const uint32_t total = vsize + fsize;
+
+			if (0 < total)
+			{
+				uint32_t vsUniformBinding = program.m_vsh->m_uniformBinding;
+				uint32_t fsUniformBinding = program.m_fsh ? program.m_fsh->m_uniformBinding : 0;
+
+				if (vsize > 0)
+				{
+					bufferInfo[bufferCount].buffer = scratchBuffer.m_buffer;
+					bufferInfo[bufferCount].offset = 0;
+					bufferInfo[bufferCount].range  = vsize;
+
+					wds[wdsCount].sType            = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+					wds[wdsCount].pNext            = NULL;
+					wds[wdsCount].dstSet           = descriptorSet;
+					wds[wdsCount].dstBinding       = vsUniformBinding;
+					wds[wdsCount].dstArrayElement  = 0;
+					wds[wdsCount].descriptorCount  = 1;
+					wds[wdsCount].descriptorType   = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
+					wds[wdsCount].pImageInfo       = NULL;
+					wds[wdsCount].pBufferInfo      = &bufferInfo[bufferCount];
+					wds[wdsCount].pTexelBufferView = NULL;
+					++wdsCount;
+					++bufferCount;
+				}
+
+				if (fsize > 0)
+				{
+					bufferInfo[bufferCount].buffer = scratchBuffer.m_buffer;
+					bufferInfo[bufferCount].offset = 0;
+					bufferInfo[bufferCount].range  = fsize;
+
+					wds[wdsCount].sType            = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+					wds[wdsCount].pNext            = NULL;
+					wds[wdsCount].dstSet           = descriptorSet;
+					wds[wdsCount].dstBinding       = fsUniformBinding;
+					wds[wdsCount].dstArrayElement  = 0;
+					wds[wdsCount].descriptorCount  = 1;
+					wds[wdsCount].descriptorType   = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
+					wds[wdsCount].pImageInfo       = NULL;
+					wds[wdsCount].pBufferInfo      = &bufferInfo[bufferCount];
+					wds[wdsCount].pTexelBufferView = NULL;
+					++wdsCount;
+					++bufferCount;
+				}
+			}
+
+			vkUpdateDescriptorSets(m_device, wdsCount, wds, 0, NULL);
+		}
+
 		void commit(UniformBuffer& _uniformBuffer)
 		{
 			_uniformBuffer.reset();
@@ -3819,6 +4020,7 @@ VK_IMPORT_DEVICE
 
 		VkPhysicalDeviceProperties       m_deviceProperties;
 		VkPhysicalDeviceMemoryProperties m_memoryProperties;
+		VkPhysicalDeviceFeatures         m_deviceFeatures;
 
 		VkSwapchainCreateInfoKHR m_sci;
 		VkSurfaceKHR     m_surface;
@@ -3958,7 +4160,7 @@ VK_DESTROY
 		ma.pNext = NULL;
 		ma.allocationSize  = mr.size;
 		ma.memoryTypeIndex = s_renderVK->selectMemoryType(mr.memoryTypeBits
-			, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
+			, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
 			);
 		VK_CHECK(vkAllocateMemory(device
 			, &ma
@@ -4120,16 +4322,18 @@ VK_DESTROY
 		m_flags   = _flags;
 		m_dynamic = NULL == _data;
 
-		bool compute = m_flags   & BGFX_BUFFER_COMPUTE_READ_WRITE;
+		bool storage  = m_flags & BGFX_BUFFER_COMPUTE_READ_WRITE;
+		bool indirect = m_flags & BGFX_BUFFER_DRAW_INDIRECT;
 		VkBufferCreateInfo bci;
 		bci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
 		bci.pNext = NULL;
 		bci.flags = 0;
 		bci.size  = _size;
 		bci.usage = 0
-//			| (m_dynamic ? VK_BUFFER_USAGE_TRANSFER_DST_BIT   : 0)
-			| (_vertex   ? VK_BUFFER_USAGE_VERTEX_BUFFER_BIT  : VK_BUFFER_USAGE_INDEX_BUFFER_BIT)
-			| (compute   ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : 0)
+//			| (m_dynamic            ? VK_BUFFER_USAGE_TRANSFER_DST_BIT    : 0)
+			| (_vertex              ? VK_BUFFER_USAGE_VERTEX_BUFFER_BIT   : VK_BUFFER_USAGE_INDEX_BUFFER_BIT)
+			| (storage || indirect  ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT  : 0)
+			| (indirect             ? VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT : 0)
 			| VK_BUFFER_USAGE_TRANSFER_DST_BIT
 			;
 		bci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
@@ -4531,7 +4735,7 @@ VK_DESTROY
 			{
 				m_uniformBinding = fragment ? 48 : 0;
 				m_bindings[bidx].stageFlags = VK_SHADER_STAGE_ALL;
-				m_bindings[bidx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+				m_bindings[bidx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
 				m_bindings[bidx].binding = m_uniformBinding;
 				m_bindings[bidx].pImmutableSamplers = NULL;
 				m_bindings[bidx].descriptorCount = 1;
@@ -5102,6 +5306,33 @@ VK_DESTROY
 					));
 			}
 
+			if ((m_vkTextureAspect & VK_IMAGE_ASPECT_DEPTH_BIT)
+			&&  (m_vkTextureAspect & VK_IMAGE_ASPECT_STENCIL_BIT))
+			{
+				VkImageViewCreateInfo viewInfo;
+				viewInfo.sType        = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
+				viewInfo.pNext        = NULL;
+				viewInfo.flags        = 0;
+				viewInfo.image        = m_textureImage;
+				viewInfo.viewType     = m_type;
+				viewInfo.format       = m_vkTextureFormat;
+				viewInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
+				viewInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
+				viewInfo.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
+				viewInfo.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
+				viewInfo.subresourceRange.aspectMask     = VK_IMAGE_ASPECT_DEPTH_BIT;
+				viewInfo.subresourceRange.baseMipLevel   = 0;
+				viewInfo.subresourceRange.levelCount     = m_numMips; //m_numMips;
+				viewInfo.subresourceRange.baseArrayLayer = 0;
+				viewInfo.subresourceRange.layerCount     = m_numSides; //(m_type == VK_IMAGE_VIEW_TYPE_CUBE ? 6 : m_numLayers);
+				VK_CHECK(vkCreateImageView(
+					device
+					, &viewInfo
+					, allocatorCb
+					, &m_textureImageDepthView
+					));
+			}
+
 			// image view creation for storage if needed
 			if (m_flags & BGFX_TEXTURE_COMPUTE_WRITE)
 			{
@@ -5143,8 +5374,11 @@ VK_DESTROY
 			vkFreeMemory(device, m_textureDeviceMem, allocatorCb);
 
 			vkDestroy(m_textureImageStorageView);
+			vkDestroy(m_textureImageDepthView);
 			vkDestroy(m_textureImageView);
 			vkDestroy(m_textureImage);
+
+			m_currentImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
 		}
 	}
 
@@ -5433,6 +5667,7 @@ VK_DESTROY
 		uint16_t currentSamplerStateIdx = kInvalidHandle;
 		ProgramHandle currentProgram    = BGFX_INVALID_HANDLE;
 		uint32_t currentBindHash        = 0;
+		uint32_t currentDslHash         = 0;
 		bool     hasPredefined          = false;
 		bool     commandListChanged     = false;
 		VkPipeline currentPipeline = VK_NULL_HANDLE;
@@ -5653,96 +5888,9 @@ VK_DESTROY
 						currentPipeline = pipeline;
 						vkCmdBindPipeline(m_commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
 						currentBindHash = 0;
+						currentDslHash = 0;
 					}
 
-//					uint32_t bindHash = bx::hash<bx::HashMurmur2A>(renderBind.m_bind, sizeof(renderBind.m_bind) );
-//					if (currentBindHash != bindHash)
-//					{
-//						currentBindHash  = bindHash;
-//
-//						Bind* bindCached = bindLru.find(bindHash);
-//						if (NULL == bindCached)
-//						{
-//							D3D12_GPU_DESCRIPTOR_HANDLE srvHandle[BGFX_MAX_COMPUTE_BINDINGS] = {};
-//							uint32_t samplerFlags[BGFX_MAX_COMPUTE_BINDINGS] = {};
-//
-//							for (uint32_t ii = 0; ii < maxComputeBindings; ++ii)
-//							{
-//								const Binding& bind = renderBind.m_bind[ii];
-//								if (kInvalidHandle != bind.m_idx)
-//								{
-//									switch (bind.m_type)
-//									{
-//									case Binding::Image:
-//										{
-//											TextureD3D12& texture = m_textures[bind.m_idx];
-//
-//											if (Access::Read != bind.m_access)
-//											{
-//												texture.setState(m_commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
-//												scratchBuffer.allocUav(srvHandle[ii], texture, bind.m_mip);
-//											}
-//											else
-//											{
-//												texture.setState(m_commandList, D3D12_RESOURCE_STATE_GENERIC_READ);
-//												scratchBuffer.allocSrv(srvHandle[ii], texture, bind.m_mip);
-//												samplerFlags[ii] = texture.m_flags;
-//											}
-//										}
-//										break;
-//
-//									case Binding::IndexBuffer:
-//									case Binding::VertexBuffer:
-//										{
-//											BufferD3D12& buffer = Binding::IndexBuffer == bind.m_type
-//												? m_indexBuffers[bind.m_idx]
-//												: m_vertexBuffers[bind.m_idx]
-//												;
-//
-//											if (Access::Read != bind.m_access)
-//											{
-//												buffer.setState(m_commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
-//												scratchBuffer.allocUav(srvHandle[ii], buffer);
-//											}
-//											else
-//											{
-//												buffer.setState(m_commandList, D3D12_RESOURCE_STATE_GENERIC_READ);
-//												scratchBuffer.allocSrv(srvHandle[ii], buffer);
-//											}
-//										}
-//										break;
-//									}
-//								}
-//							}
-//
-//							uint16_t samplerStateIdx = getSamplerState(samplerFlags, maxComputeBindings, _render->m_colorPalette);
-//							if (samplerStateIdx != currentSamplerStateIdx)
-//							{
-//								currentSamplerStateIdx = samplerStateIdx;
-//								m_commandList->SetComputeRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) );
-//							}
-//
-//							m_commandList->SetComputeRootDescriptorTable(Rdt::SRV, srvHandle[0]);
-//							m_commandList->SetComputeRootDescriptorTable(Rdt::UAV, srvHandle[0]);
-//
-//							Bind bind;
-//							bind.m_srvHandle = srvHandle[0];
-//							bind.m_samplerStateIdx = samplerStateIdx;
-//							bindLru.add(bindHash, bind, 0);
-//						}
-//						else
-//						{
-//							uint16_t samplerStateIdx = bindCached->m_samplerStateIdx;
-//							if (samplerStateIdx != currentSamplerStateIdx)
-//							{
-//								currentSamplerStateIdx = samplerStateIdx;
-//								m_commandList->SetComputeRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) );
-//							}
-//							m_commandList->SetComputeRootDescriptorTable(Rdt::SRV, bindCached->m_srvHandle);
-//							m_commandList->SetComputeRootDescriptorTable(Rdt::UAV, bindCached->m_srvHandle);
-//						}
-//					}
-
 					bool constantsChanged = false;
 					if (compute.m_uniformBegin < compute.m_uniformEnd
 					||  currentProgram.idx != key.m_program.idx)
@@ -5762,135 +5910,54 @@ VK_DESTROY
 						constantsChanged = true;
 					}
 
+					ProgramVK& program = m_program[currentProgram.idx];
 					if (constantsChanged
 					||  hasPredefined)
 					{
-						ProgramVK& program = m_program[currentProgram.idx];
 						viewState.setPredefined<4>(this, view, program, _render, compute);
 //						commitShaderConstants(key.m_program, gpuAddress);
 //						m_commandList->SetComputeRootConstantBufferView(Rdt::CBV, gpuAddress);
 					}
 
+					uint32_t bindHash = bx::hash<bx::HashMurmur2A>(renderBind.m_bind, sizeof(renderBind.m_bind) );
+					if (currentBindHash != bindHash
+					||  currentDslHash != program.m_descriptorSetLayoutHash)
 					{
-						ProgramVK& program = m_program[currentProgram.idx];
-						ScratchBufferVK& sb = m_scratchBuffer[m_backBufferColorIdx];
-
-						VkDescriptorSetLayout dsl = m_descriptorSetLayoutCache.find(program.m_descriptorSetLayoutHash);
-						VkDescriptorSetAllocateInfo dsai;
-						dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
-						dsai.pNext = NULL;
-						dsai.descriptorPool = m_descriptorPool;
-						dsai.descriptorSetCount = 1;
-						dsai.pSetLayouts = &dsl;
-						vkAllocateDescriptorSets(m_device, &dsai, &sb.m_descriptorSet[sb.m_currentDs]);
-
-						VkDescriptorImageInfo imageInfo[BGFX_MAX_COMPUTE_BINDINGS];
-						VkDescriptorBufferInfo bufferInfo[BGFX_MAX_COMPUTE_BINDINGS];
-						VkWriteDescriptorSet wds[BGFX_MAX_COMPUTE_BINDINGS];
-						bx::memSet(wds, 0, sizeof(VkWriteDescriptorSet) * BGFX_MAX_COMPUTE_BINDINGS);
-						uint32_t wdsCount = 0;
-						uint32_t imageCount = 0;
-						uint32_t bufferCount = 0;
-						for (uint32_t stage = 0; stage < BGFX_MAX_COMPUTE_BINDINGS; ++stage)
-						{
-							const Binding& bind = renderBind.m_bind[stage];
-							if (kInvalidHandle != bind.m_idx)
-							{
-								VkDescriptorType descriptorType = (VkDescriptorType)program.m_vsh->m_bindInfo[stage].samplerBinding;
-								if (descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
-								{
-									VertexBufferVK& vb = m_vertexBuffers[bind.m_idx];
-									bufferInfo[bufferCount].buffer = vb.m_buffer;
-									bufferInfo[bufferCount].offset = 0;
-									bufferInfo[bufferCount].range = vb.m_size;
-
-									wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-									wds[wdsCount].pNext = NULL;
-									wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
-									wds[wdsCount].dstBinding = program.m_vsh->m_bindInfo[stage].binding;
-									wds[wdsCount].dstArrayElement = 0;
-									wds[wdsCount].descriptorCount = 1;
-									wds[wdsCount].descriptorType = descriptorType;
-									wds[wdsCount].pImageInfo = NULL;
-									wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount];
-									wds[wdsCount].pTexelBufferView = NULL;
-									wdsCount++;
-									bufferCount++;
-								}
-								else if (descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
-								{
-									TextureVK& texture = m_textures[bind.m_idx];
-									VkSampler sampler = getSampler(
-										(0 == (BGFX_SAMPLER_INTERNAL_DEFAULT & bind.m_samplerFlags)
-											? bind.m_samplerFlags
-											: (uint32_t)texture.m_flags
-										) & (BGFX_SAMPLER_BITS_MASK | BGFX_SAMPLER_BORDER_COLOR_MASK)
-										, (uint32_t)texture.m_numMips);
-
-									imageInfo[stage].imageLayout = texture.m_currentImageLayout;
-									imageInfo[stage].imageView   = texture.m_textureImageStorageView ? texture.m_textureImageStorageView : texture.m_textureImageView;
-									imageInfo[stage].sampler     = sampler;
-
-									wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-									wds[wdsCount].pNext = NULL;
-									wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
-									wds[wdsCount].dstBinding = program.m_vsh->m_bindInfo[stage].binding;
-									wds[wdsCount].dstArrayElement = 0;
-									wds[wdsCount].descriptorCount = 1;
-									wds[wdsCount].descriptorType = descriptorType;
-									wds[wdsCount].pImageInfo = &imageInfo[imageCount];
-									wds[wdsCount].pBufferInfo = NULL;
-									wds[wdsCount].pTexelBufferView = NULL;
-									wdsCount++;
-								}
-							}
-						}
+						currentBindHash = bindHash;
+						currentDslHash = program.m_descriptorSetLayoutHash;
 
-						const uint32_t align = uint32_t(m_deviceProperties.limits.minUniformBufferOffsetAlignment);
-						const uint32_t vsize = bx::strideAlign(program.m_vsh->m_size, align);
+						allocDescriptorSet(program, renderBind, scratchBuffer);
+					}
 
-						if (vsize > 0)
-						{
-							bufferInfo[bufferCount].buffer = sb.m_buffer;
-							bufferInfo[bufferCount].offset = sb.m_pos;
-							bufferInfo[bufferCount].range = vsize;
-
-							wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-							wds[wdsCount].pNext = NULL;
-							wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
-							wds[wdsCount].dstBinding = program.m_vsh->m_uniformBinding;
-							wds[wdsCount].dstArrayElement = 0;
-							wds[wdsCount].descriptorCount = 1;
-							wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
-							wds[wdsCount].pImageInfo = NULL;
-							wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount];
-							wds[wdsCount].pTexelBufferView = NULL;
-							wdsCount++;
-							bufferCount++;
+					uint32_t offset = 0;
 
-							bx::memCopy(&sb.m_data[sb.m_pos], m_vsScratch, program.m_vsh->m_size);
-						}
+					if (constantsChanged
+					||  hasPredefined)
+					{
+						const uint32_t align = uint32_t(m_deviceProperties.limits.minUniformBufferOffsetAlignment);
+						const uint32_t vsize = bx::strideAlign(program.m_vsh->m_size, align);
 
-						sb.m_pos += vsize;
+						offset = scratchBuffer.m_pos;
 
 						m_vsChanges = 0;
 						m_fsChanges = 0;
 
-						vkUpdateDescriptorSets(m_device, wdsCount, wds, 0, NULL);
-						vkCmdBindDescriptorSets(
-								m_commandBuffer
-								, VK_PIPELINE_BIND_POINT_COMPUTE
-								, program.m_pipelineLayout
-								, 0
-								, 1
-								, &sb.m_descriptorSet[sb.m_currentDs]
-								, 0
-								, NULL
-						);
+						bx::memCopy(&scratchBuffer.m_data[scratchBuffer.m_pos], m_vsScratch, program.m_vsh->m_size);
 
-						sb.m_currentDs++;
+						scratchBuffer.m_pos += vsize;
 					}
 
+					vkCmdBindDescriptorSets(
+						m_commandBuffer
+						, VK_PIPELINE_BIND_POINT_COMPUTE
+						, program.m_pipelineLayout
+						, 0
+						, 1
+						, &scratchBuffer.getCurrentDS()
+						, constantsChanged || hasPredefined ? 1 : 0
+						, &offset
+						);
+
 					if (isValid(compute.m_indirectBuffer) )
 					{
 						const VertexBufferVK& vb = m_vertexBuffers[compute.m_indirectBuffer.idx];
@@ -5980,6 +6047,7 @@ VK_DESTROY
 
 					currentPipeline        = VK_NULL_HANDLE;
 					currentBindHash        = 0;
+					currentDslHash         = 0;
 					currentSamplerStateIdx = kInvalidHandle;
 					currentProgram         = BGFX_INVALID_HANDLE;
 					currentState.clear();
@@ -6046,79 +6114,6 @@ VK_DESTROY
 
 					uint16_t scissor = draw.m_scissor;
 					uint32_t bindHash = bx::hash<bx::HashMurmur2A>(renderBind.m_bind, sizeof(renderBind.m_bind) );
-					if (currentBindHash != bindHash
-					||  0 != changedStencil
-					|| (hasFactor && blendFactor != draw.m_rgba)
-					|| (0 != (BGFX_STATE_PT_MASK & changedFlags)
-					||  prim.m_topology != s_primInfo[primIndex].m_topology)
-					||  currentState.m_scissor != scissor
-					||  pipeline != currentPipeline
-					||  hasOcclusionQuery)
-					{
-//						m_batch.flush(m_commandList);
-					}
-
-//					if (currentBindHash != bindHash)
-//					{
-//						currentBindHash  = bindHash;
-//
-//						Bind* bindCached = bindLru.find(bindHash);
-//						if (NULL == bindCached)
-//						{
-//							D3D12_GPU_DESCRIPTOR_HANDLE srvHandle[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
-//							uint32_t samplerFlags[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
-//							{
-//								srvHandle[0].ptr = 0;
-//								for (uint32_t stage = 0; stage < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++stage)
-//								{
-//									const Binding& bind = renderBind.m_bind[stage];
-//									if (kInvalidHandle != bind.m_idx)
-//									{
-//										TextureD3D12& texture = m_textures[bind.m_idx];
-//										texture.setState(m_commandList, D3D12_RESOURCE_STATE_GENERIC_READ);
-//										scratchBuffer.allocSrv(srvHandle[stage], texture);
-//										samplerFlags[stage] = (0 == (BGFX_TEXTURE_INTERNAL_DEFAULT_SAMPLER & bind.m_textureFlags)
-//											? bind.m_textureFlags
-//											: texture.m_flags
-//											) & (BGFX_TEXTURE_SAMPLER_BITS_MASK|BGFX_TEXTURE_BORDER_COLOR_MASK)
-//											;
-//									}
-//									else
-//									{
-//										bx::memCopy(&srvHandle[stage], &srvHandle[0], sizeof(D3D12_GPU_DESCRIPTOR_HANDLE) );
-//										samplerFlags[stage] = 0;
-//									}
-//								}
-//							}
-//
-//							if (srvHandle[0].ptr != 0)
-//							{
-//								uint16_t samplerStateIdx = getSamplerState(samplerFlags, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, _render->m_colorPalette);
-//								if (samplerStateIdx != currentSamplerStateIdx)
-//								{
-//									currentSamplerStateIdx = samplerStateIdx;
-//									m_commandList->SetGraphicsRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) );
-//								}
-//
-//								m_commandList->SetGraphicsRootDescriptorTable(Rdt::SRV, srvHandle[0]);
-//
-//								Bind bind;
-//								bind.m_srvHandle = srvHandle[0];
-//								bind.m_samplerStateIdx = samplerStateIdx;
-//								bindLru.add(bindHash, bind, 0);
-//							}
-//						}
-//						else
-//						{
-//							uint16_t samplerStateIdx = bindCached->m_samplerStateIdx;
-//							if (samplerStateIdx != currentSamplerStateIdx)
-//							{
-//								currentSamplerStateIdx = samplerStateIdx;
-//								m_commandList->SetGraphicsRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) );
-//							}
-//							m_commandList->SetGraphicsRootDescriptorTable(Rdt::SRV, bindCached->m_srvHandle);
-//						}
-//					}
 
 					if (pipeline != currentPipeline
 					||  0 != changedStencil)
@@ -6213,157 +6208,59 @@ VK_DESTROY
 						constantsChanged = true;
 					}
 
-					if (constantsChanged
-						|| hasPredefined
-						|| currentBindHash != bindHash)
+					ProgramVK& program = m_program[currentProgram.idx];
+					if (hasPredefined)
 					{
-						ProgramVK& program = m_program[currentProgram.idx];
-						ScratchBufferVK& sb = m_scratchBuffer[m_backBufferColorIdx];
-
-						VkDescriptorSetLayout dsl = m_descriptorSetLayoutCache.find(program.m_descriptorSetLayoutHash);
-						VkDescriptorSetAllocateInfo dsai;
-						dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
-						dsai.pNext = NULL;
-						dsai.descriptorPool = m_descriptorPool;
-						dsai.descriptorSetCount = 1;
-						dsai.pSetLayouts = &dsl;
-						vkAllocateDescriptorSets(m_device, &dsai, &sb.m_descriptorSet[sb.m_currentDs]);
-
-						VkDescriptorImageInfo imageInfo[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
-						VkDescriptorBufferInfo bufferInfo[16];
-						VkWriteDescriptorSet wds[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS];
-						bx::memSet(wds, 0, sizeof(VkWriteDescriptorSet) * BGFX_CONFIG_MAX_TEXTURE_SAMPLERS);
-						uint32_t wdsCount = 0;
-						uint32_t bufferCount = 0;
-						for (uint32_t stage = 0; stage < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++stage)
-						{
-							const Binding& bind = renderBind.m_bind[stage];
-							if (kInvalidHandle != bind.m_idx &&
-								isValid(program.m_fsh->m_bindInfo[stage].uniformHandle))
-							{
-								TextureVK& texture = m_textures[bind.m_idx];
-								VkSampler sampler = getSampler(
-									(0 == (BGFX_SAMPLER_INTERNAL_DEFAULT & bind.m_samplerFlags)
-										? bind.m_samplerFlags
-										: (uint32_t)texture.m_flags
-									) & (BGFX_SAMPLER_BITS_MASK | BGFX_SAMPLER_BORDER_COLOR_MASK)
-									, (uint32_t)texture.m_numMips);
-
-								imageInfo[stage].imageLayout = texture.m_currentImageLayout;
-								imageInfo[stage].imageView   = texture.m_textureImageView;
-								imageInfo[stage].sampler     = sampler;
-
-								wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-								wds[wdsCount].pNext = NULL;
-								wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
-								wds[wdsCount].dstBinding = program.m_fsh->m_bindInfo[stage].binding;
-								wds[wdsCount].dstArrayElement = 0;
-								wds[wdsCount].descriptorCount = 1;
-								wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
-								wds[wdsCount].pImageInfo = &imageInfo[stage];
-								wds[wdsCount].pBufferInfo = NULL;
-								wds[wdsCount].pTexelBufferView = NULL;
-								wdsCount++;
-
-								wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-								wds[wdsCount].pNext = NULL;
-								wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
-								wds[wdsCount].dstBinding = program.m_fsh->m_bindInfo[stage].samplerBinding;
-								wds[wdsCount].dstArrayElement = 0;
-								wds[wdsCount].descriptorCount = 1;
-								wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
-								wds[wdsCount].pImageInfo = &imageInfo[stage];
-								wds[wdsCount].pBufferInfo = NULL;
-								wds[wdsCount].pTexelBufferView = NULL;
-								wdsCount++;
-							}
-							else
-							{
-								imageInfo[stage].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
-								imageInfo[stage].imageView = VK_NULL_HANDLE;
-								imageInfo[stage].sampler = VK_NULL_HANDLE;
-							}
-						}
-
 						uint32_t ref = (newFlags & BGFX_STATE_ALPHA_REF_MASK) >> BGFX_STATE_ALPHA_REF_SHIFT;
 						viewState.m_alphaRef = ref / 255.0f;
 						viewState.setPredefined<4>(this, view, program, _render, draw);
+					}
+
+					if (currentBindHash != bindHash
+					||  currentDslHash != program.m_descriptorSetLayoutHash)
+					{
+						currentBindHash = bindHash;
+						currentDslHash = program.m_descriptorSetLayoutHash;
 
+						allocDescriptorSet(program, renderBind, scratchBuffer);
+					}
+
+					uint32_t numOffset = 0;
+					uint32_t offsets[2] = {0, 0};
+					if (constantsChanged
+					||  hasPredefined)
+					{
 						const uint32_t align = uint32_t(m_deviceProperties.limits.minUniformBufferOffsetAlignment);
 						const uint32_t vsize = bx::strideAlign(program.m_vsh->m_size, align);
 						const uint32_t fsize = bx::strideAlign((NULL != program.m_fsh ? program.m_fsh->m_size : 0), align);
 						const uint32_t total = vsize + fsize;
-
-						if (0 < total)
+						if (vsize > 0)
 						{
-							uint32_t vsUniformBinding = program.m_vsh->m_uniformBinding;
-							uint32_t fsUniformBinding = program.m_fsh->m_uniformBinding;
-
-							if (vsize > 0)
-							{
-								bufferInfo[bufferCount].buffer = sb.m_buffer;
-								bufferInfo[bufferCount].offset = sb.m_pos;
-								bufferInfo[bufferCount].range = vsize;
-
-								wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-								wds[wdsCount].pNext = NULL;
-								wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
-								wds[wdsCount].dstBinding = vsUniformBinding;
-								wds[wdsCount].dstArrayElement = 0;
-								wds[wdsCount].descriptorCount = 1;
-								wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
-								wds[wdsCount].pImageInfo = NULL;
-								wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount];
-								wds[wdsCount].pTexelBufferView = NULL;
-								wdsCount++;
-								bufferCount++;
-
-								bx::memCopy(&sb.m_data[sb.m_pos], m_vsScratch, program.m_vsh->m_size);
-							}
-
-							if (fsize > 0)
-							{
-								bufferInfo[bufferCount].buffer = sb.m_buffer;
-								bufferInfo[bufferCount].offset = sb.m_pos + vsize;
-								bufferInfo[bufferCount].range = fsize;
-
-								wds[wdsCount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-								wds[wdsCount].pNext = NULL;
-								wds[wdsCount].dstSet = sb.m_descriptorSet[sb.m_currentDs];
-								wds[wdsCount].dstBinding = fsUniformBinding;
-								wds[wdsCount].dstArrayElement = 0;
-								wds[wdsCount].descriptorCount = 1;
-								wds[wdsCount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
-								wds[wdsCount].pImageInfo = NULL;
-								wds[wdsCount].pBufferInfo = &bufferInfo[bufferCount];
-								wds[wdsCount].pTexelBufferView = NULL;
-								wdsCount++;
-								bufferCount++;
-
-								bx::memCopy(&sb.m_data[sb.m_pos + vsize], m_fsScratch, program.m_fsh->m_size);
-							}
-
-							sb.m_pos += vsize + fsize;
+							offsets[numOffset++] = scratchBuffer.m_pos;
+							bx::memCopy(&scratchBuffer.m_data[scratchBuffer.m_pos], m_vsScratch, program.m_vsh->m_size);
+						}
+						if (fsize > 0)
+						{
+							offsets[numOffset++] = scratchBuffer.m_pos + vsize;
+							bx::memCopy(&scratchBuffer.m_data[scratchBuffer.m_pos + vsize], m_fsScratch, program.m_fsh->m_size);
 						}
 
 						m_vsChanges = 0;
 						m_fsChanges = 0;
+						scratchBuffer.m_pos += total;
+					}
 
+					vkCmdBindDescriptorSets(
+						m_commandBuffer
+						, VK_PIPELINE_BIND_POINT_GRAPHICS
+						, program.m_pipelineLayout
+						, 0
+						, 1
+						, &scratchBuffer.getCurrentDS()
+						, numOffset
+						, offsets
+						);
 
-						vkUpdateDescriptorSets(m_device, wdsCount, wds, 0, NULL);
-						vkCmdBindDescriptorSets(
-							  m_commandBuffer
-							, VK_PIPELINE_BIND_POINT_GRAPHICS
-							, program.m_pipelineLayout
-							, 0
-							, 1
-							, &sb.m_descriptorSet[sb.m_currentDs]
-							, 0
-							, NULL
-							);
-
-						sb.m_currentDs++;
-					}
 
 //					if (constantsChanged
 //					||  hasPredefined)
@@ -6638,11 +6535,11 @@ BX_UNUSED(presentMin, presentMax);
 
 				pos++;
 				tvm.printf(10, pos++, 0x8b, " State cache:                        ");
-				tvm.printf(10, pos++, 0x8b, " PSO    | Sampler | Bind   | Queued  ");
-				tvm.printf(10, pos++, 0x8b, " %6d " //|  %6d | %6d | %6d  "
+				tvm.printf(10, pos++, 0x8b, " PSO    | DSL     | DS     | Queued  ");
+				tvm.printf(10, pos++, 0x8b, " %6d | %6d | %6d | %6d  "
 					, m_pipelineStateCache.getCount()
-//					, m_samplerStateCache.getCount()
-//					, bindLru.getCount()
+					, m_descriptorSetLayoutCache.getCount()
+					, scratchBuffer.m_currentDs
 //					, m_cmd.m_control.available()
 					);
 				pos++;
@@ -6675,6 +6572,14 @@ BX_UNUSED(presentMin, presentMax);
 //			PIX_ENDEVENT();
 		}
 
+		VkMappedMemoryRange range;
+		range.sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+		range.pNext  = NULL;
+		range.memory = scratchBuffer.m_deviceMem;
+		range.offset = 0;
+		range.size   = scratchBuffer.m_pos;
+		vkFlushMappedMemoryRanges(m_device, 1, &range);
+
 		if (beginRenderPass)
 		{
 			vkCmdEndRenderPass(m_commandBuffer);

+ 8 - 0
src/renderer_vk.h

@@ -71,6 +71,7 @@
 			VK_IMPORT_INSTANCE_FUNC(false, vkEnumerateDeviceLayerProperties);          \
 			VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceProperties);             \
 			VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceFormatProperties);       \
+			VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceFeatures);               \
 			VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceImageFormatProperties);  \
 			VK_IMPORT_INSTANCE_FUNC(false, vkGetPhysicalDeviceMemoryProperties);       \
 			VK_IMPORT_INSTANCE_FUNC(true,  vkGetPhysicalDeviceMemoryProperties2KHR);   \
@@ -325,6 +326,11 @@ VK_DESTROY
 		void destroy();
 		void reset();
 
+		VkDescriptorSet& getCurrentDS()
+		{
+			return m_descriptorSet[m_currentDs - 1];
+		}
+
 		VkDescriptorSet* m_descriptorSet;
 		VkBuffer m_buffer;
 		VkDeviceMemory m_deviceMem;
@@ -469,6 +475,7 @@ VK_DESTROY
 			, m_textureImage(VK_NULL_HANDLE)
 			, m_textureDeviceMem(VK_NULL_HANDLE)
 			, m_textureImageView(VK_NULL_HANDLE)
+			, m_textureImageDepthView(VK_NULL_HANDLE)
 			, m_textureImageStorageView(VK_NULL_HANDLE)
 			, m_currentImageLayout(VK_IMAGE_LAYOUT_UNDEFINED)
 		{
@@ -498,6 +505,7 @@ VK_DESTROY
 		VkImage m_textureImage;
 		VkDeviceMemory m_textureDeviceMem;
 		VkImageView m_textureImageView;
+		VkImageView m_textureImageDepthView;
 		VkImageView m_textureImageStorageView;
 		VkImageLayout m_currentImageLayout;
 	};