Browse Source

implement multiple vertex stream

- additional condition for devices that don't have debug_utils extension
- set FIFO present mode if possible
- fix attachment load and store operation and its image layout during render pass creation
- fix resolve attachment reference setup during render pass creation
- implement multiple vertex stream
rinthel 6 years ago
parent
commit
2b5246f72c
1 changed files with 163 additions and 80 deletions
  1. 163 80
      src/renderer_vk.cpp

+ 163 - 80
src/renderer_vk.cpp

@@ -328,23 +328,23 @@ VK_IMPORT_DEVICE
 		VkVertexInputBindingDescription*   inputBinding = const_cast<VkVertexInputBindingDescription*>(_vertexInputState.pVertexBindingDescriptions + numBindings);
 		VkVertexInputAttributeDescription* inputAttrib  = const_cast<VkVertexInputAttributeDescription*>(_vertexInputState.pVertexAttributeDescriptions + numAttribs);
 
-		inputBinding->binding   = 0;
+		inputBinding->binding   = numBindings;
 		inputBinding->stride    = _decl.m_stride;
 		inputBinding->inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
 
 		for (uint32_t attr = 0; attr < Attrib::Count; ++attr)
 		{
-			if (UINT16_MAX != _decl.m_attributes[attr])
+			if (UINT16_MAX != _decl.m_attributes[attr] && 0 != _decl.m_attributes[attr])
 			{
 				inputAttrib->location = _vsh->m_attrRemap[attr];
-				inputAttrib->binding  = 0;
+				inputAttrib->binding  = numBindings;
 
-				if (0 == _decl.m_attributes[attr])
-				{
-					inputAttrib->format = VK_FORMAT_R32G32B32_SFLOAT;
-					inputAttrib->offset = 0;
-				}
-				else
+//				if (0 == _decl.m_attributes[attr])
+//				{
+//					inputAttrib->format = VK_FORMAT_R32G32B32_SFLOAT;
+//					inputAttrib->offset = 0;
+//				}
+//				else
 				{
 					uint8_t num;
 					AttribType::Enum type;
@@ -707,7 +707,7 @@ VK_IMPORT_DEVICE
 	template<typename Ty>
 	static BX_NO_INLINE void setDebugObjectName(VkDevice _device, Ty _object, const char* _format, ...)
 	{
-		if (BX_ENABLED(BGFX_CONFIG_DEBUG_OBJECT_NAME) )
+		if (BX_ENABLED(BGFX_CONFIG_DEBUG_OBJECT_NAME) && s_extension[Extension::EXT_debug_utils].m_supported)
 		{
 			char temp[2048];
 			va_list argList;
@@ -1147,9 +1147,14 @@ VK_IMPORT_INSTANCE
 				g_caps.vendorId = uint16_t(m_deviceProperties.vendorID);
 				g_caps.deviceId = uint16_t(m_deviceProperties.deviceID);
 
+				g_caps.supported |= ( 0
+					| BGFX_CAPS_TEXTURE_BLIT
+					| BGFX_CAPS_INSTANCING
+					);
 				g_caps.limits.maxTextureSize     = m_deviceProperties.limits.maxImageDimension2D;
 				g_caps.limits.maxFBAttachments   = bx::min(uint8_t(m_deviceProperties.limits.maxFragmentOutputAttachments), uint8_t(BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS) );
 				g_caps.limits.maxComputeBindings = BGFX_MAX_COMPUTE_BINDINGS;
+				g_caps.limits.maxVertexStreams   = BGFX_CONFIG_MAX_VERTEX_STREAMS;
 
 				{
 //					VkFormatProperties fp;
@@ -1279,11 +1284,6 @@ VK_IMPORT_INSTANCE
 					BX_TRACE("Init error: Unable to find graphics queue.");
 					goto error;
 				}
-
-				g_caps.supported |= ( 0
-					| BGFX_CAPS_TEXTURE_BLIT
-					| BGFX_CAPS_INSTANCING
-					);
 			}
 
 			{
@@ -1540,8 +1540,35 @@ VK_IMPORT_DEVICE
 				numPresentModes = bx::min<uint32_t>(numPresentModes, BX_COUNTOF(presentModes) );
 				vkGetPhysicalDeviceSurfacePresentModesKHR(m_physicalDevice, m_surface, &numPresentModes, presentModes);
 
+
 				// find the best match...
-				uint32_t presentModeIdx = 0;
+				uint32_t presentModeIdx = numPresentModes;
+				VkPresentModeKHR preferredPresentMode[] = {
+					VK_PRESENT_MODE_FIFO_KHR,
+					VK_PRESENT_MODE_FIFO_RELAXED_KHR,
+					VK_PRESENT_MODE_MAILBOX_KHR,
+					VK_PRESENT_MODE_IMMEDIATE_KHR,
+				};
+				for (uint32_t ii = 0; ii < BX_COUNTOF(preferredPresentMode); ++ii)
+				{
+					for (uint32_t jj = 0; jj < numPresentModes; ++jj)
+					{
+						if (presentModes[jj] == preferredPresentMode[ii])
+						{
+							presentModeIdx = jj;
+							BX_TRACE("present mode: %d", (int)preferredPresentMode[ii]);
+							break;
+						}
+					}
+					if (presentModeIdx < numPresentModes)
+					{
+						break;
+					}
+				}
+				if (presentModeIdx == numPresentModes)
+				{
+					presentModeIdx = 0;
+				}
 
 				m_backBufferDepthStencilFormat =
 //					VK_FORMAT_D32_SFLOAT_S8_UINT
@@ -1763,20 +1790,20 @@ VK_IMPORT_DEVICE
 				ad[0].flags          = 0;
 				ad[0].format         = m_sci.imageFormat;
 				ad[0].samples        = VK_SAMPLE_COUNT_1_BIT;
-				ad[0].loadOp         = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+				ad[0].loadOp         = VK_ATTACHMENT_LOAD_OP_LOAD;
 				ad[0].storeOp        = VK_ATTACHMENT_STORE_OP_STORE;
 				ad[0].stencilLoadOp  = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
 				ad[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
-				ad[0].initialLayout  = VK_IMAGE_LAYOUT_UNDEFINED;
+				ad[0].initialLayout  = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
 				ad[0].finalLayout    = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
 				ad[1].flags          = 0;
 				ad[1].format         = m_backBufferDepthStencilFormat;
 				ad[1].samples        = VK_SAMPLE_COUNT_1_BIT;
-				ad[1].loadOp         = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+				ad[1].loadOp         = VK_ATTACHMENT_LOAD_OP_LOAD;
 				ad[1].storeOp        = VK_ATTACHMENT_STORE_OP_STORE;
-				ad[1].stencilLoadOp  = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
-				ad[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
-				ad[1].initialLayout  = VK_IMAGE_LAYOUT_UNDEFINED;
+				ad[1].stencilLoadOp  = VK_ATTACHMENT_LOAD_OP_LOAD;
+				ad[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
+				ad[1].initialLayout  = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
 				ad[1].finalLayout    = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
 
 				VkAttachmentReference colorAr[1];
@@ -2530,9 +2557,11 @@ VK_IMPORT_DEVICE
 				| BGFX_STATE_DEPTH_TEST_ALWAYS
 				;
 
+			const VertexDecl* vertexDecl = &m_vertexDecls[_blitter.m_vb->decl.idx];
 			VkPipeline pso = getPipeline(state
 				, packStencil(BGFX_STENCIL_DEFAULT, BGFX_STENCIL_DEFAULT)
-				, _blitter.m_vb->decl.idx
+				, 1
+				, &vertexDecl
 				, _blitter.m_program
 				, 0
 				);
@@ -3041,27 +3070,31 @@ VK_IMPORT_DEVICE
 			_desc.maxDepthBounds = 1.0f;
 		}
 
-		void setInputLayout(VkPipelineVertexInputStateCreateInfo& _vertexInputState, const VertexDecl& _vertexDecl, const ProgramVK& _program, uint8_t _numInstanceData)
+		void setInputLayout(VkPipelineVertexInputStateCreateInfo& _vertexInputState, uint8_t _numStream, const VertexDecl** _vertexDecl, const ProgramVK& _program, uint8_t _numInstanceData)
 		{
 			_vertexInputState.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
 			_vertexInputState.pNext = NULL;
 			_vertexInputState.flags = 0;
 
-			VertexDecl decl;
-			bx::memCopy(&decl, &_vertexDecl, sizeof(VertexDecl) );
-			const uint16_t* attrMask = _program.m_vsh->m_attrMask;
+			_vertexInputState.vertexBindingDescriptionCount   = 0;
+			_vertexInputState.vertexAttributeDescriptionCount = 0;
 
-			for (uint32_t ii = 0; ii < Attrib::Count; ++ii)
+			for (uint8_t ii = 0; ii < _numStream; ++ii)
 			{
-				uint16_t mask = attrMask[ii];
-				uint16_t attr = (decl.m_attributes[ii] & mask);
-				decl.m_attributes[ii] = attr == 0 ? UINT16_MAX : attr == UINT16_MAX ? 0 : attr;
-			}
+				VertexDecl decl;
+				bx::memCopy(&decl, _vertexDecl[ii], sizeof(VertexDecl) );
+				const uint16_t* attrMask = _program.m_vsh->m_attrMask;
 
-			_vertexInputState.vertexBindingDescriptionCount   = 0;
-			_vertexInputState.vertexAttributeDescriptionCount = 0;
+				for (uint32_t ii = 0; ii < Attrib::Count; ++ii)
+				{
+					uint16_t mask = attrMask[ii];
+					uint16_t attr = (decl.m_attributes[ii] & mask);
+					decl.m_attributes[ii] = attr == 0 ? UINT16_MAX : attr == UINT16_MAX ? 0 : attr;
+				}
+
+				fillVertexDecl(_program.m_vsh, _vertexInputState, decl);
+			}
 
-			fillVertexDecl(_program.m_vsh, _vertexInputState, decl);
 			if (0 < _numInstanceData)
 			{
 				fillInstanceBinding(_program.m_vsh, _vertexInputState, _numInstanceData);
@@ -3098,12 +3131,10 @@ VK_IMPORT_DEVICE
 			// cache missed
 			VkAttachmentDescription ad[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS];
 			VkAttachmentReference colorAr[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS];
-			VkAttachmentReference resolveAr;
+			VkAttachmentReference resolveAr[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS];
 			VkAttachmentReference depthAr;
 			uint32_t numColorAr = 0;
 
-			resolveAr.attachment = VK_ATTACHMENT_UNUSED;
-			resolveAr.layout     = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
 			depthAr.attachment   = VK_ATTACHMENT_UNUSED;
 			depthAr.layout		 = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
 
@@ -3113,25 +3144,33 @@ VK_IMPORT_DEVICE
 				ad[ii].flags          = 0;
 				ad[ii].format         = texture.m_vkTextureFormat;
 				ad[ii].samples        = VK_SAMPLE_COUNT_1_BIT;
-				ad[ii].loadOp         = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
-				ad[ii].storeOp        = VK_ATTACHMENT_STORE_OP_STORE;
-				ad[ii].stencilLoadOp  = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
-				ad[ii].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
-				ad[ii].initialLayout  = VK_IMAGE_LAYOUT_UNDEFINED;
 
 				if (texture.m_vkTextureAspect & VK_IMAGE_ASPECT_COLOR_BIT)
 				{
-					ad[ii].finalLayout               = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-					colorAr[numColorAr].layout       = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-					colorAr[numColorAr].attachment   = ii;
+					ad[ii].loadOp                  = VK_ATTACHMENT_LOAD_OP_LOAD;
+					ad[ii].storeOp                 = VK_ATTACHMENT_STORE_OP_STORE;
+					ad[ii].stencilLoadOp           = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+					ad[ii].stencilStoreOp          = VK_ATTACHMENT_STORE_OP_DONT_CARE;
+					ad[ii].initialLayout           = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+					ad[ii].finalLayout             = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+					colorAr[numColorAr].layout     = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+					colorAr[numColorAr].attachment = ii;
 					numColorAr++;
 				}
 				else if (texture.m_vkTextureAspect & VK_IMAGE_ASPECT_DEPTH_BIT)
 				{
-					ad[ii].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
-					depthAr.layout     = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
-					depthAr.attachment = ii;
+					ad[ii].loadOp         = VK_ATTACHMENT_LOAD_OP_LOAD;
+					ad[ii].storeOp        = VK_ATTACHMENT_STORE_OP_STORE;
+					ad[ii].stencilLoadOp  = VK_ATTACHMENT_LOAD_OP_LOAD;
+					ad[ii].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
+					ad[ii].initialLayout  = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+					ad[ii].finalLayout    = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+					depthAr.layout        = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+					depthAr.attachment    = ii;
 				}
+
+				resolveAr[ii].attachment = VK_ATTACHMENT_UNUSED;
+				resolveAr[ii].layout     = ad[ii].initialLayout;	// TODO: should know hot to set it appropriately
 			}
 
 			VkSubpassDescription sd[1];
@@ -3141,7 +3180,7 @@ VK_IMPORT_DEVICE
 			sd[0].pInputAttachments       = NULL;
 			sd[0].colorAttachmentCount    = numColorAr;
 			sd[0].pColorAttachments       = colorAr;
-			sd[0].pResolveAttachments     = &resolveAr;
+			sd[0].pResolveAttachments     = resolveAr;
 			sd[0].pDepthStencilAttachment = &depthAr;
 			sd[0].preserveAttachmentCount = 0;
 			sd[0].pPreserveAttachments    = NULL;
@@ -3229,7 +3268,7 @@ VK_IMPORT_DEVICE
 			return VK_NULL_HANDLE;
 		}
 
-		VkPipeline getPipeline(uint64_t _state, uint64_t _stencil, uint16_t _declIdx, ProgramHandle _program, uint8_t _numInstanceData)
+		VkPipeline getPipeline(uint64_t _state, uint64_t _stencil, uint8_t _numStreams, const VertexDecl** _vertexDecls, ProgramHandle _program, uint8_t _numInstanceData)
 		{
 			ProgramVK& program = m_program[_program.idx];
 
@@ -3252,14 +3291,17 @@ VK_IMPORT_DEVICE
 			_stencil &= packStencil(~BGFX_STENCIL_FUNC_REF_MASK, ~BGFX_STENCIL_FUNC_REF_MASK);
 
 			VertexDecl decl;
-			bx::memCopy(&decl, &m_vertexDecls[_declIdx], sizeof(VertexDecl) );
-			const uint16_t* attrMask = program.m_vsh->m_attrMask;
-
-			for (uint32_t ii = 0; ii < Attrib::Count; ++ii)
+			if (0 < _numStreams)
 			{
-				uint16_t mask = attrMask[ii];
-				uint16_t attr = (decl.m_attributes[ii] & mask);
-				decl.m_attributes[ii] = attr == 0 ? UINT16_MAX : attr == UINT16_MAX ? 0 : attr;
+				bx::memCopy(&decl, _vertexDecls[0], sizeof(VertexDecl) );
+				const uint16_t* attrMask = program.m_vsh->m_attrMask;
+
+				for (uint32_t ii = 0; ii < Attrib::Count; ++ii)
+				{
+					uint16_t mask = attrMask[ii];
+					uint16_t attr = (decl.m_attributes[ii] & mask);
+					decl.m_attributes[ii] = attr == 0 ? UINT16_MAX : attr == UINT16_MAX ? 0 : attr;
+				}
 			}
 
 			bx::HashMurmur2A murmur;
@@ -3269,7 +3311,10 @@ VK_IMPORT_DEVICE
 			murmur.add(program.m_vsh->m_hash);
 			murmur.add(program.m_vsh->m_attrMask, sizeof(program.m_vsh->m_attrMask) );
 			murmur.add(program.m_fsh->m_hash);
-			murmur.add(m_vertexDecls[_declIdx].m_hash);
+			for (uint8_t ii = 0; ii < _numStreams; ++ii)
+			{
+				murmur.add(_vertexDecls[ii]->m_hash);
+			}
 			murmur.add(decl.m_attributes, sizeof(decl.m_attributes) );
 			murmur.add(m_fbh.idx);
 			murmur.add(_numInstanceData);
@@ -3306,7 +3351,7 @@ VK_IMPORT_DEVICE
 			VkPipelineVertexInputStateCreateInfo vertexInputState;
 			vertexInputState.pVertexBindingDescriptions   = inputBinding;
 			vertexInputState.pVertexAttributeDescriptions = inputAttrib;
-			setInputLayout(vertexInputState, m_vertexDecls[_declIdx], program, _numInstanceData);
+			setInputLayout(vertexInputState, _numStreams, _vertexDecls, program, _numInstanceData);
 
 			const VkDynamicState dynamicStates[] =
 			{
@@ -5247,8 +5292,12 @@ VK_DESTROY
 				, &m_backBufferColorIdx
 				) );
 
-		const uint64_t f0 = BGFX_STATE_BLEND_FUNC(BGFX_STATE_BLEND_FACTOR, BGFX_STATE_BLEND_FACTOR);
-		const uint64_t f1 = BGFX_STATE_BLEND_FUNC(BGFX_STATE_BLEND_INV_FACTOR, BGFX_STATE_BLEND_INV_FACTOR);
+//		const uint64_t f0 = BGFX_STATE_BLEND_FUNC(BGFX_STATE_BLEND_FACTOR, BGFX_STATE_BLEND_FACTOR);
+//		const uint64_t f1 = BGFX_STATE_BLEND_FUNC(BGFX_STATE_BLEND_INV_FACTOR, BGFX_STATE_BLEND_INV_FACTOR);
+		const uint64_t f0 = BGFX_STATE_BLEND_FACTOR;
+		const uint64_t f1 = BGFX_STATE_BLEND_INV_FACTOR;
+		const uint64_t f2 = BGFX_STATE_BLEND_FACTOR<<4;
+		const uint64_t f3 = BGFX_STATE_BLEND_INV_FACTOR<<4;
 
 		ScratchBufferVK& scratchBuffer = m_scratchBuffer[m_backBufferColorIdx];
 		scratchBuffer.reset();
@@ -5315,7 +5364,7 @@ VK_DESTROY
 						vkCmdEndRenderPass(m_commandBuffer);
 						beginRenderPass = false;
 
-						if (BX_ENABLED(BGFX_CONFIG_DEBUG_ANNOTATION) )
+						if (BX_ENABLED(BGFX_CONFIG_DEBUG_ANNOTATION) && s_extension[Extension::EXT_debug_utils].m_supported )
 						{
 							vkCmdEndDebugUtilsLabelEXT(m_commandBuffer);
 						}
@@ -5352,7 +5401,7 @@ BX_UNUSED(currentSamplerStateIdx);
 					rpbi.renderArea.extent.width  = rect.m_width;
 					rpbi.renderArea.extent.height = rect.m_height;
 
-					if (BX_ENABLED(BGFX_CONFIG_DEBUG_ANNOTATION) )
+					if (BX_ENABLED(BGFX_CONFIG_DEBUG_ANNOTATION) && s_extension[Extension::EXT_debug_utils].m_supported )
 					{
 						VkDebugUtilsLabelEXT dul;
 						dul.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
@@ -5643,21 +5692,51 @@ BX_UNUSED(currentSamplerStateIdx);
 
 				rendererUpdateUniforms(this, _render->m_uniformBuffer[draw.m_uniformIdx], draw.m_uniformBegin, draw.m_uniformEnd);
 
-				if (isValid(draw.m_stream[0].m_handle) )
+				if (0 != draw.m_streamMask)
 				{
+					currentState.m_streamMask			= draw.m_streamMask;
+
 					const uint64_t state = draw.m_stateFlags;
 					bool hasFactor = 0
 						|| f0 == (state & f0)
 						|| f1 == (state & f1)
+						|| f2 == (state & f2)
+						|| f3 == (state & f3)
 						;
 
-					const VertexBufferVK& vb = m_vertexBuffers[draw.m_stream[0].m_handle.idx];
-					uint16_t declIdx = !isValid(vb.m_decl) ? draw.m_stream[0].m_decl.idx : vb.m_decl.idx;
+					const VertexDecl* decls[BGFX_CONFIG_MAX_VERTEX_STREAMS];
+					uint8_t numStreams = 0;
+					if (UINT8_MAX != draw.m_streamMask)
+					{
+						for (uint32_t idx = 0, streamMask = draw.m_streamMask
+							; 0 != streamMask
+							; streamMask >>= 1, idx += 1, ++numStreams
+							)
+						{
+							const uint32_t ntz = bx::uint32_cnttz(streamMask);
+							streamMask >>= ntz;
+							idx         += ntz;
+
+							currentState.m_stream[idx].m_decl        = draw.m_stream[idx].m_decl;
+							currentState.m_stream[idx].m_handle      = draw.m_stream[idx].m_handle;
+							currentState.m_stream[idx].m_startVertex = draw.m_stream[idx].m_startVertex;
+
+							uint16_t handle = draw.m_stream[idx].m_handle.idx;
+							const VertexBufferVK& vb = m_vertexBuffers[handle];
+							const uint16_t decl = isValid(draw.m_stream[idx].m_decl)
+												  ? draw.m_stream[idx].m_decl.idx
+												  : vb.m_decl.idx;
+							const VertexDecl& vertexDecl = m_vertexDecls[decl];
+
+							decls[numStreams] = &vertexDecl;
+						}
+					}
 
 					VkPipeline pipeline =
 						getPipeline(state
 							, draw.m_stencil
-							, declIdx
+							, numStreams
+							, decls
 							, key.m_program
 							, uint8_t(draw.m_instanceDataStride/16)
 							);
@@ -5996,23 +6075,24 @@ BX_UNUSED(currentSamplerStateIdx);
 
 //					vb.setState(_commandList, D3D12_RESOURCE_STATE_GENERIC_READ);
 
-					const VertexDecl& vertexDecl = m_vertexDecls[declIdx];
 					uint32_t numIndices = 0;
-
-					VkDeviceSize offset = 0;
-					vkCmdBindVertexBuffers(m_commandBuffer
-						, 0
-						, 1
-						, &vb.m_buffer
-						, &offset
-						);
+					for (uint32_t ii = 0; ii < numStreams; ++ii)
+					{
+						VkDeviceSize offset = 0;
+						vkCmdBindVertexBuffers(m_commandBuffer
+							, ii
+							, 1
+							, &m_vertexBuffers[draw.m_stream[ii].m_handle.idx].m_buffer
+							, &offset
+							);
+					}
 
 					if (isValid(draw.m_instanceDataBuffer))
 					{
 						VkDeviceSize instanceOffset = draw.m_instanceDataOffset;
 						VertexBufferVK& instanceBuffer = m_vertexBuffers[draw.m_instanceDataBuffer.idx];
 						vkCmdBindVertexBuffers(m_commandBuffer
-							, 1
+							, numStreams
 							, 1
 							, &instanceBuffer.m_buffer
 							, &instanceOffset
@@ -6021,8 +6101,11 @@ BX_UNUSED(currentSamplerStateIdx);
 
 					if (!isValid(draw.m_indexBuffer) )
 					{
+						const VertexBufferVK& vertexBuffer = m_vertexBuffers[draw.m_stream[0].m_handle.idx];
+						const VertexDecl& vertexDecl = m_vertexDecls[draw.m_stream[0].m_decl.idx];
+
 						const uint32_t numVertices = UINT32_MAX == draw.m_numVertices
-							? vb.m_size / vertexDecl.m_stride
+							? vertexBuffer.m_size / vertexDecl.m_stride
 							: draw.m_numVertices
 							;
 						vkCmdDraw(m_commandBuffer
@@ -6303,7 +6386,7 @@ BX_UNUSED(presentMin, presentMax);
 			vkCmdEndRenderPass(m_commandBuffer);
 			beginRenderPass = false;
 
-			if (BX_ENABLED(BGFX_CONFIG_DEBUG_ANNOTATION) )
+			if (BX_ENABLED(BGFX_CONFIG_DEBUG_ANNOTATION) && s_extension[Extension::EXT_debug_utils].m_supported )
 			{
 				vkCmdEndDebugUtilsLabelEXT(m_commandBuffer);
 			}