Prechádzať zdrojové kódy

Added compute buffer support.

Branimir Karadžić 11 rokov pred
rodič
commit
ad47659669

+ 4 - 4
include/bgfx.c99.h

@@ -245,8 +245,8 @@ typedef struct bgfx_instance_data_buffer
     uint8_t* data;
     uint32_t size;
     uint32_t offset;
+	uint32_t num;
     uint16_t stride;
-    uint16_t num;
     bgfx_vertex_buffer_handle_t handle;
 
 } bgfx_instance_data_buffer_t;
@@ -671,7 +671,7 @@ BGFX_C_API void bgfx_destroy_index_buffer(bgfx_index_buffer_handle_t _handle);
  *  @param _decl Vertex declaration.
  *  @returns Static vertex buffer handle.
  */
-BGFX_C_API bgfx_vertex_buffer_handle_t bgfx_create_vertex_buffer(const bgfx_memory_t* _mem, const bgfx_vertex_decl_t* _decl);
+BGFX_C_API bgfx_vertex_buffer_handle_t bgfx_create_vertex_buffer(const bgfx_memory_t* _mem, const bgfx_vertex_decl_t* _decl, uint8_t _flags);
 
 /**
  *  Destroy static vertex buffer.
@@ -721,7 +721,7 @@ BGFX_C_API void bgfx_destroy_dynamic_index_buffer(bgfx_dynamic_index_buffer_hand
  *  @param _num Number of vertices.
  *  @param _decl Vertex declaration.
  */
-BGFX_C_API bgfx_dynamic_vertex_buffer_handle_t bgfx_create_dynamic_vertex_buffer(uint16_t _num, const bgfx_vertex_decl_t* _decl);
+BGFX_C_API bgfx_dynamic_vertex_buffer_handle_t bgfx_create_dynamic_vertex_buffer(uint16_t _num, const bgfx_vertex_decl_t* _decl, uint8_t _flags);
 
 /**
  *  Create dynamic vertex buffer and initialize it.
@@ -1297,7 +1297,7 @@ BGFX_C_API void bgfx_set_transient_vertex_buffer(const bgfx_transient_vertex_buf
 /**
  *  Set instance data buffer for draw primitive.
  */
-BGFX_C_API void bgfx_set_instance_data_buffer(const bgfx_instance_data_buffer_t* _idb, uint16_t _num);
+BGFX_C_API void bgfx_set_instance_data_buffer(const bgfx_instance_data_buffer_t* _idb, uint32_t _num);
 
 /**
  *  Set program for draw primitive.

+ 17 - 4
include/bgfx.h

@@ -345,8 +345,8 @@ namespace bgfx
 		uint8_t* data;             //!< Pointer to data.
 		uint32_t size;             //!< Data size.
 		uint32_t offset;           //!< Offset in vertex buffer.
+		uint32_t num;              //!< Number of instances.
 		uint16_t stride;           //!< Vertex buffer stride.
-		uint16_t num;              //!< Number of instances.
 		VertexBufferHandle handle; //!< Vertex buffer object handle.
 	};
 
@@ -626,7 +626,7 @@ namespace bgfx
 	/// @param _decl Vertex declaration.
 	/// @returns Static vertex buffer handle.
 	///
-	VertexBufferHandle createVertexBuffer(const Memory* _mem, const VertexDecl& _decl);
+	VertexBufferHandle createVertexBuffer(const Memory* _mem, const VertexDecl& _decl, uint8_t _flags = BGFX_BUFFER_COMPUTE_NONE);
 
 	/// Destroy static vertex buffer.
 	///
@@ -669,8 +669,9 @@ namespace bgfx
 	///
 	/// @param _num Number of vertices.
 	/// @param _decl Vertex declaration.
+	/// @param _compute True if vertex buffer will be used by compute shader.
 	///
-	DynamicVertexBufferHandle createDynamicVertexBuffer(uint16_t _num, const VertexDecl& _decl);
+	DynamicVertexBufferHandle createDynamicVertexBuffer(uint16_t _num, const VertexDecl& _decl, uint8_t _flags = BGFX_BUFFER_COMPUTE_NONE);
 
 	/// Create dynamic vertex buffer and initialize it.
 	///
@@ -1183,7 +1184,13 @@ namespace bgfx
 	void setVertexBuffer(const TransientVertexBuffer* _tvb, uint32_t _startVertex, uint32_t _numVertices);
 
 	/// Set instance data buffer for draw primitive.
-	void setInstanceDataBuffer(const InstanceDataBuffer* _idb, uint16_t _num = UINT16_MAX);
+	void setInstanceDataBuffer(const InstanceDataBuffer* _idb, uint32_t _num = UINT32_MAX);
+
+	/// Set instance data buffer for draw primitive.
+	void setInstanceDataBuffer(VertexBufferHandle _handle, uint32_t _offset, uint32_t _num, uint16_t _stride);
+
+	/// Set instance data buffer for draw primitive.
+	void setInstanceDataBuffer(DynamicVertexBufferHandle _handle, uint32_t _offset, uint32_t _num);
 
 	/// Set program for draw primitive.
 	void setProgram(ProgramHandle _handle);
@@ -1228,6 +1235,12 @@ namespace bgfx
 	///
 	uint32_t submit(uint8_t _id, int32_t _depth = 0);
 
+	///
+	void setBuffer(uint8_t _stage, VertexBufferHandle _handle, Access::Enum _access);
+
+	///
+	void setBuffer(uint8_t _stage, DynamicVertexBufferHandle _handle, Access::Enum _access);
+
 	///
 	void setImage(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint8_t _mip, TextureFormat::Enum _format, Access::Enum _access);
 

+ 5 - 0
include/bgfxdefines.h

@@ -194,6 +194,11 @@
 #define BGFX_DEBUG_STATS                 UINT32_C(0x00000004)
 #define BGFX_DEBUG_TEXT                  UINT32_C(0x00000008)
 
+///
+#define BGFX_BUFFER_COMPUTE_NONE         UINT8_C(0x00)
+#define BGFX_BUFFER_COMPUTE_READ         UINT8_C(0x01)
+#define BGFX_BUFFER_COMPUTE_WRITE        UINT8_C(0x02)
+
 ///
 #define BGFX_TEXTURE_NONE                UINT32_C(0x00000000)
 #define BGFX_TEXTURE_U_MIRROR            UINT32_C(0x00000001)

+ 42 - 12
src/bgfx.cpp

@@ -1532,7 +1532,10 @@ again:
 					VertexDeclHandle declHandle;
 					_cmdbuf.read(declHandle);
 
-					m_renderCtx->createVertexBuffer(handle, mem, declHandle);
+					uint8_t flags;
+					_cmdbuf.read(flags);
+
+					m_renderCtx->createVertexBuffer(handle, mem, declHandle, flags);
 
 					release(mem);
 				}
@@ -1596,7 +1599,10 @@ again:
 					uint32_t size;
 					_cmdbuf.read(size);
 
-					m_renderCtx->createDynamicVertexBuffer(handle, size);
+					uint8_t flags;
+					_cmdbuf.read(flags);
+
+					m_renderCtx->createDynamicVertexBuffer(handle, size, flags);
 				}
 				break;
 
@@ -2059,11 +2065,11 @@ again:
 		s_ctx->destroyIndexBuffer(_handle);
 	}
 
-	VertexBufferHandle createVertexBuffer(const Memory* _mem, const VertexDecl& _decl)
+	VertexBufferHandle createVertexBuffer(const Memory* _mem, const VertexDecl& _decl, uint8_t _flags)
 	{
 		BGFX_CHECK_MAIN_THREAD();
 		BX_CHECK(0 != _decl.m_stride, "Invalid VertexDecl.");
-		return s_ctx->createVertexBuffer(_mem, _decl);
+		return s_ctx->createVertexBuffer(_mem, _decl, _flags);
 	}
 
 	void destroyVertexBuffer(VertexBufferHandle _handle)
@@ -2098,11 +2104,11 @@ again:
 		s_ctx->destroyDynamicIndexBuffer(_handle);
 	}
 
-	DynamicVertexBufferHandle createDynamicVertexBuffer(uint16_t _num, const VertexDecl& _decl)
+	DynamicVertexBufferHandle createDynamicVertexBuffer(uint16_t _num, const VertexDecl& _decl, uint8_t _compute)
 	{
 		BGFX_CHECK_MAIN_THREAD();
 		BX_CHECK(0 != _decl.m_stride, "Invalid VertexDecl.");
-		return s_ctx->createDynamicVertexBuffer(_num, _decl);
+		return s_ctx->createDynamicVertexBuffer(_num, _decl, _compute);
 	}
 
 	DynamicVertexBufferHandle createDynamicVertexBuffer(const Memory* _mem, const VertexDecl& _decl)
@@ -2698,12 +2704,24 @@ again:
 		s_ctx->setVertexBuffer(_tvb, _tvb->startVertex + _startVertex, _numVertices);
 	}
 
-	void setInstanceDataBuffer(const InstanceDataBuffer* _idb, uint16_t _num)
+	void setInstanceDataBuffer(const InstanceDataBuffer* _idb, uint32_t _num)
 	{
 		BGFX_CHECK_MAIN_THREAD();
 		s_ctx->setInstanceDataBuffer(_idb, _num);
 	}
 
+	void setInstanceDataBuffer(VertexBufferHandle _handle, uint32_t _startVertex, uint32_t _num)
+	{
+		BGFX_CHECK_MAIN_THREAD();
+		s_ctx->setInstanceDataBuffer(_handle, _startVertex, _num);
+	}
+
+	void setInstanceDataBuffer(DynamicVertexBufferHandle _handle, uint32_t _startVertex, uint32_t _num)
+	{
+		BGFX_CHECK_MAIN_THREAD();
+		s_ctx->setInstanceDataBuffer(_handle, _startVertex, _num);
+	}
+
 	void setProgram(ProgramHandle _handle)
 	{
 		BGFX_CHECK_MAIN_THREAD();
@@ -2728,6 +2746,18 @@ again:
 		return s_ctx->submit(_id, _depth);
 	}
 
+	void setBuffer(uint8_t _stage, VertexBufferHandle _handle, Access::Enum _access)
+	{
+		BGFX_CHECK_MAIN_THREAD();
+		s_ctx->setBuffer(_stage, _handle, _access);
+	}
+
+	void setBuffer(uint8_t _stage, DynamicVertexBufferHandle _handle, Access::Enum _access)
+	{
+		BGFX_CHECK_MAIN_THREAD();
+		s_ctx->setBuffer(_stage, _handle, _access);
+	}
+
 	void setImage(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint8_t _mip, TextureFormat::Enum _format, Access::Enum _access)
 	{
 		BGFX_CHECK_MAIN_THREAD();
@@ -2941,11 +2971,11 @@ BGFX_C_API void bgfx_destroy_index_buffer(bgfx_index_buffer_handle_t _handle)
 	bgfx::destroyIndexBuffer(handle.cpp);
 }
 
-BGFX_C_API bgfx_vertex_buffer_handle_t bgfx_create_vertex_buffer(const bgfx_memory_t* _mem, const bgfx_vertex_decl_t* _decl)
+BGFX_C_API bgfx_vertex_buffer_handle_t bgfx_create_vertex_buffer(const bgfx_memory_t* _mem, const bgfx_vertex_decl_t* _decl, uint8_t _flags)
 {
 	const bgfx::VertexDecl& decl = *(const bgfx::VertexDecl*)_decl;
 	union { bgfx_vertex_buffer_handle_t c; bgfx::VertexBufferHandle cpp; } handle;
-	handle.cpp = bgfx::createVertexBuffer( (const bgfx::Memory*)_mem, decl);
+	handle.cpp = bgfx::createVertexBuffer( (const bgfx::Memory*)_mem, decl, _flags);
 	return handle.c;
 }
 
@@ -2981,11 +3011,11 @@ BGFX_C_API void bgfx_destroy_dynamic_index_buffer(bgfx_dynamic_index_buffer_hand
 	bgfx::destroyDynamicIndexBuffer(handle.cpp);
 }
 
-BGFX_C_API bgfx_dynamic_vertex_buffer_handle_t bgfx_create_dynamic_vertex_buffer(uint16_t _num, const bgfx_vertex_decl_t* _decl)
+BGFX_C_API bgfx_dynamic_vertex_buffer_handle_t bgfx_create_dynamic_vertex_buffer(uint16_t _num, const bgfx_vertex_decl_t* _decl, uint8_t _flags)
 {
 	const bgfx::VertexDecl& decl = *(const bgfx::VertexDecl*)_decl;
 	union { bgfx_dynamic_vertex_buffer_handle_t c; bgfx::DynamicVertexBufferHandle cpp; } handle;
-	handle.cpp = bgfx::createDynamicVertexBuffer(_num, decl);
+	handle.cpp = bgfx::createDynamicVertexBuffer(_num, decl, _flags);
 	return handle.c;
 }
 
@@ -3317,7 +3347,7 @@ BGFX_C_API void bgfx_set_transient_vertex_buffer(const bgfx_transient_vertex_buf
 	bgfx::setVertexBuffer( (const bgfx::TransientVertexBuffer*)_tvb, _startVertex, _numVertices);
 }
 
-BGFX_C_API void bgfx_set_instance_data_buffer(const bgfx_instance_data_buffer_t* _idb, uint16_t _num)
+BGFX_C_API void bgfx_set_instance_data_buffer(const bgfx_instance_data_buffer_t* _idb, uint32_t _num)
 {
 	bgfx::setInstanceDataBuffer( (const bgfx::InstanceDataBuffer*)_idb, _num);
 }

+ 2 - 2
src/bgfx_compute.sh

@@ -14,8 +14,8 @@
 #define IMAGE2D_RW(_name, _reg) RWTexture2D<float4> _name : register(u[_reg])
 #define IMAGE2D_WR(_name, _reg) IMAGE2D_RW(_name, _reg)
 
-#define BUFFER_RO(_name, _struct, _reg) StructuredBuffer<_struct>   _name : register(b[_reg])
-#define BUFFER_RW(_name, _struct, _reg) RWStructuredBuffer<_struct> _name : register(b[_reg])
+#define BUFFER_RO(_name, _struct, _reg) Buffer<_struct> _name : register(b[_reg])
+#define BUFFER_RW(_name, _struct, _reg) Buffer<_struct> _name : register(u[_reg])
 #define BUFFER_WR(_name, _struct, _reg) BUFFER_RW(_name, _struct, _reg)
 
 #define NUM_THREADS(_x, _y, _z) [numthreads(_x, _y, _z)]

+ 121 - 28
src/bgfx_p.h

@@ -1148,6 +1148,11 @@ namespace bgfx
 		uint32_t m_flags;
 	};
 
+	struct VertexBuffer
+	{
+		uint16_t m_stride;
+	};
+
 	struct DynamicIndexBuffer
 	{
 		IndexBufferHandle m_handle;
@@ -1162,8 +1167,9 @@ namespace bgfx
 		uint32_t m_size;
 		uint32_t m_startVertex;
 		uint32_t m_numVertices;
-		uint32_t m_stride;
+		uint16_t m_stride;
 		VertexDeclHandle m_decl;
+		uint8_t m_flags;
 	};
 
 	BX_ALIGN_DECL_CACHE_LINE(struct) Frame
@@ -1336,15 +1342,23 @@ namespace bgfx
 			m_draw.m_vertexDecl   = _tvb->decl;
 		}
 
-		void setInstanceDataBuffer(const InstanceDataBuffer* _idb, uint16_t _num)
+		void setInstanceDataBuffer(const InstanceDataBuffer* _idb, uint32_t _num)
 		{
  			m_draw.m_instanceDataOffset = _idb->offset;
 			m_draw.m_instanceDataStride = _idb->stride;
-			m_draw.m_numInstances       = bx::uint16_min( (uint16_t)_idb->num, _num);
+			m_draw.m_numInstances       = bx::uint32_min(_idb->num, _num);
 			m_draw.m_instanceDataBuffer = _idb->handle;
 			BX_FREE(g_allocator, const_cast<InstanceDataBuffer*>(_idb) );
 		}
 
+		void setInstanceDataBuffer(VertexBufferHandle _handle, uint32_t _startVertex, uint32_t _num, uint16_t _stride)
+		{
+			m_draw.m_instanceDataOffset = _startVertex * _stride;
+			m_draw.m_instanceDataStride = _stride;
+			m_draw.m_numInstances       = _num;
+			m_draw.m_instanceDataBuffer = _handle;
+		}
+
 		void setProgram(ProgramHandle _handle)
 		{
 			BX_CHECK(isValid(_handle), "Can't set program with invalid handle.");
@@ -1365,6 +1379,16 @@ namespace bgfx
 			}
 		}
 
+		void setBuffer(uint8_t _stage, VertexBufferHandle _handle, Access::Enum _access)
+		{
+			ComputeBinding& bind = m_compute.m_bind[_stage];
+			bind.m_idx    = _handle.idx;
+			bind.m_format = 0;
+			bind.m_access = uint8_t(_access);
+			bind.m_mip    = 0;
+			bind.m_type   = uint8_t(ComputeBinding::Buffer);
+		}
+
 		void setImage(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint8_t _mip, TextureFormat::Enum _format, Access::Enum _access)
 		{
 			ComputeBinding& bind = m_compute.m_bind[_stage];
@@ -1666,7 +1690,7 @@ namespace bgfx
 					if (it->m_size != _size)
 					{
 						it->m_size -= _size;
-						it->m_ptr += _size;
+						it->m_ptr  += _size;
 					}
 					else
 					{
@@ -1745,12 +1769,12 @@ namespace bgfx
 		virtual void destroyIndexBuffer(IndexBufferHandle _handle) = 0;
 		virtual void createVertexDecl(VertexDeclHandle _handle, const VertexDecl& _decl) = 0;
 		virtual void destroyVertexDecl(VertexDeclHandle _handle) = 0;
-		virtual void createVertexBuffer(VertexBufferHandle _handle, Memory* _mem, VertexDeclHandle _declHandle) = 0;
+		virtual void createVertexBuffer(VertexBufferHandle _handle, Memory* _mem, VertexDeclHandle _declHandle, uint8_t _flags) = 0;
 		virtual void destroyVertexBuffer(VertexBufferHandle _handle) = 0;
 		virtual void createDynamicIndexBuffer(IndexBufferHandle _handle, uint32_t _size) = 0;
 		virtual void updateDynamicIndexBuffer(IndexBufferHandle _handle, uint32_t _offset, uint32_t _size, Memory* _mem) = 0;
 		virtual void destroyDynamicIndexBuffer(IndexBufferHandle _handle) = 0;
-		virtual void createDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _size) = 0;
+		virtual void createDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _size, uint8_t _flags) = 0;
 		virtual void updateDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _offset, uint32_t _size, Memory* _mem) = 0;
 		virtual void destroyDynamicVertexBuffer(VertexBufferHandle _handle) = 0;
 		virtual void createShader(ShaderHandle _handle, Memory* _mem) = 0;
@@ -1909,7 +1933,7 @@ namespace bgfx
 			return declHandle;
 		}
 
-		BGFX_API_FUNC(VertexBufferHandle createVertexBuffer(const Memory* _mem, const VertexDecl& _decl) )
+		BGFX_API_FUNC(VertexBufferHandle createVertexBuffer(const Memory* _mem, const VertexDecl& _decl, uint8_t flags) )
 		{
 			VertexBufferHandle handle = { m_vertexBufferHandle.alloc() };
 
@@ -1919,10 +1943,13 @@ namespace bgfx
 				VertexDeclHandle declHandle = findVertexDecl(_decl);
 				m_declRef.add(handle, declHandle, _decl.m_hash);
 
+				m_vertexBuffers[handle.idx].m_stride = _decl.m_stride;
+
 				CommandBuffer& cmdbuf = getCommandBuffer(CommandBuffer::CreateVertexBuffer);
 				cmdbuf.write(handle);
 				cmdbuf.write(_mem);
 				cmdbuf.write(declHandle);
+				cmdbuf.write(flags);
 			}
 
 			return handle;
@@ -2016,27 +2043,53 @@ namespace bgfx
 			m_dynamicIndexBufferHandle.free(_handle.idx);
 		}
 
-		BGFX_API_FUNC(DynamicVertexBufferHandle createDynamicVertexBuffer(uint16_t _num, const VertexDecl& _decl) )
+		BGFX_API_FUNC(DynamicVertexBufferHandle createDynamicVertexBuffer(uint16_t _num, const VertexDecl& _decl, uint8_t _flags) )
 		{
 			DynamicVertexBufferHandle handle = BGFX_INVALID_HANDLE;
 			uint32_t size = strideAlign16(_num*_decl.m_stride, _decl.m_stride);
-			uint64_t ptr = m_dynamicVertexBufferAllocator.alloc(size);
-			if (ptr == NonLocalAllocator::invalidBlock)
-			{
-				VertexBufferHandle vertexBufferHandle = { m_vertexBufferHandle.alloc() };
 
-				BX_WARN(isValid(handle), "Failed to allocate dynamic vertex buffer handle.");
-				if (!isValid(vertexBufferHandle) )
+			uint64_t ptr = 0;
+			if (0 != (_flags & BGFX_BUFFER_COMPUTE_WRITE) )
+			{
+				ptr = m_gpuDvbAllocator.alloc(size);
+				if (ptr == NonLocalAllocator::invalidBlock)
 				{
-					return handle;
+					VertexBufferHandle vertexBufferHandle = { m_vertexBufferHandle.alloc() };
+					if (!isValid(vertexBufferHandle) )
+					{
+						return handle;
+					}
+
+					CommandBuffer& cmdbuf = getCommandBuffer(CommandBuffer::CreateDynamicVertexBuffer);
+					cmdbuf.write(vertexBufferHandle);
+					cmdbuf.write(BGFX_CONFIG_DYNAMIC_VERTEX_BUFFER_SIZE);
+					cmdbuf.write(_flags);
+
+					m_gpuDvbAllocator.add(uint64_t(vertexBufferHandle.idx)<<32, BGFX_CONFIG_DYNAMIC_VERTEX_BUFFER_SIZE);
+					ptr = m_gpuDvbAllocator.alloc(size);
 				}
+			}
+			else
+			{
+				ptr = m_cpuDvbAllocator.alloc(size);
+				if (ptr == NonLocalAllocator::invalidBlock)
+				{
+					VertexBufferHandle vertexBufferHandle = { m_vertexBufferHandle.alloc() };
 
-				CommandBuffer& cmdbuf = getCommandBuffer(CommandBuffer::CreateDynamicVertexBuffer);
-				cmdbuf.write(vertexBufferHandle);
-				cmdbuf.write(BGFX_CONFIG_DYNAMIC_VERTEX_BUFFER_SIZE);
+					BX_WARN(isValid(handle), "Failed to allocate dynamic vertex buffer handle.");
+					if (!isValid(vertexBufferHandle) )
+					{
+						return handle;
+					}
+
+					CommandBuffer& cmdbuf = getCommandBuffer(CommandBuffer::CreateDynamicVertexBuffer);
+					cmdbuf.write(vertexBufferHandle);
+					cmdbuf.write(BGFX_CONFIG_DYNAMIC_VERTEX_BUFFER_SIZE);
+					cmdbuf.write(_flags);
 
-				m_dynamicVertexBufferAllocator.add(uint64_t(vertexBufferHandle.idx)<<32, BGFX_CONFIG_DYNAMIC_VERTEX_BUFFER_SIZE);
-				ptr = m_dynamicVertexBufferAllocator.alloc(size);
+					m_cpuDvbAllocator.add(uint64_t(vertexBufferHandle.idx)<<32, BGFX_CONFIG_DYNAMIC_VERTEX_BUFFER_SIZE);
+					ptr = m_cpuDvbAllocator.alloc(size);
+				}
 			}
 
 			VertexDeclHandle declHandle = findVertexDecl(_decl);
@@ -2049,6 +2102,7 @@ namespace bgfx
 			dvb.m_startVertex = dvb.m_offset/_decl.m_stride;
 			dvb.m_numVertices = dvb.m_size/_decl.m_stride;
 			dvb.m_decl = declHandle;
+			dvb.m_flags = _flags;
 			m_declRef.add(dvb.m_handle, declHandle, _decl.m_hash);
 
 			return handle;
@@ -2058,7 +2112,7 @@ namespace bgfx
 		{
 			uint32_t numVertices = _mem->size/_decl.m_stride;
 			BX_CHECK(numVertices <= UINT16_MAX, "Num vertices exceeds maximum (num %d, max %d).", numVertices, UINT16_MAX);
-			DynamicVertexBufferHandle handle = createDynamicVertexBuffer(uint16_t(numVertices), _decl);
+			DynamicVertexBufferHandle handle = createDynamicVertexBuffer(uint16_t(numVertices), _decl, false);
 			if (isValid(handle) )
 			{
 				updateDynamicVertexBuffer(handle, _mem);
@@ -2069,6 +2123,7 @@ namespace bgfx
 		BGFX_API_FUNC(void updateDynamicVertexBuffer(DynamicVertexBufferHandle _handle, const Memory* _mem) )
 		{
 			DynamicVertexBuffer& dvb = m_dynamicVertexBuffers[_handle.idx];
+			BX_CHECK(!dvb.m_flags, "Can't update GPU buffer from CPU.");
 			CommandBuffer& cmdbuf = getCommandBuffer(CommandBuffer::UpdateDynamicVertexBuffer);
 			cmdbuf.write(dvb.m_handle);
 			cmdbuf.write(dvb.m_offset);
@@ -2092,7 +2147,14 @@ namespace bgfx
 				cmdbuf.write(declHandle);
 			}
 
-			m_dynamicVertexBufferAllocator.free(uint64_t(dvb.m_handle.idx)<<32 | dvb.m_offset);
+			if (0 != (dvb.m_flags & BGFX_BUFFER_COMPUTE_WRITE) )
+			{
+				m_gpuDvbAllocator.free(uint64_t(dvb.m_handle.idx)<<32 | dvb.m_offset);
+			}
+			else
+			{
+				m_cpuDvbAllocator.free(uint64_t(dvb.m_handle.idx)<<32 | dvb.m_offset);
+			}
 			m_dynamicVertexBufferHandle.free(_handle.idx);
 		}
 
@@ -2171,6 +2233,7 @@ namespace bgfx
 				CommandBuffer& cmdbuf = getCommandBuffer(CommandBuffer::CreateDynamicVertexBuffer);
 				cmdbuf.write(handle);
 				cmdbuf.write(_size);
+				cmdbuf.write(false);
 
 				vb = (TransientVertexBuffer*)BX_ALLOC(g_allocator, sizeof(TransientVertexBuffer)+_size);
 				vb->data = (uint8_t*)&vb[1];
@@ -2216,7 +2279,7 @@ namespace bgfx
 			_tvb->startVertex = offset/_decl.m_stride;
 			_tvb->stride = _decl.m_stride;
 			_tvb->handle = dvb.handle;
-			_tvb->decl = declHandle;
+			_tvb->decl   = declHandle;
 		}
 
 		BGFX_API_FUNC(const InstanceDataBuffer* allocInstanceDataBuffer(uint32_t _num, uint16_t _stride) )
@@ -2228,11 +2291,11 @@ namespace bgfx
 
 			TransientVertexBuffer& dvb = *m_submit->m_transientVb;
 			InstanceDataBuffer* idb = (InstanceDataBuffer*)BX_ALLOC(g_allocator, sizeof(InstanceDataBuffer) );
-			idb->data = &dvb.data[offset];
-			idb->size = _num * stride;
+			idb->data   = &dvb.data[offset];
+			idb->size   = _num * stride;
 			idb->offset = offset;
+			idb->num    = _num;
 			idb->stride = stride;
-			idb->num = _num;
 			idb->handle = dvb.handle;
 
 			return idb;
@@ -2893,13 +2956,29 @@ namespace bgfx
 			m_submit->setVertexBuffer(_tvb, _startVertex, _numVertices);
 		}
 
-		BGFX_API_FUNC(void setInstanceDataBuffer(const InstanceDataBuffer* _idb, uint16_t _num) )
+		BGFX_API_FUNC(void setInstanceDataBuffer(const InstanceDataBuffer* _idb, uint32_t _num) )
 		{
 			--m_instBufferCount;
 
 			m_submit->setInstanceDataBuffer(_idb, _num);
 		}
 
+		BGFX_API_FUNC(void setInstanceDataBuffer(VertexBufferHandle _handle, uint32_t _startVertex, uint32_t _num) )
+		{
+			const VertexBuffer& vb = m_vertexBuffers[_handle.idx];
+			m_submit->setInstanceDataBuffer(_handle, _startVertex, _num, vb.m_stride);
+		}
+
+		BGFX_API_FUNC(void setInstanceDataBuffer(DynamicVertexBufferHandle _handle, uint32_t _startVertex, uint32_t _num) )
+		{
+			const DynamicVertexBuffer& dvb = m_dynamicVertexBuffers[_handle.idx];
+			m_submit->setInstanceDataBuffer(dvb.m_handle
+				, dvb.m_startVertex + _startVertex
+				, _num
+				, dvb.m_stride
+				);
+		}
+
 		BGFX_API_FUNC(void setProgram(ProgramHandle _handle) )
 		{
 			m_submit->setProgram(_handle);
@@ -2930,6 +3009,17 @@ namespace bgfx
 			return m_submit->submit(_id, _depth);
 		}
 
+		BGFX_API_FUNC(void setBuffer(uint8_t _stage, VertexBufferHandle _handle, Access::Enum _access) )
+		{
+			m_submit->setBuffer(_stage, _handle, _access);
+		}
+
+		BGFX_API_FUNC(void setBuffer(uint8_t _stage, DynamicVertexBufferHandle _handle, Access::Enum _access) )
+		{
+			const DynamicVertexBuffer& dvb = m_dynamicVertexBuffers[_handle.idx];
+			m_submit->setBuffer(_stage, dvb.m_handle, _access);
+		}
+
 		BGFX_API_FUNC(void setImage(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint8_t _mip, TextureFormat::Enum _format, Access::Enum _access) )
 		{
 			m_submit->setImage(_stage, _sampler, _handle, _mip, _format, _access);
@@ -3028,6 +3118,8 @@ namespace bgfx
 		uint64_t m_tempKeys[BGFX_CONFIG_MAX_DRAW_CALLS];
 		uint16_t m_tempValues[BGFX_CONFIG_MAX_DRAW_CALLS];
 
+		VertexBuffer m_vertexBuffers[BGFX_CONFIG_MAX_VERTEX_BUFFERS];
+
 		DynamicIndexBuffer m_dynamicIndexBuffers[BGFX_CONFIG_MAX_DYNAMIC_INDEX_BUFFERS];
 		DynamicVertexBuffer m_dynamicVertexBuffers[BGFX_CONFIG_MAX_DYNAMIC_VERTEX_BUFFERS];
 
@@ -3038,7 +3130,8 @@ namespace bgfx
 
 		NonLocalAllocator m_dynamicIndexBufferAllocator;
 		bx::HandleAllocT<BGFX_CONFIG_MAX_DYNAMIC_INDEX_BUFFERS> m_dynamicIndexBufferHandle;
-		NonLocalAllocator m_dynamicVertexBufferAllocator;
+		NonLocalAllocator m_cpuDvbAllocator;
+		NonLocalAllocator m_gpuDvbAllocator;
 		bx::HandleAllocT<BGFX_CONFIG_MAX_DYNAMIC_VERTEX_BUFFERS> m_dynamicVertexBufferHandle;
 
 		bx::HandleAllocT<BGFX_CONFIG_MAX_INDEX_BUFFERS> m_indexBufferHandle;

+ 87 - 23
src/renderer_d3d11.cpp

@@ -901,9 +901,9 @@ RENDERDOC_IMPORT
 		{
 		}
 
-		void createVertexBuffer(VertexBufferHandle _handle, Memory* _mem, VertexDeclHandle _declHandle) BX_OVERRIDE
+		void createVertexBuffer(VertexBufferHandle _handle, Memory* _mem, VertexDeclHandle _declHandle, uint8_t _flags) BX_OVERRIDE
 		{
-			m_vertexBuffers[_handle.idx].create(_mem->size, _mem->data, _declHandle);
+			m_vertexBuffers[_handle.idx].create(_mem->size, _mem->data, _declHandle, _flags);
 		}
 
 		void destroyVertexBuffer(VertexBufferHandle _handle) BX_OVERRIDE
@@ -926,10 +926,10 @@ RENDERDOC_IMPORT
 			m_indexBuffers[_handle.idx].destroy();
 		}
 
-		void createDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _size) BX_OVERRIDE
+		void createDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _size, uint8_t _flags) BX_OVERRIDE
 		{
 			VertexDeclHandle decl = BGFX_INVALID_HANDLE;
-			m_vertexBuffers[_handle.idx].create(_size, NULL, decl);
+			m_vertexBuffers[_handle.idx].create(_size, NULL, decl, _flags);
 		}
 
 		void updateDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _offset, uint32_t _size, Memory* _mem) BX_OVERRIDE
@@ -1299,6 +1299,20 @@ RENDERDOC_IMPORT
 			m_samplerStateCache.invalidate();
 		}
 
+		void invalidateCompute()
+		{
+			m_deviceCtx->CSSetShader(NULL, NULL, 0);
+
+			ID3D11UnorderedAccessView* uav[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS] = {};
+			m_deviceCtx->CSSetUnorderedAccessViews(0, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, uav, NULL);
+
+			ID3D11ShaderResourceView* srv[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS] = {};
+			m_deviceCtx->CSSetShaderResources(0, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, srv);
+
+			ID3D11SamplerState* samplers[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS] = {};
+			m_deviceCtx->CSSetSamplers(0, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, samplers);
+		}
+
 		void updateMsaa()
 		{
 			for (uint32_t ii = 1, last = 0; ii < BX_COUNTOF(s_msaa); ++ii)
@@ -2373,24 +2387,59 @@ RENDERDOC_IMPORT
 		deviceCtx->Unmap(m_ptr, 0);
 	}
 
-	void VertexBufferD3D11::create(uint32_t _size, void* _data, VertexDeclHandle _declHandle)
+	void VertexBufferD3D11::create(uint32_t _size, void* _data, VertexDeclHandle _declHandle, uint8_t _flags)
 	{
+		m_uav = NULL;
 		m_size = _size;
 		m_decl = _declHandle;
-		m_dynamic = NULL == _data;
+
+		const bool needUav = 0 != (_flags & BGFX_BUFFER_COMPUTE_WRITE);
+		const bool needSrv = 0 != (_flags & BGFX_BUFFER_COMPUTE_READ);
+		m_dynamic = NULL == _data && !needUav;
 
 		D3D11_BUFFER_DESC desc;
 		desc.ByteWidth = _size;
-		desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
+		desc.BindFlags = 0
+			| D3D11_BIND_VERTEX_BUFFER
+			| (needUav ? D3D11_BIND_UNORDERED_ACCESS : 0)
+			| (needSrv ? D3D11_BIND_SHADER_RESOURCE  : 0)
+			;
 		desc.MiscFlags = 0;
+		desc.StructureByteStride = 0;
 
-		if (m_dynamic)
+		ID3D11Device* device = s_renderD3D11->m_device;
+
+		if (needUav)
+		{
+			desc.Usage = D3D11_USAGE_DEFAULT;
+			desc.CPUAccessFlags = 0;
+			desc.StructureByteStride = isValid(_declHandle) 
+				? s_renderD3D11->m_vertexDecls[_declHandle.idx].m_stride 
+				: 0
+				;
+
+			DX_CHECK(device->CreateBuffer(&desc
+				, NULL
+				, &m_ptr
+				) );
+
+			D3D11_UNORDERED_ACCESS_VIEW_DESC uavd;
+			uavd.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
+			uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
+			uavd.Buffer.FirstElement = 0;
+			uavd.Buffer.NumElements  = m_size/16;
+			uavd.Buffer.Flags = 0;
+			DX_CHECK(device->CreateUnorderedAccessView(m_ptr
+				, &uavd
+				, &m_uav
+				) );
+		}
+		else if (m_dynamic)
 		{
 			desc.Usage = D3D11_USAGE_DYNAMIC;
 			desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
-			desc.StructureByteStride = 0;
 
-			DX_CHECK(s_renderD3D11->m_device->CreateBuffer(&desc
+			DX_CHECK(device->CreateBuffer(&desc
 				, NULL
 				, &m_ptr
 				) );
@@ -2399,18 +2448,30 @@ RENDERDOC_IMPORT
 		{
 			desc.Usage = D3D11_USAGE_IMMUTABLE;
 			desc.CPUAccessFlags = 0;
-			desc.StructureByteStride = 0;
 
 			D3D11_SUBRESOURCE_DATA srd;
 			srd.pSysMem = _data;
 			srd.SysMemPitch = 0;
 			srd.SysMemSlicePitch = 0;
 
-			DX_CHECK(s_renderD3D11->m_device->CreateBuffer(&desc
+			DX_CHECK(device->CreateBuffer(&desc
 				, &srd
 				, &m_ptr
 				) );
 		}
+
+		if (needSrv)
+		{
+			D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
+			srvd.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
+			srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
+			srvd.Buffer.FirstElement = 0;
+			srvd.Buffer.NumElements = m_size/16;
+			DX_CHECK(device->CreateShaderResourceView(m_ptr
+				, &srvd
+				, &m_srv
+				) );
+		}
 	}
 
 	void VertexBufferD3D11::update(uint32_t _offset, uint32_t _size, void* _data, bool _discard)
@@ -3312,7 +3373,14 @@ RENDERDOC_IMPORT
 							case ComputeBinding::Buffer:
 								{
 									const VertexBufferD3D11& vertexBuffer = m_vertexBuffers[bind.m_idx];
-									BX_UNUSED(vertexBuffer);
+									if (Access::Read != bind.m_access)
+									{
+										uav[ii] = vertexBuffer.m_uav;
+									}
+									else
+									{
+										srv[ii] = vertexBuffer.m_srv;
+									}
 								}
 								break;
 							}
@@ -3335,16 +3403,7 @@ RENDERDOC_IMPORT
 					programIdx = invalidHandle;
 					m_currentProgram = NULL;
 
-					deviceCtx->CSSetShader(NULL, NULL, 0);
-
-					ID3D11UnorderedAccessView* uav[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS] = {};
-					deviceCtx->CSSetUnorderedAccessViews(0, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, uav, NULL);
-
-					ID3D11ShaderResourceView* srv[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS] = {};
-					deviceCtx->CSSetShaderResources(0, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, srv);
-
-					ID3D11SamplerState* samplers[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS] = {};
-					m_deviceCtx->CSSetSamplers(0, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, samplers);
+					invalidateCompute();
 				}
 
 				const RenderDraw& draw = renderItem.draw;
@@ -3804,6 +3863,11 @@ RENDERDOC_IMPORT
 				}
 			}
 
+			if (wasCompute)
+			{
+				invalidateCompute();
+			}
+
 			if (0 < _render->m_num)
 			{
 				captureElapsed = -bx::getHPCounter();

+ 8 - 1
src/renderer_d3d11.h

@@ -78,11 +78,13 @@ namespace bgfx
 	{
 		VertexBufferD3D11()
 			: m_ptr(NULL)
+			, m_srv(NULL)
+			, m_uav(NULL)
 			, m_dynamic(false)
 		{
 		}
 
-		void create(uint32_t _size, void* _data, VertexDeclHandle _declHandle);
+		void create(uint32_t _size, void* _data, VertexDeclHandle _declHandle, uint8_t _flags);
 		void update(uint32_t _offset, uint32_t _size, void* _data, bool _discard = false);
 
 		void destroy()
@@ -92,9 +94,14 @@ namespace bgfx
 				DX_RELEASE(m_ptr, 0);
 				m_dynamic = false;
 			}
+
+			DX_RELEASE(m_srv, 0);
+			DX_RELEASE(m_uav, 0);
 		}
 
 		ID3D11Buffer* m_ptr;
+		ID3D11ShaderResourceView* m_srv;
+		ID3D11UnorderedAccessView* m_uav;
 		uint32_t m_size;
 		VertexDeclHandle m_decl;
 		bool m_dynamic;

+ 2 - 2
src/renderer_d3d9.cpp

@@ -660,7 +660,7 @@ namespace bgfx
 			m_vertexDecls[_handle.idx].destroy();
 		}
 
-		void createVertexBuffer(VertexBufferHandle _handle, Memory* _mem, VertexDeclHandle _declHandle) BX_OVERRIDE
+		void createVertexBuffer(VertexBufferHandle _handle, Memory* _mem, VertexDeclHandle _declHandle, uint8_t /*_flags*/) BX_OVERRIDE
 		{
 			m_vertexBuffers[_handle.idx].create(_mem->size, _mem->data, _declHandle);
 		}
@@ -685,7 +685,7 @@ namespace bgfx
 			m_indexBuffers[_handle.idx].destroy();
 		}
 
-		void createDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _size) BX_OVERRIDE
+		void createDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _size, uint8_t /*_flags*/) BX_OVERRIDE
 		{
 			VertexDeclHandle decl = BGFX_INVALID_HANDLE;
 			m_vertexBuffers[_handle.idx].create(_size, NULL, decl);

+ 5 - 5
src/renderer_gl.cpp

@@ -1451,7 +1451,7 @@ namespace bgfx
 		{
 		}
 
-		void createVertexBuffer(VertexBufferHandle _handle, Memory* _mem, VertexDeclHandle _declHandle) BX_OVERRIDE
+		void createVertexBuffer(VertexBufferHandle _handle, Memory* _mem, VertexDeclHandle _declHandle, uint8_t /*_flags*/) BX_OVERRIDE
 		{
 			m_vertexBuffers[_handle.idx].create(_mem->size, _mem->data, _declHandle);
 		}
@@ -1476,7 +1476,7 @@ namespace bgfx
 			m_indexBuffers[_handle.idx].destroy();
 		}
 
-		void createDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _size) BX_OVERRIDE
+		void createDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _size, uint8_t /*_flags*/) BX_OVERRIDE
 		{
 			VertexDeclHandle decl = BGFX_INVALID_HANDLE;
 			m_vertexBuffers[_handle.idx].create(_size, NULL, decl);
@@ -4337,9 +4337,9 @@ namespace bgfx
 
 								case ComputeBinding::Buffer:
 									{
-// 										const VertexBufferGL& vertexBuffer = m_vertexBuffers[bind.m_idx];
-// 										GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, ii, vertexBuffer.m_id) ); 
-// 										barrier |= GL_SHADER_STORAGE_BARRIER_BIT;
+										const VertexBufferGL& vertexBuffer = m_vertexBuffers[bind.m_idx];
+										GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, ii, vertexBuffer.m_id) ); 
+										barrier |= GL_SHADER_STORAGE_BARRIER_BIT;
 									}
 									break;
 								}

+ 2 - 2
src/renderer_null.cpp

@@ -49,7 +49,7 @@ namespace bgfx
 		{
 		}
 
-		void createVertexBuffer(VertexBufferHandle /*_handle*/, Memory* /*_mem*/, VertexDeclHandle /*_declHandle*/) BX_OVERRIDE
+		void createVertexBuffer(VertexBufferHandle /*_handle*/, Memory* /*_mem*/, VertexDeclHandle /*_declHandle*/, uint8_t /*_flags*/) BX_OVERRIDE
 		{
 		}
 
@@ -69,7 +69,7 @@ namespace bgfx
 		{
 		}
 
-		void createDynamicVertexBuffer(VertexBufferHandle /*_handle*/, uint32_t /*_size*/) BX_OVERRIDE
+		void createDynamicVertexBuffer(VertexBufferHandle /*_handle*/, uint32_t /*_size*/, uint8_t /*_flags*/) BX_OVERRIDE
 		{
 		}