Browse Source

Added indirect compute dispatch.

Branimir Karadžić 10 years ago
parent
commit
3b66dfa5e8
8 changed files with 204 additions and 71 deletions
  1. 11 8
      include/bgfx.h
  2. 19 13
      src/bgfx.cpp
  3. 28 0
      src/bgfx_compute.sh
  4. 47 24
      src/bgfx_p.h
  5. 2 0
      src/config.h
  6. 35 11
      src/renderer_d3d11.cpp
  7. 58 15
      src/renderer_gl.cpp
  8. 4 0
      src/renderer_gl.h

+ 11 - 8
include/bgfx.h

@@ -206,7 +206,7 @@ namespace bgfx
 
 	static const uint16_t invalidHandle = UINT16_MAX;
 
-	BGFX_HANDLE(DrawIndirectBufferHandle);
+	BGFX_HANDLE(IndirectBufferHandle);
 	BGFX_HANDLE(DynamicIndexBufferHandle);
 	BGFX_HANDLE(DynamicVertexBufferHandle);
 	BGFX_HANDLE(FrameBufferHandle);
@@ -871,10 +871,10 @@ namespace bgfx
 	const InstanceDataBuffer* allocInstanceDataBuffer(uint32_t _num, uint16_t _stride);
 
 	///
-	DrawIndirectBufferHandle createDrawIndirectBuffer(uint32_t _num);
+	IndirectBufferHandle createIndirectBuffer(uint32_t _num);
 
 	///
-	void destroyDrawIndirectBuffer(DrawIndirectBufferHandle _handle);
+	void destroyIndirectBuffer(IndirectBufferHandle _handle);
 
 	/// Create shader from memory buffer.
 	ShaderHandle createShader(const Memory* _mem);
@@ -1351,9 +1351,6 @@ namespace bgfx
 	/// Set instance data buffer for draw primitive.
 	void setInstanceDataBuffer(DynamicVertexBufferHandle _handle, uint32_t _startVertex, uint32_t _num);
 
-	///
-	void setDrawIndirectBuffer(DrawIndirectBufferHandle _handle, uint16_t _start = 0, uint16_t _num = UINT16_MAX);
-
 	/// Set program for draw primitive.
 	void setProgram(ProgramHandle _handle);
 
@@ -1397,6 +1394,9 @@ namespace bgfx
 	///
 	uint32_t submit(uint8_t _id, int32_t _depth = 0);
 
+	///
+	uint32_t submit(uint8_t _id, IndirectBufferHandle _indirectHandle, uint16_t _start = 0, uint16_t _num = 1, int32_t _depth = 0);
+
 	///
 	void setBuffer(uint8_t _stage, IndexBufferHandle _handle, Access::Enum _access);
 
@@ -1410,7 +1410,7 @@ namespace bgfx
 	void setBuffer(uint8_t _stage, DynamicVertexBufferHandle _handle, Access::Enum _access);
 
 	///
-	void setBuffer(uint8_t _stage, DrawIndirectBufferHandle _handle, Access::Enum _access);
+	void setBuffer(uint8_t _stage, IndirectBufferHandle _handle, Access::Enum _access);
 
 	///
 	void setImage(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint8_t _mip, Access::Enum _access, TextureFormat::Enum _format = TextureFormat::Count);
@@ -1419,7 +1419,10 @@ namespace bgfx
 	void setImage(uint8_t _stage, UniformHandle _sampler, FrameBufferHandle _handle, uint8_t _attachment, Access::Enum _access, TextureFormat::Enum _format = TextureFormat::Count);
 
 	/// Dispatch compute.
-	void dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _numX = 1, uint16_t _numY = 1, uint16_t _numZ = 1, uint8_t _flags = BGFX_SUBMIT_EYE_FIRST);
+	uint32_t dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _numX = 1, uint16_t _numY = 1, uint16_t _numZ = 1, uint8_t _flags = BGFX_SUBMIT_EYE_FIRST);
+
+	///
+	uint32_t dispatch(uint8_t _id, ProgramHandle _handle, IndirectBufferHandle _indirectHandle, uint16_t _start = 0, uint16_t _num = 1, uint8_t _flags = BGFX_SUBMIT_EYE_FIRST);
 
 	/// Discard all previously set state for draw or compute call.
 	void discard();

+ 19 - 13
src/bgfx.cpp

@@ -2309,16 +2309,16 @@ again:
 		return s_ctx->allocInstanceDataBuffer(_num, _stride);
 	}
 
-	DrawIndirectBufferHandle createDrawIndirectBuffer(uint32_t _num)
+	IndirectBufferHandle createIndirectBuffer(uint32_t _num)
 	{
 		BGFX_CHECK_MAIN_THREAD();
-		return s_ctx->createDrawIndirectBuffer(_num);
+		return s_ctx->createIndirectBuffer(_num);
 	}
 
-	void destroyDrawIndirectBuffer(DrawIndirectBufferHandle _handle)
+	void destroyIndirectBuffer(IndirectBufferHandle _handle)
 	{
 		BGFX_CHECK_MAIN_THREAD();
-		s_ctx->destroyDrawIndirectBuffer(_handle);
+		s_ctx->destroyIndirectBuffer(_handle);
 	}
 
 	ShaderHandle createShader(const Memory* _mem)
@@ -2922,12 +2922,6 @@ again:
 		s_ctx->setInstanceDataBuffer(_handle, _startVertex, _num);
 	}
 
-	void setDrawIndirectBuffer(DrawIndirectBufferHandle _handle, uint16_t _start, uint16_t _num)
-	{
-		BGFX_CHECK_MAIN_THREAD();
-		s_ctx->setDrawIndirectBuffer(_handle, _start, _num);
-	}
-
 	void setProgram(ProgramHandle _handle)
 	{
 		BGFX_CHECK_MAIN_THREAD();
@@ -2952,6 +2946,12 @@ again:
 		return s_ctx->submit(_id, _depth);
 	}
 
+	uint32_t submit(uint8_t _id, IndirectBufferHandle _indirectHandle, uint16_t _start, uint16_t _num, int32_t _depth)
+	{
+		BGFX_CHECK_MAIN_THREAD();
+		return s_ctx->submit(_id, _indirectHandle, _start, _num, _depth);
+	}
+
 	void setBuffer(uint8_t _stage, IndexBufferHandle _handle, Access::Enum _access)
 	{
 		BGFX_CHECK_MAIN_THREAD();
@@ -2976,7 +2976,7 @@ again:
 		s_ctx->setBuffer(_stage, _handle, _access);
 	}
 
-	void setBuffer(uint8_t _stage, DrawIndirectBufferHandle _handle, Access::Enum _access)
+	void setBuffer(uint8_t _stage, IndirectBufferHandle _handle, Access::Enum _access)
 	{
 		BGFX_CHECK_MAIN_THREAD();
 		s_ctx->setBuffer(_stage, _handle, _access);
@@ -2994,10 +2994,16 @@ again:
 		s_ctx->setImage(_stage, _sampler, _handle, _attachment, _access, _format);
 	}
 
-	void dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _numX, uint16_t _numY, uint16_t _numZ, uint8_t _flags)
+	uint32_t dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _numX, uint16_t _numY, uint16_t _numZ, uint8_t _flags)
+	{
+		BGFX_CHECK_MAIN_THREAD();
+		return s_ctx->dispatch(_id, _handle, _numX, _numY, _numZ, _flags);
+	}
+
+	uint32_t dispatch(uint8_t _id, ProgramHandle _handle, IndirectBufferHandle _indirectHandle, uint16_t _start, uint16_t _num, uint8_t _flags)
 	{
 		BGFX_CHECK_MAIN_THREAD();
-		s_ctx->dispatch(_id, _handle, _numX, _numY, _numZ, _flags);
+		return s_ctx->dispatch(_id, _handle, _indirectHandle, _start, _num, _flags);
 	}
 
 	void discard()

+ 28 - 0
src/bgfx_compute.sh

@@ -194,6 +194,34 @@ uint atomicCompSwap(uint _mem, uint _compare, uint _data)
 
 #endif // BGFX_SHADER_LANGUAGE_HLSL
 
+#define dispatchIndirect(_buffer \
+			, _offset \
+			, _numX \
+			, _numY \
+			, _numZ \
+			) \
+			_buffer[_offset*2+0] = uvec4(_numX, _numY, _numZ, 0u)
+
+#define drawIndirect(_buffer \
+			, _offset \
+			, _numVertices \
+			, _numInstances \
+			, _startVertex \
+			, _startInstance \
+			) \
+			_buffer[_offset*2+0] = uvec4(_numVertices, _numInstances, _startVertex, _startInstance)
+
+#define drawIndexedIndirect(_buffer \
+			, _offset \
+			, _numIndices \
+			, _numInstances \
+			, _startIndex \
+			, _startVertex \
+			, _startInstance \
+			) \
+			_buffer[_offset*2+0] = uvec4(_numIndices, _numInstances, _startIndex, _startInstance); \
+			_buffer[_offset*2+1] = uvec4(_startInstance, 0u, 0u, 0u)
+
 #endif // __cplusplus
 
 #endif // BGFX_COMPUTE_H_HEADER_GUARD

+ 47 - 24
src/bgfx_p.h

@@ -1072,8 +1072,8 @@ namespace bgfx
 			m_instanceDataOffset = 0;
 			m_instanceDataStride = 0;
 			m_numInstances       = 1;
-			m_startDrawIndirect  = 0;
-			m_numDrawIndirect    = UINT16_MAX;
+			m_startIndirect  = 0;
+			m_numIndirect    = UINT16_MAX;
 			m_num     = 1;
 			m_flags   = BGFX_SUBMIT_EYE_FIRST;
 			m_scissor = UINT16_MAX;
@@ -1081,7 +1081,7 @@ namespace bgfx
 			m_vertexDecl.idx         = invalidHandle;
 			m_indexBuffer.idx        = invalidHandle;
 			m_instanceDataBuffer.idx = invalidHandle;
-			m_drawIndirectBuffer.idx = invalidHandle;
+			m_indirectBuffer.idx = invalidHandle;
 
 			for (uint32_t ii = 0; ii < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++ii)
 			{
@@ -1103,8 +1103,8 @@ namespace bgfx
 		uint32_t m_instanceDataOffset;
 		uint16_t m_instanceDataStride;
 		uint16_t m_numInstances;
-		uint16_t m_startDrawIndirect;
-		uint16_t m_numDrawIndirect;
+		uint16_t m_startIndirect;
+		uint16_t m_numIndirect;
 		uint16_t m_num;
 		uint16_t m_scissor;
 		uint8_t  m_submitFlags;
@@ -1113,7 +1113,7 @@ namespace bgfx
 		VertexDeclHandle   m_vertexDecl;
 		IndexBufferHandle  m_indexBuffer;
 		VertexBufferHandle m_instanceDataBuffer;
-		DrawIndirectBufferHandle m_drawIndirectBuffer;
+		IndirectBufferHandle m_indirectBuffer;
 	};
 
 	struct RenderCompute
@@ -1129,6 +1129,10 @@ namespace bgfx
 			m_num         = 0;
 			m_submitFlags = BGFX_SUBMIT_EYE_FIRST;
 
+			m_indirectBuffer.idx = invalidHandle;
+			m_startIndirect      = 0;
+			m_numIndirect        = UINT16_MAX;
+
 			for (uint32_t ii = 0; ii < BGFX_MAX_COMPUTE_BINDINGS; ++ii)
 			{
 				m_bind[ii].m_idx = invalidHandle;
@@ -1139,10 +1143,13 @@ namespace bgfx
 		uint32_t m_constBegin;
 		uint32_t m_constEnd;
 		uint32_t m_matrix;
+		IndirectBufferHandle m_indirectBuffer;
 
 		uint16_t m_numX;
 		uint16_t m_numY;
 		uint16_t m_numZ;
+		uint16_t m_startIndirect;
+		uint16_t m_numIndirect;
 		uint16_t m_num;
 		uint8_t  m_submitFlags;
 	};
@@ -1387,13 +1394,6 @@ namespace bgfx
 			m_draw.m_instanceDataBuffer = _handle;
 		}
 
-		void setDrawIndirectBuffer(DrawIndirectBufferHandle _handle, uint16_t _start, uint16_t _num)
-		{
-			m_draw.m_startDrawIndirect  = _start;
-			m_draw.m_numDrawIndirect    = _num;
-			m_draw.m_drawIndirectBuffer = _handle;
-		}
-
 		void setProgram(ProgramHandle _handle)
 		{
 			m_key.m_program = _handle.idx;
@@ -1462,7 +1462,25 @@ namespace bgfx
 		}
 
 		uint32_t submit(uint8_t _id, int32_t _depth);
+
+		uint32_t submit(uint8_t _id, IndirectBufferHandle _indirectHandle, uint16_t _start, uint16_t _num, int32_t _depth)
+		{
+			m_draw.m_startIndirect  = _start;
+			m_draw.m_numIndirect    = _num;
+			m_draw.m_indirectBuffer = _indirectHandle;
+			return submit(_id, _depth);
+		}
+
 		uint32_t dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _ngx, uint16_t _ngy, uint16_t _ngz, uint8_t _flags);
+
+		uint32_t dispatch(uint8_t _id, ProgramHandle _handle, IndirectBufferHandle _indirectHandle, uint16_t _start, uint16_t _num, uint8_t _flags)
+		{
+			m_compute.m_indirectBuffer = _indirectHandle;
+			m_compute.m_startIndirect  = _start;
+			m_compute.m_numIndirect    = _num;
+			return dispatch(_id, _handle, 0, 0, 0, _flags);
+		}
+
 		void sort();
 
 		bool checkAvailTransientIndexBuffer(uint32_t _num)
@@ -2509,15 +2527,15 @@ namespace bgfx
 			return idb;
 		}
 
-		DrawIndirectBufferHandle createDrawIndirectBuffer(uint32_t _num)
+		IndirectBufferHandle createIndirectBuffer(uint32_t _num)
 		{
 			BX_UNUSED(_num);
-			DrawIndirectBufferHandle handle = { m_vertexBufferHandle.alloc() };
+			IndirectBufferHandle handle = { m_vertexBufferHandle.alloc() };
 
 			BX_WARN(isValid(handle), "Failed to allocate draw indirect buffer handle.");
 			if (isValid(handle) )
 			{
-				uint32_t size = _num * sizeof(uint32_t) * 5;
+				uint32_t size = _num * BGFX_CONFIG_DRAW_INDIRECT_STRIDE;
 				uint8_t flags = BGFX_BUFFER_DRAW_INDIRECT;
 
 				CommandBuffer& cmdbuf = getCommandBuffer(CommandBuffer::CreateDynamicVertexBuffer);
@@ -2529,7 +2547,7 @@ namespace bgfx
 			return handle;
 		}
 
-		void destroyDrawIndirectBuffer(DrawIndirectBufferHandle _handle)
+		void destroyIndirectBuffer(IndirectBufferHandle _handle)
 		{
 			VertexBufferHandle handle = { _handle.idx };
 			BGFX_CHECK_HANDLE("destroyDrawIndirectBuffer", m_vertexBufferHandle, handle);
@@ -3299,12 +3317,6 @@ namespace bgfx
 				);
 		}
 
-		BGFX_API_FUNC(void setDrawIndirectBuffer(DrawIndirectBufferHandle _handle, uint16_t _start, uint16_t _num) )
-		{
-			BGFX_CHECK_HANDLE("setDrawIndirectBuffer", m_vertexBufferHandle, _handle);
-			m_submit->setDrawIndirectBuffer(_handle, _start, _num);
-		}
-
 		BGFX_API_FUNC(void setProgram(ProgramHandle _handle) )
 		{
 			BGFX_CHECK_HANDLE("setProgram", m_programHandle, _handle);
@@ -3338,6 +3350,12 @@ namespace bgfx
 			return m_submit->submit(_id, _depth);
 		}
 
+		BGFX_API_FUNC(uint32_t submit(uint8_t _id, IndirectBufferHandle _indirectHandle, uint16_t _start, uint16_t _num, int32_t _depth) )
+		{
+			BGFX_CHECK_HANDLE("submit", m_vertexBufferHandle, _indirectHandle);
+			return m_submit->submit(_id, _indirectHandle, _start, _num, _depth);
+		}
+
 		BGFX_API_FUNC(void setBuffer(uint8_t _stage, IndexBufferHandle _handle, Access::Enum _access) )
 		{
 			BGFX_CHECK_HANDLE("setBuffer", m_indexBufferHandle, _handle);
@@ -3364,7 +3382,7 @@ namespace bgfx
 			m_submit->setBuffer(_stage, dvb.m_handle, _access);
 		}
 
-		BGFX_API_FUNC(void setBuffer(uint8_t _stage, DrawIndirectBufferHandle _handle, Access::Enum _access) )
+		BGFX_API_FUNC(void setBuffer(uint8_t _stage, IndirectBufferHandle _handle, Access::Enum _access) )
 		{
 			BGFX_CHECK_HANDLE("setBuffer", m_vertexBufferHandle, _handle);
 			VertexBufferHandle handle = { _handle.idx };
@@ -3400,6 +3418,11 @@ namespace bgfx
 			return m_submit->dispatch(_id, _handle, _numX, _numY, _numZ, _flags);
 		}
 
+		BGFX_API_FUNC(uint32_t dispatch(uint8_t _id, ProgramHandle _handle, IndirectBufferHandle _indirectHandle, uint16_t _start, uint16_t _num, uint8_t _flags) )
+		{
+			return m_submit->dispatch(_id, _handle, _indirectHandle, _start, _num, _flags);
+		}
+
 		BGFX_API_FUNC(void discard() )
 		{
 			m_submit->discard();

+ 2 - 0
src/config.h

@@ -267,4 +267,6 @@
 #	define BGFX_CONFIG_MAX_CLEAR_COLOR_PALETTE 16
 #endif // BGFX_CONFIG_MAX_CLEAR_COLOR_PALETTE
 
+#define BGFX_CONFIG_DRAW_INDIRECT_STRIDE 32
+
 #endif // BGFX_CONFIG_H_HEADER_GUARD

+ 35 - 11
src/renderer_d3d11.cpp

@@ -3691,7 +3691,27 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 					deviceCtx->CSSetShaderResources(0, BX_COUNTOF(srv), srv);
 					deviceCtx->CSSetSamplers(0, BX_COUNTOF(sampler), sampler);
 
-					deviceCtx->Dispatch(compute.m_numX, compute.m_numY, compute.m_numZ);
+					if (isValid(compute.m_indirectBuffer) )
+					{
+						const VertexBufferD3D11& vb = m_vertexBuffers[compute.m_indirectBuffer.idx];
+						ID3D11Buffer* ptr = vb.m_ptr;
+
+						uint32_t numDrawIndirect = UINT16_MAX == compute.m_numIndirect
+							? vb.m_size/BGFX_CONFIG_DRAW_INDIRECT_STRIDE
+							: compute.m_numIndirect
+							;
+
+						uint32_t args = compute.m_startIndirect * BGFX_CONFIG_DRAW_INDIRECT_STRIDE;
+						for (uint32_t ii = 0; ii < numDrawIndirect; ++ii)
+						{
+							deviceCtx->DispatchIndirect(ptr, args);
+							args += BGFX_CONFIG_DRAW_INDIRECT_STRIDE;
+						}
+					}
+					else
+					{
+						deviceCtx->Dispatch(compute.m_numX, compute.m_numY, compute.m_numZ);
+					}
 
 					continue;
 				}
@@ -4005,33 +4025,37 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 					uint32_t numPrimsRendered  = 0;
 					uint32_t numDrawIndirect   = 0;
 
-					if (isValid(draw.m_drawIndirectBuffer) )
+					if (isValid(draw.m_indirectBuffer) )
 					{
-						const VertexBufferD3D11& vb = m_vertexBuffers[draw.m_drawIndirectBuffer.idx];
+						const VertexBufferD3D11& vb = m_vertexBuffers[draw.m_indirectBuffer.idx];
 						ID3D11Buffer* ptr = vb.m_ptr;
 
 						if (isValid(draw.m_indexBuffer) )
 						{
-							const uint32_t commandSize = 5 * sizeof(uint32_t);
-							numDrawIndirect = UINT16_MAX == draw.m_numDrawIndirect ? vb.m_size/commandSize : draw.m_numDrawIndirect;
+							numDrawIndirect = UINT16_MAX == draw.m_numIndirect
+								? vb.m_size/BGFX_CONFIG_DRAW_INDIRECT_STRIDE
+								: draw.m_numIndirect
+								;
 
-							uint32_t args = draw.m_startDrawIndirect * commandSize;
+							uint32_t args = draw.m_startIndirect * BGFX_CONFIG_DRAW_INDIRECT_STRIDE;
 							for (uint32_t ii = 0; ii < numDrawIndirect; ++ii)
 							{
 								deviceCtx->DrawIndexedInstancedIndirect(ptr, args);
-								args += commandSize;
+								args += BGFX_CONFIG_DRAW_INDIRECT_STRIDE;
 							}
 						}
 						else
 						{
-							const uint32_t commandSize = 4 * sizeof(uint32_t);
-							numDrawIndirect = UINT16_MAX == draw.m_numDrawIndirect ? vb.m_size/commandSize : draw.m_numDrawIndirect;
+							numDrawIndirect = UINT16_MAX == draw.m_numIndirect
+								? vb.m_size/BGFX_CONFIG_DRAW_INDIRECT_STRIDE
+								: draw.m_numIndirect
+								;
 
-							uint32_t args = draw.m_startDrawIndirect * commandSize;
+							uint32_t args = draw.m_startIndirect * BGFX_CONFIG_DRAW_INDIRECT_STRIDE;
 							for (uint32_t ii = 0; ii < numDrawIndirect; ++ii)
 							{
 								deviceCtx->DrawInstancedIndirect(ptr, args);
-								args += commandSize;
+								args += BGFX_CONFIG_DRAW_INDIRECT_STRIDE;
 							}
 						}
 					}

+ 58 - 15
src/renderer_gl.cpp

@@ -4876,7 +4876,38 @@ namespace bgfx { namespace gl
 
 							viewState.setPredefined<1>(this, view, eye, program, _render, compute);
 
-							GL_CHECK(glDispatchCompute(compute.m_numX, compute.m_numY, compute.m_numZ) );
+							if (isValid(compute.m_indirectBuffer) )
+							{
+								const VertexBufferGL& vb = m_vertexBuffers[compute.m_indirectBuffer.idx];
+								if (currentState.m_indirectBuffer.idx != compute.m_indirectBuffer.idx)
+								{
+									currentState.m_indirectBuffer = compute.m_indirectBuffer;
+									GL_CHECK(glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, vb.m_id) );
+								}
+
+								uint32_t numDrawIndirect = UINT16_MAX == compute.m_numIndirect
+									? vb.m_size/BGFX_CONFIG_DRAW_INDIRECT_STRIDE
+									: compute.m_numIndirect
+									;
+
+								uintptr_t args = compute.m_startIndirect * BGFX_CONFIG_DRAW_INDIRECT_STRIDE;
+								for (uint32_t ii = 0; ii < numDrawIndirect; ++ii)
+								{
+									GL_CHECK(glDispatchComputeIndirect(args) );
+									args += BGFX_CONFIG_DRAW_INDIRECT_STRIDE;
+								}
+							}
+							else
+							{
+								if (isValid(currentState.m_indirectBuffer) )
+								{
+									currentState.m_indirectBuffer.idx = invalidHandle;
+									GL_CHECK(glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, 0) );
+								}
+
+								GL_CHECK(glDispatchCompute(compute.m_numX, compute.m_numY, compute.m_numZ) );
+							}
+
 							GL_CHECK(glMemoryBarrier(barrier) );
 						}
 					}
@@ -5412,12 +5443,12 @@ namespace bgfx { namespace gl
 						uint32_t numPrimsRendered  = 0;
 						uint32_t numDrawIndirect   = 0;
 
-						if (isValid(draw.m_drawIndirectBuffer) )
+						if (isValid(draw.m_indirectBuffer) )
 						{
-							const VertexBufferGL& vb = m_vertexBuffers[draw.m_drawIndirectBuffer.idx];
-							if (currentState.m_drawIndirectBuffer.idx != draw.m_drawIndirectBuffer.idx)
+							const VertexBufferGL& vb = m_vertexBuffers[draw.m_indirectBuffer.idx];
+							if (currentState.m_indirectBuffer.idx != draw.m_indirectBuffer.idx)
 							{
-								currentState.m_drawIndirectBuffer = draw.m_drawIndirectBuffer;
+								currentState.m_indirectBuffer = draw.m_indirectBuffer;
 								GL_CHECK(glBindBuffer(GL_DRAW_INDIRECT_BUFFER, vb.m_id) );
 							}
 
@@ -5430,26 +5461,38 @@ namespace bgfx { namespace gl
 									: GL_UNSIGNED_INT
 									;
 
-								const uint32_t commandSize = 5 * sizeof(uint32_t);
-								numDrawIndirect = UINT16_MAX == draw.m_numDrawIndirect ? vb.m_size/commandSize : draw.m_numDrawIndirect;
+								numDrawIndirect = UINT16_MAX == draw.m_numIndirect
+									? vb.m_size/BGFX_CONFIG_DRAW_INDIRECT_STRIDE
+									: draw.m_numIndirect
+									;
 
-								uintptr_t args = draw.m_startDrawIndirect * commandSize;
-								GL_CHECK(glMultiDrawElementsIndirect(prim.m_type, indexFormat, (void*)args, numDrawIndirect, commandSize) );
+								uintptr_t args = draw.m_startIndirect * BGFX_CONFIG_DRAW_INDIRECT_STRIDE;
+								GL_CHECK(glMultiDrawElementsIndirect(prim.m_type, indexFormat
+									, (void*)args
+									, numDrawIndirect
+									, BGFX_CONFIG_DRAW_INDIRECT_STRIDE
+									) );
 							}
 							else
 							{
-								const uint32_t commandSize = 4 * sizeof(uint32_t);
-								numDrawIndirect = UINT16_MAX == draw.m_numDrawIndirect ? vb.m_size/commandSize : draw.m_numDrawIndirect;
+								numDrawIndirect = UINT16_MAX == draw.m_numIndirect
+									? vb.m_size/BGFX_CONFIG_DRAW_INDIRECT_STRIDE
+									: draw.m_numIndirect
+									;
 
-								uintptr_t args = draw.m_startDrawIndirect * commandSize;
-								GL_CHECK(glMultiDrawArraysIndirect(prim.m_type, (void*)args, numDrawIndirect, commandSize) );
+								uintptr_t args = draw.m_startIndirect * BGFX_CONFIG_DRAW_INDIRECT_STRIDE;
+								GL_CHECK(glMultiDrawArraysIndirect(prim.m_type
+									, (void*)args
+									, numDrawIndirect
+									, BGFX_CONFIG_DRAW_INDIRECT_STRIDE
+									) );
 							}
 						}
 						else
 						{
-							if (isValid(currentState.m_drawIndirectBuffer) )
+							if (isValid(currentState.m_indirectBuffer) )
 							{
-								currentState.m_drawIndirectBuffer.idx = invalidHandle;
+								currentState.m_indirectBuffer.idx = invalidHandle;
 								GL_CHECK(glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0) );
 							}
 

+ 4 - 0
src/renderer_gl.h

@@ -625,6 +625,10 @@ typedef uint64_t GLuint64;
 #	define GL_DRAW_INDIRECT_BUFFER 0x8F3F
 #endif // GL_DRAW_INDIRECT_BUFFER
 
+#ifndef GL_DISPATCH_INDIRECT_BUFFER
+#	define GL_DISPATCH_INDIRECT_BUFFER 0x90EE
+#endif // GL_DISPATCH_INDIRECT_BUFFER
+
 #if BX_PLATFORM_NACL
 #	include "glcontext_ppapi.h"
 #elif BX_PLATFORM_WINDOWS