Explorar o código

Adding geometry instancing support.

bkaradzic %!s(int64=13) %!d(string=hai) anos
pai
achega
1e7b94e82a
Modificáronse 9 ficheiros con 361 adicións e 111 borrados
  1. 16 0
      include/bgfx.h
  2. 10 0
      src/bgfx.cpp
  3. 41 0
      src/bgfx_p.h
  4. 4 0
      src/config.h
  5. 4 0
      src/glimports.h
  6. 142 57
      src/renderer_d3d9.cpp
  7. 63 20
      src/renderer_d3d9.h
  8. 77 32
      src/renderer_gl.cpp
  9. 4 2
      src/renderer_gl.h

+ 16 - 0
include/bgfx.h

@@ -253,6 +253,16 @@ namespace bgfx
 		VertexDeclHandle decl;
 	};
 
+	struct InstanceDataBuffer
+	{
+		uint8_t* data;
+		uint32_t size;
+		uint32_t offset;
+		uint16_t stride;
+		uint16_t num;
+		VertexBufferHandle handle;
+	};
+
 	struct ConstantType
 	{
 		enum Enum
@@ -369,6 +379,9 @@ namespace bgfx
 	///
 	const TransientVertexBuffer* allocTransientVertexBuffer(uint16_t _num, const VertexDecl& _decl);
 
+	///
+	const InstanceDataBuffer* allocInstanceDataBuffer(uint16_t _num, uint16_t _stride);
+
 	///
 	VertexShaderHandle createVertexShader(const Memory* _mem);
 
@@ -471,6 +484,9 @@ namespace bgfx
 	///
 	void setVertexBuffer(const TransientVertexBuffer* _vb);
 
+	///
+	void setInstanceDataBuffer(const InstanceDataBuffer* _idb);
+
 	///
 	void setMaterial(MaterialHandle _handle);
 

+ 10 - 0
src/bgfx.cpp

@@ -845,6 +845,11 @@ namespace bgfx
 		return s_ctx.allocTransientVertexBuffer(_num, _decl);
 	}
 
+	const InstanceDataBuffer* allocInstanceDataBuffer(uint16_t _num, uint16_t _stride)
+	{
+		return s_ctx.allocInstanceDataBuffer(_num, _stride);
+	}
+
 	VertexShaderHandle createVertexShader(const Memory* _mem)
 	{
 		return s_ctx.createVertexShader(_mem);
@@ -1021,6 +1026,11 @@ namespace bgfx
 		s_ctx.m_submit->setVertexBuffer(_vb);
 	}
 
+	void setInstanceDataBuffer(const InstanceDataBuffer* _idb)
+	{
+		s_ctx.m_submit->setInstanceDataBuffer(_idb);
+	}
+
 	void setMaterial(MaterialHandle _handle)
 	{
 		s_ctx.m_submit->setMaterial(_handle);

+ 41 - 0
src/bgfx_p.h

@@ -862,10 +862,14 @@ namespace bgfx
 			m_numIndices = 0;
 			m_startVertex = 0;
 			m_numVertices = UINT32_C(0xffffffff);
+			m_instanceDataOffset = 0;
+			m_instanceDataStride = 0;
+			m_numInstances = 1;
 			m_num = 1;
 			m_vertexBuffer.idx = bgfx::invalidHandle;
 			m_vertexDecl.idx = bgfx::invalidHandle;
 			m_indexBuffer.idx = bgfx::invalidHandle;
+			m_instanceDataBuffer.idx = bgfx::invalidHandle;
 			
 			for (uint32_t ii = 0; ii < BGFX_STATE_TEX_COUNT; ++ii)
 			{
@@ -882,11 +886,15 @@ namespace bgfx
 		uint32_t m_numIndices;
 		uint32_t m_startVertex;
 		uint32_t m_numVertices;
+		uint32_t m_instanceDataOffset;
+		uint16_t m_instanceDataStride;
+		uint16_t m_numInstances;
 		uint16_t m_num;
 
 		VertexBufferHandle m_vertexBuffer;
 		VertexDeclHandle m_vertexDecl;
 		IndexBufferHandle m_indexBuffer;
+		VertexBufferHandle m_instanceDataBuffer;
 		Sampler m_sampler[BGFX_STATE_TEX_COUNT];
 	};
 
@@ -1079,6 +1087,18 @@ namespace bgfx
 			g_free(const_cast<TransientVertexBuffer*>(_vb) );
 		}
 
+		void setInstanceDataBuffer(const InstanceDataBuffer* _idb)
+		{
+#if BGFX_CONFIG_RENDERER_OPENGLES
+#else
+ 			m_state.m_instanceDataOffset = _idb->offset;
+			m_state.m_instanceDataStride = _idb->stride;
+			m_state.m_numInstances = _idb->num;
+			m_state.m_instanceDataBuffer = _idb->handle;
+			g_free(const_cast<InstanceDataBuffer*>(_idb) );
+#endif // BGFX_CONFIG_RENDERER_OPENGLES
+		}
+
 		void setMaterial(MaterialHandle _handle)
 		{
 			BX_CHECK(invalidHandle != _handle.idx, "Can't set material with invalid handle.");
@@ -1857,6 +1877,27 @@ namespace bgfx
 			return vb;
 		}
 
+		const InstanceDataBuffer* allocInstanceDataBuffer(uint16_t _num, uint16_t _stride)
+		{
+#if BGFX_CONFIG_RENDERER_OPENGLES
+			return NULL;
+#else
+			uint16_t stride = BX_ALIGN_16(_stride);
+			uint32_t offset = m_submit->allocTransientVertexBuffer(_num, stride);
+
+			TransientVertexBuffer& dvb = *m_submit->m_transientVb;
+			InstanceDataBuffer* idb = (InstanceDataBuffer*)g_realloc(NULL, sizeof(InstanceDataBuffer) );
+			idb->data = &dvb.data[offset];
+			idb->size = _num * stride;
+			idb->offset = offset;
+			idb->stride = stride;
+			idb->num = _num;
+			idb->handle = dvb.handle;
+
+			return idb;
+#endif // BGFX_CONFIG_RENDERER_OPENGLES
+		}
+
 		VertexShaderHandle createVertexShader(const Memory* _mem)
 		{
 			VertexShaderHandle handle = { m_vertexShaderHandle.alloc() };

+ 4 - 0
src/config.h

@@ -128,4 +128,8 @@
 #	define BGFX_CONFIG_USE_TINYSTL 0
 #endif // BGFX_CONFIG_USE_TINYSTL
 
+#ifndef BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT
+#	define BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT 5
+#endif // BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT
+
 #endif // __CONFIG_H__

+ 4 - 0
src/glimports.h

@@ -117,6 +117,10 @@ GL_IMPORT(true,  PFNGLGETQUERYOBJECTI64VEXTPROC,          glGetQueryObjecti64vEX
 GL_IMPORT(true,  PFNGLGETQUERYOBJECTUI64VEXTPROC,         glGetQueryObjectui64vEXT);
 													
 GL_IMPORT(true,  PFNGLSAMPLECOVERAGEARBPROC,              glSampleCoverageARB);
+
+GL_IMPORT(true,  PFNGLDRAWARRAYSINSTANCEDARBPROC,         glDrawArraysInstanced);
+GL_IMPORT(true,  PFNGLDRAWELEMENTSINSTANCEDARBPROC,       glDrawElementsInstanced);
+GL_IMPORT(true,  PFNGLVERTEXATTRIBDIVISORARBPROC,         glVertexAttribDivisor);
 													
 #if BGFX_CONFIG_DEBUG_GREMEDY						
 GL_IMPORT(true,  PFNGLSTRINGMARKERGREMEDYPROC,            glStringMarkerGREMEDY);

+ 142 - 57
src/renderer_d3d9.cpp

@@ -132,16 +132,27 @@ namespace bgfx
 		{ D3DFMT_A16B16G16R16, 8 },
 	};
 
+	static ExtendedFormat s_extendedFormats[ExtendedFormat::Count] =
+	{
+		{ D3DFMT_ATI1, 0,                     D3DRTYPE_TEXTURE, false },
+		{ D3DFMT_ATI2, 0,                     D3DRTYPE_TEXTURE, false },
+		{ D3DFMT_DF16, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, false },
+		{ D3DFMT_DF24, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, false },
+		{ D3DFMT_INST, 0,                     D3DRTYPE_SURFACE, false },
+		{ D3DFMT_INTZ, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, false },
+		{ D3DFMT_NULL, D3DUSAGE_RENDERTARGET, D3DRTYPE_SURFACE, false },
+		{ D3DFMT_RESZ, D3DUSAGE_RENDERTARGET, D3DRTYPE_SURFACE, false },
+		{ D3DFMT_RAWZ, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, false },
+	};
+
 	struct RendererContext
 	{
 		RendererContext()
 			: m_flags(BGFX_RESET_NONE)
 			, m_initialized(false)
-			, m_fmtNULL(false)
-			, m_fmtDF16(false)
-			, m_fmtDF24(false)
-			, m_fmtINTZ(false)
-			, m_fmtRAWZ(false)
+			, m_amd(false)
+			, m_nvidia(false)
+			, m_instancing(false)
 			, m_rtMsaa(false)
 		{
 			m_rt.idx = invalidHandle;
@@ -222,6 +233,11 @@ namespace bgfx
 #endif // BGFX_CONFIG_DEBUG_PERFHUD
 			}
 
+			D3DADAPTER_IDENTIFIER9 identifier;
+			DX_CHECK(m_d3d9->GetAdapterIdentifier(m_adapter, 0, &identifier) );
+			m_amd = identifier.VendorId == 0x1002;
+			m_nvidia = identifier.VendorId == 0x10de;
+
 			uint32_t behaviorFlags[] =
 			{
 				D3DCREATE_HARDWARE_VERTEXPROCESSING|D3DCREATE_PUREDEVICE,
@@ -277,11 +293,24 @@ namespace bgfx
 			BX_TRACE("Max fragment shader 2.0 instr. slots: %d", m_caps.PS20Caps.NumInstructionSlots);
 			BX_TRACE("Max fragment shader 3.0 instr. slots: %d", m_caps.MaxPixelShader30InstructionSlots);
 
-			m_fmtNULL = SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter, m_deviceType, adapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, D3DFMT_NULL) );
-			m_fmtDF16 = SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter, m_deviceType, adapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, D3DFMT_DF16) );
-			m_fmtDF24 = SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter, m_deviceType, adapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, D3DFMT_DF24) );
-			m_fmtINTZ = SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter, m_deviceType, adapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, D3DFMT_INTZ) );
-			m_fmtRAWZ = SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter, m_deviceType, adapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, D3DFMT_RAWZ) );
+			BX_TRACE("Extended formats:");
+			for (uint32_t ii = 0; ii < ExtendedFormat::Count; ++ii)
+			{
+				ExtendedFormat& fmt = s_extendedFormats[ii];
+				fmt.m_supported = SUCCEEDED(m_d3d9->CheckDeviceFormat(m_adapter, m_deviceType, adapterFormat, fmt.m_usage, fmt.m_type, fmt.m_fmt) );
+				const char* fourcc = (const char*)&fmt.m_fmt;
+				BX_TRACE("\t%2d: %c%c%c%c %s", ii, fourcc[0], fourcc[1], fourcc[2], fourcc[3], fmt.m_supported ? "supported" : "");
+			}
+
+			m_instancing = false
+				|| s_extendedFormats[ExtendedFormat::Inst].m_supported
+				|| (m_caps.VertexShaderVersion >= D3DVS_VERSION(3, 0) )
+				;
+
+			if (m_instancing)
+			{
+				m_device->SetRenderState(D3DRS_POINTSIZE, D3DFMT_INST);
+			}
 
 			uint32_t index = 1;
 			for (const D3DFORMAT* fmt = &s_checkColorFormats[index]; *fmt != D3DFMT_UNKNOWN; ++fmt, ++index)
@@ -674,6 +703,7 @@ namespace bgfx
 
 		IDirect3DSurface9* m_backBufferColor;
 		IDirect3DSurface9* m_backBufferDepthStencil;
+		IDirect3DVertexDeclaration9* m_instanceDataDecls[BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT];
 
 		HMODULE m_d3d9dll;
 		uint32_t m_adapter;
@@ -682,11 +712,9 @@ namespace bgfx
 		uint32_t m_flags;
 
 		bool m_initialized;
-		bool m_fmtNULL;
-		bool m_fmtDF16;
-		bool m_fmtDF24;
-		bool m_fmtINTZ;
-		bool m_fmtRAWZ;
+		bool m_amd;
+		bool m_nvidia;
+		bool m_instancing;
 
 		D3DFORMAT m_fmtDepth;
 
@@ -811,30 +839,26 @@ namespace bgfx
 
 	static const D3DVERTEXELEMENT9 s_attrib[Attrib::Count+1] =
 	{
-		{0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION,     0},
-		{0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_NORMAL,       0},
-		{0, 0, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR,        0},
-		{0, 0, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR,        1},
-		{0, 0, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_BLENDINDICES, 0},
-		{0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_BLENDWEIGHT,  0},
-		{0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     0},
-		{0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     1},
-		{0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     2},
-		{0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     3},
-		{0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     4},
-		{0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     5},
-		{0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     6},
-		{0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     7},
+		{ 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION,     0 },
+		{ 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_NORMAL,       0 },
+		{ 0, 0, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR,        0 },
+		{ 0, 0, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR,        1 },
+		{ 0, 0, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_BLENDINDICES, 0 },
+		{ 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_BLENDWEIGHT,  0 },
+		{ 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     0 },
+		{ 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     1 },
+		{ 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     2 },
+		{ 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     3 },
+		{ 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     4 },
+		{ 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     5 },
+		{ 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     6 },
+		{ 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,     7 },
 		D3DDECL_END()
 	};
 
-	void VertexDeclaration::create(const VertexDecl& _decl)
+	static D3DVERTEXELEMENT9* fillVertexDecl(D3DVERTEXELEMENT9* _out, uint32_t _count, const VertexDecl& _decl)
 	{
-		memcpy(&m_decl, &_decl, sizeof(VertexDecl) );
-		dump(m_decl);
-
-		D3DVERTEXELEMENT9 vertexElements[Attrib::Count+1];
-		D3DVERTEXELEMENT9* elem = vertexElements;
+		D3DVERTEXELEMENT9* elem = _out;
 
 		for (uint32_t attr = 0; attr < Attrib::Count; ++attr)
 		{
@@ -903,7 +927,7 @@ namespace bgfx
 					case 2:
 						declType = D3DDECLTYPE_FLOAT2;
 						break;
-						
+
 					default:
 					case 3:
 						declType = D3DDECLTYPE_FLOAT3;
@@ -925,19 +949,46 @@ namespace bgfx
 				elem->Offset = _decl.m_offset[attr];
 				++elem;
 
-				BX_TRACE("\tattr %d, num %d, type %d, norm %d, offset %d"
-					, attr
-					, num
-					, type
-					, normalized
-					, _decl.m_offset[attr]
-				);
+// 				BX_TRACE("\tattr %d, num %d, type %d, norm %d, offset %d"
+// 					, attr
+// 					, num
+// 					, type
+// 					, normalized
+// 					, _decl.m_offset[attr]
+// 				);
 			}
 		}
 
+		return elem;
+	}
+
+	static IDirect3DVertexDeclaration9* createVertexDecl(const VertexDecl& _decl, uint8_t _numInstanceData)
+	{
+		D3DVERTEXELEMENT9 vertexElements[Attrib::Count+1+BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT];
+		D3DVERTEXELEMENT9* elem = fillVertexDecl(vertexElements, Attrib::Count, _decl);
+
+		const D3DVERTEXELEMENT9 inst = { 1, 0, D3DDECLTYPE_FLOAT4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 };
+
+		for (uint32_t ii = 0; ii < _numInstanceData; ++ii)
+		{
+			memcpy(elem, &inst, sizeof(D3DVERTEXELEMENT9) );
+			elem->UsageIndex = 8-_numInstanceData+ii;
+			elem->Offset = ii*16;
+			++elem;
+		}
+
 		memcpy(elem, &s_attrib[Attrib::Count], sizeof(D3DVERTEXELEMENT9) );
 
-		DX_CHECK(s_renderCtx.m_device->CreateVertexDeclaration(vertexElements, &m_ptr) );
+		IDirect3DVertexDeclaration9* ptr;
+		DX_CHECK(s_renderCtx.m_device->CreateVertexDeclaration(vertexElements, &ptr) );
+		return ptr;
+	}
+
+	void VertexDeclaration::create(const VertexDecl& _decl)
+	{
+		memcpy(&m_decl, &_decl, sizeof(VertexDecl) );
+		dump(m_decl);
+		m_ptr = createVertexDecl(_decl, 0);
 	}
 
 	void Shader::create(bool _fragment, const Memory* _mem)
@@ -1816,8 +1867,10 @@ namespace bgfx
 		D3DPRIMITIVETYPE primType = D3DPT_TRIANGLELIST;
 		uint32_t primNumVerts = 3;
 
-		uint32_t statsNumPrims = 0;
+		uint32_t statsNumPrimsSubmitted = 0;
 		uint32_t statsNumIndices = 0;
+		uint32_t statsNumInstances = 0;
+		uint32_t statsNumPrimsRendered = 0;
 
 		int64_t elapsed = -bx::getHPCounter();
 
@@ -2182,16 +2235,34 @@ namespace bgfx
 					uint16_t handle = state.m_vertexBuffer.idx;
 					if (bgfx::invalidHandle != handle)
 					{
-						VertexBuffer& vb = s_renderCtx.m_vertexBuffers[handle];
+						const VertexBuffer& vb = s_renderCtx.m_vertexBuffers[handle];
 
 						uint16_t decl = vb.m_decl.idx == bgfx::invalidHandle ? state.m_vertexDecl.idx : vb.m_decl.idx;
-						VertexDeclaration& vertexDecl = s_renderCtx.m_vertexDecls[decl];
+						const VertexDeclaration& vertexDecl = s_renderCtx.m_vertexDecls[decl];
 						DX_CHECK(s_renderCtx.m_device->SetStreamSource(0, vb.m_ptr, 0, vertexDecl.m_decl.m_stride) );
-						DX_CHECK(s_renderCtx.m_device->SetVertexDeclaration(vertexDecl.m_ptr) );
+
+						if (invalidHandle != state.m_instanceDataBuffer.idx)
+						{
+							const VertexBuffer& inst = s_renderCtx.m_vertexBuffers[state.m_instanceDataBuffer.idx];
+							DX_CHECK(s_renderCtx.m_device->SetStreamSourceFreq(0, D3DSTREAMSOURCE_INDEXEDDATA|state.m_numInstances) );
+							DX_CHECK(s_renderCtx.m_device->SetStreamSourceFreq(1, D3DSTREAMSOURCE_INSTANCEDATA|1) );
+							DX_CHECK(s_renderCtx.m_device->SetStreamSource(1, inst.m_ptr, state.m_instanceDataOffset, state.m_instanceDataStride) );
+
+							IDirect3DVertexDeclaration9* ptr = createVertexDecl(vertexDecl.m_decl, state.m_instanceDataStride/16);
+							DX_CHECK(s_renderCtx.m_device->SetVertexDeclaration(ptr) );
+							DX_RELEASE(ptr, 0);
+						}
+						else
+						{
+							DX_CHECK(s_renderCtx.m_device->SetStreamSourceFreq(0, 1) );
+							DX_CHECK(s_renderCtx.m_device->SetStreamSource(1, NULL, 0, 0) );
+							DX_CHECK(s_renderCtx.m_device->SetVertexDeclaration(vertexDecl.m_ptr) );
+						}
 					}
 					else
 					{
 						DX_CHECK(s_renderCtx.m_device->SetStreamSource(0, NULL, 0, 0) );
+						DX_CHECK(s_renderCtx.m_device->SetStreamSource(1, NULL, 0, 0) );
 					}
 				}
 
@@ -2223,48 +2294,58 @@ namespace bgfx
 					}
 
 					uint32_t numIndices = 0;
-					uint32_t numPrims = 0;
+					uint32_t numPrimsSubmitted = 0;
+					uint32_t numInstances = 0;
+					uint32_t numPrimsRendered = 0;
 
 					if (bgfx::invalidHandle != state.m_indexBuffer.idx)
 					{
 						if (BGFX_DRAW_WHOLE_INDEX_BUFFER == state.m_startIndex)
 						{
 							numIndices = s_renderCtx.m_indexBuffers[state.m_indexBuffer.idx].m_size/2;
-							numPrims = numIndices/primNumVerts;
+							numPrimsSubmitted = numIndices/primNumVerts;
+							numInstances = state.m_numInstances;
+							numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
 
 							DX_CHECK(s_renderCtx.m_device->DrawIndexedPrimitive(primType
 								, state.m_startVertex
 								, 0
 								, numVertices
 								, 0
-								, numPrims
+								, numPrimsSubmitted
 								) );
 						}
 						else if (primNumVerts <= state.m_numIndices)
 						{
 							numIndices = state.m_numIndices;
-							numPrims = numIndices/primNumVerts;
+							numPrimsSubmitted = numIndices/primNumVerts;
+							numInstances = state.m_numInstances;
+							numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
 
 							DX_CHECK(s_renderCtx.m_device->DrawIndexedPrimitive(primType
 								, state.m_startVertex
 								, 0
 								, numVertices
 								, state.m_startIndex
-								, numPrims
+								, numPrimsSubmitted
 								) );
 						}
 					}
 					else
 					{
-						numPrims = numVertices/primNumVerts;
+						numPrimsSubmitted = numVertices/primNumVerts;
+						numInstances = state.m_numInstances;
+						numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
 						DX_CHECK(s_renderCtx.m_device->DrawPrimitive(primType
 							, state.m_startVertex
-							, numPrims
+							, numPrimsSubmitted
 							) );
 					}
 
-					statsNumPrims += numPrims;
+					statsNumPrimsSubmitted += numPrimsSubmitted;
 					statsNumIndices += numIndices;
+					statsNumInstances += numInstances;
+					statsNumPrimsRendered += numPrimsRendered;
 				}
 			}
 
@@ -2300,7 +2381,11 @@ namespace bgfx
 					, m_render->m_num
 					, elapsedCpuMs
 					);
-				tvm.printf(10, pos++, 0x8e, "      Prims: %7d", statsNumPrims);
+				tvm.printf(10, pos++, 0x8e, "      Prims: %7d (#inst: %5d), submitted: %7d"
+					, statsNumPrimsRendered
+					, statsNumInstances
+					, statsNumPrimsSubmitted
+					);
 				tvm.printf(10, pos++, 0x8e, "    Indices: %7d", statsNumIndices);
 				tvm.printf(10, pos++, 0x8e, "   DVB size: %7d", m_render->m_vboffset);
 				tvm.printf(10, pos++, 0x8e, "   DIB size: %7d", m_render->m_iboffset);

+ 63 - 20
src/renderer_d3d9.h

@@ -14,26 +14,6 @@
 #	endif // !BGFX_CONFIG_RENDERER_DIRECT3D_EX
 #	include <d3d9.h>
 
-#	ifndef D3DFMT_NULL
-#		define D3DFMT_NULL ( (D3DFORMAT)MAKEFOURCC('N','U','L','L') )
-#	endif // D3DFMT_NULL
-
-#	ifndef D3DFMT_DF16
-#		define D3DFMT_DF16 ( (D3DFORMAT)MAKEFOURCC('D','F','1','6') )
-#	endif // D3DFMT_DF16
-
-#	ifndef D3DFMT_DF24
-#		define D3DFMT_DF24 ( (D3DFORMAT)MAKEFOURCC('D','F','2','4') )
-#	endif // D3DFMT_DF24
-
-#	ifndef D3DFMT_INTZ
-#		define D3DFMT_INTZ ( (D3DFORMAT)MAKEFOURCC('I','N','T','Z') )
-#	endif // D3DFMT_INTZ
-
-#	ifndef D3DFMT_RAWZ
-#		define D3DFMT_RAWZ ( (D3DFORMAT)MAKEFOURCC('R','A','W','Z') )
-#	endif // D3DFMT_RAWZ
-
 #	if BGFX_CONFIG_RENDERER_DIRECT3D_EX
 typedef HRESULT (WINAPI *Direct3DCreate9ExFunc)(UINT SDKVersion, IDirect3D9Ex**);
 #	else
@@ -113,6 +93,69 @@ namespace bgfx
 					} while (0)
 #endif // BGFX_CONFIG_DEBUG
 
+#	ifndef D3DFMT_ATI1
+#		define D3DFMT_ATI1 ( (D3DFORMAT)MAKEFOURCC('A','T','I','1') )
+#	endif // D3DFMT_ATI1
+
+#	ifndef D3DFMT_ATI2
+#		define D3DFMT_ATI2 ( (D3DFORMAT)MAKEFOURCC('A','T','I','2') )
+#	endif // D3DFMT_ATI2
+
+#	ifndef D3DFMT_ATOC
+#		define D3DFMT_ATOC ( (D3DFORMAT)MAKEFOURCC('A','T','O','C') )
+#	endif // D3DFMT_ATOC
+
+#	ifndef D3DFMT_DF16
+#		define D3DFMT_DF16 ( (D3DFORMAT)MAKEFOURCC('D','F','1','6') )
+#	endif // D3DFMT_DF16
+
+#	ifndef D3DFMT_DF24
+#		define D3DFMT_DF24 ( (D3DFORMAT)MAKEFOURCC('D','F','2','4') )
+#	endif // D3DFMT_DF24
+
+#	ifndef D3DFMT_INST
+#		define D3DFMT_INST ( (D3DFORMAT)MAKEFOURCC('I','N','S','T') )
+#	endif // D3DFMT_INST
+
+#	ifndef D3DFMT_INTZ
+#		define D3DFMT_INTZ ( (D3DFORMAT)MAKEFOURCC('I','N','T','Z') )
+#	endif // D3DFMT_INTZ
+
+#	ifndef D3DFMT_NULL
+#		define D3DFMT_NULL ( (D3DFORMAT)MAKEFOURCC('N','U','L','L') )
+#	endif // D3DFMT_NULL
+
+#	ifndef D3DFMT_RESZ
+#		define D3DFMT_RESZ ( (D3DFORMAT)MAKEFOURCC('R','E','S','Z') )
+#	endif // D3DFMT_RESZ
+
+#	ifndef D3DFMT_RAWZ
+#		define D3DFMT_RAWZ ( (D3DFORMAT)MAKEFOURCC('R','A','W','Z') )
+#	endif // D3DFMT_RAWZ
+
+	struct ExtendedFormat
+	{
+		enum Enum
+		{
+			Ati1,
+			Ati2,
+			Df16,
+			Df24,
+			Inst,
+			Intz,
+			Null,
+			Resz,
+			Rawz,
+
+			Count,
+		};
+
+		D3DFORMAT m_fmt;
+		DWORD m_usage;
+		D3DRESOURCETYPE m_type;
+		bool m_supported;
+	};
+
 	struct Msaa
 	{
 		D3DMULTISAMPLE_TYPE m_type;

+ 77 - 32
src/renderer_gl.cpp

@@ -532,6 +532,7 @@ namespace bgfx
 			ARB_multisample,
 			CHROMIUM_framebuffer_multisample,
 			ANGLE_translated_shader_source,
+			ARB_instanced_arrays,
 			ANGLE_instanced_arrays,
 			OES_texture_float,
 			OES_texture_float_linear,
@@ -568,6 +569,7 @@ namespace bgfx
 		{ "GL_ARB_multisample",                   false, true },
 		{ "GL_CHROMIUM_framebuffer_multisample",  false, true },
 		{ "GL_ANGLE_translated_shader_source",    false, true },
+		{ "GL_ARB_instanced_arrays",              false, true },
 		{ "GL_ANGLE_instanced_arrays",            false, true },
 		{ "GL_OES_texture_float",                 false, true },
 		{ "GL_OES_texture_float_linear",          false, true },
@@ -608,6 +610,15 @@ namespace bgfx
 		"a_texcoord7",
 	};
 
+	static const char* s_instanceDataName[BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT] =
+	{
+		"i_data0",
+		"i_data1",
+		"i_data2",
+		"i_data3",
+		"i_data4",
+	};
+
 	static const GLenum s_attribType[AttribType::Count] =
 	{
 		GL_UNSIGNED_BYTE,
@@ -978,7 +989,7 @@ namespace bgfx
 		for (uint32_t ii = 0; ii < Attrib::Count; ++ii)
 		{
 			GLuint loc = glGetAttribLocation(m_id, s_attribName[ii]);
-			if ( GLuint(-1) != loc )
+			if (GLuint(-1) != loc )
 			{
 				BX_TRACE("attr %s: %d", s_attribName[ii], loc);
 				m_attributes[ii] = loc;
@@ -986,9 +997,21 @@ namespace bgfx
 			}
 		}
 		m_used[used] = Attrib::Count;
+
+		used = 0;
+		for (uint32_t ii = 0; ii < countof(s_instanceDataName); ++ii)
+		{
+			GLuint loc = glGetAttribLocation(m_id, s_instanceDataName[ii]);
+			if (GLuint(-1) != loc )
+			{
+				BX_TRACE("instance data %s: %d", s_instanceDataName[ii], loc);
+				m_instanceData[used++] = loc;
+			}
+		}
+		m_instanceData[used] = 0xffff;
 	}
 
-	void Material::bindAttributes(const VertexDecl& _vertexDecl, uint32_t _baseVertex)
+	void Material::bindAttributes(const VertexDecl& _vertexDecl, uint32_t _baseVertex) const
 	{
 		uint32_t enabled = 0;
 		for (uint32_t ii = 0; Attrib::Count != m_used[ii]; ++ii)
@@ -1007,6 +1030,8 @@ namespace bgfx
 				GL_CHECK(glEnableVertexAttribArray(loc) );
 				enabled |= 1<<attr;
 
+				GL_CHECK(glVertexAttribDivisor(loc, 0) );
+
 				uint32_t baseVertex = _baseVertex*_vertexDecl.m_stride + _vertexDecl.m_offset[attr];
 				GL_CHECK(glVertexAttribPointer(loc, num, s_attribType[type], normalized, _vertexDecl.m_stride, (void*)(uintptr_t)baseVertex) );
 			}
@@ -1038,25 +1063,19 @@ namespace bgfx
 				}
 			}
 		}
-// 
-// 		uint32_t changed = enabled^m_enabled;
-// 		m_enabled = enabled;
-// 
-// 		if (0 != changed)
-// 		{
-// 			uint32_t test = 1;
-// 			for (uint32_t attr = 0; attr != Attrib::Count; ++attr)
-// 			{
-// 				if ( (changed & test)
-// 				&&   !(enabled & test) )
-// 				{
-// 					GLuint loc = m_attributes[attr];
-// 					GL_CHECK(glDisableVertexAttribArray(loc) );
-// 				}
-// 
-// 				test <<= 1;
-// 			}
-// 		}
+	}
+
+	void Material::bindInstanceData(uint32_t _stride, uint32_t _baseVertex) const
+	{
+		uint32_t baseVertex = _baseVertex;
+		for (uint32_t ii = 0; 0xffff != m_instanceData[ii]; ++ii)
+		{
+			GLuint loc = m_instanceData[ii];
+			GL_CHECK(glEnableVertexAttribArray(loc) );
+			GL_CHECK(glVertexAttribPointer(loc, 4, GL_FLOAT, GL_FALSE, _stride, (void*)(uintptr_t)baseVertex) );
+			GL_CHECK(glVertexAttribDivisor(loc, 1) );
+			baseVertex += 16;
+		}
 	}
 
 	void Texture::create(const Memory* _mem, uint32_t _flags)
@@ -1899,8 +1918,10 @@ namespace bgfx
 
 		GL_CHECK(glBindFramebuffer(GL_FRAMEBUFFER, 0) );
 
-		uint32_t statsNumPrims = 0;
+		uint32_t statsNumPrimsSubmitted = 0;
 		uint32_t statsNumIndices = 0;
+		uint32_t statsNumInstances = 0;
+		uint32_t statsNumPrimsRendered = 0;
 
 		if (0 == (m_render->m_debug&BGFX_DEBUG_IFH) )
 		{
@@ -2336,49 +2357,69 @@ namespace bgfx
 							baseVertex = state.m_startVertex;
 							VertexBuffer& vb = s_renderCtx.m_vertexBuffers[state.m_vertexBuffer.idx];
 							uint16_t decl = vb.m_decl.idx == bgfx::invalidHandle ? state.m_vertexDecl.idx : vb.m_decl.idx;
-							s_renderCtx.m_materials[materialIdx].bindAttributes(s_renderCtx.m_vertexDecls[decl], state.m_startVertex);
+							const Material& material = s_renderCtx.m_materials[materialIdx];
+							material.bindAttributes(s_renderCtx.m_vertexDecls[decl], state.m_startVertex);
+							
+							if (invalidHandle != state.m_instanceDataBuffer.idx)
+							{
+								GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, s_renderCtx.m_vertexBuffers[state.m_instanceDataBuffer.idx].m_id) );
+								material.bindInstanceData(state.m_instanceDataStride, state.m_instanceDataOffset);
+							}
 						}
 
 						uint32_t numIndices = 0;
-						uint32_t numPrims = 0;
+						uint32_t numPrimsSubmitted = 0;
+						uint32_t numInstances = 0;
+						uint32_t numPrimsRendered = 0;
 
 						if (bgfx::invalidHandle != state.m_indexBuffer.idx)
 						{
 							if (BGFX_DRAW_WHOLE_INDEX_BUFFER == state.m_startIndex)
 							{
 								numIndices = s_renderCtx.m_indexBuffers[state.m_indexBuffer.idx].m_size/2;
-								numPrims = numIndices/primNumVerts;
+								numPrimsSubmitted = numIndices/primNumVerts;
+								numInstances = state.m_numInstances;
+								numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
 
-								GL_CHECK(glDrawElements(primType
+								GL_CHECK(glDrawElementsInstanced(primType
 									, s_renderCtx.m_indexBuffers[state.m_indexBuffer.idx].m_size/2
 									, GL_UNSIGNED_SHORT
 									, (void*)0
+									, state.m_numInstances
 									) );
 							}
 							else if (primNumVerts <= state.m_numIndices)
 							{
 								numIndices = state.m_numIndices;
-								numPrims = numIndices/primNumVerts;
+								numPrimsSubmitted = numIndices/primNumVerts;
+								numInstances = state.m_numInstances;
+								numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
 
-								GL_CHECK(glDrawElements(primType
+								GL_CHECK(glDrawElementsInstanced(primType
 									, numIndices
 									, GL_UNSIGNED_SHORT
 									, (void*)(uintptr_t)(state.m_startIndex*2)
+									, state.m_numInstances
 									) );
 							}
 						}
 						else
 						{
-							numPrims = state.m_numVertices/primNumVerts;
+							numPrimsSubmitted = state.m_numVertices/primNumVerts;
+							numInstances = state.m_numInstances;
+							numPrimsRendered = numPrimsSubmitted*state.m_numInstances;
 
-							GL_CHECK(glDrawArrays(primType
+							GL_CHECK(glDrawArraysInstanced(primType
 								, 0
 								, state.m_numVertices
+								, state.m_numInstances
 								) );
 						}
 
-						statsNumPrims += numPrims;
+						statsNumPrimsSubmitted += numPrimsSubmitted;
 						statsNumIndices += numIndices;
+						statsNumInstances += numInstances;
+						statsNumPrimsRendered += numPrimsRendered;
 					}
 				}
 			}
@@ -2420,7 +2461,11 @@ namespace bgfx
 					, elapsedCpuMs > elapsedGpuMs ? '>' : '<'
 					, elapsedGpuMs
 					);
-				tvm.printf(10, pos++, 0x8e, "      Prims: %7d", statsNumPrims);
+				tvm.printf(10, pos++, 0x8e, "      Prims: %7d (#inst: %5d), submitted: %7d"
+					, statsNumPrimsRendered
+					, statsNumInstances
+					, statsNumPrimsSubmitted
+					);
 				tvm.printf(10, pos++, 0x8e, "    Indices: %7d", statsNumIndices);
 				tvm.printf(10, pos++, 0x8e, "   DVB size: %7d", m_render->m_vboffset);
 				tvm.printf(10, pos++, 0x8e, "   DIB size: %7d", m_render->m_iboffset);

+ 4 - 2
src/renderer_gl.h

@@ -140,7 +140,7 @@ namespace bgfx
 					BX_CHECK(0 == err, #_call "; glError 0x%x %d", err, err); \
 				} while (0)
 
-#if 0 // BGFX_CONFIG_DEBUG
+#if BGFX_CONFIG_DEBUG
 #	define GL_CHECK(_call) _GL_CHECK(_call)
 #else
 #	define GL_CHECK(_call) _call
@@ -334,12 +334,14 @@ namespace bgfx
 		void create(const Shader& _vsh, const Shader& _fsh);
 		void destroy();
  		void init();
- 		void bindAttributes(const VertexDecl& _vertexDecl, uint32_t _baseVertex = 0);
+ 		void bindAttributes(const VertexDecl& _vertexDecl, uint32_t _baseVertex = 0) const;
+		void bindInstanceData(uint32_t _stride, uint32_t _baseVertex = 0) const;
  
 		GLuint m_id;
 
 		uint8_t m_used[Attrib::Count+1]; // dense
 		uint16_t m_attributes[Attrib::Count]; // sparse
+		uint16_t m_instanceData[BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT];
 		uint32_t m_enabled;
 
  		GLuint m_sampler[BGFX_CONFIG_MAX_TEXTURES];