Branimir Karadžić 10 лет назад
Родитель
Сommit
7d50012dbe
5 измененных файлов с 278 добавлено и 6 удалено
  1. 116 4
      src/renderer_d3d11.cpp
  2. 27 0
      src/renderer_d3d11.h
  3. 106 2
      src/renderer_d3d9.cpp
  4. 28 0
      src/renderer_d3d9.h
  5. 1 0
      src/renderer_gl.cpp

+ 116 - 4
src/renderer_d3d11.cpp

@@ -1468,6 +1468,8 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 		{
 			ovrPreReset();
 
+			m_gpuTimer.destroy();
+
 			if (NULL == g_platformData.backBufferDS)
 			{
 				DX_RELEASE(m_backBufferDepthStencil, 0);
@@ -1510,6 +1512,8 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 				DX_RELEASE(color, 0);
 			}
 
+			m_gpuTimer.create();
+
 			ovrPostReset();
 
 			// If OVR doesn't create separate depth stencil view, create default one.
@@ -2608,9 +2612,10 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 		uint16_t m_numWindows;
 		FrameBufferHandle m_windows[BGFX_CONFIG_MAX_FRAME_BUFFERS];
 
-		ID3D11Device*           m_device;
-		ID3D11DeviceContext*    m_deviceCtx;
-		ID3D11InfoQueue*        m_infoQueue;
+		ID3D11Device*        m_device;
+		ID3D11DeviceContext* m_deviceCtx;
+		ID3D11InfoQueue*     m_infoQueue;
+		TimerQueryD3D11      m_gpuTimer;
 
 		ID3D11RenderTargetView* m_backBufferColor;
 		ID3D11DepthStencilView* m_backBufferDepthStencil;
@@ -3471,6 +3476,88 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 		}
 	}
 
+	void TimerQueryD3D11::create()
+	{
+		ID3D11Device* device = s_renderD3D11->m_device;
+
+		D3D11_QUERY_DESC query;
+		query.MiscFlags = 0;
+		for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
+		{
+			Frame& frame = m_frame[ii];
+
+			query.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
+			DX_CHECK(device->CreateQuery(&query, &frame.m_disjoint) );
+
+			query.Query = D3D11_QUERY_TIMESTAMP;
+			DX_CHECK(device->CreateQuery(&query, &frame.m_start) );
+			DX_CHECK(device->CreateQuery(&query, &frame.m_end) );
+		}
+
+		m_elapsed   = 0;
+		m_frequency = 1;
+	}
+
+	void TimerQueryD3D11::destroy()
+	{
+		for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
+		{
+			Frame& frame = m_frame[ii];
+			DX_RELEASE(frame.m_disjoint, 0);
+			DX_RELEASE(frame.m_start, 0);
+			DX_RELEASE(frame.m_end, 0);
+		}
+	}
+
+	void TimerQueryD3D11::begin()
+	{
+		ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx;
+
+		while (0 == m_control.reserve(1) )
+		{
+			get();
+		}
+
+		Frame& frame = m_frame[m_control.m_current];
+		deviceCtx->Begin(frame.m_disjoint);
+		deviceCtx->End(frame.m_start);
+	}
+
+	void TimerQueryD3D11::end()
+	{
+		ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx;
+		Frame& frame = m_frame[m_control.m_current];
+		deviceCtx->End(frame.m_end);
+		deviceCtx->End(frame.m_disjoint);
+		m_control.commit(1);
+	}
+
+	bool TimerQueryD3D11::get()
+	{
+		ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx;
+		Frame& frame = m_frame[m_control.m_read];
+
+		uint64_t end;
+		HRESULT hr = deviceCtx->GetData(frame.m_end, &end, sizeof(end), 0);
+		if (S_OK == hr)
+		{
+			m_control.consume(1);
+
+			D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
+			deviceCtx->GetData(frame.m_disjoint, &disjoint, sizeof(disjoint), 0);
+
+			uint64_t start;
+			deviceCtx->GetData(frame.m_start, &start, sizeof(start), 0);
+
+			m_frequency = disjoint.Frequency;
+			m_elapsed   = end - start;
+
+			return true;
+		}
+
+		return false;
+	}
+
 	void RendererContextD3D11::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter)
 	{
 		PIX_BEGINEVENT(D3DCOLOR_RGBA(0xff, 0x00, 0x00, 0xff), L"rendererSubmit");
@@ -3482,6 +3569,11 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 		int64_t elapsed = -bx::getHPCounter();
 		int64_t captureElapsed = 0;
 
+		if (_render->m_debug & (BGFX_DEBUG_IFH|BGFX_DEBUG_STATS) )
+		{
+			m_gpuTimer.begin();
+		}
+
 		if (0 < _render->m_iboffset)
 		{
 			TransientIndexBuffer* ib = _render->m_transientIb;
@@ -4256,6 +4348,20 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 		{
 			PIX_BEGINEVENT(D3DCOLOR_RGBA(0x40, 0x40, 0x40, 0xff), L"debugstats");
 
+			static uint32_t maxGpuLatency = 0;
+			static double   maxGpuElapsed = 0.0f;
+			double elapsedGpuMs = 0.0;
+
+			m_gpuTimer.end();
+
+			while (m_gpuTimer.get() )
+			{
+				double toGpuMs = 1000.0 / double(m_gpuTimer.m_frequency);
+				elapsedGpuMs   = m_gpuTimer.m_elapsed * toGpuMs;
+				maxGpuElapsed  = elapsedGpuMs > maxGpuElapsed ? elapsedGpuMs : maxGpuElapsed;
+			}
+			maxGpuLatency = bx::uint32_max(maxGpuLatency, m_gpuTimer.m_control.available()-1);
+
 			TextVideoMem& tvm = m_textVideoMem;
 
 			static int64_t next = now;
@@ -4314,12 +4420,18 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 					);
 
 				double elapsedCpuMs = double(elapsed)*toMs;
-				tvm.printf(10, pos++, 0x8e, "   Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms]"
+				tvm.printf(10, pos++, 0x8e, "   Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms] %c GPU %3.4f [ms] (latency %d)"
 					, _render->m_num
 					, statsKeyType[0]
 					, statsKeyType[1]
 					, elapsedCpuMs
+					, elapsedCpuMs > maxGpuElapsed ? '>' : '<'
+					, maxGpuElapsed
+					, maxGpuLatency
 					);
+				maxGpuLatency = 0;
+				maxGpuElapsed = 0.0;
+
 				for (uint32_t ii = 0; ii < BX_COUNTOF(s_primName); ++ii)
 				{
 					tvm.printf(10, pos++, 0x8e, "   %9s: %7d (#inst: %5d), submitted: %7d, indirect %7d"

+ 27 - 0
src/renderer_d3d11.h

@@ -297,6 +297,33 @@ namespace bgfx { namespace d3d11
 		TextureHandle m_th[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS];
 	};
 
+	struct TimerQueryD3D11
+	{
+		TimerQueryD3D11()
+			: m_control(BX_COUNTOF(m_frame) )
+		{
+		}
+
+		void create();
+		void destroy();
+		void begin();
+		void end();
+		bool get();
+
+		struct Frame
+		{
+			ID3D11Query* m_disjoint;
+			ID3D11Query* m_start;
+			ID3D11Query* m_end;
+		};
+
+		uint64_t m_elapsed;
+		uint64_t m_frequency;
+
+		Frame m_frame[4];
+		bx::RingBufferControl m_control;
+	};
+
 } /*  namespace d3d11 */ } // namespace bgfx
 
 #endif // BGFX_RENDERER_D3D11_H_HEADER_GUARD

+ 106 - 2
src/renderer_d3d9.cpp

@@ -1239,6 +1239,8 @@ namespace bgfx { namespace d3d9
 
 			capturePreReset();
 
+			m_gpuTimer.destroy();
+
 			for (uint32_t ii = 0; ii < BX_COUNTOF(m_indexBuffers); ++ii)
 			{
 				m_indexBuffers[ii].preReset();
@@ -1266,6 +1268,8 @@ namespace bgfx { namespace d3d9
 			DX_CHECK(m_swapChain->GetBackBuffer(0, D3DBACKBUFFER_TYPE_MONO, &m_backBufferColor) );
 			DX_CHECK(m_device->GetDepthStencilSurface(&m_backBufferDepthStencil) );
 
+			m_gpuTimer.create();
+
 			capturePostReset();
 
 			for (uint32_t ii = 0; ii < BX_COUNTOF(m_indexBuffers); ++ii)
@@ -1719,8 +1723,9 @@ namespace bgfx { namespace d3d9
 		IDirect3DDevice9Ex* m_deviceEx;
 #endif // BGFX_CONFIG_RENDERER_DIRECT3D9EX
 
-		IDirect3D9* m_d3d9;
+		IDirect3D9*       m_d3d9;
 		IDirect3DDevice9* m_device;
+		TimerQueryD3D9    m_gpuTimer;
 		D3DPOOL m_pool;
 
 		IDirect3DSwapChain9* m_swapChain;
@@ -2892,6 +2897,80 @@ namespace bgfx { namespace d3d9
 			) );
 	}
 
+	void TimerQueryD3D9::create()
+	{
+		IDirect3DDevice9* device = s_renderD3D9->m_device;
+
+		for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
+		{
+			Frame& frame = m_frame[ii];
+			DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMPDISJOINT, &frame.m_disjoint) );
+			DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMP,         &frame.m_start) );
+			DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMP,         &frame.m_end) );
+			DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMPFREQ,     &frame.m_freq) );
+		}
+
+		m_elapsed   = 0;
+		m_frequency = 1;
+	}
+
+	void TimerQueryD3D9::destroy()
+	{
+		for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
+		{
+			Frame& frame = m_frame[ii];
+			DX_RELEASE(frame.m_disjoint, 0);
+			DX_RELEASE(frame.m_start, 0);
+			DX_RELEASE(frame.m_end, 0);
+			DX_RELEASE(frame.m_freq, 0);
+		}
+	}
+
+	void TimerQueryD3D9::begin()
+	{
+		while (0 == m_control.reserve(1) )
+		{
+			get();
+		}
+
+		Frame& frame = m_frame[m_control.m_current];
+		frame.m_disjoint->Issue(D3DISSUE_BEGIN);
+		frame.m_start->Issue(D3DISSUE_END);
+	}
+
+	void TimerQueryD3D9::end()
+	{
+		Frame& frame = m_frame[m_control.m_current];
+		frame.m_end->Issue(D3DISSUE_END);
+		frame.m_freq->Issue(D3DISSUE_END);
+		m_control.commit(1);
+	}
+
+	bool TimerQueryD3D9::get()
+	{
+		Frame& frame = m_frame[m_control.m_read];
+
+		uint64_t freq;
+		HRESULT hr = frame.m_freq->GetData(&freq, sizeof(freq), 0);
+		if (S_OK == hr)
+		{
+			m_control.consume(1);
+
+			uint64_t start;
+			DX_CHECK(frame.m_start->GetData(&start, sizeof(start), 0) );
+
+			uint64_t end;
+			DX_CHECK(frame.m_end->GetData(&end, sizeof(end), 0) );
+
+			m_frequency = freq;
+			m_elapsed   = end - start;
+
+			return true;
+		}
+
+		return false;
+	}
+
 	void RendererContextD3D9::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter)
 	{
 		IDirect3DDevice9* device = m_device;
@@ -2905,6 +2984,11 @@ namespace bgfx { namespace d3d9
 
 		device->BeginScene();
 
+		if (_render->m_debug & (BGFX_DEBUG_IFH|BGFX_DEBUG_STATS) )
+		{
+			m_gpuTimer.begin();
+		}
+
 		if (0 < _render->m_iboffset)
 		{
 			TransientIndexBuffer* ib = _render->m_transientIb;
@@ -3453,6 +3537,20 @@ namespace bgfx { namespace d3d9
 		{
 			PIX_BEGINEVENT(D3DCOLOR_RGBA(0x40, 0x40, 0x40, 0xff), L"debugstats");
 
+			static uint32_t maxGpuLatency = 0;
+			static double   maxGpuElapsed = 0.0f;
+			double elapsedGpuMs = 0.0;
+
+			m_gpuTimer.end();
+
+			while (m_gpuTimer.get() )
+			{
+				double toGpuMs = 1000.0 / double(m_gpuTimer.m_frequency);
+				elapsedGpuMs   = m_gpuTimer.m_elapsed * toGpuMs;
+				maxGpuElapsed  = elapsedGpuMs > maxGpuElapsed ? elapsedGpuMs : maxGpuElapsed;
+			}
+			maxGpuLatency = bx::uint32_max(maxGpuLatency, m_gpuTimer.m_control.available()-1);
+
 			TextVideoMem& tvm = m_textVideoMem;
 
 			static int64_t next = now;
@@ -3490,12 +3588,18 @@ namespace bgfx { namespace d3d9
 					);
 
 				double elapsedCpuMs = double(elapsed)*toMs;
-				tvm.printf(10, pos++, 0x8e, "   Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms]"
+				tvm.printf(10, pos++, 0x8e, "   Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms] %c GPU %3.4f [ms] (latency %d)"
 					, _render->m_num
 					, statsKeyType[0]
 					, statsKeyType[1]
 					, elapsedCpuMs
+					, elapsedCpuMs > maxGpuElapsed ? '>' : '<'
+					, maxGpuElapsed
+					, maxGpuLatency
 					);
+				maxGpuLatency = 0;
+				maxGpuElapsed = 0.0;
+
 				for (uint32_t ii = 0; ii < BX_COUNTOF(s_primName); ++ii)
 				{
 					tvm.printf(10, pos++, 0x8e, "   %9s: %7d (#inst: %5d), submitted: %7d"

+ 28 - 0
src/renderer_d3d9.h

@@ -389,6 +389,34 @@ namespace bgfx { namespace d3d9
 		bool m_needResolve;
 	};
 
+	struct TimerQueryD3D9
+	{
+		TimerQueryD3D9()
+			: m_control(BX_COUNTOF(m_frame) )
+		{
+		}
+
+		void create();
+		void destroy();
+		void begin();
+		void end();
+		bool get();
+
+		struct Frame
+		{
+			IDirect3DQuery9* m_disjoint;
+			IDirect3DQuery9* m_start;
+			IDirect3DQuery9* m_end;
+			IDirect3DQuery9* m_freq;
+		};
+
+		uint64_t m_elapsed;
+		uint64_t m_frequency;
+
+		Frame m_frame[4];
+		bx::RingBufferControl m_control;
+	};
+
 } /* namespace d3d9 */ } // namespace bgfx
 
 #endif // BGFX_RENDERER_D3D9_H_HEADER_GUARD

+ 1 - 0
src/renderer_gl.cpp

@@ -5661,6 +5661,7 @@ namespace bgfx { namespace gl
 					, elapsedCpuMs > elapsedGpuMs ? '>' : '<'
 					, elapsedGpuMs
 					);
+
 				for (uint32_t ii = 0; ii < BX_COUNTOF(s_primInfo); ++ii)
 				{
 					tvm.printf(10, pos++, 0x8e, "   %9s: %7d (#inst: %5d), submitted: %7d"