Browse Source

Replaced GL internal profiler.

Branimir Karadžić 8 years ago
parent
commit
21ade78454
5 changed files with 176 additions and 157 deletions
  1. 2 2
      examples/common/example-glue.cpp
  2. 61 0
      src/renderer.h
  3. 6 53
      src/renderer_d3d11.cpp
  4. 25 48
      src/renderer_gl.cpp
  5. 82 54
      src/renderer_gl.h

+ 2 - 2
examples/common/example-glue.cpp

@@ -202,9 +202,9 @@ void showExampleDialog(entry::AppI* _app, const char* _errorText)
 				}
 
 				ImGui::PopFont();
-
-				ImGui::EndChild();
 			}
+
+			ImGui::EndChild();
 		}
 	}
 

+ 61 - 0
src/renderer.h

@@ -495,6 +495,67 @@ namespace bgfx
 		return false;
 	}
 
+	template<typename Ty>
+	struct Profiler
+	{
+		Profiler(Frame* _frame, Ty& _gpuTimer, const char (*_viewName)[BGFX_CONFIG_MAX_VIEW_NAME], bool _enabled = true)
+			: m_viewName(_viewName)
+			, m_frame(_frame)
+			, m_gpuTimer(_gpuTimer)
+			, m_queryIdx(UINT32_MAX)
+			, m_numViews(0)
+			, m_enabled(_enabled)
+		{
+		}
+
+		~Profiler()
+		{
+			m_frame->m_perfStats.numViews = m_numViews;
+		}
+
+		void begin(uint16_t _view)
+		{
+			if (m_enabled)
+			{
+				ViewStats& viewStats = m_frame->m_perfStats.viewStats[m_numViews];
+				viewStats.cpuTimeElapsed = -bx::getHPCounter();
+
+				m_queryIdx = m_gpuTimer.begin(_view);
+
+				viewStats.view = uint8_t(_view);
+				bx::strCopy(viewStats.name
+					, BGFX_CONFIG_MAX_VIEW_NAME
+					, &m_viewName[_view][BGFX_CONFIG_MAX_VIEW_NAME_RESERVED]
+					);
+			}
+		}
+
+		void end()
+		{
+			if (m_enabled
+			&&  UINT32_MAX != m_queryIdx)
+			{
+				m_gpuTimer.end(m_queryIdx);
+
+				ViewStats& viewStats = m_frame->m_perfStats.viewStats[m_numViews];
+				const typename Ty::Result& result = m_gpuTimer.m_result[viewStats.view];
+
+				viewStats.cpuTimeElapsed += bx::getHPCounter();
+				viewStats.gpuTimeElapsed = result.m_end - result.m_begin;
+
+				++m_numViews;
+				m_queryIdx = UINT32_MAX;
+			}
+		}
+
+		const char (*m_viewName)[BGFX_CONFIG_MAX_VIEW_NAME];
+		Frame*   m_frame;
+		Ty&      m_gpuTimer;
+		uint32_t m_queryIdx;
+		uint16_t m_numViews;
+		bool     m_enabled;
+	};
+
 } // namespace bgfx
 
 #endif // BGFX_RENDERER_H_HEADER_GUARD

+ 6 - 53
src/renderer_d3d11.cpp

@@ -5301,58 +5301,6 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 		return false;
 	}
 
-	struct Profiler
-	{
-		Profiler(Frame* _frame, TimerQueryD3D11& _gpuTimer, bool _enabled = false)
-			: m_frame(_frame)
-			, m_gpuTimer(_gpuTimer)
-			, m_numViews(0)
-			, m_enabled(_enabled)
-		{
-		}
-
-		~Profiler()
-		{
-			m_frame->m_perfStats.numViews = m_numViews;
-		}
-
-		void begin(uint16_t _view)
-		{
-			if (m_enabled)
-			{
-				ViewStats& viewStats = m_frame->m_perfStats.viewStats[m_numViews];
-				viewStats.cpuTimeElapsed = -bx::getHPCounter();
-
-				m_queryIdx = m_gpuTimer.begin(_view);
-
-				viewStats.view = uint8_t(_view);
-				bx::strCopy(viewStats.name, BGFX_CONFIG_MAX_VIEW_NAME, &s_viewName[_view][BGFX_CONFIG_MAX_VIEW_NAME_RESERVED]);
-			}
-		}
-
-		void end()
-		{
-			if (m_enabled)
-			{
-				m_gpuTimer.end(m_queryIdx);
-
-				ViewStats& viewStats = m_frame->m_perfStats.viewStats[m_numViews];
-				const TimerQueryD3D11::Result& result = m_gpuTimer.m_result[viewStats.view];
-
-				viewStats.cpuTimeElapsed += bx::getHPCounter();
-				viewStats.gpuTimeElapsed = result.m_end - result.m_begin;
-
-				++m_numViews;
-			}
-		}
-
-		Frame* m_frame;
-		TimerQueryD3D11& m_gpuTimer;
-		uint32_t m_queryIdx;
-		uint16_t m_numViews;
-		bool     m_enabled;
-	};
-
 	void OcclusionQueryD3D11::postReset()
 	{
 		ID3D11Device* device = s_renderD3D11->m_device;
@@ -5597,7 +5545,12 @@ BX_PRAGMA_DIAGNOSTIC_POP();
 		uint32_t statsNumIndices = 0;
 		uint32_t statsKeyType[2] = {};
 
-		Profiler profiler(_render, m_gpuTimer);
+		Profiler<TimerQueryD3D11> profiler(
+			  _render
+			, m_gpuTimer
+			, s_viewName
+			, false //m_timerQuerySupport
+			);
 
 		m_occlusionQuery.resolve(_render);
 

+ 25 - 48
src/renderer_gl.cpp

@@ -11,20 +11,6 @@
 #	include <bx/uint32_t.h>
 #	include "hmd_ovr.h"
 
-#if BGFX_CONFIG_PROFILER_REMOTERY
-#	define BGFX_GPU_PROFILER_BIND() rmt_BindOpenGL()
-#	define BGFX_GPU_PROFILER_UNBIND() rmt_UnbindOpenGL()
-#	define BGFX_GPU_PROFILER_BEGIN(_group, _name, _color) rmt_BeginOpenGLSample(_group##_##_name)
-#	define BGFX_GPU_PROFILER_BEGIN_DYNAMIC(_namestr) rmt_BeginOpenGLSampleDynamic(_namestr)
-#	define BGFX_GPU_PROFILER_END() rmt_EndOpenGLSample()
-#else
-#	define BGFX_GPU_PROFILER_BIND() BX_NOOP()
-#	define BGFX_GPU_PROFILER_UNBIND() BX_NOOP()
-#	define BGFX_GPU_PROFILER_BEGIN(_group, _name, _color) BX_NOOP()
-#	define BGFX_GPU_PROFILER_BEGIN_DYNAMIC(_namestr) BX_NOOP()
-#	define BGFX_GPU_PROFILER_END() BX_NOOP()
-#endif // BGFX_CONFIG_PROFILER_REMOTERY
-
 namespace bgfx { namespace gl
 {
 	static char s_viewName[BGFX_CONFIG_MAX_VIEWS][BGFX_CONFIG_MAX_VIEW_NAME];
@@ -2561,8 +2547,6 @@ namespace bgfx { namespace gl
 				m_needPresent = false;
 			}
 
-			BGFX_GPU_PROFILER_BIND();
-
 			return true;
 
 		error:
@@ -2583,8 +2567,6 @@ namespace bgfx { namespace gl
 
 		void shutdown()
 		{
-			BGFX_GPU_PROFILER_UNBIND();
-
 			ovrPreReset();
 			m_ovr.shutdown();
 
@@ -6491,8 +6473,6 @@ namespace bgfx { namespace gl
 
 	void RendererContextGL::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter)
 	{
-		BGFX_GPU_PROFILER_BEGIN_DYNAMIC("rendererSubmit");
-
 		if (_render->m_capture)
 		{
 			renderDocTriggerCapture();
@@ -6522,9 +6502,11 @@ namespace bgfx { namespace gl
 		int64_t elapsed = -bx::getHPCounter();
 		int64_t captureElapsed = 0;
 
+		uint32_t frameQueryIdx = UINT32_MAX;
+
 		if (m_timerQuerySupport)
 		{
-			m_gpuTimer.begin();
+			frameQueryIdx = m_gpuTimer.begin(BGFX_CONFIG_MAX_VIEWS);
 		}
 
 		if (0 < _render->m_iboffset)
@@ -6598,6 +6580,13 @@ namespace bgfx { namespace gl
 		uint32_t statsNumIndices = 0;
 		uint32_t statsKeyType[2] = {};
 
+		Profiler<TimerQueryGL> profiler(
+			  _render
+			, m_gpuTimer
+			, s_viewName
+			, false //m_timerQuerySupport && !BX_ENABLED(BX_PLATFORM_OSX)
+			);
+
 		if (m_occlusionQuerySupport)
 		{
 			m_occlusionQuery.resolve(_render);
@@ -6674,11 +6663,10 @@ namespace bgfx { namespace gl
 
 					if (item > 1)
 					{
-						BGFX_GPU_PROFILER_END();
-						BGFX_PROFILER_END();
+						profiler.end();
 					}
-					BGFX_PROFILER_BEGIN_DYNAMIC(s_viewName[view]);
-					BGFX_GPU_PROFILER_BEGIN_DYNAMIC(s_viewName[view]);
+
+					profiler.begin(view);
 
 					viewState.m_rect = _render->m_rect[view];
 					if (viewRestart)
@@ -7574,13 +7562,10 @@ namespace bgfx { namespace gl
 				capture();
 				captureElapsed += bx::getHPCounter();
 
-				BGFX_GPU_PROFILER_END();
-				BGFX_PROFILER_END();
+				profiler.end();
 			}
 		}
 
-		BGFX_GPU_PROFILER_END();
-
 		m_glctx.makeCurrent(NULL);
 		int64_t now = bx::getHPCounter();
 		elapsed += now;
@@ -7601,28 +7586,26 @@ namespace bgfx { namespace gl
 		static uint32_t maxGpuLatency = 0;
 		static double   maxGpuElapsed = 0.0f;
 		double elapsedGpuMs = 0.0;
-		uint64_t elapsedGl  = 0;
 
-		if (m_timerQuerySupport)
+		if (UINT32_MAX != frameQueryIdx)
 		{
-			m_gpuTimer.end();
-			do
-			{
-				elapsedGl     = m_gpuTimer.m_elapsed;
-				elapsedGpuMs  = double(elapsedGl)/1e6;
-				maxGpuElapsed = elapsedGpuMs > maxGpuElapsed ? elapsedGpuMs : maxGpuElapsed;
-			}
-			while (m_gpuTimer.get() );
+			m_gpuTimer.end(frameQueryIdx);
+
+			const TimerQueryGL::Result& result = m_gpuTimer.m_result[BGFX_CONFIG_MAX_VIEWS];
+			double toGpuMs = 1000.0 / 1e6;
+			elapsedGpuMs   = (result.m_end - result.m_begin) * toGpuMs;
+			maxGpuElapsed  = elapsedGpuMs > maxGpuElapsed ? elapsedGpuMs : maxGpuElapsed;
 
-			maxGpuLatency = bx::uint32_imax(maxGpuLatency, m_gpuTimer.m_control.available()-1);
+			maxGpuLatency = bx::uint32_imax(maxGpuLatency, result.m_pending-1);
 		}
 
 		const int64_t timerFreq = bx::getHPFrequency();
 
 		perfStats.cpuTimeEnd    = now;
 		perfStats.cpuTimerFreq  = timerFreq;
-		perfStats.gpuTimeBegin  = m_gpuTimer.m_begin;
-		perfStats.gpuTimeEnd    = m_gpuTimer.m_end;
+		const TimerQueryGL::Result& result = m_gpuTimer.m_result[BGFX_CONFIG_MAX_VIEWS];
+		perfStats.gpuTimeBegin  = result.m_begin;
+		perfStats.gpuTimeEnd    = result.m_end;
 		perfStats.gpuTimerFreq  = 1000000000;
 		perfStats.numDraw       = statsKeyType[0];
 		perfStats.numCompute    = statsKeyType[1];
@@ -7811,12 +7794,6 @@ namespace bgfx { namespace gl
 	}
 } } // namespace bgfx
 
-#undef BGFX_GPU_PROFILER_BIND
-#undef BGFX_GPU_PROFILER_UNBIND
-#undef BGFX_GPU_PROFILER_BEGIN
-#undef BGFX_GPU_PROFILER_BEGIN_DYNAMIC
-#undef BGFX_GPU_PROFILER_END
-
 #else
 
 namespace bgfx { namespace gl

+ 82 - 54
src/renderer_gl.h

@@ -1293,88 +1293,102 @@ namespace bgfx { namespace gl
 	struct TimerQueryGL
 	{
 		TimerQueryGL()
-			: m_control(BX_COUNTOF(m_frame) )
+			: m_control(BX_COUNTOF(m_query) )
 		{
 		}
 
 		void create()
 		{
-			for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_query); ++ii)
 			{
-				Frame& frame = m_frame[ii];
-				GL_CHECK(glGenQueries(1, &frame.m_begin) );
-				GL_CHECK(glGenQueries(1, &frame.m_elapsed) );
+				Query& query = m_query[ii];
+				GL_CHECK(glGenQueries(1, &query.m_begin) );
+				GL_CHECK(glGenQueries(1, &query.m_end) );
 			}
 		}
 
 		void destroy()
 		{
-			for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_query); ++ii)
 			{
-				Frame& frame = m_frame[ii];
-				GL_CHECK(glDeleteQueries(1, &frame.m_begin) );
-				GL_CHECK(glDeleteQueries(1, &frame.m_elapsed) );
+				Query& query = m_query[ii];
+				GL_CHECK(glDeleteQueries(1, &query.m_begin) );
+				GL_CHECK(glDeleteQueries(1, &query.m_end) );
 			}
 		}
 
-		void begin()
+		uint32_t begin(uint32_t _resultIdx)
 		{
 			while (0 == m_control.reserve(1) )
 			{
-				get();
+				update();
 			}
 
-			Frame& frame = m_frame[m_control.m_current];
-			if (!BX_ENABLED(BX_PLATFORM_OSX) )
-			{
-				GL_CHECK(glQueryCounter(frame.m_begin
-						, GL_TIMESTAMP
-						) );
-			}
+			Result& result = m_result[_resultIdx];
+			++result.m_pending;
 
-			GL_CHECK(glBeginQuery(GL_TIME_ELAPSED
-					, frame.m_elapsed
-					) );
+			const uint32_t idx = m_control.m_current;
+			Query& query = m_query[idx];
+			query.m_resultIdx = _resultIdx;
+			query.m_ready     = false;
+
+			GL_CHECK(glQueryCounter(query.m_begin
+				, GL_TIMESTAMP
+				) );
+
+			m_control.commit(1);
+
+			return idx;
 		}
 
-		void end()
+		void end(uint32_t _idx)
 		{
-			GL_CHECK(glEndQuery(GL_TIME_ELAPSED) );
-			m_control.commit(1);
+			Query& query = m_query[_idx];
+			query.m_ready = true;
+
+			GL_CHECK(glQueryCounter(query.m_end
+				, GL_TIMESTAMP
+				) );
+
+			while (update() )
+			{
+			}
 		}
 
-		bool get()
+		bool update()
 		{
 			if (0 != m_control.available() )
 			{
-				Frame& frame = m_frame[m_control.m_read];
+				Query& query = m_query[m_control.m_read];
+
+				if (!query.m_ready)
+				{
+					return false;
+				}
 
 				GLint available;
-				GL_CHECK(glGetQueryObjectiv(frame.m_elapsed
-						, GL_QUERY_RESULT_AVAILABLE
-						, &available
-						) );
+				GL_CHECK(glGetQueryObjectiv(query.m_end
+					, GL_QUERY_RESULT_AVAILABLE
+					, &available
+					) );
 
 				if (available)
 				{
-					if (!BX_ENABLED(BX_PLATFORM_OSX) )
-					{
-						GL_CHECK(glGetQueryObjectui64v(frame.m_begin
-								, GL_QUERY_RESULT
-								, &m_begin
-								) );
-					}
-					else
-					{
-						m_begin = 0;
-					}
-
-					GL_CHECK(glGetQueryObjectui64v(frame.m_elapsed
-							, GL_QUERY_RESULT
-							, &m_elapsed
-							) );
-					m_end = m_begin + m_elapsed;
 					m_control.consume(1);
+
+					Result& result = m_result[query.m_resultIdx];
+					--result.m_pending;
+
+					GL_CHECK(glGetQueryObjectui64v(query.m_begin
+						, GL_QUERY_RESULT
+						, &result.m_begin
+						) );
+
+					GL_CHECK(glGetQueryObjectui64v(query.m_end
+						, GL_QUERY_RESULT
+						, &result.m_end
+						) );
+
 					return true;
 				}
 			}
@@ -1382,17 +1396,31 @@ namespace bgfx { namespace gl
 			return false;
 		}
 
-		uint64_t m_begin;
-		uint64_t m_end;
-		uint64_t m_elapsed;
+		struct Result
+		{
+			void reset()
+			{
+				m_begin   = 0;
+				m_end     = 0;
+				m_pending = 0;
+			}
+
+			uint64_t m_begin;
+			uint64_t m_end;
+			uint32_t m_pending;
+		};
 
-		struct Frame
+		struct Query
 		{
-			GLuint m_begin;
-			GLuint m_elapsed;
+			GLuint   m_begin;
+			GLuint   m_end;
+			uint32_t m_resultIdx;
+			bool     m_ready;
 		};
 
-		Frame m_frame[4];
+		Result m_result[BGFX_CONFIG_MAX_VIEWS+1];
+
+		Query m_query[BGFX_CONFIG_MAX_VIEWS*4];
 		bx::RingBufferControl m_control;
 	};