Browse Source

display: Add per-shader PStats timer for compute dispatches

Also add a total work group counter

Fixes #1721
rdb 10 months ago
parent
commit
a7f8c57681

+ 3 - 0
panda/src/display/graphicsStateGuardian.cxx

@@ -100,6 +100,7 @@ PStatCollector GraphicsStateGuardian::_draw_primitive_pcollector("Draw:Primitive
 PStatCollector GraphicsStateGuardian::_draw_set_state_pcollector("Draw:Set State");
 PStatCollector GraphicsStateGuardian::_flush_pcollector("Draw:Flush");
 PStatCollector GraphicsStateGuardian::_compute_dispatch_pcollector("Draw:Compute dispatch");
+PStatCollector GraphicsStateGuardian::_compute_work_groups_pcollector("Compute work groups");
 
 PStatCollector GraphicsStateGuardian::_wait_occlusion_pcollector("Wait:Occlusion");
 PStatCollector GraphicsStateGuardian::_wait_timer_pcollector("Wait:Timer Queries");
@@ -2780,6 +2781,7 @@ end_frame(Thread *current_thread) {
   _vertices_tri_pcollector.flush_level();
   _vertices_patch_pcollector.flush_level();
   _vertices_other_pcollector.flush_level();
+  _compute_work_groups_pcollector.flush_level();
 
   _state_pcollector.flush_level();
   _texture_state_pcollector.flush_level();
@@ -3428,6 +3430,7 @@ init_frame_pstats() {
     _vertices_tri_pcollector.clear_level();
     _vertices_patch_pcollector.clear_level();
     _vertices_other_pcollector.clear_level();
+    _compute_work_groups_pcollector.clear_level();
 
     _state_pcollector.clear_level();
     _transform_state_pcollector.clear_level();

+ 1 - 0
panda/src/display/graphicsStateGuardian.h

@@ -706,6 +706,7 @@ public:
   static PStatCollector _draw_set_state_pcollector;
   static PStatCollector _flush_pcollector;
   static PStatCollector _compute_dispatch_pcollector;
+  static PStatCollector _compute_work_groups_pcollector;
   static PStatCollector _wait_occlusion_pcollector;
   static PStatCollector _wait_timer_pcollector;
   static PStatCollector _timer_queries_pcollector;

+ 7 - 2
panda/src/glstuff/glGraphicsStateGuardian_src.cxx

@@ -7916,11 +7916,16 @@ void CLP(GraphicsStateGuardian)::
 dispatch_compute(int num_groups_x, int num_groups_y, int num_groups_z) {
   maybe_gl_finish();
 
-  PStatGPUTimer timer(this, _compute_dispatch_pcollector);
-  nassertv(_supports_compute_shaders);
+  nassertv(get_supports_compute_shaders());
   nassertv(_current_shader_context != nullptr);
   CLP(ShaderContext) *gsc;
   DCAST_INTO_V(gsc, _current_shader_context);
+
+#ifdef DO_PSTATS
+  _compute_work_groups_pcollector.add_level(num_groups_x * num_groups_y * num_groups_z);
+  PStatGPUTimer timer(this, gsc->_compute_dispatch_pcollector);
+#endif
+
   gsc->issue_memory_barriers();
 
   _glDispatchCompute(num_groups_x, num_groups_y, num_groups_z);

+ 4 - 0
panda/src/glstuff/glShaderContext_src.cxx

@@ -421,6 +421,10 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext
 
   _mat_part_cache = new LVecBase4f[_shader->cp_get_mat_cache_size()];
   _mat_scratch_space = new LVecBase4f[_shader->cp_get_mat_scratch_size()];
+
+#ifdef DO_PSTATS
+  _compute_dispatch_pcollector = PStatCollector(glgsg->_compute_dispatch_pcollector, s->get_debug_name());
+#endif
 }
 
 /**

+ 4 - 0
panda/src/glstuff/glShaderContext_src.h

@@ -120,6 +120,10 @@ private:
 
   bool _uses_standard_vertex_arrays;
 
+#ifdef DO_PSTATS
+  PStatCollector _compute_dispatch_pcollector;
+#endif
+
   void glsl_report_shader_errors(GLuint shader, Shader::ShaderType type, bool fatal);
   void glsl_report_program_errors(GLuint program, bool fatal);
   bool glsl_compile_shader(Shader::ShaderType type);