Browse Source

display: Add queries for max work group count/size/invocations

Fixes #1713
rdb 10 months ago
parent
commit
c729c75b29

+ 40 - 1
panda/src/display/graphicsStateGuardian.I

@@ -596,7 +596,7 @@ get_supports_tessellation_shaders() const {
  */
 INLINE bool GraphicsStateGuardian::
 get_supports_compute_shaders() const {
-  return _supports_compute_shaders;
+  return _max_compute_work_group_invocations > 0;
 }
 
 /**
@@ -709,6 +709,45 @@ get_supports_dual_source_blending() const {
   return _supports_dual_source_blending;
 }
 
+/**
+ * Returns the maximum number of work groups that can be submitted in a single
+ * compute dispatch.
+ *
+ * If compute shaders are supported, this will be at least 65535x65535x65535.
+ * Otherwise, it will be zero.
+ */
+INLINE LVecBase3i GraphicsStateGuardian::
+get_max_compute_work_group_count() const {
+  return _max_compute_work_group_count;
+}
+
+/**
+ * Returns the maximum number of invocations in each work group split out
+ * separately to every x, y, z dimension.  This limit applies in addition to
+ * the overall number of invocations, which is specified by
+ * get_max_compute_work_group_invocations().
+ *
+ * If compute shaders are supported, this will be at least 128x128x64.
+ * Otherwise, it will be zero.
+ */
+INLINE LVecBase3i GraphicsStateGuardian::
+get_max_compute_work_group_size() const {
+  return _max_compute_work_group_size;
+}
+
+/**
+ * Returns the maximum number of invocations in each work group as a product
+ * of the x, y, z dimensions.  This limit applies in addition to the
+ * per-dimension limits specified by get_max_compute_work_group_size().
+ *
+ * If compute shaders are supported, this will be at least 128.  Otherwise, it
+ * will be zero.
+ */
+INLINE int GraphicsStateGuardian::
+get_max_compute_work_group_invocations() const {
+  return _max_compute_work_group_invocations;
+}
+
 /**
  * Deprecated.  Use get_max_color_targets() instead, which returns the exact
  * same value.

+ 4 - 1
panda/src/display/graphicsStateGuardian.cxx

@@ -244,10 +244,13 @@ GraphicsStateGuardian(CoordinateSystem internal_coordinate_system,
   _supports_basic_shaders = false;
   _supports_geometry_shaders = false;
   _supports_tessellation_shaders = false;
-  _supports_compute_shaders = false;
   _supports_glsl = false;
   _supports_hlsl = false;
 
+  _max_compute_work_group_count = LVecBase3i(0, 0, 0);
+  _max_compute_work_group_size = LVecBase3i(0, 0, 0);
+  _max_compute_work_group_invocations = 0;
+
   _supports_stencil = false;
   _supports_stencil_wrap = false;
   _supports_two_sided_stencil = false;

+ 13 - 1
panda/src/display/graphicsStateGuardian.h

@@ -176,6 +176,12 @@ PUBLISHED:
   INLINE int get_maximum_simultaneous_render_targets() const;
   INLINE bool get_supports_dual_source_blending() const;
 
+public:
+  INLINE LVecBase3i get_max_compute_work_group_count() const;
+  INLINE LVecBase3i get_max_compute_work_group_size() const;
+  INLINE int get_max_compute_work_group_invocations() const;
+
+PUBLISHED:
   MAKE_PROPERTY(max_vertices_per_array, get_max_vertices_per_array);
   MAKE_PROPERTY(max_vertices_per_primitive, get_max_vertices_per_primitive);
   MAKE_PROPERTY(max_texture_stages, get_max_texture_stages);
@@ -222,6 +228,9 @@ PUBLISHED:
   MAKE_PROPERTY(timer_queries_active, get_timer_queries_active);
   MAKE_PROPERTY(max_color_targets, get_max_color_targets);
   MAKE_PROPERTY(supports_dual_source_blending, get_supports_dual_source_blending);
+  MAKE_PROPERTY(max_compute_work_group_count, get_max_compute_work_group_count);
+  MAKE_PROPERTY(max_compute_work_group_size, get_max_compute_work_group_size);
+  MAKE_PROPERTY(max_compute_work_group_invocations, get_max_compute_work_group_invocations);
 
   INLINE ShaderModel get_shader_model() const;
   INLINE void set_shader_model(ShaderModel shader_model);
@@ -620,12 +629,15 @@ protected:
   bool _supports_basic_shaders;
   bool _supports_geometry_shaders;
   bool _supports_tessellation_shaders;
-  bool _supports_compute_shaders;
   bool _supports_glsl;
   bool _supports_hlsl;
   bool _supports_framebuffer_multisample;
   bool _supports_framebuffer_blit;
 
+  LVecBase3i _max_compute_work_group_count;
+  LVecBase3i _max_compute_work_group_size;
+  int _max_compute_work_group_invocations;
+
   bool _supports_stencil;
   bool _supports_stencil_wrap;
   bool _supports_two_sided_stencil;

+ 3 - 0
panda/src/gles2gsg/gles2gsg.h

@@ -278,6 +278,7 @@ typedef char GLchar;
 #define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067
 #define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069
 #define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A
+#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
 #define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117
 #define GL_UNSIGNALED 0x9118
 #define GL_SIGNALED 0x9119
@@ -285,6 +286,8 @@ typedef char GLchar;
 #define GL_TIMEOUT_EXPIRED 0x911B
 #define GL_CONDITION_SATISFIED 0x911C
 #define GL_COMPUTE_SHADER 0x91B9
+#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
+#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
 #define GL_FRAMEBUFFER_DEFAULT_WIDTH 0x9310
 #define GL_FRAMEBUFFER_DEFAULT_HEIGHT 0x9311
 #define GL_FRAMEBUFFER_DEFAULT_SAMPLES 0x9313

+ 29 - 2
panda/src/glstuff/glGraphicsStateGuardian_src.cxx

@@ -936,6 +936,15 @@ reset() {
   // Print out a list of all extensions.
   report_extensions();
 
+#ifndef OPENGLES_1
+  if (_gl_version_major >= 3) {
+    _glGetIntegeri_v = (PFNGLGETINTEGERI_VPROC)
+      get_extension_func("glGetIntegeri_v");
+  } else {
+    _glGetIntegeri_v = nullptr;
+  }
+#endif
+
   // Check if we are running under a profiling tool such as apitrace.
 #if !defined(NDEBUG) && !defined(OPENGLES_1)
   if (has_extension("GL_EXT_debug_marker")) {
@@ -2116,7 +2125,6 @@ reset() {
   }
 #endif  // HAVE_CG
 
-  _supports_compute_shaders = false;
 #ifndef OPENGLES_1
 #ifdef OPENGLES
   if (is_at_least_gles_version(3, 1)) {
@@ -2127,7 +2135,26 @@ reset() {
       get_extension_func("glDispatchCompute");
 
     if (_glDispatchCompute != nullptr) {
-      _supports_compute_shaders = true;
+      glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &_max_compute_work_group_invocations);
+
+      if (_max_compute_work_group_invocations > 0) {
+        // Initialize to spec-mandated minima
+        _max_compute_work_group_count.fill(65535);
+#ifdef OPENGLES
+        _max_compute_work_group_size.set(128, 128, 64);
+#else
+        _max_compute_work_group_size.set(1024, 1024, 64);
+#endif
+
+        if (_glGetIntegeri_v != nullptr) {
+          _glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &_max_compute_work_group_count[0]);
+          _glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &_max_compute_work_group_count[1]);
+          _glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &_max_compute_work_group_count[2]);
+          _glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &_max_compute_work_group_size[0]);
+          _glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 1, &_max_compute_work_group_size[1]);
+          _glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2, &_max_compute_work_group_size[2]);
+        }
+      }
     }
   }
 #endif  // !OPENGLES_1

+ 5 - 0
panda/src/glstuff/glGraphicsStateGuardian_src.h

@@ -168,6 +168,7 @@ typedef void (APIENTRYP PFNGLGETPROGRAMINFOLOGPROC) (GLuint program, GLsizei buf
 typedef void (APIENTRYP PFNGLGETSHADERIVPROC) (GLuint shader, GLenum pname, GLint *params);
 typedef void (APIENTRYP PFNGLGETSHADERINFOLOGPROC) (GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *infoLog);
 typedef GLint (APIENTRYP PFNGLGETUNIFORMLOCATIONPROC) (GLuint program, const GLchar *name);
+typedef void (APIENTRYP PFNGLGETINTEGERI_VPROC) (GLenum target, GLuint index, GLint *data);
 typedef void (APIENTRYP PFNGLLINKPROGRAMPROC) (GLuint program);
 typedef void (APIENTRYP PFNGLSHADERSOURCEPROC_P) (GLuint shader, GLsizei count, const GLchar* const *string, const GLint *length);
 typedef void (APIENTRYP PFNGLUSEPROGRAMPROC) (GLuint program);
@@ -823,6 +824,10 @@ protected:
 #endif
 
 public:
+#ifndef OPENGLES_1
+  PFNGLGETINTEGERI_VPROC _glGetIntegeri_v;
+#endif
+
 #ifndef OPENGLES_1
   bool _use_depth_zero_to_one;
   bool _use_remapped_depth_range;