Explorar o código

Compute shader support under OpenGL :-)

rdb %!s(int64=11) %!d(string=hai) anos
pai
achega
39a321d0ba

+ 11 - 0
panda/src/display/graphicsStateGuardian.I

@@ -655,6 +655,17 @@ get_supports_tessellation_shaders() const {
   return _supports_tessellation_shaders;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GraphicsStateGuardian::get_supports_compute_shaders
+//       Access: Published
+//  Description: Returns true if this particular GSG supports
+//               compute shaders.
+////////////////////////////////////////////////////////////////////
+INLINE bool GraphicsStateGuardian::
+get_supports_compute_shaders() const {
+  return _supports_compute_shaders;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsStateGuardian::get_supports_glsl
 //       Access: Published

+ 11 - 0
panda/src/display/graphicsStateGuardian.cxx

@@ -754,6 +754,17 @@ end_occlusion_query() {
   return result;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GraphicsStateGuardian::dispatch_compute
+//       Access: Public, Virtual
+//  Description: Dispatches a currently bound compute shader using
+//               the given work group counts.
+////////////////////////////////////////////////////////////////////
+void GraphicsStateGuardian::
+dispatch_compute(int num_groups_x, int num_groups_y, int num_groups_z) {
+  nassertv(false /* Compute shaders not supported by GSG */);
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsStateGuardian::get_geom_munger
 //       Access: Public, Virtual

+ 5 - 1
panda/src/display/graphicsStateGuardian.h

@@ -145,6 +145,7 @@ PUBLISHED:
   INLINE bool get_supports_basic_shaders() const;
   INLINE bool get_supports_geometry_shaders() const;
   INLINE bool get_supports_tessellation_shaders() const;
+  INLINE bool get_supports_compute_shaders() const;
   INLINE bool get_supports_glsl() const;
   INLINE bool get_supports_stencil() const;
   INLINE bool get_supports_two_sided_stencil() const;
@@ -225,6 +226,8 @@ public:
   virtual void begin_occlusion_query();
   virtual PT(OcclusionQueryContext) end_occlusion_query();
 
+  virtual void dispatch_compute(int size_x, int size_y, int size_z);
+
   virtual PT(GeomMunger) get_geom_munger(const RenderState *state,
                                          Thread *current_thread);
   virtual PT(GeomMunger) make_geom_munger(const RenderState *state,
@@ -487,10 +490,11 @@ protected:
   bool _supports_basic_shaders;
   bool _supports_geometry_shaders;
   bool _supports_tessellation_shaders;
+  bool _supports_compute_shaders;
   bool _supports_glsl;
   bool _supports_framebuffer_multisample;
   bool _supports_framebuffer_blit;
-  
+
   bool _supports_stencil;
   bool _supports_stencil_wrap;
   bool _supports_two_sided_stencil;

+ 27 - 0
panda/src/glstuff/glGraphicsStateGuardian_src.cxx

@@ -993,10 +993,12 @@ reset() {
 
 #ifdef OPENGLES_2
   _supports_glsl = true;
+  _supports_geometry_shaders = false;
   _supports_tessellation_shaders = false;
 #else
   #ifdef OPENGLES_1
     _supports_glsl = false;
+    _supports_geometry_shaders = false;
     _supports_tessellation_shaders = false;
   #else
     _supports_glsl = is_at_least_gl_version(2, 0) || has_extension("GL_ARB_shading_language_100");
@@ -1006,6 +1008,18 @@ reset() {
 #endif
   _shader_caps._supports_glsl = _supports_glsl;
 
+  _supports_compute_shaders = false;
+#ifndef OPENGLES
+  if (is_at_least_gl_version(4, 3) || has_extension("GL_ARB_compute_shader")) {
+    _glDispatchCompute = (PFNGLDISPATCHCOMPUTEPROC)
+      get_extension_func("glDispatchCompute");
+
+    if (_glDispatchCompute != NULL) {
+      _supports_compute_shaders = true;
+    }
+  }
+#endif
+
 #ifndef OPENGLES
   if (_supports_glsl) {
     _glAttachShader = (PFNGLATTACHSHADERPROC)
@@ -4218,6 +4232,19 @@ end_occlusion_query() {
 #endif  // OPENGLES
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GLGraphicsStateGuardian::dispatch_compute
+//       Access: Public, Virtual
+//  Description: Dispatches a currently bound compute shader using
+//               the given work group counts.
+////////////////////////////////////////////////////////////////////
+void CLP(GraphicsStateGuardian)::
+dispatch_compute(int num_groups_x, int num_groups_y, int num_groups_z) {
+  nassertv(_supports_compute_shaders);
+  nassertv(_current_shader_context != NULL);
+  _glDispatchCompute(num_groups_x, num_groups_y, num_groups_z);
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GLGraphicsStateGuardian::make_geom_munger
 //       Access: Public, Virtual

+ 4 - 0
panda/src/glstuff/glGraphicsStateGuardian_src.h

@@ -172,6 +172,7 @@ typedef void (APIENTRYP PFNGLDRAWARRAYSINSTANCEDPROC) (GLenum mode, GLint first,
 typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDPROC) (GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei primcount);
 typedef void (APIENTRYP PFNGLBINDIMAGETEXTUREPROC) (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format);
 typedef void (APIENTRYP PFNGLBINDIMAGETEXTURESPROC) (GLuint first, GLsizei count, const GLuint *textures);
+typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEPROC) (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z);
 #endif  // OPENGLES
 #endif  // __EDG__
 
@@ -262,6 +263,8 @@ public:
   virtual void begin_occlusion_query();
   virtual PT(OcclusionQueryContext) end_occlusion_query();
 
+  virtual void dispatch_compute(int size_x, int size_y, int size_z);
+
   virtual PT(GeomMunger) make_geom_munger(const RenderState *state,
                                           Thread *current_thread);
 
@@ -693,6 +696,7 @@ public:
   PFNGLDRAWELEMENTSINSTANCEDPROC _glDrawElementsInstanced;
   PFNGLBINDIMAGETEXTUREPROC _glBindImageTexture;
   PFNGLBINDIMAGETEXTURESPROC _glBindImageTextures;
+  PFNGLDISPATCHCOMPUTEPROC _glDispatchCompute;
 #endif  // OPENGLES
 
   GLenum _edge_clamp;

+ 36 - 1
panda/src/glstuff/glShaderContext_src.cxx

@@ -217,6 +217,7 @@ CLP(ShaderContext)(Shader *s, GSG *gsg) : ShaderContext(s) {
   _glsl_gshader = 0;
   _glsl_tcshader = 0;
   _glsl_teshader = 0;
+  _glsl_cshader = 0;
   _uses_standard_vertex_arrays = false;
 
 #if defined(HAVE_CG) && !defined(OPENGLES)
@@ -880,6 +881,9 @@ release_resources(GSG *gsg) {
     if (_glsl_teshader != 0) {
       gsg->_glDetachShader(_glsl_program, _glsl_teshader);
     }
+    if (_glsl_cshader != 0) {
+      gsg->_glDetachShader(_glsl_program, _glsl_cshader);
+    }
     gsg->_glDeleteProgram(_glsl_program);
     _glsl_program = 0;
   }
@@ -903,6 +907,10 @@ release_resources(GSG *gsg) {
     gsg->_glDeleteShader(_glsl_teshader);
     _glsl_teshader = 0;
   }
+  if (_glsl_cshader != 0) {
+    gsg->_glDeleteShader(_glsl_cshader);
+    _glsl_cshader = 0;
+  }
   
   gsg->report_my_gl_errors();
 }
@@ -1391,6 +1399,22 @@ disable_shader_texture_bindings(GSG *gsg) {
   cg_report_errors();
 #endif
 
+#ifndef OPENGLES
+  // Now unbind all the image units.  Not sure if we *have* to do this.
+  int num_image_units = min(_glsl_img_inputs.size(), (size_t)gsg->_max_image_units);
+
+  if (num_image_units > 0 && _shader->get_language() == Shader::SL_GLSL) {
+    if (gsg->_supports_multi_bind) {
+      gsg->_glBindImageTextures(0, num_image_units, NULL);
+
+    } else {
+      for (int i = 0; i < num_image_units; ++i) {
+        gsg->_glBindImageTexture(i, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
+      }
+    }
+  }
+#endif
+
   gsg->report_my_gl_errors();
 }
 
@@ -1623,6 +1647,11 @@ glsl_compile_entry_point(GSG *gsg, Shader::ShaderType type) {
         handle = gsg->_glCreateShader(GL_TESS_EVALUATION_SHADER);
       }
       break;
+    case Shader::ST_compute:
+      if (gsg->get_supports_compute_shaders()) {
+        handle = gsg->_glCreateShader(GL_COMPUTE_SHADER);
+      }
+      break;
 #endif
   }
   if (!handle) {
@@ -1701,7 +1730,13 @@ glsl_compile_shader(GSG *gsg) {
     if (!_glsl_teshader) return false;
     gsg->_glAttachShader(_glsl_program, _glsl_teshader);
   }
-  
+
+  if (!_shader->get_text(Shader::ST_compute).empty()) {
+    _glsl_cshader = glsl_compile_entry_point(gsg, Shader::ST_compute);
+    if (!_glsl_cshader) return false;
+    gsg->_glAttachShader(_glsl_program, _glsl_cshader);
+  }
+
   // There might be warnings. Only report them for one shader program.
   if (_glsl_vshader != 0) {
     glsl_report_shader_errors(gsg, _glsl_vshader);

+ 1 - 0
panda/src/glstuff/glShaderContext_src.h

@@ -74,6 +74,7 @@ private:
   GLuint _glsl_gshader;
   GLuint _glsl_tcshader;
   GLuint _glsl_teshader;
+  GLuint _glsl_cshader;
 
   pvector <GLint> _glsl_parameter_map;
 

+ 31 - 0
panda/src/glstuff/panda_glext.h

@@ -2398,6 +2398,27 @@ extern "C" {
 #define GL_TEXTURE_IMMUTABLE_FORMAT       0x912F
 #endif
 
+#ifndef GL_ARB_compute_shader
+#define GL_COMPUTE_SHADER                 0x91B9
+#define GL_MAX_COMPUTE_UNIFORM_BLOCKS     0x91BB
+#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC
+#define GL_MAX_COMPUTE_IMAGE_UNIFORMS     0x91BD
+#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
+#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263
+#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264
+#define GL_MAX_COMPUTE_ATOMIC_COUNTERS    0x8265
+#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266
+#define GL_MAX_COMPUTE_LOCAL_INVOCATIONS  0x90EB
+#define GL_MAX_COMPUTE_WORK_GROUP_COUNT   0x91BE
+#define GL_MAX_COMPUTE_WORK_GROUP_SIZE    0x91BF
+#define GL_COMPUTE_LOCAL_WORK_SIZE        0x8267
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER 0x90EC
+#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER 0x90ED
+#define GL_DISPATCH_INDIRECT_BUFFER       0x90EE
+#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF
+#define GL_COMPUTE_SHADER_BIT             0x00000020
+#endif
+
 #ifndef GL_EXT_abgr
 #define GL_ABGR_EXT                       0x8000
 #endif
@@ -7882,6 +7903,16 @@ typedef void (APIENTRYP PFNGLTEXTURESTORAGE2DEXTPROC) (GLuint texture, GLenum ta
 typedef void (APIENTRYP PFNGLTEXTURESTORAGE3DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
 #endif
 
+#ifndef GL_ARB_compute_shader
+#define GL_ARB_compute_shader 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDispatchCompute (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z);
+GLAPI void APIENTRY glDispatchComputeIndirect (GLintptr indirect);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEPROC) (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z);
+typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEINDIRECTPROC) (GLintptr indirect);
+#endif
+
 #ifndef GL_EXT_abgr
 #define GL_EXT_abgr 1
 #endif

+ 26 - 8
panda/src/gobj/shader.I

@@ -32,6 +32,15 @@ get_filename(const ShaderType &type) const {
       case ST_geometry:
         return _filename->_geometry;
         break;
+      case ST_tess_control:
+        return _text->_tess_control;
+        break;
+      case ST_tess_evaluation:
+        return _text->_tess_evaluation;
+        break;
+      case ST_compute:
+        return _text->_compute;
+        break;
       default:
         return _filename->_shared;
     }
@@ -65,6 +74,9 @@ get_text(const ShaderType &type) const {
       case ST_tess_evaluation:
         return _text->_tess_evaluation;
         break;
+      case ST_compute:
+        return _text->_compute;
+        break;
       default:
         return _text->_shared;
     }
@@ -636,14 +648,16 @@ ShaderFile(const string &vertex,
 ////////////////////////////////////////////////////////////////////
 INLINE void Shader::ShaderFile::
 write_datagram(Datagram &dg) const {
-  dg.add_bool(_separate);
   if (_separate) {
+    dg.add_uint8(6);
     dg.add_string(_vertex);
     dg.add_string(_fragment);
     dg.add_string(_geometry);
     dg.add_string(_tess_control);
     dg.add_string(_tess_evaluation);
+    dg.add_string(_compute);
   } else {
+    dg.add_uint8(0);
     dg.add_string(_shared);
   }
 }
@@ -655,13 +669,17 @@ write_datagram(Datagram &dg) const {
 ////////////////////////////////////////////////////////////////////
 INLINE void Shader::ShaderFile::
 read_datagram(DatagramIterator &scan) {
-  _separate = scan.get_bool();
-  if (_separate) {
-    _vertex = scan.get_string();
-    _fragment = scan.get_string();
-    _geometry = scan.get_string();
-    _tess_control = scan.get_string();
-    _tess_evaluation = scan.get_string();
+  short count = scan.get_uint8();
+  if (count > 0) {
+    if (count-- > 0) _vertex = scan.get_string();
+    if (count-- > 0) _fragment = scan.get_string();
+    if (count-- > 0) _geometry = scan.get_string();
+    if (count-- > 0) _tess_control = scan.get_string();
+    if (count-- > 0) _tess_evaluation = scan.get_string();
+    if (count-- > 0) _compute = scan.get_string();
+    while (count-- > 0) {
+      scan.get_string();
+    }
   } else {
     _shared = scan.get_string();
   }

+ 58 - 2
panda/src/gobj/shader.cxx

@@ -20,8 +20,6 @@
 
 #ifdef HAVE_CG
 #include <Cg/cg.h>
-#define JCG_PROFILE_GLSLV ((CGprofile)7007)
-#define JCG_PROFILE_GLSLF ((CGprofile)7008)
 #endif
 
 TypeHandle Shader::_type_handle;
@@ -1428,6 +1426,14 @@ cg_compile_entry_point(const char *entry, const ShaderCaps &caps, ShaderType typ
     compiler_args[nargs++] = "-po";
     compiler_args[nargs++] = "ATI_draw_buffers";
   }
+
+  char version_arg[16];
+  if (!cg_glsl_version.empty() && cgGetProfileProperty((CGprofile) active, CG_IS_GLSL_PROFILE)) {
+    snprintf(version_arg, 16, "version=%s", cg_glsl_version.c_str());
+    compiler_args[nargs++] = "-po";
+    compiler_args[nargs++] = version_arg;
+  }
+
   compiler_args[nargs] = 0;
 
   if ((active != (int)CG_PROFILE_UNKNOWN) && (active != ultimate)) {
@@ -2243,6 +2249,34 @@ load(const ShaderLanguage &lang, const Filename &vertex,
   return result;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: Shader::load_compute
+//       Access: Published, Static
+//  Description: Loads a compute shader.
+////////////////////////////////////////////////////////////////////
+PT(Shader) Shader::
+load_compute(const ShaderLanguage &lang, const Filename &fn) {
+  PT(ShaderFile) sfile = new ShaderFile(fn);
+  ShaderTable::const_iterator i = _load_table.find(sfile);
+  if (i != _load_table.end() && (lang == SL_none || lang == i->second->_language)) {
+    return i->second;
+  }
+
+  PT(ShaderFile) sbody = new ShaderFile;
+  sbody->_separate = true;
+  VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr();
+  if (!vfs->read_file(fn, sbody->_compute, true)) {
+    gobj_cat.error()
+      << "Could not read compute shader file: " << fn << "\n";
+    return NULL;
+  }
+
+  PT(Shader) result = new Shader(sfile, sbody, lang);
+  result->_loaded = true;
+  _load_table[sfile] = result;
+  return result;
+}
+
 //////////////////////////////////////////////////////////////////////
 //     Function: Shader::make
 //       Access: Published, Static
@@ -2294,6 +2328,28 @@ make(const ShaderLanguage &lang, const string &vertex, const string &fragment,
   return result;
 }
 
+//////////////////////////////////////////////////////////////////////
+//     Function: Shader::make_compute
+//       Access: Published, Static
+//  Description: Loads the compute shader from the given string.
+//////////////////////////////////////////////////////////////////////
+PT(Shader) Shader::
+make_compute(const ShaderLanguage &lang, const string &body) {
+  PT(ShaderFile) sbody = new ShaderFile;
+  sbody->_separate = true;
+  sbody->_compute = body;
+
+  ShaderTable::const_iterator i = _make_table.find(sbody);
+  if (i != _make_table.end() && (lang == SL_none || lang == i->second->_language)) {
+    return i->second;
+  }
+
+  PT(ShaderFile) sfile = new ShaderFile("created-shader");
+  PT(Shader) result = new Shader(sfile, sbody, lang);
+  _make_table[sbody] = result;
+  return result;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: Shader::parse_init
 //       Access: Public

+ 4 - 0
panda/src/gobj/shader.h

@@ -59,6 +59,7 @@ PUBLISHED:
     ST_geometry,
     ST_tess_control,
     ST_tess_evaluation,
+    ST_compute,
   };
 
   enum AutoShaderSwitch {
@@ -84,11 +85,13 @@ PUBLISHED:
                          const Filename &geometry = "",
                          const Filename &tess_control = "",
                          const Filename &tess_evaluation = "");
+  static PT(Shader) load_compute(const ShaderLanguage &lang, const Filename &fn);
   static PT(Shader) make(const ShaderLanguage &lang, 
                          const string &vertex, const string &fragment, 
                          const string &geometry = "",
                          const string &tess_control = "",
                          const string &tess_evaluation = "");
+  static PT(Shader) make_compute(const ShaderLanguage &lang, const string &body);
 
   INLINE const Filename get_filename(const ShaderType &type = ST_none) const;
   INLINE const string &get_text(const ShaderType &type = ST_none) const;
@@ -397,6 +400,7 @@ public:
     string _geometry;
     string _tess_control;
     string _tess_evaluation;
+    string _compute;
   };
 
 public:

+ 2 - 0
panda/src/gsgbase/graphicsStateGuardianBase.h

@@ -163,6 +163,8 @@ public:
   virtual void begin_occlusion_query()=0;
   virtual PT(OcclusionQueryContext) end_occlusion_query()=0;
 
+  virtual void dispatch_compute(int size_x, int size_y, int size_z)=0;
+
   virtual PT(GeomMunger) get_geom_munger(const RenderState *state,
                                          Thread *current_thread)=0;
 

+ 87 - 0
panda/src/pgraphnodes/computeNode.I

@@ -0,0 +1,87 @@
+// Filename: computeNode.I
+// Created by:  rdb (13Mar09)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::add_dispatch
+//       Access: Published
+//  Description: Adds a dispatch command with the given number of
+//               work groups in the X, Y, and Z dimensions.  Any
+//               of these values may be set to 1 if the respective
+//               dimension should not be used.
+////////////////////////////////////////////////////////////////////
+INLINE void ComputeNode::
+add_dispatch(const LVecBase3i &num_groups) {
+  Dispatcher::CDWriter cdata(_dispatcher->_cycler);
+  cdata->_dispatches.push_back(num_groups);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::add_dispatch
+//       Access: Published
+//  Description: Adds a dispatch command with the given number of
+//               work groups in the X, Y, and Z dimensions.  Any
+//               of these values may be set to 1 if the respective
+//               dimension should not be used.
+////////////////////////////////////////////////////////////////////
+INLINE void ComputeNode::
+add_dispatch(int num_groups_x, int num_groups_y, int num_groups_z) {
+  LVecBase3i num_groups(num_groups_x, num_groups_y, num_groups_z);
+  add_dispatch(num_groups);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::get_num_dispatches
+//       Access: Published
+//  Description: Returns the number of times add_dispatch has been
+//               called on this object.
+////////////////////////////////////////////////////////////////////
+INLINE int ComputeNode::
+get_num_dispatches() const {
+  Dispatcher::CDReader cdata(_dispatcher->_cycler);
+  return cdata->_dispatches.size();
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::get_dispatch
+//       Access: Published
+//  Description: Returns the group counts of the nth dispatch
+//               associated with this object.
+////////////////////////////////////////////////////////////////////
+INLINE const LVecBase3i &ComputeNode::
+get_dispatch(int n) const {
+  Dispatcher::CDReader cdata(_dispatcher->_cycler);
+  nassertr(n >= 0 && n < cdata->_dispatches.size(), LVecBase3i::zero());
+  return cdata->_dispatches[n];
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::Dispatcher::CData::Constructor
+//       Access: Public
+//  Description:
+////////////////////////////////////////////////////////////////////
+INLINE ComputeNode::Dispatcher::CData::
+CData() {
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::Dispatcher::CData::Copy Constructor
+//       Access: Public
+//  Description:
+////////////////////////////////////////////////////////////////////
+INLINE ComputeNode::Dispatcher::CData::
+CData(const ComputeNode::Dispatcher::CData &copy) :
+  _dispatches(copy._dispatches)
+{
+}

+ 274 - 0
panda/src/pgraphnodes/computeNode.cxx

@@ -0,0 +1,274 @@
+// Filename: computeNode.cxx
+// Created by:  rdb (19Jun14)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+
+#include "pandabase.h"
+#include "computeNode.h"
+#include "cullTraverser.h"
+#include "cullableObject.h"
+#include "cullHandler.h"
+#include "geomDrawCallbackData.h"
+#include "omniBoundingVolume.h"
+#include "config_pgraph.h"
+
+TypeHandle ComputeNode::_type_handle;
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::Constructor
+//       Access: Published
+//  Description: Creates a ComputeNode with the given name.  Use
+//               add_dispatch and  also assign a shader using a
+//               ShaderAttrib.
+////////////////////////////////////////////////////////////////////
+ComputeNode::
+ComputeNode(const string &name) :
+  PandaNode(name),
+  _dispatcher(new ComputeNode::Dispatcher)
+{
+  set_internal_bounds(new OmniBoundingVolume);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::Copy Constructor
+//       Access: Protected
+//  Description:
+////////////////////////////////////////////////////////////////////
+ComputeNode::
+ComputeNode(const ComputeNode &copy) :
+  PandaNode(copy),
+  _dispatcher(new ComputeNode::Dispatcher(*copy._dispatcher))
+{
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::make_copy
+//       Access: Public, Virtual
+//  Description: Returns a newly-allocated Node that is a shallow copy
+//               of this one.  It will be a different Node pointer,
+//               but its internal data may or may not be shared with
+//               that of the original Node.
+////////////////////////////////////////////////////////////////////
+PandaNode *ComputeNode::
+make_copy() const {
+  return new ComputeNode(*this);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::safe_to_combine
+//       Access: Public, Virtual
+//  Description: Returns true if it is generally safe to combine this
+//               particular kind of PandaNode with other kinds of
+//               PandaNodes of compatible type, adding children or
+//               whatever.  For instance, an LODNode should not be
+//               combined with any other PandaNode, because its set of
+//               children is meaningful.
+////////////////////////////////////////////////////////////////////
+bool ComputeNode::
+safe_to_combine() const {
+  return false;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::is_renderable
+//       Access: Public, Virtual
+//  Description: Returns true if there is some value to visiting this
+//               particular node during the cull traversal for any
+//               camera, false otherwise.  This will be used to
+//               optimize the result of get_net_draw_show_mask(), so
+//               that any subtrees that contain only nodes for which
+//               is_renderable() is false need not be visited.
+////////////////////////////////////////////////////////////////////
+bool ComputeNode::
+is_renderable() const {
+  return true;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::add_for_draw
+//       Access: Public, Virtual
+//  Description: Adds the node's contents to the CullResult we are
+//               building up during the cull traversal, so that it
+//               will be drawn at render time.  For most nodes other
+//               than GeomNodes, this is a do-nothing operation.
+////////////////////////////////////////////////////////////////////
+void ComputeNode::
+add_for_draw(CullTraverser *trav, CullTraverserData &data) {
+  if (pgraph_cat.is_spam()) {
+    pgraph_cat.spam()
+      << "Found " << *this << " in state " << *data._state 
+      << " draw_mask = " << data._draw_mask << "\n";
+  }
+
+  // OK, render this node.  Rendering this node means creating a
+  // CullableObject for the Dispatcher.  We don't need to pass
+  // any Geoms, however.
+  CullableObject *object =
+    new CullableObject(NULL, data._state,
+                         data.get_net_transform(trav),
+                         data.get_modelview_transform(trav),
+                         trav->get_scene());
+  object->set_draw_callback(_dispatcher);
+  trav->get_cull_handler()->record_object(object, trav);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::output
+//       Access: Public, Virtual
+//  Description: Writes a brief description of the node to the
+//               indicated output stream.  This is invoked by the <<
+//               operator.  It may be overridden in derived classes to
+//               include some information relevant to the class.
+////////////////////////////////////////////////////////////////////
+void ComputeNode::
+output(ostream &out) const {
+  PandaNode::output(out);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::Dispatcher::Constructor
+//       Access: Public
+//  Description:
+////////////////////////////////////////////////////////////////////
+ComputeNode::Dispatcher::
+Dispatcher() {
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::Dispatcher::Copy Constructor
+//       Access: Public
+//  Description:
+////////////////////////////////////////////////////////////////////
+ComputeNode::Dispatcher::
+Dispatcher(const Dispatcher &copy) :
+  _cycler(copy._cycler)
+{
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::Dispatcher::do_callback
+//       Access: Public, Virtual
+//  Description: Asks the GSG to dispatch the compute shader.
+////////////////////////////////////////////////////////////////////
+void ComputeNode::Dispatcher::
+do_callback(CallbackData *cbdata) {
+  GeomDrawCallbackData *data = (GeomDrawCallbackData *)cbdata;
+  GraphicsStateGuardianBase *gsg = data->get_gsg();
+
+  CDReader cdata(_cycler);
+
+  Dispatches::const_iterator it;
+  for (it = cdata->_dispatches.begin(); it != cdata->_dispatches.end(); ++it) {
+    gsg->dispatch_compute(it->get_x(), it->get_y(), it->get_z());
+  }
+
+  // No need to upcall; we don't have any geometry, after all.
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::register_with_read_factory
+//       Access: Public, Static
+//  Description: Tells the BamReader how to create objects of type
+//               ComputeNode.
+////////////////////////////////////////////////////////////////////
+void ComputeNode::
+register_with_read_factory() {
+  BamReader::get_factory()->register_factory(get_class_type(), make_from_bam);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::write_datagram
+//       Access: Public, Virtual
+//  Description: Writes the contents of this object to the datagram
+//               for shipping out to a Bam file.
+////////////////////////////////////////////////////////////////////
+void ComputeNode::
+write_datagram(BamWriter *manager, Datagram &dg) {
+  PandaNode::write_datagram(manager, dg);
+  manager->write_cdata(dg, _dispatcher->_cycler);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::make_from_bam
+//       Access: Protected, Static
+//  Description: This function is called by the BamReader's factory
+//               when a new object of type ComputeNode is encountered
+//               in the Bam file.  It should create the ComputeNode
+//               and extract its information from the file.
+////////////////////////////////////////////////////////////////////
+TypedWritable *ComputeNode::
+make_from_bam(const FactoryParams &params) {
+  ComputeNode *node = new ComputeNode("");
+  DatagramIterator scan;
+  BamReader *manager;
+
+  parse_params(params, scan, manager);
+  node->fillin(scan, manager);
+
+  return node;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::fillin
+//       Access: Protected
+//  Description: This internal function is called by make_from_bam to
+//               read in all of the relevant data from the BamFile for
+//               the new ComputeNode.
+////////////////////////////////////////////////////////////////////
+void ComputeNode::
+fillin(DatagramIterator &scan, BamReader *manager) {
+  PandaNode::fillin(scan, manager);
+  manager->read_cdata(scan, _dispatcher->_cycler);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::Dispatcher::CData::make_copy
+//       Access: Public, Virtual
+//  Description:
+////////////////////////////////////////////////////////////////////
+CycleData *ComputeNode::Dispatcher::CData::
+make_copy() const {
+  return new CData(*this);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::Dispatcher::CData::write_datagram
+//       Access: Public, Virtual
+//  Description: Writes the contents of this object to the datagram
+//               for shipping out to a Bam file.
+////////////////////////////////////////////////////////////////////
+void ComputeNode::Dispatcher::CData::
+write_datagram(BamWriter *manager, Datagram &dg) const {
+  dg.add_uint16(_dispatches.size());
+
+  Dispatches::const_iterator it;
+  for (it = _dispatches.begin(); it != _dispatches.end(); ++it) {
+    generic_write_datagram(dg, *it);
+  }
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: ComputeNode::Dispatcher::CData::fillin
+//       Access: Public, Virtual
+//  Description: This internal function is called by make_from_bam to
+//               read in all of the relevant data from the BamFile for
+//               the new ComputeNode.
+////////////////////////////////////////////////////////////////////
+void ComputeNode::Dispatcher::CData::
+fillin(DatagramIterator &scan, BamReader *manager) {
+  int num_dispatches = scan.get_uint16();
+  _dispatches.resize(num_dispatches);
+
+  for (int i = 0; i < num_dispatches; ++i) {
+    generic_read_datagram(_dispatches[i], scan);
+  }
+}

+ 114 - 0
panda/src/pgraphnodes/computeNode.h

@@ -0,0 +1,114 @@
+// Filename: computeNode.h
+// Created by:  rdb (19Jun14)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+
+#ifndef COMPUTENODE_H
+#define COMPUTENODE_H
+
+#include "pandabase.h"
+#include "pandaNode.h"
+#include "callbackObject.h"
+#include "pointerTo.h"
+
+////////////////////////////////////////////////////////////////////
+//       Class : ComputeNode
+// Description : A special node, the sole purpose of which is to
+//               invoke a dispatch operation on the assigned
+//               compute shader.
+////////////////////////////////////////////////////////////////////
+class EXPCL_PANDA_PGRAPHNODES ComputeNode : public PandaNode {
+PUBLISHED:
+  ComputeNode(const string &name);
+
+  INLINE void add_dispatch(const LVecBase3i &num_groups);
+  INLINE void add_dispatch(int num_groups_x, int num_groups_y, int num_groups_z);
+
+  INLINE int get_num_dispatches() const;
+  INLINE const LVecBase3i &get_dispatch(int i) const;
+  MAKE_SEQ(get_dispatches, get_num_dispatches, get_dispatch);
+
+public:
+  ComputeNode(const ComputeNode &copy);
+
+  virtual PandaNode *make_copy() const;
+  virtual bool safe_to_combine() const;
+
+  virtual bool is_renderable() const;
+  virtual void add_for_draw(CullTraverser *trav, CullTraverserData &data);
+
+  virtual void output(ostream &out) const;
+
+private:
+  class EXPCL_PANDA_PGRAPHNODES Dispatcher : public CallbackObject {
+    friend class ComputeNode;
+  public:
+    ALLOC_DELETED_CHAIN(Dispatcher);
+    Dispatcher();
+    Dispatcher(const Dispatcher &copy);
+
+    virtual void do_callback(CallbackData *cbdata);
+
+    typedef pvector<LVecBase3i> Dispatches;
+
+    class EXPCL_PANDA_PGRAPHNODES CData : public CycleData {
+    public:
+      INLINE CData();
+      INLINE CData(const CData &copy);
+      virtual CycleData *make_copy() const;
+      virtual void write_datagram(BamWriter *manager, Datagram &dg) const;
+      virtual void fillin(DatagramIterator &scan, BamReader *manager);
+      virtual TypeHandle get_parent_type() const {
+        return CallbackNode::get_class_type();
+      }
+
+      Dispatches _dispatches;
+    };
+
+    PipelineCycler<CData> _cycler;
+    typedef CycleDataReader<CData> CDReader;
+    typedef CycleDataWriter<CData> CDWriter;
+
+  };
+
+  // One per ComputeNode.
+  PT(Dispatcher) _dispatcher;
+
+public:
+  static void register_with_read_factory();
+  virtual void write_datagram(BamWriter *manager, Datagram &dg);
+
+protected:
+  static TypedWritable *make_from_bam(const FactoryParams &params);
+  void fillin(DatagramIterator &scan, BamReader *manager);
+
+public:
+  static TypeHandle get_class_type() {
+    return _type_handle;
+  }
+  static void init_type() {
+    PandaNode::init_type();
+    register_type(_type_handle, "ComputeNode",
+                  PandaNode::get_class_type());
+  }
+  virtual TypeHandle get_type() const {
+    return get_class_type();
+  }
+  virtual TypeHandle force_init_type() {init_type(); return get_class_type();}
+
+private:
+  static TypeHandle _type_handle;
+};
+
+#include "computeNode.I"
+
+#endif

+ 3 - 0
panda/src/pgraphnodes/config_pgraphnodes.cxx

@@ -18,6 +18,7 @@
 #include "callbackData.h"
 #include "callbackNode.h"
 #include "callbackObject.h"
+#include "computeNode.h"
 #include "directionalLight.h"
 #include "fadeLodNode.h"
 #include "fadeLodNodeData.h"
@@ -113,6 +114,7 @@ init_libpgraphnodes() {
   CallbackData::init_type();
   CallbackNode::init_type();
   CallbackObject::init_type();
+  ComputeNode::init_type();
   DirectionalLight::init_type();
   FadeLODNode::init_type();
   FadeLODNodeData::init_type();
@@ -130,6 +132,7 @@ init_libpgraphnodes() {
 
   AmbientLight::register_with_read_factory();
   CallbackNode::register_with_read_factory();
+  ComputeNode::register_with_read_factory();
   DirectionalLight::register_with_read_factory();
   FadeLODNode::register_with_read_factory();
   LightNode::register_with_read_factory();

+ 1 - 0
panda/src/pgraphnodes/p3pgraphnodes_composite1.cxx

@@ -1,5 +1,6 @@
 #include "ambientLight.cxx"
 #include "callbackNode.cxx"
+#include "computeNode.cxx"
 #include "config_pgraphnodes.cxx"
 #include "directionalLight.cxx"
 #include "fadeLodNode.cxx"