浏览代码

display: Add method to synchronously download SSBO contents

rdb 10 月之前
父节点
当前提交
3833866801

+ 71 - 2
panda/src/display/graphicsEngine.cxx

@@ -1160,7 +1160,7 @@ extract_texture_data(Texture *tex, GraphicsStateGuardian *gsg) {
     // Now that the draw thread is idle, signal it to do the extraction task.
     thread->_gsg = gsg;
     thread->_texture = tex;
-    thread->_thread_state = TS_do_extract;
+    thread->_thread_state = TS_do_extract_texture_data;
     thread->_cv_mutex.release();
     thread->_cv_start.notify();
     thread->_cv_mutex.acquire();
@@ -1177,6 +1177,70 @@ extract_texture_data(Texture *tex, GraphicsStateGuardian *gsg) {
   }
 }
 
+/**
+ * Asks the indicated GraphicsStateGuardian to retrieve the buffer memory
+ * image of the indicated ShaderBuffer and return it.
+ *
+ * This is mainly useful for debugging.  It is a very slow call because it
+ * introduces a pipeline stall both of Panda's pipeline and the graphics
+ * pipeline.
+ *
+ * The return value is empty if some kind of error occurred.
+ */
+vector_uchar GraphicsEngine::
+extract_shader_buffer_data(ShaderBuffer *buffer, GraphicsStateGuardian *gsg) {
+  ReMutexHolder holder(_lock);
+
+  string draw_name = gsg->get_threading_model().get_draw_name();
+  if (draw_name.empty()) {
+    // A single-threaded environment.  No problem.
+    vector_uchar data;
+    if (!gsg->extract_shader_buffer_data(buffer, data)) {
+      data.clear();
+    }
+    return data;
+  }
+
+  // A multi-threaded environment.  We have to wait until the draw thread
+  // has finished its current task.
+  WindowRenderer *wr = get_window_renderer(draw_name, 0);
+  RenderThread *thread = (RenderThread *)wr;
+  MutexHolder cv_holder(thread->_cv_mutex);
+
+  while (thread->_thread_state != TS_wait) {
+    thread->_cv_done.wait();
+  }
+
+  // Temporarily set this so that it accesses data from the current thread.
+  int pipeline_stage = Thread::get_current_pipeline_stage();
+  int draw_pipeline_stage = thread->get_pipeline_stage();
+  thread->set_pipeline_stage(pipeline_stage);
+
+  // Now that the draw thread is idle, signal it to do the extraction task.
+  vector_uchar data;
+  thread->_gsg = gsg;
+  thread->_buffer = buffer;
+  thread->_buffer_result = &data;
+  thread->_thread_state = TS_do_extract_shader_buffer_data;
+  thread->_cv_mutex.release();
+  thread->_cv_start.notify();
+  thread->_cv_mutex.acquire();
+
+  // Wait for it to finish the extraction.
+  while (thread->_thread_state != TS_wait) {
+    thread->_cv_done.wait();
+  }
+
+  thread->set_pipeline_stage(draw_pipeline_stage);
+  thread->_gsg = nullptr;
+  thread->_buffer = nullptr;
+  thread->_buffer_result = nullptr;
+  if (!thread->_result) {
+    data.clear();
+  }
+  return data;
+}
+
 /**
  * Asks the indicated GraphicsStateGuardian to dispatch the compute shader in
  * the given ShaderAttrib using the given work group counts.  This can act as
@@ -2815,11 +2879,16 @@ thread_main() {
       }
       break;
 
-    case TS_do_extract:
+    case TS_do_extract_texture_data:
       nassertd(_gsg != nullptr && _texture != nullptr) break;
       _result = _gsg->extract_texture_data(_texture);
       break;
 
+    case TS_do_extract_shader_buffer_data:
+      nassertd(_gsg != nullptr && _texture != nullptr) break;
+      _result = _gsg->extract_shader_buffer_data(_buffer, *_buffer_result);
+      break;
+
     case TS_do_screenshot:
       nassertd(_region != nullptr) break;
       _texture = _region->get_screenshot();

+ 5 - 1
panda/src/display/graphicsEngine.h

@@ -113,6 +113,7 @@ PUBLISHED:
   BLOCKING void flip_frame();
 
   bool extract_texture_data(Texture *tex, GraphicsStateGuardian *gsg);
+  vector_uchar extract_shader_buffer_data(ShaderBuffer *buffer, GraphicsStateGuardian *gsg);
   void dispatch_compute(const LVecBase3i &work_groups,
                         const RenderState *state,
                         GraphicsStateGuardian *gsg);
@@ -130,7 +131,8 @@ public:
     TS_do_release,
     TS_do_windows,
     TS_do_compute,
-    TS_do_extract,
+    TS_do_extract_texture_data,
+    TS_do_extract_shader_buffer_data,
     TS_do_screenshot,
     TS_terminate,
     TS_done
@@ -313,6 +315,8 @@ private:
     // These are stored for extract_texture_data and dispatch_compute.
     GraphicsStateGuardian *_gsg;
     PT(Texture) _texture;
+    ShaderBuffer *_buffer;
+    vector_uchar *_buffer_result;
     const RenderState *_state;
     DisplayRegion *_region;
     LVecBase3i _work_groups;

+ 12 - 0
panda/src/display/graphicsStateGuardian.cxx

@@ -745,6 +745,18 @@ release_shader_buffers(const pvector<BufferContext *> &contexts) {
   }
 }
 
+/**
+ * This method should only be called by the GraphicsEngine.  Do not call it
+ * directly; call GraphicsEngine::extract_texture_data() instead.
+ *
+ * This method will be called in the draw thread to download the buffer's
+ * current contents synchronously.
+ */
+bool GraphicsStateGuardian::
+extract_shader_buffer_data(ShaderBuffer *buffer, vector_uchar &data) {
+  return false;
+}
+
 /**
  * Begins a new occlusion query.  After this call, you may call
  * begin_draw_primitives() and draw_triangles()/draw_whatever() repeatedly.

+ 1 - 0
panda/src/display/graphicsStateGuardian.h

@@ -316,6 +316,7 @@ public:
   virtual BufferContext *prepare_shader_buffer(ShaderBuffer *data);
   virtual void release_shader_buffer(BufferContext *ibc);
   virtual void release_shader_buffers(const pvector<BufferContext *> &contexts);
+  virtual bool extract_shader_buffer_data(ShaderBuffer *buffer, vector_uchar &data);
 
   virtual void begin_occlusion_query();
   virtual PT(OcclusionQueryContext) end_occlusion_query();

+ 34 - 1
panda/src/glstuff/glGraphicsStateGuardian_src.cxx

@@ -7367,7 +7367,11 @@ prepare_shader_buffer(ShaderBuffer *data) {
     // Some drivers require the buffer to be padded to 16 byte boundary.
     uint64_t num_bytes = (data->get_data_size_bytes() + 15u) & ~15u;
     if (_supports_buffer_storage) {
-      _glBufferStorage(GL_SHADER_STORAGE_BUFFER, num_bytes, data->get_initial_data(), 0);
+      GLbitfield flags = 0;
+      if (data->get_usage_hint() == GeomEnums::UH_client) {
+        flags |= GL_CLIENT_STORAGE_BIT;
+      }
+      _glBufferStorage(GL_SHADER_STORAGE_BUFFER, num_bytes, data->get_initial_data(), flags);
     } else {
       _glBufferData(GL_SHADER_STORAGE_BUFFER, num_bytes, data->get_initial_data(), get_usage(data->get_usage_hint()));
     }
@@ -7502,6 +7506,35 @@ release_shader_buffers(const pvector<BufferContext *> &contexts) {
   _glDeleteBuffers(num_indices, indices);
   report_my_gl_errors();
 }
+
+/**
+ * This method should only be called by the GraphicsEngine.  Do not call it
+ * directly; call GraphicsEngine::extract_texture_data() instead.
+ *
+ * This method will be called in the draw thread to download the buffer's
+ * current contents synchronously.
+ */
+bool CLP(GraphicsStateGuardian)::
+extract_shader_buffer_data(ShaderBuffer *buffer, vector_uchar &data) {
+  GLuint index = 0;
+  BufferContext *bc = buffer->prepare_now(get_prepared_objects(), this);
+  if (bc == nullptr || !bc->is_of_type(CLP(BufferContext)::get_class_type())) {
+    return false;
+  }
+  CLP(BufferContext) *gbc = DCAST(CLP(BufferContext), bc);
+
+  data.resize(buffer->get_data_size_bytes());
+
+  _glBindBuffer(GL_SHADER_STORAGE_BUFFER, gbc->_index);
+
+  _glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, data.size(), &data[0]);
+
+  _glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
+  _current_sbuffer_index = 0;
+  report_my_gl_errors();
+
+  return true;
+}
 #endif
 
 #ifndef OPENGLES

+ 1 - 0
panda/src/glstuff/glGraphicsStateGuardian_src.h

@@ -394,6 +394,7 @@ public:
   CLP(BufferContext) *apply_shader_buffer(GLuint base, ShaderBuffer *buffer);
   virtual void release_shader_buffer(BufferContext *bc);
   virtual void release_shader_buffers(const pvector<BufferContext *> &contexts);
+  virtual bool extract_shader_buffer_data(ShaderBuffer *buffer, vector_uchar &data);
 #endif
 
 #ifndef OPENGLES

+ 9 - 1
tests/display/test_glsl_shader.py

@@ -299,7 +299,7 @@ def test_glsl_uimage(gsg):
 
 
 def test_glsl_ssbo(gsg):
-    from struct import pack
+    from struct import pack, unpack
     num1 = pack('<i', 1234567)
     num2 = pack('<i', -1234567)
     buffer1 = core.ShaderBuffer("buffer1", num1, core.GeomEnums.UH_static)
@@ -316,12 +316,20 @@ def test_glsl_ssbo(gsg):
     code = """
     assert(value1 == 1234567);
     assert(value2 == -1234567);
+    value1 = 98765;
+    value2 = 5343525;
     """
     run_glsl_test(gsg, code, preamble, {'buffer1': buffer1, 'buffer2': buffer2},
                   exts={'GL_ARB_shader_storage_buffer_object',
                         'GL_ARB_uniform_buffer_object',
                         'GL_ARB_shading_language_420pack'})
 
+    data1 = gsg.get_engine().extract_shader_buffer_data(buffer1, gsg)
+    assert unpack('<i', data1[:4]) == (98765, )
+
+    data2 = gsg.get_engine().extract_shader_buffer_data(buffer2, gsg)
+    assert unpack('<i', data2[:4]) == (5343525, )
+
 
 def test_glsl_int(gsg):
     inputs = dict(