浏览代码

glgsg: Big refactoring of texture uploading, add async upload support

rdb 10 月之前
父节点
当前提交
2a47826101

+ 17 - 0
panda/src/display/graphicsStateGuardian.cxx

@@ -572,6 +572,23 @@ update_texture(TextureContext *, bool) {
   return true;
 }
 
+/**
+ * Ensures that the current Texture data is refreshed onto the GSG.  This
+ * means updating the texture properties and/or re-uploading the texture
+ * image, if necessary.  This should only be called within the draw thread.
+ *
+ * If force is true, this function will not return until the texture has been
+ * fully uploaded.  If force is false, the function may choose to upload a
+ * simple version of the texture instead, if the texture is not fully resident
+ * (and if get_incomplete_render() is true).
+ */
+bool GraphicsStateGuardian::
+update_texture(TextureContext *tc, bool force, CompletionToken token) {
+  bool result = update_texture(tc, force);
+  token.complete(result);
+  return result;
+}
+
 /**
  * Frees the resources previously allocated via a call to prepare_texture(),
  * including deleting the TextureContext itself, if it is non-NULL.

+ 1 - 0
panda/src/display/graphicsStateGuardian.h

@@ -292,6 +292,7 @@ PUBLISHED:
 public:
   virtual TextureContext *prepare_texture(Texture *tex);
   virtual bool update_texture(TextureContext *tc, bool force);
+  virtual bool update_texture(TextureContext *tc, bool force, CompletionToken token);
   virtual void release_texture(TextureContext *tc);
   virtual void release_textures(const pvector<TextureContext *> &contexts);
   virtual bool extract_texture_data(Texture *tex);

+ 4 - 0
panda/src/gles2gsg/gles2gsg.h

@@ -148,6 +148,10 @@ typedef char GLchar;
 #define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x800
 #define GL_ATOMIC_COUNTER_BARRIER_BIT 0x1000
 #define GL_SHADER_STORAGE_BARRIER_BIT 0x2000
+#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004
+#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008
+#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010
+#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020
 #define GL_HALF_FLOAT 0x140B
 #define GL_COLOR 0x1800
 #define GL_DEPTH 0x1801

文件差异内容过多而无法显示
+ 977 - 493
panda/src/glstuff/glGraphicsStateGuardian_src.cxx


+ 46 - 5
panda/src/glstuff/glGraphicsStateGuardian_src.h

@@ -39,6 +39,8 @@
 #include "geomVertexArrayData.h"
 #include "lightMutex.h"
 #include "pStatGPUTimer.h"
+#include "completionToken.h"
+#include "asyncTaskChain.h"
 
 class PlaneNode;
 class Light;
@@ -230,6 +232,7 @@ typedef void (APIENTRYP PFNGLGETPROGRAMBINARYPROC) (GLuint program, GLsizei bufS
 typedef void (APIENTRYP PFNGLPROGRAMBINARYPROC) (GLuint program, GLenum binaryFormat, const void *binary, GLsizei length);
 typedef void (APIENTRYP PFNGLGETINTERNALFORMATIVPROC) (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params);
 typedef void (APIENTRYP PFNGLBUFFERSTORAGEPROC) (GLenum target, GLsizeiptr size, const void *data, GLbitfield flags);
+typedef void (APIENTRYP PFNGLCOPYBUFFERSUBDATAPROC) (GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size);
 typedef void (APIENTRYP PFNGLBINDIMAGETEXTUREPROC) (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format);
 typedef void (APIENTRYP PFNGLCLEARTEXIMAGEPROC) (GLuint texture, GLint level, GLenum format, GLenum type, const void *data);
 typedef void (APIENTRYP PFNGLCLEARTEXSUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *data);
@@ -345,7 +348,8 @@ public:
 #endif
 
   virtual TextureContext *prepare_texture(Texture *tex);
-  virtual bool update_texture(TextureContext *tc, bool force);
+  virtual bool update_texture(TextureContext *tc, bool force,
+                              CompletionToken token = CompletionToken());
   virtual void release_texture(TextureContext *tc);
   virtual void release_textures(const pvector<TextureContext *> &contexts);
   virtual bool extract_texture_data(Texture *tex);
@@ -637,12 +641,21 @@ protected:
   bool apply_texture(CLP(TextureContext) *gtc, int view);
   bool apply_sampler(GLuint unit, const SamplerState &sampler,
                      CLP(TextureContext) *gtc, int view);
-  bool upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps);
-  bool upload_texture_image(CLP(TextureContext) *gtc, int view,
-                            bool needs_reload, int mipmap_bias, int num_levels,
+  bool upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps,
+                      CompletionToken token = CompletionToken());
+  bool upload_texture_view(CLP(TextureContext) *gtc, int view,
+                           bool needs_reload, int mipmap_bias, int num_levels,
+                           GLint internal_format, GLint external_format,
+                           GLenum component_type, bool compressed,
+                           int async_buffers, CompletionToken token);
+  bool upload_texture_level(bool full_reload, bool compressed,
+                            GLenum target, int level,
+                            int width, int height, int depth,
                             GLint internal_format, GLint external_format,
                             GLenum component_type,
-                            Texture::CompressionMode image_compression);
+                            const unsigned char *image_ptr,
+                            size_t page_size, SparseArray pages,
+                            GLenum usage_hint);
   void generate_mipmaps(CLP(TextureContext) *gtc);
   bool upload_simple_texture(CLP(TextureContext) *gtc);
 
@@ -658,6 +671,14 @@ protected:
   void do_point_size();
 #endif
 
+#ifndef OPENGLES_1
+  void *map_write_discard_buffer(GLenum target, GLuint buffer, size_t size,
+                                 bool create_storage);
+#endif
+
+  void call_later(Completable &&job);
+  void process_pending_jobs(bool wait);
+
   enum AutoAntialiasMode {
     AA_poly,
     AA_line,
@@ -904,6 +925,10 @@ public:
   PFNGLGETBUFFERSUBDATAPROC _glGetBufferSubData;
 #endif
 
+#ifndef OPENGLES_1
+  PFNGLCOPYBUFFERSUBDATAPROC _glCopyBufferSubData;
+#endif
+
 #ifdef OPENGLES
   PFNGLMAPBUFFERRANGEEXTPROC _glMapBufferRange;
   PFNGLUNMAPBUFFEROESPROC _glUnmapBuffer;
@@ -911,6 +936,10 @@ public:
   PFNGLMAPBUFFERRANGEPROC _glMapBufferRange;
 #endif
 
+#ifndef OPENGLES_1
+  bool _supports_pixel_buffers;
+#endif
+
 #ifndef OPENGLES_1
   bool _supports_uniform_buffers;
   bool _supports_shader_buffers;
@@ -978,6 +1007,7 @@ public:
   PFNGLTEXTUREPARAMETERIPROC _glTextureParameteri;
   PFNGLGENERATETEXTUREMIPMAPPROC _glGenerateTextureMipmap;
   PFNGLBINDTEXTUREUNITPROC _glBindTextureUnit;
+  PFNGLMAPNAMEDBUFFERRANGEPROC _glMapNamedBufferRange;
 #endif
 
 #ifndef OPENGLES_1
@@ -1231,6 +1261,16 @@ public:
   };
   pdeque<AsyncRamCopy> _async_ram_copies;
 
+#ifdef HAVE_THREADS
+  AsyncTaskChain *_async_chain;
+#endif
+
+  // Min job system pending a real job system
+  typedef pvector<Completable> JobQueue;
+  Mutex _job_queue_mutex;
+  ConditionVar _job_queue_cvar;
+  JobQueue _job_queue;
+
   BufferResidencyTracker _renderbuffer_residency;
 
   PStatCollector _active_ppbuffer_memory_pcollector;
@@ -1274,6 +1314,7 @@ private:
   friend class CLP(BufferContext);
   friend class CLP(ShaderContext);
   friend class CLP(CgShaderContext);
+  friend class CLP(TextureContext);
   friend class CLP(GraphicsBuffer);
   friend class CLP(OcclusionQueryContext);
 };

+ 40 - 0
panda/src/glstuff/glTextureContext_src.I

@@ -59,3 +59,43 @@ get_view_buffer(int view) const {
     return 0;
   }
 }
+
+/**
+ * Returns true if an async upload is pending.
+ */
+INLINE bool CLP(TextureContext)::
+is_upload_pending() const {
+  // We can't simply compare _uploads_started to _uploads_finished, since
+  // they also get set to the same by cancel_pending_uploads()
+  return _uploads_pending > 0;
+}
+
+/**
+ * Waits for all uploads to be finished.
+ */
+INLINE void CLP(TextureContext)::
+wait_pending_uploads() const {
+  if (is_upload_pending()) {
+    do_wait_pending_uploads();
+  }
+}
+
+/**
+ * Cancels all asynchronous uploads.  Not guaranteed to be cancelled by the
+ * time this returns, consider following this up with a call to
+ * wait_pending_uploads().
+ */
+INLINE void CLP(TextureContext)::
+cancel_pending_uploads() {
+  _uploads_finished = _uploads_started;
+}
+
+/**
+ * Waits for an unused PBO unless we're not at the given limit of PBOs yet.
+ */
+INLINE void CLP(TextureContext)::
+wait_for_unused_pbo(int limit) const {
+  if (_unused_pbos.empty() && _num_pbos >= limit) {
+    do_wait_for_unused_pbo(limit);
+  }
+}

+ 60 - 0
panda/src/glstuff/glTextureContext_src.cxx

@@ -13,6 +13,8 @@
 
 #include "pnotify.h"
 
+static PStatCollector _wait_async_texture_uploads_pcollector("Wait:Async Texture Uploads");
+
 TypeHandle CLP(TextureContext)::_type_handle;
 
 /**
@@ -48,6 +50,8 @@ evict_lru() {
  */
 void CLP(TextureContext)::
 reset_data(GLenum target, int num_views) {
+  cancel_pending_uploads();
+
   // Free the texture resources.
   set_num_views(0);
 
@@ -238,3 +242,59 @@ mark_incoherent(bool wrote) {
 }
 
 #endif  // !OPENGLES_1
+
+/**
+ * Returns a PBO with the given size to the pool of unused PBOs.
+ */
+void CLP(TextureContext)::
+return_pbo(GLuint pbo, size_t size) {
+  // Also triggers when the number of buffers is -1 (which effectively means
+  // to always delete the buffers after use).
+  if (_num_pbos > get_texture()->get_num_async_transfer_buffers() ||
+      size < _pbo_size) {
+    // We have too many PBOs, or this PBO is no longer of the proper
+    // size, so delete it rather than returning it to the pool.
+    _num_pbos--;
+    _glgsg->_glDeleteBuffers(1, &pbo);
+  } else {
+    _unused_pbos.push_front(pbo);
+  }
+}
+
+/**
+ * Deletes all unused PBOs.
+ */
+void CLP(TextureContext)::
+delete_unused_pbos() {
+  if (!_unused_pbos.empty()) {
+    for (GLuint pbo : _unused_pbos) {
+      _glgsg->_glDeleteBuffers(1, &pbo);
+    }
+    _num_pbos -= (int)_unused_pbos.size();
+    _unused_pbos.clear();
+  }
+}
+
+/**
+ * Waits for all uploads to be finished.
+ */
+void CLP(TextureContext)::
+do_wait_pending_uploads() const {
+  PStatTimer timer(_wait_async_texture_uploads_pcollector);
+  do {
+    _glgsg->process_pending_jobs(true);
+  }
+  while (is_upload_pending());
+}
+
+/**
+ *
+ */
+void CLP(TextureContext)::
+do_wait_for_unused_pbo(int limit) const {
+  PStatTimer timer(_wait_async_texture_uploads_pcollector);
+  do {
+    _glgsg->process_pending_jobs(true);
+  }
+  while (_unused_pbos.empty() && _num_pbos >= limit);
+}

+ 20 - 0
panda/src/glstuff/glTextureContext_src.h

@@ -47,6 +47,18 @@ public:
   void mark_incoherent(bool wrote);
 #endif
 
+  INLINE bool is_upload_pending() const;
+  INLINE void wait_pending_uploads() const;
+  INLINE void cancel_pending_uploads();
+
+  void return_pbo(GLuint pbo, size_t size);
+  void delete_unused_pbos();
+  INLINE void wait_for_unused_pbo(int limit) const;
+
+private:
+  void do_wait_pending_uploads() const;
+  void do_wait_for_unused_pbo(int limit) const;
+
 private:
   // This is the GL "name" of the texture object.
   GLuint _index;
@@ -76,6 +88,14 @@ public:
   GLenum _target;
   SamplerState _active_sampler;
 
+  // These counters are used to prevent out-of-order updates.
+  int _uploads_started = 0;
+  int _uploads_finished = 0;
+  int _uploads_pending = 0;
+  pdeque<GLuint> _unused_pbos;
+  int _num_pbos = 0;
+  size_t _pbo_size = 0;
+
   CLP(GraphicsStateGuardian) *_glgsg;
 
   // These are set to the equivalent counter in glgsg when a write is performed.

+ 5 - 0
panda/src/glstuff/glmisc_src.cxx

@@ -22,6 +22,11 @@ ConfigVariableBool gl_forward_compatible
    PRC_DESC("Setting this to true will request a forward-compatible OpenGL "
             "context, which will not support the fixed-function pipeline."));
 
+ConfigVariableBool gl_support_dsa
+  ("gl-support-dsa", true,
+   PRC_DESC("Configure this false if you suspect your GL's implementation of "
+            "Direct State Access is broken."));
+
 ConfigVariableBool gl_support_fbo
   ("gl-support-fbo", true,
    PRC_DESC("Configure this false if your GL's implementation of "

+ 1 - 0
panda/src/glstuff/glmisc_src.h

@@ -35,6 +35,7 @@
 extern EXPCL_GL ConfigVariableInt gl_version;
 extern EXPCL_GL ConfigVariableBool gl_forward_compatible;
 extern EXPCL_GL ConfigVariableBool gl_support_fbo;
+extern ConfigVariableBool gl_support_dsa;
 extern ConfigVariableBool gl_cheap_textures;
 extern ConfigVariableBool gl_ignore_clamp;
 extern ConfigVariableBool gl_support_clamp_to_border;

+ 18 - 3
panda/src/gobj/preparedGraphicsObjects.cxx

@@ -1515,9 +1515,24 @@ begin_frame(GraphicsStateGuardianBase *gsg, Thread *current_thread) {
     Texture *tex = qti->first;
     TextureContext *tc = tex->prepare_now(this, gsg);
     if (tc != nullptr) {
-      gsg->update_texture(tc, true);
-      if (qti->second != nullptr) {
-        qti->second->set_result(tc);
+      if (tex->get_num_async_transfer_buffers() == 0) {
+        gsg->update_texture(tc, true);
+        if (qti->second != nullptr) {
+          qti->second->set_result(tc);
+        }
+      } else {
+        // Async update
+        CompletionToken token;
+        if (qti->second != nullptr) {
+          token = [tc, fut = std::move(qti->second)] (bool success) {
+            if (success) {
+              fut->set_result(tc);
+            } else {
+              fut->notify_removed();
+            }
+          };
+        }
+        gsg->update_texture(tc, false, std::move(token));
       }
     }
   }

+ 8 - 0
panda/src/gobj/texture.I

@@ -2139,6 +2139,14 @@ rescale_texture() {
   return do_rescale_texture(cdata);
 }
 
+/**
+ * Returns the number previously passed to setup_async_transfer().
+ */
+INLINE int Texture::
+get_num_async_transfer_buffers() const {
+  return _num_async_transfer_buffers.load(std::memory_order_relaxed);
+}
+
 /**
  * Works like adjust_size, but also considers the texture class.  Movie
  * textures, for instance, always pad outwards, regardless of textures-

+ 38 - 2
panda/src/gobj/texture.cxx

@@ -1570,6 +1570,27 @@ get_view_modified_pages(UpdateSeq since, int view, int n) const {
   return result;
 }
 
+/**
+ * Sets the number of buffers for asynchronous upload of texture data.  If this
+ * number is higher than 0, future texture uploads will occur in the background,
+ * up to the provided amount at a time.  The asynchronous upload will be
+ * triggered by calls to prepare() or when the texture comes into view and
+ * allow-incomplete-render is true.
+ *
+ * Each buffer is only large enough to contain a single view, so you may wish
+ * to create twice as many buffers if you want to update twice as many views.
+ *
+ * You can also pass the special value -1, which means to create as many
+ * buffers as is necessary for all asynchronous uploads to take place, and they
+ * will be deleted afterwards automatically.
+ *
+ * This setting will take effect immediately.
+ */
+void Texture::
+setup_async_transfer(int num_buffers) {
+  _num_async_transfer_buffers.store(num_buffers);
+}
+
 /**
  * Indicates that the texture should be enqueued to be prepared in the
  * indicated prepared_objects at the beginning of the next frame.  This will
@@ -5704,7 +5725,14 @@ do_modify_ram_image(CData *cdata) {
   } else {
     do_clear_ram_mipmap_images(cdata);
   }
-  return cdata->_ram_images[0]._image;
+  PTA_uchar data = cdata->_ram_images[0]._image;
+  if (data.get_node_ref_count() > 0) {
+    // Copy on write, if an upload thread is reading this now.
+    PTA_uchar new_data = PTA_uchar::empty_array(0);
+    new_data.v() = data.v();
+    data.swap(new_data);
+  }
+  return data;
 }
 
 /**
@@ -5779,7 +5807,15 @@ do_modify_ram_mipmap_image(CData *cdata, int n) {
       cdata->_ram_images[n]._image.empty()) {
     do_make_ram_mipmap_image(cdata, n);
   }
-  return cdata->_ram_images[n]._image;
+
+  PTA_uchar data = cdata->_ram_images[n]._image;
+  if (data.get_node_ref_count() > 0) {
+    // Copy on write, if an upload thread is reading this now.
+    PTA_uchar new_data = PTA_uchar::empty_array(0);
+    new_data.v() = data.v();
+    data.swap(new_data);
+  }
+  return data;
 }
 
 /**

+ 6 - 0
panda/src/gobj/texture.h

@@ -47,6 +47,7 @@
 #include "pfmFile.h"
 #include "asyncTask.h"
 #include "extension.h"
+#include "patomic.h"
 
 class TextureContext;
 class FactoryParams;
@@ -536,6 +537,8 @@ PUBLISHED:
   MAKE_PROPERTY(auto_texture_scale, get_auto_texture_scale,
                                     set_auto_texture_scale);
 
+  void setup_async_transfer(int num_buffers);
+
   PT(AsyncFuture) prepare(PreparedGraphicsObjects *prepared_objects);
   bool is_prepared(PreparedGraphicsObjects *prepared_objects) const;
   bool was_image_modified(PreparedGraphicsObjects *prepared_objects) const;
@@ -628,6 +631,7 @@ PUBLISHED:
 
 public:
   void texture_uploaded();
+  INLINE int get_num_async_transfer_buffers() const;
 
   virtual bool has_cull_callback() const;
   virtual bool cull_callback(CullTraverser *trav, const CullTraverserData &data) const;
@@ -1072,6 +1076,8 @@ protected:
   typedef pmap<PreparedGraphicsObjects *, TextureContext *> Contexts;
   Contexts _contexts;
 
+  patomic_signed_lock_free _num_async_transfer_buffers { 0 };
+
   // It is common, when using normal maps, specular maps, gloss maps, and
   // such, to use a file naming convention where the filenames of the special
   // maps are derived by concatenating a suffix to the name of the diffuse

+ 2 - 0
panda/src/gsgbase/graphicsStateGuardianBase.h

@@ -22,6 +22,7 @@
 #include "lightMutex.h"
 #include "patomic.h"
 #include "small_vector.h"
+#include "completionToken.h"
 
 // A handful of forward references.
 
@@ -149,6 +150,7 @@ public:
 
   virtual TextureContext *prepare_texture(Texture *tex)=0;
   virtual bool update_texture(TextureContext *tc, bool force)=0;
+  virtual bool update_texture(TextureContext *tc, bool force, CompletionToken token)=0;
   virtual void release_texture(TextureContext *tc)=0;
   virtual void release_textures(const pvector<TextureContext *> &contexts)=0;
   virtual bool extract_texture_data(Texture *tex)=0;

+ 14 - 0
panda/src/putil/completable.I

@@ -37,6 +37,20 @@ Completable(Completable &&from) noexcept :
   from._data = nullptr;
 }
 
+/**
+ *
+ */
+INLINE Completable &Completable::
+operator =(Completable &&from) {
+  Data *data = _data;
+  _data = from._data;
+  from._data = nullptr;
+  if (data != nullptr) {
+    data->_function.load(std::memory_order_relaxed)(data, false);
+  }
+  return *this;
+}
+
 /**
  *
  */

+ 3 - 0
panda/src/putil/completable.h

@@ -32,6 +32,9 @@ public:
   INLINE Completable(const Completable &copy) = delete;
   INLINE Completable(Completable &&from) noexcept;
 
+  INLINE Completable &operator =(const Completable &copy) = delete;
+  INLINE Completable &operator =(Completable &&from);
+
   INLINE void operator ()();
 
   INLINE ~Completable();

部分文件因为文件数量过多而无法显示