Browse Source

glgsg: More efficient handling of texture memory barriers

New system uses counters instead of sets, just setting a counter for
each barrier on a texture to the same value as a counter on the GSG when
a barrier is pending, and incrementing the counter on the GSG whenever
that barrier is issued, is enough to track pending barriers since
barriers are global
rdb 10 months ago
parent
commit
5b0cf9e053

+ 2 - 2
panda/src/glstuff/glGraphicsBuffer_src.cxx

@@ -281,7 +281,7 @@ begin_frame(FrameMode mode, Thread *current_thread) {
       CLP(GraphicsStateGuardian) *glgsg = (CLP(GraphicsStateGuardian) *)_gsg.p();
 
       for (CLP(TextureContext) *gtc : _texture_contexts) {
-        if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT)) {
+        if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT, true)) {
           glgsg->issue_memory_barrier(GL_FRAMEBUFFER_BARRIER_BIT);
           // If we've done it for one, we've done it for all.
           break;
@@ -1973,7 +1973,7 @@ resolve_multisamples() {
     // Issue memory barriers as necessary to make sure that the texture memory
     // is synchronized before we blit to it.
     for (CLP(TextureContext) *gtc : _texture_contexts) {
-      if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT)) {
+      if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT, true)) {
         glgsg->issue_memory_barrier(GL_FRAMEBUFFER_BARRIER_BIT);
         // If we've done it for one, we've done it for all.
         break;

+ 10 - 25
panda/src/glstuff/glGraphicsStateGuardian_src.cxx

@@ -6252,25 +6252,24 @@ issue_memory_barrier(GLbitfield barriers) {
 
   _glMemoryBarrier(barriers);
 
-  // Indicate that barriers no longer need to be issued for the relevant lists
-  // of textures.
+  // Increment these counters to indicate that these barriers have been issued.
   if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT) {
-    _textures_needing_fetch_barrier.clear();
+    ++_texture_fetch_barrier_counter;
     GLCAT.spam(false) << " texture_fetch";
   }
 
   if (barriers & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) {
-    _textures_needing_image_access_barrier.clear();
+    ++_shader_image_access_barrier_counter;
     GLCAT.spam(false) << " shader_image_access";
   }
 
   if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT) {
-    _textures_needing_update_barrier.clear();
+    ++_texture_update_barrier_counter;
     GLCAT.spam(false) << " texture_update";
   }
 
   if (barriers & GL_FRAMEBUFFER_BARRIER_BIT) {
-    _textures_needing_framebuffer_barrier.clear();
+    ++_framebuffer_barrier_counter;
     GLCAT.spam(false) << " framebuffer";
   }
 
@@ -6445,13 +6444,6 @@ void CLP(GraphicsStateGuardian)::
 release_texture(TextureContext *tc) {
   CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc);
 
-#ifndef OPENGLES_1
-  _textures_needing_fetch_barrier.erase(gtc);
-  _textures_needing_image_access_barrier.erase(gtc);
-  _textures_needing_update_barrier.erase(gtc);
-  _textures_needing_framebuffer_barrier.erase(gtc);
-#endif
-
   gtc->set_num_views(0);
   delete gtc;
 }
@@ -6473,13 +6465,6 @@ release_textures(const pvector<TextureContext *> &contexts) {
   for (TextureContext *tc : contexts) {
     CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc);
 
-#ifndef OPENGLES_1
-    _textures_needing_fetch_barrier.erase(gtc);
-    _textures_needing_image_access_barrier.erase(gtc);
-    _textures_needing_update_barrier.erase(gtc);
-    _textures_needing_framebuffer_barrier.erase(gtc);
-#endif
-
     num_indices += gtc->_num_views;
     if (gtc->_buffers != nullptr) {
       num_buffers += gtc->_num_views;
@@ -7887,8 +7872,8 @@ framebuffer_copy_to_texture(Texture *tex, int view, int z,
   }
 
 #ifndef OPENGLES_1
-  if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT)) {
-    // Make sure that any incoherent writes to this texture have been synced.
+  if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT, true)) {
+    // Make sure that any reads and writes to this texture have been synced.
     issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
   }
 #endif
@@ -14175,8 +14160,8 @@ upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps) {
 
 #ifndef OPENGLES_1
   if (needs_reload || !image.is_null()) {
-    // Make sure that any incoherent writes to this texture have been synced.
-    if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT)) {
+    // Make sure that any reads and writes to this texture have been synced.
+    if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT, true)) {
       issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
     }
   }
@@ -15174,7 +15159,7 @@ do_extract_texture_data(CLP(TextureContext) *gtc, int view) {
 
 #ifndef OPENGLES_1
   // Make sure any incoherent writes to the texture have been synced.
-  if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT)) {
+  if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT, false)) {
     issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
   }
 #endif

+ 8 - 6
panda/src/glstuff/glGraphicsStateGuardian_src.h

@@ -1162,12 +1162,14 @@ public:
 #endif
 
 #ifndef OPENGLES_1
-  // Stores textures for which memory bariers should be issued.
-  typedef pset<TextureContext*> TextureSet;
-  TextureSet _textures_needing_fetch_barrier;
-  TextureSet _textures_needing_image_access_barrier;
-  TextureSet _textures_needing_update_barrier;
-  TextureSet _textures_needing_framebuffer_barrier;
+  // This count increments every time the corresponding barrier is issued.
+  // GLTextureContext et al store copies of this counter, when a write is
+  // performed on a texture, it will set its counter to match the value on the
+  // GSG to indicate that it is out of sync and the barrier needs to be issued.
+  int _texture_fetch_barrier_counter = 0;
+  int _shader_image_access_barrier_counter = 0;
+  int _texture_update_barrier_counter = 0;
+  int _framebuffer_barrier_counter = 0;
   int _shader_storage_barrier_counter = 0;
 #endif
 

+ 11 - 7
panda/src/glstuff/glShaderContext_src.cxx

@@ -2813,12 +2813,6 @@ update_shader_texture_bindings(ShaderContext *prev) {
 
           int view = _glgsg->get_current_tex_view_offset();
           gl_tex = gtc->get_view_index(view);
-
-#ifndef OPENGLES
-          if (gtc->needs_barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT)) {
-            barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
-          }
-#endif
         }
       }
       input._writable = false;
@@ -2879,7 +2873,17 @@ update_shader_texture_bindings(ShaderContext *prev) {
             access = GL_READ_ONLY;
             gl_tex = 0;
           }
+        } else {
+          // If no parameters were specified, we have to assume writable access.
+          input._writable = true;
         }
+
+#ifndef OPENGLES
+        if (gtc->needs_barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT, input._writable)) {
+          barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
+        }
+#endif
+
         _glgsg->_glBindImageTexture(i, gl_tex, bind_level, layered, bind_layer,
                                     access, gtc->_internal_format);
       }
@@ -2969,7 +2973,7 @@ update_shader_texture_bindings(ShaderContext *prev) {
 #ifndef OPENGLES
     // If it was recently written to, we will have to issue a memory barrier
     // soon.
-    if (gtc->needs_barrier(GL_TEXTURE_FETCH_BARRIER_BIT)) {
+    if (gtc->needs_barrier(GL_TEXTURE_FETCH_BARRIER_BIT, false)) {
       barriers |= GL_TEXTURE_FETCH_BARRIER_BIT;
     }
 #endif

+ 50 - 24
panda/src/glstuff/glTextureContext_src.cxx

@@ -63,12 +63,13 @@ reset_data(GLenum target, int num_views) {
 
 #ifndef OPENGLES_1
   // Mark the texture as coherent.
-  if (gl_enable_memory_barriers) {
-    _glgsg->_textures_needing_fetch_barrier.erase(this);
-    _glgsg->_textures_needing_image_access_barrier.erase(this);
-    _glgsg->_textures_needing_update_barrier.erase(this);
-    _glgsg->_textures_needing_framebuffer_barrier.erase(this);
-  }
+  _texture_fetch_barrier_counter = _glgsg->_texture_fetch_barrier_counter - 1;
+  _shader_image_read_barrier_counter = _glgsg->_shader_image_access_barrier_counter - 1;
+  _shader_image_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter - 1;
+  _texture_read_barrier_counter = _glgsg->_texture_update_barrier_counter - 1;
+  _texture_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter - 1;
+  _framebuffer_read_barrier_counter = _glgsg->_framebuffer_barrier_counter - 1;
+  _framebuffer_write_barrier_counter = _glgsg->_framebuffer_barrier_counter - 1;
 #endif
 }
 
@@ -168,26 +169,50 @@ set_num_views(int num_views) {
 
 #ifndef OPENGLES_1
 /**
- *
+ * Returns true if the texture needs a barrier before a read or write of the
+ * given kind.  If writing is false, only writes are synced, otherwise both
+ * reads and writes are synced.
  */
 bool CLP(TextureContext)::
-needs_barrier(GLbitfield barrier) {
+needs_barrier(GLbitfield barrier, bool writing) {
   if (!gl_enable_memory_barriers) {
     return false;
   }
 
-  return (((barrier & GL_TEXTURE_FETCH_BARRIER_BIT) &&
-           _glgsg->_textures_needing_fetch_barrier.count(this)))
-      || (((barrier & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) &&
-           _glgsg->_textures_needing_image_access_barrier.count(this)))
-      || (((barrier & GL_TEXTURE_UPDATE_BARRIER_BIT) &&
-           _glgsg->_textures_needing_update_barrier.count(this)))
-      || (((barrier & GL_FRAMEBUFFER_BARRIER_BIT) &&
-           _glgsg->_textures_needing_framebuffer_barrier.count(this)));
+  if (barrier & GL_TEXTURE_FETCH_BARRIER_BIT) {
+    // This is always a read, so only sync RAW.
+    if (_glgsg->_texture_fetch_barrier_counter == _texture_fetch_barrier_counter) {
+      return true;
+    }
+  }
+
+  if (barrier & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) {
+    // Sync WAR, WAW and RAW, but not RAR.
+    if ((writing && _glgsg->_shader_image_access_barrier_counter == _shader_image_read_barrier_counter) ||
+        (_glgsg->_shader_image_access_barrier_counter == _shader_image_write_barrier_counter)) {
+      return true;
+    }
+  }
+
+  if (barrier & GL_TEXTURE_UPDATE_BARRIER_BIT) {
+    if ((writing && _glgsg->_texture_update_barrier_counter == _texture_read_barrier_counter) ||
+        (_glgsg->_texture_update_barrier_counter == _texture_write_barrier_counter)) {
+      return true;
+    }
+  }
+
+  if (barrier & GL_FRAMEBUFFER_BARRIER_BIT) {
+    if ((writing && _glgsg->_framebuffer_barrier_counter == _framebuffer_read_barrier_counter) ||
+        (_glgsg->_framebuffer_barrier_counter == _framebuffer_write_barrier_counter)) {
+      return true;
+    }
+  }
+
+  return false;
 }
 
 /**
- * Mark a texture as needing a memory barrier, since a non-coherent read or
+ * Mark a texture as needing a memory barrier, since an unsynchronized read or
  * write just happened to it.  If 'wrote' is true, it was written to.
  */
 void CLP(TextureContext)::
@@ -199,16 +224,17 @@ mark_incoherent(bool wrote) {
   // If we only read from it, the next read operation won't need another
   // barrier, since it'll be reading the same data.
   if (wrote) {
-    _glgsg->_textures_needing_fetch_barrier.insert(this);
+    _texture_fetch_barrier_counter = _glgsg->_texture_fetch_barrier_counter;
+    _shader_image_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter;
+    _texture_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter;
+    _framebuffer_write_barrier_counter = _glgsg->_framebuffer_barrier_counter;
   }
 
   // We could still write to it before we read from it, so we have to always
-  // insert these barriers.  This could be slightly optimized so that we don't
-  // issue a barrier between consecutive image reads, but that may not be
-  // worth the trouble.
-  _glgsg->_textures_needing_image_access_barrier.insert(this);
-  _glgsg->_textures_needing_update_barrier.insert(this);
-  _glgsg->_textures_needing_framebuffer_barrier.insert(this);
+  // insert these barriers.
+  _shader_image_read_barrier_counter = _glgsg->_shader_image_access_barrier_counter;
+  _texture_read_barrier_counter = _glgsg->_texture_update_barrier_counter;
+  _framebuffer_read_barrier_counter = _glgsg->_framebuffer_barrier_counter;
 }
 
 #endif  // !OPENGLES_1

+ 11 - 2
panda/src/glstuff/glTextureContext_src.h

@@ -41,9 +41,9 @@ public:
   INLINE GLuint get_view_buffer(int view) const;
 
 #ifdef OPENGLES_1
-  static constexpr bool needs_barrier(GLbitfield barrier) { return false; };
+  static constexpr bool needs_barrier(GLbitfield barrier, bool writing) { return false; };
 #else
-  bool needs_barrier(GLbitfield barrier);
+  bool needs_barrier(GLbitfield barrier, bool writing);
   void mark_incoherent(bool wrote);
 #endif
 
@@ -78,6 +78,15 @@ public:
 
   CLP(GraphicsStateGuardian) *_glgsg;
 
+  // These are set to the equivalent counter in glgsg when a write is performed.
+  int _texture_fetch_barrier_counter = -1;
+  int _shader_image_read_barrier_counter = -1;
+  int _shader_image_write_barrier_counter = -1;
+  int _texture_read_barrier_counter = -1;
+  int _texture_write_barrier_counter = -1;
+  int _framebuffer_read_barrier_counter = -1;
+  int _framebuffer_write_barrier_counter = -1;
+
 public:
   static TypeHandle get_class_type() {
     return _type_handle;