ソースを参照

glgsg: More efficient handling of texture memory barriers

New system uses counters instead of sets, just setting a counter for
each barrier on a texture to the same value as a counter on the GSG when
a barrier is pending, and incrementing the counter on the GSG whenever
that barrier is issued, is enough to track pending barriers since
barriers are global
rdb 10 ヶ月 前
コミット
5b0cf9e053

+ 2 - 2
panda/src/glstuff/glGraphicsBuffer_src.cxx

@@ -281,7 +281,7 @@ begin_frame(FrameMode mode, Thread *current_thread) {
       CLP(GraphicsStateGuardian) *glgsg = (CLP(GraphicsStateGuardian) *)_gsg.p();
       CLP(GraphicsStateGuardian) *glgsg = (CLP(GraphicsStateGuardian) *)_gsg.p();
 
 
       for (CLP(TextureContext) *gtc : _texture_contexts) {
       for (CLP(TextureContext) *gtc : _texture_contexts) {
-        if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT)) {
+        if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT, true)) {
           glgsg->issue_memory_barrier(GL_FRAMEBUFFER_BARRIER_BIT);
           glgsg->issue_memory_barrier(GL_FRAMEBUFFER_BARRIER_BIT);
           // If we've done it for one, we've done it for all.
           // If we've done it for one, we've done it for all.
           break;
           break;
@@ -1973,7 +1973,7 @@ resolve_multisamples() {
     // Issue memory barriers as necessary to make sure that the texture memory
     // Issue memory barriers as necessary to make sure that the texture memory
     // is synchronized before we blit to it.
     // is synchronized before we blit to it.
     for (CLP(TextureContext) *gtc : _texture_contexts) {
     for (CLP(TextureContext) *gtc : _texture_contexts) {
-      if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT)) {
+      if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT, true)) {
         glgsg->issue_memory_barrier(GL_FRAMEBUFFER_BARRIER_BIT);
         glgsg->issue_memory_barrier(GL_FRAMEBUFFER_BARRIER_BIT);
         // If we've done it for one, we've done it for all.
         // If we've done it for one, we've done it for all.
         break;
         break;

+ 10 - 25
panda/src/glstuff/glGraphicsStateGuardian_src.cxx

@@ -6252,25 +6252,24 @@ issue_memory_barrier(GLbitfield barriers) {
 
 
   _glMemoryBarrier(barriers);
   _glMemoryBarrier(barriers);
 
 
-  // Indicate that barriers no longer need to be issued for the relevant lists
-  // of textures.
+  // Increment these counters to indicate that these barriers have been issued.
   if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT) {
   if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT) {
-    _textures_needing_fetch_barrier.clear();
+    ++_texture_fetch_barrier_counter;
     GLCAT.spam(false) << " texture_fetch";
     GLCAT.spam(false) << " texture_fetch";
   }
   }
 
 
   if (barriers & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) {
   if (barriers & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) {
-    _textures_needing_image_access_barrier.clear();
+    ++_shader_image_access_barrier_counter;
     GLCAT.spam(false) << " shader_image_access";
     GLCAT.spam(false) << " shader_image_access";
   }
   }
 
 
   if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT) {
   if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT) {
-    _textures_needing_update_barrier.clear();
+    ++_texture_update_barrier_counter;
     GLCAT.spam(false) << " texture_update";
     GLCAT.spam(false) << " texture_update";
   }
   }
 
 
   if (barriers & GL_FRAMEBUFFER_BARRIER_BIT) {
   if (barriers & GL_FRAMEBUFFER_BARRIER_BIT) {
-    _textures_needing_framebuffer_barrier.clear();
+    ++_framebuffer_barrier_counter;
     GLCAT.spam(false) << " framebuffer";
     GLCAT.spam(false) << " framebuffer";
   }
   }
 
 
@@ -6445,13 +6444,6 @@ void CLP(GraphicsStateGuardian)::
 release_texture(TextureContext *tc) {
 release_texture(TextureContext *tc) {
   CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc);
   CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc);
 
 
-#ifndef OPENGLES_1
-  _textures_needing_fetch_barrier.erase(gtc);
-  _textures_needing_image_access_barrier.erase(gtc);
-  _textures_needing_update_barrier.erase(gtc);
-  _textures_needing_framebuffer_barrier.erase(gtc);
-#endif
-
   gtc->set_num_views(0);
   gtc->set_num_views(0);
   delete gtc;
   delete gtc;
 }
 }
@@ -6473,13 +6465,6 @@ release_textures(const pvector<TextureContext *> &contexts) {
   for (TextureContext *tc : contexts) {
   for (TextureContext *tc : contexts) {
     CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc);
     CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc);
 
 
-#ifndef OPENGLES_1
-    _textures_needing_fetch_barrier.erase(gtc);
-    _textures_needing_image_access_barrier.erase(gtc);
-    _textures_needing_update_barrier.erase(gtc);
-    _textures_needing_framebuffer_barrier.erase(gtc);
-#endif
-
     num_indices += gtc->_num_views;
     num_indices += gtc->_num_views;
     if (gtc->_buffers != nullptr) {
     if (gtc->_buffers != nullptr) {
       num_buffers += gtc->_num_views;
       num_buffers += gtc->_num_views;
@@ -7887,8 +7872,8 @@ framebuffer_copy_to_texture(Texture *tex, int view, int z,
   }
   }
 
 
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-  if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT)) {
-    // Make sure that any incoherent writes to this texture have been synced.
+  if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT, true)) {
+    // Make sure that any reads and writes to this texture have been synced.
     issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
     issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
   }
   }
 #endif
 #endif
@@ -14175,8 +14160,8 @@ upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps) {
 
 
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
   if (needs_reload || !image.is_null()) {
   if (needs_reload || !image.is_null()) {
-    // Make sure that any incoherent writes to this texture have been synced.
-    if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT)) {
+    // Make sure that any reads and writes to this texture have been synced.
+    if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT, true)) {
       issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
       issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
     }
     }
   }
   }
@@ -15174,7 +15159,7 @@ do_extract_texture_data(CLP(TextureContext) *gtc, int view) {
 
 
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
   // Make sure any incoherent writes to the texture have been synced.
   // Make sure any incoherent writes to the texture have been synced.
-  if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT)) {
+  if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT, false)) {
     issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
     issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
   }
   }
 #endif
 #endif

+ 8 - 6
panda/src/glstuff/glGraphicsStateGuardian_src.h

@@ -1162,12 +1162,14 @@ public:
 #endif
 #endif
 
 
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-  // Stores textures for which memory bariers should be issued.
-  typedef pset<TextureContext*> TextureSet;
-  TextureSet _textures_needing_fetch_barrier;
-  TextureSet _textures_needing_image_access_barrier;
-  TextureSet _textures_needing_update_barrier;
-  TextureSet _textures_needing_framebuffer_barrier;
+  // This count increments every time the corresponding barrier is issued.
+  // GLTextureContext et al store copies of this counter, when a write is
+  // performed on a texture, it will set its counter to match the value on the
+  // GSG to indicate that it is out of sync and the barrier needs to be issued.
+  int _texture_fetch_barrier_counter = 0;
+  int _shader_image_access_barrier_counter = 0;
+  int _texture_update_barrier_counter = 0;
+  int _framebuffer_barrier_counter = 0;
   int _shader_storage_barrier_counter = 0;
   int _shader_storage_barrier_counter = 0;
 #endif
 #endif
 
 

+ 11 - 7
panda/src/glstuff/glShaderContext_src.cxx

@@ -2813,12 +2813,6 @@ update_shader_texture_bindings(ShaderContext *prev) {
 
 
           int view = _glgsg->get_current_tex_view_offset();
           int view = _glgsg->get_current_tex_view_offset();
           gl_tex = gtc->get_view_index(view);
           gl_tex = gtc->get_view_index(view);
-
-#ifndef OPENGLES
-          if (gtc->needs_barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT)) {
-            barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
-          }
-#endif
         }
         }
       }
       }
       input._writable = false;
       input._writable = false;
@@ -2879,7 +2873,17 @@ update_shader_texture_bindings(ShaderContext *prev) {
             access = GL_READ_ONLY;
             access = GL_READ_ONLY;
             gl_tex = 0;
             gl_tex = 0;
           }
           }
+        } else {
+          // If no parameters were specified, we have to assume writable access.
+          input._writable = true;
         }
         }
+
+#ifndef OPENGLES
+        if (gtc->needs_barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT, input._writable)) {
+          barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
+        }
+#endif
+
         _glgsg->_glBindImageTexture(i, gl_tex, bind_level, layered, bind_layer,
         _glgsg->_glBindImageTexture(i, gl_tex, bind_level, layered, bind_layer,
                                     access, gtc->_internal_format);
                                     access, gtc->_internal_format);
       }
       }
@@ -2969,7 +2973,7 @@ update_shader_texture_bindings(ShaderContext *prev) {
 #ifndef OPENGLES
 #ifndef OPENGLES
     // If it was recently written to, we will have to issue a memory barrier
     // If it was recently written to, we will have to issue a memory barrier
     // soon.
     // soon.
-    if (gtc->needs_barrier(GL_TEXTURE_FETCH_BARRIER_BIT)) {
+    if (gtc->needs_barrier(GL_TEXTURE_FETCH_BARRIER_BIT, false)) {
       barriers |= GL_TEXTURE_FETCH_BARRIER_BIT;
       barriers |= GL_TEXTURE_FETCH_BARRIER_BIT;
     }
     }
 #endif
 #endif

+ 50 - 24
panda/src/glstuff/glTextureContext_src.cxx

@@ -63,12 +63,13 @@ reset_data(GLenum target, int num_views) {
 
 
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
   // Mark the texture as coherent.
   // Mark the texture as coherent.
-  if (gl_enable_memory_barriers) {
-    _glgsg->_textures_needing_fetch_barrier.erase(this);
-    _glgsg->_textures_needing_image_access_barrier.erase(this);
-    _glgsg->_textures_needing_update_barrier.erase(this);
-    _glgsg->_textures_needing_framebuffer_barrier.erase(this);
-  }
+  _texture_fetch_barrier_counter = _glgsg->_texture_fetch_barrier_counter - 1;
+  _shader_image_read_barrier_counter = _glgsg->_shader_image_access_barrier_counter - 1;
+  _shader_image_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter - 1;
+  _texture_read_barrier_counter = _glgsg->_texture_update_barrier_counter - 1;
+  _texture_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter - 1;
+  _framebuffer_read_barrier_counter = _glgsg->_framebuffer_barrier_counter - 1;
+  _framebuffer_write_barrier_counter = _glgsg->_framebuffer_barrier_counter - 1;
 #endif
 #endif
 }
 }
 
 
@@ -168,26 +169,50 @@ set_num_views(int num_views) {
 
 
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
 /**
 /**
- *
+ * Returns true if the texture needs a barrier before a read or write of the
+ * given kind.  If writing is false, only writes are synced, otherwise both
+ * reads and writes are synced.
  */
  */
 bool CLP(TextureContext)::
 bool CLP(TextureContext)::
-needs_barrier(GLbitfield barrier) {
+needs_barrier(GLbitfield barrier, bool writing) {
   if (!gl_enable_memory_barriers) {
   if (!gl_enable_memory_barriers) {
     return false;
     return false;
   }
   }
 
 
-  return (((barrier & GL_TEXTURE_FETCH_BARRIER_BIT) &&
-           _glgsg->_textures_needing_fetch_barrier.count(this)))
-      || (((barrier & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) &&
-           _glgsg->_textures_needing_image_access_barrier.count(this)))
-      || (((barrier & GL_TEXTURE_UPDATE_BARRIER_BIT) &&
-           _glgsg->_textures_needing_update_barrier.count(this)))
-      || (((barrier & GL_FRAMEBUFFER_BARRIER_BIT) &&
-           _glgsg->_textures_needing_framebuffer_barrier.count(this)));
+  if (barrier & GL_TEXTURE_FETCH_BARRIER_BIT) {
+    // This is always a read, so only sync RAW.
+    if (_glgsg->_texture_fetch_barrier_counter == _texture_fetch_barrier_counter) {
+      return true;
+    }
+  }
+
+  if (barrier & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) {
+    // Sync WAR, WAW and RAW, but not RAR.
+    if ((writing && _glgsg->_shader_image_access_barrier_counter == _shader_image_read_barrier_counter) ||
+        (_glgsg->_shader_image_access_barrier_counter == _shader_image_write_barrier_counter)) {
+      return true;
+    }
+  }
+
+  if (barrier & GL_TEXTURE_UPDATE_BARRIER_BIT) {
+    if ((writing && _glgsg->_texture_update_barrier_counter == _texture_read_barrier_counter) ||
+        (_glgsg->_texture_update_barrier_counter == _texture_write_barrier_counter)) {
+      return true;
+    }
+  }
+
+  if (barrier & GL_FRAMEBUFFER_BARRIER_BIT) {
+    if ((writing && _glgsg->_framebuffer_barrier_counter == _framebuffer_read_barrier_counter) ||
+        (_glgsg->_framebuffer_barrier_counter == _framebuffer_write_barrier_counter)) {
+      return true;
+    }
+  }
+
+  return false;
 }
 }
 
 
 /**
 /**
- * Mark a texture as needing a memory barrier, since a non-coherent read or
+ * Mark a texture as needing a memory barrier, since an unsynchronized read or
  * write just happened to it.  If 'wrote' is true, it was written to.
  * write just happened to it.  If 'wrote' is true, it was written to.
  */
  */
 void CLP(TextureContext)::
 void CLP(TextureContext)::
@@ -199,16 +224,17 @@ mark_incoherent(bool wrote) {
   // If we only read from it, the next read operation won't need another
   // If we only read from it, the next read operation won't need another
   // barrier, since it'll be reading the same data.
   // barrier, since it'll be reading the same data.
   if (wrote) {
   if (wrote) {
-    _glgsg->_textures_needing_fetch_barrier.insert(this);
+    _texture_fetch_barrier_counter = _glgsg->_texture_fetch_barrier_counter;
+    _shader_image_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter;
+    _texture_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter;
+    _framebuffer_write_barrier_counter = _glgsg->_framebuffer_barrier_counter;
   }
   }
 
 
   // We could still write to it before we read from it, so we have to always
   // We could still write to it before we read from it, so we have to always
-  // insert these barriers.  This could be slightly optimized so that we don't
-  // issue a barrier between consecutive image reads, but that may not be
-  // worth the trouble.
-  _glgsg->_textures_needing_image_access_barrier.insert(this);
-  _glgsg->_textures_needing_update_barrier.insert(this);
-  _glgsg->_textures_needing_framebuffer_barrier.insert(this);
+  // insert these barriers.
+  _shader_image_read_barrier_counter = _glgsg->_shader_image_access_barrier_counter;
+  _texture_read_barrier_counter = _glgsg->_texture_update_barrier_counter;
+  _framebuffer_read_barrier_counter = _glgsg->_framebuffer_barrier_counter;
 }
 }
 
 
 #endif  // !OPENGLES_1
 #endif  // !OPENGLES_1

+ 11 - 2
panda/src/glstuff/glTextureContext_src.h

@@ -41,9 +41,9 @@ public:
   INLINE GLuint get_view_buffer(int view) const;
   INLINE GLuint get_view_buffer(int view) const;
 
 
 #ifdef OPENGLES_1
 #ifdef OPENGLES_1
-  static constexpr bool needs_barrier(GLbitfield barrier) { return false; };
+  static constexpr bool needs_barrier(GLbitfield barrier, bool writing) { return false; };
 #else
 #else
-  bool needs_barrier(GLbitfield barrier);
+  bool needs_barrier(GLbitfield barrier, bool writing);
   void mark_incoherent(bool wrote);
   void mark_incoherent(bool wrote);
 #endif
 #endif
 
 
@@ -78,6 +78,15 @@ public:
 
 
   CLP(GraphicsStateGuardian) *_glgsg;
   CLP(GraphicsStateGuardian) *_glgsg;
 
 
+  // These are set to the equivalent counter in glgsg when a write is performed.
+  int _texture_fetch_barrier_counter = -1;
+  int _shader_image_read_barrier_counter = -1;
+  int _shader_image_write_barrier_counter = -1;
+  int _texture_read_barrier_counter = -1;
+  int _texture_write_barrier_counter = -1;
+  int _framebuffer_read_barrier_counter = -1;
+  int _framebuffer_write_barrier_counter = -1;
+
 public:
 public:
   static TypeHandle get_class_type() {
   static TypeHandle get_class_type() {
     return _type_handle;
     return _type_handle;