Browse Source

sokol_gfx.h gl: update memory barrier code for vertex- and index-buffer bindings

Andre Weissflog 5 months ago
parent
commit
2a32a7fa64
1 changed files with 53 additions and 18 deletions
  1. 53 18
      sokol_gfx.h

+ 53 - 18
sokol_gfx.h

@@ -5742,13 +5742,20 @@ typedef _sg_dummy_attachments_t _sg_attachments_t;
 
 
 #elif defined(_SOKOL_ANY_GL)
 #elif defined(_SOKOL_ANY_GL)
 
 
+typedef enum {
+    _SG_GL_GPUDIRTY_VERTEXBUFFER = (1<<0),
+    _SG_GL_GPUDIRTY_INDEXBUFFER = (1<<1),
+    _SG_GL_GPUDIRTY_STORAGEBUFFER = (1<<2),
+    _SG_GL_GPUDIRTY_BUFFER_ALL = _SG_GL_GPUDIRTY_VERTEXBUFFER | _SG_GL_GPUDIRTY_INDEXBUFFER | _SG_GL_GPUDIRTY_STORAGEBUFFER,
+} _sg_gl_gpudirty_t;
+
 typedef struct _sg_buffer_s {
 typedef struct _sg_buffer_s {
     _sg_slot_t slot;
     _sg_slot_t slot;
     _sg_buffer_common_t cmn;
     _sg_buffer_common_t cmn;
     struct {
     struct {
         GLuint buf[SG_NUM_INFLIGHT_FRAMES];
         GLuint buf[SG_NUM_INFLIGHT_FRAMES];
         bool injected;  // if true, external buffers were injected with sg_buffer_desc.gl_buffers
         bool injected;  // if true, external buffers were injected with sg_buffer_desc.gl_buffers
-        bool gpu_dirty; // true if modified by GPU shader but memory barrier hasn't been issued yet
+        uint8_t gpu_dirty_flags; // combination of _sg_gl_gpudirty_t flags
     } gl;
     } gl;
 } _sg_gl_buffer_t;
 } _sg_gl_buffer_t;
 typedef _sg_gl_buffer_t _sg_buffer_t;
 typedef _sg_gl_buffer_t _sg_buffer_t;
@@ -9986,26 +9993,53 @@ _SOKOL_PRIVATE void _sg_gl_handle_memory_barriers(const _sg_shader_t* shd, const
     if (!_sg.features.compute) {
     if (!_sg.features.compute) {
         return;
         return;
     }
     }
-    // NOTE: currently only storage buffers can be GPU-written, and storage
-    // buffers cannot be bound as vertex- or index-buffers.
-    bool needs_barrier = false;
+    GLbitfield gl_barrier_bits = 0;
+
+    // if vertex-, index- or storage-buffer bindings have been written
+    // by a compute shader before, a barrier must be issued
+    for (size_t i = 0; i < SG_MAX_VERTEXBUFFER_BINDSLOTS; i++) {
+        _sg_buffer_t* buf = bnd->vbs[i];
+        if (!buf) {
+            continue;
+        }
+        if (buf->gl.gpu_dirty_flags & _SG_GL_GPUDIRTY_VERTEXBUFFER) {
+            gl_barrier_bits |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT;
+            buf->gl.gpu_dirty_flags &= ~_SG_GL_GPUDIRTY_VERTEXBUFFER;
+        }
+    }
+    if (bnd->ib) {
+        _sg_buffer_t* buf = bnd->ib;
+        if (buf->gl.gpu_dirty_flags & _SG_GL_GPUDIRTY_INDEXBUFFER) {
+            gl_barrier_bits |= GL_ELEMENT_ARRAY_BARRIER_BIT;
+            buf->gl.gpu_dirty_flags &= ~_SG_GL_GPUDIRTY_INDEXBUFFER;
+        }
+    }
     for (size_t i = 0; i < SG_MAX_STORAGEBUFFER_BINDSLOTS; i++) {
     for (size_t i = 0; i < SG_MAX_STORAGEBUFFER_BINDSLOTS; i++) {
-        if (shd->cmn.storage_buffers[i].stage == SG_SHADERSTAGE_NONE) {
+        _sg_buffer_t* buf = bnd->sbufs[i];
+        if (!buf) {
             continue;
             continue;
         }
         }
+        SOKOL_ASSERT(shd->cmn.storage_buffers[i].stage != SG_SHADERSTAGE_NONE);
+        if (buf->gl.gpu_dirty_flags & _SG_GL_GPUDIRTY_STORAGEBUFFER) {
+            gl_barrier_bits |= GL_SHADER_STORAGE_BARRIER_BIT;
+            buf->gl.gpu_dirty_flags &= ~_SG_GL_GPUDIRTY_STORAGEBUFFER;
+        }
+    }
+
+    // mark storage buffers as dirty which will be written by compute shaders
+    // (don't merge this into the above loop, this would mess up the dirty
+    // dirty flags if the same buffer is bound multiple times)
+    for (size_t i = 0; i < SG_MAX_STORAGEBUFFER_BINDSLOTS; i++) {
         _sg_buffer_t* buf = bnd->sbufs[i];
         _sg_buffer_t* buf = bnd->sbufs[i];
-        // if this buffer has pending GPU changes, issue a memory barrier
-        if (buf->gl.gpu_dirty) {
-            buf->gl.gpu_dirty = false;
-            needs_barrier = true;
+        if (!buf) {
+            continue;
         }
         }
-        // if this binding is going to be written by the GPU set the buffer to 'gpu_dirty'
         if (!shd->cmn.storage_buffers[i].readonly) {
         if (!shd->cmn.storage_buffers[i].readonly) {
-            buf->gl.gpu_dirty = true;
+            buf->gl.gpu_dirty_flags = _SG_GL_GPUDIRTY_BUFFER_ALL;
         }
         }
     }
     }
-    if (needs_barrier) {
-        glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
+    if (0 != gl_barrier_bits) {
+        glMemoryBarrier(gl_barrier_bits);
         _sg_stats_add(gl.num_memory_barriers, 1);
         _sg_stats_add(gl.num_memory_barriers, 1);
     }
     }
 }
 }
@@ -10018,11 +10052,6 @@ _SOKOL_PRIVATE bool _sg_gl_apply_bindings(_sg_bindings_t* bnd) {
     _SG_GL_CHECK_ERROR();
     _SG_GL_CHECK_ERROR();
     const _sg_shader_t* shd = bnd->pip->shader;
     const _sg_shader_t* shd = bnd->pip->shader;
 
 
-    // take care of storage buffer memory barriers
-    #if defined(_SOKOL_GL_HAS_COMPUTE)
-    _sg_gl_handle_memory_barriers(shd, bnd);
-    #endif
-
     // bind combined image-samplers
     // bind combined image-samplers
     _SG_GL_CHECK_ERROR();
     _SG_GL_CHECK_ERROR();
     for (size_t img_smp_index = 0; img_smp_index < SG_MAX_IMAGE_SAMPLER_PAIRS; img_smp_index++) {
     for (size_t img_smp_index = 0; img_smp_index < SG_MAX_IMAGE_SAMPLER_PAIRS; img_smp_index++) {
@@ -10122,6 +10151,12 @@ _SOKOL_PRIVATE bool _sg_gl_apply_bindings(_sg_bindings_t* bnd) {
         }
         }
     }
     }
     _SG_GL_CHECK_ERROR();
     _SG_GL_CHECK_ERROR();
+
+    // take care of storage buffer memory barriers (this needs to happen after the bindings are set)
+    #if defined(_SOKOL_GL_HAS_COMPUTE)
+    _sg_gl_handle_memory_barriers(shd, bnd);
+    #endif
+
     return true;
     return true;
 }
 }