Browse Source

vulkan: Overhaul command buffer tracking and resource tracking

Each command buffer gets a unique sequence number and resource use is now tracked per-CB.  Upon submission the barriers are issued on the previous CB (which is kept open).  Commands and command buffers may now be submitted continuously using flush() even outside begin_frame() / end_frame().

This greatly simplifies tracking and makes the system easier to understand than having VulkanFrameData manage the transfer cmd / render cmd dichotomy.  It also allows delayed submission, which allows reuse of the render CB as the transfer CB of the next bit of rendering.

VulkanFrameData is now "demoted" to only storing resources to clean up after the next fence, it could probably be renamed

This also adds proper resource tracking and barrier use for SSBOs.
rdb 1 month ago
parent
commit
6d0d2dd612

+ 1 - 0
panda/src/vulkandisplay/p3vulkandisplay_composite1.cxx

@@ -1,5 +1,6 @@
 #include "config_vulkandisplay.cxx"
 #include "vulkanBufferContext.cxx"
+#include "vulkanCommandBuffer.cxx"
 #include "vulkanFrameData.cxx"
 #include "vulkanGraphicsBuffer.cxx"
 #include "vulkanGraphicsPipe.cxx"

+ 19 - 0
panda/src/vulkandisplay/vulkanBufferContext.h

@@ -34,6 +34,25 @@ public:
   VulkanMemoryBlock _block;
   bool _host_visible = false;
 
+  // Used for shader buffers.
+  VkAccessFlags _write_access_mask = 0;
+  VkPipelineStageFlags _write_stage_mask = 0;
+  VkPipelineStageFlags _read_stage_mask = 0;
+
+  // Sequence number of the last command buffer in which this was used.
+  uint64_t _read_seq = 0;
+  uint64_t _write_seq = 0;
+
+  // Index of the barrier into the list of barriers of the _read_seq CB.
+  size_t _image_barrier_index = 0;
+  size_t _buffer_barrier_index = 0;
+
+  // These fields are managed by VulkanFrameData::add_initial_barrier(),
+  // and are used to keep track of the barrier we issue at the beginning of a
+  // frame.
+  VkAccessFlags _initial_src_access_mask = 0;
+  VkAccessFlags _initial_dst_access_mask = 0;
+
 public:
   static TypeHandle get_class_type() {
     return _type_handle;

+ 46 - 0
panda/src/vulkandisplay/vulkanCommandBuffer.I

@@ -0,0 +1,46 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file vulkanCommandBuffer.I
+ * @author rdb
+ * @date 2025-10-23
+ */
+
+/**
+ *
+ */
+INLINE VulkanCommandBuffer::
+VulkanCommandBuffer(VulkanCommandBuffer &&from) noexcept :
+  _cmd(from._cmd), _seq(from._seq), _wait_semaphore(from._wait_semaphore) {
+  from._cmd = VK_NULL_HANDLE;
+  from._seq = 0;
+  from._wait_semaphore = VK_NULL_HANDLE;
+}
+
+/**
+ *
+ */
+INLINE VulkanCommandBuffer::
+VulkanCommandBuffer(VkCommandBuffer cmd, uint64_t seq, VkSemaphore wait_for) :
+  _cmd(cmd), _seq(seq), _wait_semaphore(wait_for) {
+}
+
+/**
+ *
+ */
+INLINE VulkanCommandBuffer &VulkanCommandBuffer::
+operator = (VulkanCommandBuffer &&from) noexcept {
+  nassertr(_cmd == VK_NULL_HANDLE, *this);
+  _cmd = from._cmd;
+  _seq = from._seq;
+  _wait_semaphore = from._wait_semaphore;
+  from._cmd = VK_NULL_HANDLE;
+  from._seq = 0;
+  from._wait_semaphore = VK_NULL_HANDLE;
+  return *this;
+}

+ 324 - 0
panda/src/vulkandisplay/vulkanCommandBuffer.cxx

@@ -0,0 +1,324 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file vulkanCommandBuffer.cxx
+ * @author rdb
+ * @date 2025-10-23
+ */
+
+#include "vulkanCommandBuffer.h"
+#include "vulkanTextureContext.h"
+#include "shaderBuffer.h"
+
+/**
+ * Marks the given resource as being used by this command buffer, ensuring that
+ * the appropriate pipeline barrier is added to the command buffer.
+ *
+ * Note that these barriers may be done BEFORE waiting on the semaphore.
+ */
+void VulkanCommandBuffer::
+add_barrier(VulkanTextureContext *tc, VkImageLayout layout,
+            VkPipelineStageFlags dst_stage_mask,
+            VkAccessFlags dst_access_mask) {
+  nassertv(_cmd != VK_NULL_HANDLE);
+
+  // Are we writing to the texture?
+  VkAccessFlags write_mask = (dst_access_mask &
+    (VK_ACCESS_SHADER_WRITE_BIT |
+     VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+     VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+     VK_ACCESS_TRANSFER_WRITE_BIT |
+     VK_ACCESS_HOST_WRITE_BIT |
+     VK_ACCESS_MEMORY_WRITE_BIT));
+
+  nassertv(tc->_write_seq <= _seq);
+  nassertv((write_mask == 0 || tc->_read_seq <= _seq));
+
+  VkPipelineStageFlags src_stage_mask = tc->_write_stage_mask;
+  VkAccessFlags src_access_mask = tc->_write_access_mask;
+
+  bool is_write = (tc->_layout != layout || write_mask != 0);
+  if (is_write) {
+    // Before a layout transition or a write, all stages that previously read
+    // this resource must have finished executing.
+    src_stage_mask |= tc->_read_stage_mask;
+
+    if (src_stage_mask == 0) {
+      // Can't specify a source stage mask of zero.
+      src_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+    }
+  }
+  else if (src_stage_mask == 0) {
+    // No write has been done, nothing to do here, except mark the read.
+    tc->_read_seq = _seq;
+    tc->_read_stage_mask |= dst_stage_mask;
+    return;
+  }
+  else {
+    // We've already synchronized these reads since the last write.
+    dst_stage_mask &= ~tc->_read_stage_mask;
+    if (dst_stage_mask == 0) {
+      // We could probably improve this by also early-outing if we've already
+      // synchronized a *preceding* stage.
+      tc->_read_seq = _seq;
+      return;
+    }
+  }
+
+  VkImageMemoryBarrier img_barrier;
+  if (tc->_image != VK_NULL_HANDLE) {
+    img_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+    img_barrier.pNext = nullptr;
+    img_barrier.srcAccessMask = src_access_mask;
+    img_barrier.dstAccessMask = dst_access_mask;
+    img_barrier.oldLayout = tc->_layout;
+    img_barrier.newLayout = layout;
+    img_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    img_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    img_barrier.image = tc->_image;
+    img_barrier.subresourceRange.aspectMask = tc->_aspect_mask;
+    img_barrier.subresourceRange.baseMipLevel = 0;
+    img_barrier.subresourceRange.levelCount = tc->_mip_levels;
+    img_barrier.subresourceRange.baseArrayLayer = 0;
+    img_barrier.subresourceRange.layerCount = tc->_array_layers;
+  }
+
+  VkBufferMemoryBarrier buf_barrier;
+  if (tc->_buffer != VK_NULL_HANDLE) {
+    buf_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
+    buf_barrier.pNext = nullptr;
+    buf_barrier.srcAccessMask = src_access_mask;
+    buf_barrier.dstAccessMask = dst_access_mask;
+    buf_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    buf_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    buf_barrier.buffer = tc->_buffer;
+    buf_barrier.offset = 0;
+    buf_barrier.size = VK_WHOLE_SIZE;
+  }
+
+  // We want to avoid adding lots of pipeline barriers to the command stream,
+  // so we instead add this to the list of barriers to be issued at the
+  // beginning of this CB, unless it has already been accessed in this CB.
+  bool pool_possible =
+    (tc->_write_seq < _seq && (tc->_read_seq < _seq || !is_write));
+
+  if (vulkandisplay_cat.is_spam()) {
+    const char src_type = is_write ? 'W' : 'R';
+    const char dst_type = (src_access_mask != 0) ? 'W' : 'R';
+    auto &out = vulkandisplay_cat.spam()
+      << (pool_possible ? "Pooling " : "Issuing ")
+      << src_type << 'A' << dst_type << " barrier for ";
+
+    Texture *tex = tc->get_texture();
+    if (tex != nullptr) {
+      out << "texture " << *tex;
+    } else if (tc->_swapchain_index >= 0) {
+      out << "swapchain image " << tc->_swapchain_index;
+    } else {
+      out << "TC " << tc;
+    }
+    out << " on CB #" << _seq << " (last "
+        << ((tc->_read_seq > tc->_write_seq) ? "read on #" : "write on #")
+        << tc->_read_seq << ")\n";
+    std::cerr << " last stage " << std::hex << tc->_write_stage_mask << " access mask " << tc->_write_access_mask << std::dec <<  "\n";
+  }
+
+  if (pool_possible) {
+    // First access in this CB, or a read in a CB without a write.
+    if (tc->_read_seq == _seq) {
+      // Already exists, this barrier, just modify it.
+      if (tc->_image != VK_NULL_HANDLE) {
+        VkImageMemoryBarrier &existing_barrier = _image_barriers[tc->_image_barrier_index];
+        existing_barrier.srcAccessMask |= img_barrier.srcAccessMask;
+        existing_barrier.dstAccessMask |= img_barrier.dstAccessMask;
+      }
+      if (tc->_buffer != VK_NULL_HANDLE) {
+        VkBufferMemoryBarrier &existing_barrier = _buffer_barriers[tc->_buffer_barrier_index];
+        existing_barrier.srcAccessMask |= buf_barrier.srcAccessMask;
+        existing_barrier.dstAccessMask |= buf_barrier.dstAccessMask;
+      }
+    } else {
+      if (tc->_image != VK_NULL_HANDLE) {
+        tc->_image_barrier_index = _image_barriers.size();
+        _image_barriers.push_back(std::move(img_barrier));
+      }
+      if (tc->_buffer != VK_NULL_HANDLE) {
+        tc->_buffer_barrier_index = _buffer_barriers.size();
+        _buffer_barriers.push_back(std::move(buf_barrier));
+      }
+    }
+    _barrier_src_stage_mask |= src_stage_mask;
+    _barrier_dst_stage_mask |= dst_stage_mask;
+  }
+  else {
+    // We already have an access done in this CB, issue the barrier now.
+    vkCmdPipelineBarrier(_cmd, src_stage_mask, dst_stage_mask, 0,
+                         0, nullptr,
+                         (tc->_buffer != VK_NULL_HANDLE), &buf_barrier,
+                         (tc->_image != VK_NULL_HANDLE), &img_barrier);
+  }
+
+  tc->_layout = layout;
+  tc->_read_seq = _seq;
+
+  if (write_mask != 0) {
+    // Remember which stages wrote to it and how.
+    tc->_write_stage_mask = dst_stage_mask;
+    tc->_write_access_mask = write_mask;
+    tc->_read_stage_mask = 0;
+    tc->_write_seq = _seq;
+  }
+  else {
+    // This is a read-after-write barrier.  It's possible that there will be
+    // another read later from a different (earlier) stage, which is why we
+    // don't zero out _write_stage_mask.  We can just check _read_stage_mask
+    // the next time to see what we have already synchronized with the write.
+    tc->_read_stage_mask |= dst_stage_mask & ~VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
+
+    if (dst_stage_mask & (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
+      // Actually, looks like we've synchronized all stages.  We still do need
+      // to keep _read_stage_mask, since a subsequent write still needs to
+      // wait for this read to complete.
+      tc->_write_stage_mask = 0;
+      tc->_write_access_mask = 0;
+    }
+  }
+}
+
+/**
+ * Same as above, but for shader buffers.
+ *
+ * Note that these barriers may be done BEFORE waiting on the semaphore.
+ */
+void VulkanCommandBuffer::
+add_barrier(VulkanBufferContext *bc, VkPipelineStageFlags dst_stage_mask,
+            VkAccessFlags dst_access_mask) {
+  nassertv(_cmd != VK_NULL_HANDLE);
+
+  // Are we writing to the texture?
+  VkAccessFlags write_mask = (dst_access_mask &
+    (VK_ACCESS_SHADER_WRITE_BIT |
+     VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+     VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+     VK_ACCESS_TRANSFER_WRITE_BIT |
+     VK_ACCESS_HOST_WRITE_BIT |
+     VK_ACCESS_MEMORY_WRITE_BIT));
+
+  nassertv(bc->_write_seq <= _seq);
+  nassertv((write_mask == 0 || bc->_read_seq <= _seq));
+
+  VkPipelineStageFlags src_stage_mask = bc->_write_stage_mask;
+  VkAccessFlags src_access_mask = bc->_write_access_mask;
+
+  if (write_mask != 0) {
+    // Before a layout transition or a write, all stages that previously read
+    // this resource must have finished executing.
+    src_stage_mask |= bc->_read_stage_mask;
+
+    if (src_stage_mask == 0) {
+      // Can't specify a source stage mask of zero.
+      src_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+    }
+  }
+  else if (src_stage_mask == 0) {
+    // No write has been done, nothing to do here, except mark the read.
+    bc->_read_seq = _seq;
+    bc->_read_stage_mask |= dst_stage_mask;
+    return;
+  }
+  else {
+    // We've already synchronized these reads since the last write.
+    dst_stage_mask &= ~bc->_read_stage_mask;
+    if (dst_stage_mask == 0) {
+      // We could probably improve this by also early-outing if we've already
+      // synchronized a *preceding* stage.
+      bc->_read_seq = _seq;
+      return;
+    }
+  }
+
+  VkBufferMemoryBarrier buf_barrier;
+  buf_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
+  buf_barrier.pNext = nullptr;
+  buf_barrier.srcAccessMask = src_access_mask;
+  buf_barrier.dstAccessMask = dst_access_mask;
+  buf_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  buf_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  buf_barrier.buffer = bc->_buffer;
+  buf_barrier.offset = 0;
+  buf_barrier.size = VK_WHOLE_SIZE;
+
+  // We want to avoid adding lots of pipeline barriers to the command stream,
+  // so we instead add this to the list of barriers to be issued at the
+  // beginning of this CB, unless it has already been accessed in this CB.
+  if (bc->_write_seq < _seq && (bc->_read_seq < _seq || write_mask == 0)) {
+    // First access in this CB, or a read in a CB without a write.
+    if (bc->_read_seq == _seq) {
+      // Already exists, this barrier, just modify it.
+      VkBufferMemoryBarrier &existing_barrier = _buffer_barriers[bc->_buffer_barrier_index];
+      existing_barrier.srcAccessMask |= buf_barrier.srcAccessMask;
+      existing_barrier.dstAccessMask |= buf_barrier.dstAccessMask;
+    } else {
+      bc->_buffer_barrier_index = _buffer_barriers.size();
+      _buffer_barriers.push_back(std::move(buf_barrier));
+    }
+    _barrier_src_stage_mask |= src_stage_mask;
+    _barrier_dst_stage_mask |= dst_stage_mask;
+
+    if (vulkandisplay_cat.is_spam()) {
+      const char src_type = (write_mask != 0) ? 'W' : 'R';
+      const char dst_type = (src_access_mask != 0) ? 'W' : 'R';
+      vulkandisplay_cat.spam()
+        << "Pooling " << src_type << 'A' << dst_type << " barrier for SSBO "
+        << *(ShaderBuffer *)bc->get_object()
+        << " on CB #" << _seq << " (last "
+        << ((bc->_read_seq > bc->_write_seq) ? "read on #" : "write on #")
+        << bc->_read_seq << ")\n";
+    }
+  }
+  else {
+    // We already have an access done in this CB, issue the barrier now.
+    if (vulkandisplay_cat.is_spam()) {
+      const char src_type = (write_mask != 0) ? 'W' : 'R';
+      const char dst_type = (src_access_mask != 0) ? 'W' : 'R';
+      vulkandisplay_cat.spam()
+        << "Issuing " << src_type << 'A' << dst_type << " barrier for SSBO "
+        << *(ShaderBuffer *)bc->get_object()
+        << " on CB #" << _seq << " (last "
+        << ((bc->_read_seq > bc->_write_seq) ? "read on #" : "write on #")
+        << bc->_read_seq << ")\n";
+    }
+    vkCmdPipelineBarrier(_cmd, src_stage_mask, dst_stage_mask, 0,
+                         0, nullptr, 1, &buf_barrier, 0, nullptr);
+  }
+
+  bc->_read_seq = _seq;
+
+  if (write_mask != 0) {
+    // Remember which stages wrote to it and how.
+    bc->_write_stage_mask = dst_stage_mask;
+    bc->_write_access_mask = write_mask;
+    bc->_read_stage_mask = 0;
+    bc->_write_seq = _seq;
+  }
+  else {
+    // This is a read-after-write barrier.  It's possible that there will be
+    // another read later from a different (earlier) stage, which is why we
+    // don't zero out _write_stage_mask.  We can just check _read_stage_mask
+    // the next time to see what we have already synchronized with the write.
+    bc->_read_stage_mask |= dst_stage_mask & ~VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
+
+    if (dst_stage_mask & (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
+      // Actually, looks like we've synchronized all stages.  We still do need
+      // to keep _read_stage_mask, since a subsequent write still needs to
+      // wait for this read to complete.
+      bc->_write_stage_mask = 0;
+      bc->_write_access_mask = 0;
+    }
+  }
+}

+ 69 - 0
panda/src/vulkandisplay/vulkanCommandBuffer.h

@@ -0,0 +1,69 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file vulkanCommandBuffer.h
+ * @author rdb
+ * @date 2025-10-23
+ */
+
+#ifndef VULKANCOMMANDBUFFER_H
+#define VULKANCOMMANDBUFFER_H
+
+#include "config_vulkandisplay.h"
+#include "vulkanMemoryPage.h"
+#include "screenshotRequest.h"
+
+class VulkanTextureContext;
+class VulkanBufferContext;
+
+/**
+ * Keeps track of a single command buffer as well as the resources.
+ *
+ * Each command buffer has a unique sequence index which indicates the order in
+ * which it is intended to be submitted.
+ */
+class VulkanCommandBuffer {
+public:
+  VulkanCommandBuffer() = default;
+  INLINE VulkanCommandBuffer(VulkanCommandBuffer &&from) noexcept;
+  INLINE VulkanCommandBuffer(VkCommandBuffer cmd, uint64_t seq,
+                             VkSemaphore wait_for = VK_NULL_HANDLE);
+
+  INLINE VulkanCommandBuffer &operator = (VulkanCommandBuffer &&from) noexcept;
+
+  operator VkCommandBuffer() const {
+    return _cmd;
+  }
+
+  void add_barrier(VulkanTextureContext *tc, VkImageLayout layout,
+                   VkPipelineStageFlags stage_mask,
+                   VkAccessFlags access_mask = 0);
+  void add_barrier(VulkanBufferContext *bc,
+                   VkPipelineStageFlags stage_mask,
+                   VkAccessFlags access_mask = 0);
+
+public:
+  VkCommandBuffer _cmd = VK_NULL_HANDLE;
+  uint64_t _seq = 0;
+
+  // Semaphore to wait on before these commands may be executed.  Note that
+  // there is no _signal_done_semaphore; this is passed as an argument into
+  // GSG::end_command_buffer().
+  VkSemaphore _wait_semaphore = VK_NULL_HANDLE;
+
+  // These barriers need to be issued BEFORE the command buffer (usually the
+  // barrier is added to the previous command buffer).
+  pvector<VkImageMemoryBarrier> _image_barriers;
+  pvector<VkBufferMemoryBarrier> _buffer_barriers;
+  VkPipelineStageFlags _barrier_src_stage_mask = 0;
+  VkPipelineStageFlags _barrier_dst_stage_mask = 0;
+};
+
+#include "vulkanCommandBuffer.I"
+
+#endif

+ 0 - 178
panda/src/vulkandisplay/vulkanFrameData.cxx

@@ -14,184 +14,6 @@
 #include "vulkanFrameData.h"
 #include "vulkanTextureContext.h"
 
-/**
- * Ensures a pipeline barrier is created for an initial transition of the given
- * texture, after the transfer commands have completed, but before the frist
- * write.  These are pooled together.
- *
- * For now, may only be called for read barriers, not write, so access_mask
- * may not contain any write bits.
- *
- * The _initial_src_layout, etc. members should have already been initialized
- * in this frame.  Any transfers on the transfer queue must have already been
- * performed for this texture.
- *
- * Returns false if this is actually not possible, due to a layout mismatch,
- * in which case there can't be a pooled transition.
- */
-bool VulkanFrameData::
-add_initial_barrier(VulkanTextureContext *tc, VkImageLayout layout,
-                    VkPipelineStageFlags stage_mask,
-                    VkAccessFlags access_mask) {
-  if (layout == tc->_layout && tc->_write_stage_mask == 0) {
-    // No write to sync with.
-    return true;
-  }
-
-  // These reads have already been synced.
-  if (layout == tc->_layout) {
-    stage_mask &= ~tc->_read_stage_mask;
-    if (stage_mask == 0) {
-      return true;
-    }
-  }
-
-  if (tc->_initial_dst_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
-    // These are derived from the current stage of the texture, after any
-    // transfer commands but before any writes have been done.
-    tc->_initial_src_layout = tc->_layout;
-    tc->_initial_src_access_mask |= tc->_write_access_mask;
-    _initial_barrier_src_stage_mask |= tc->_write_stage_mask;
-
-    if (layout != tc->_layout) {
-      // If we change layout, wait for all reads to complete too.
-      _initial_barrier_src_stage_mask |= tc->_read_stage_mask;
-    }
-
-    // And this is what we are transitioning to.
-    tc->_initial_dst_layout = layout;
-    tc->_layout = layout;
-
-    _initial_barrier_textures.push_back(tc);
-    _initial_barrier_image_count += (tc->_image != VK_NULL_HANDLE);
-    _initial_barrier_buffer_count += (tc->_buffer != VK_NULL_HANDLE);
-  }
-  else if (tc->_initial_dst_layout != layout) {
-    return false;
-  }
-
-  tc->mark_read(stage_mask);
-  tc->_initial_dst_access_mask |= access_mask;
-  _initial_barrier_dst_stage_mask |= stage_mask;
-  return true;
-}
-
-/**
- *
- */
-bool VulkanFrameData::
-begin_transfer_cmd() {
-  static const VkCommandBufferBeginInfo begin_info = {
-    VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
-    nullptr,
-    VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
-    nullptr,
-  };
-
-  VkResult err;
-  err = vkBeginCommandBuffer(_transfer_cmd, &begin_info);
-  if (err != VK_SUCCESS) {
-    vulkan_error(err, "Can't begin transfer command buffer");
-    return false;
-  }
-  return true;
-}
-
-/**
- * Issues a pipeline barrier to all the initial transitions, and closes the
- * transfer command buffer.
- */
-void VulkanFrameData::
-end_transfer_cmd() {
-  nassertv(_transfer_cmd != VK_NULL_HANDLE);
-
-  if (_initial_barrier_dst_stage_mask != 0) {
-    VkImageMemoryBarrier *image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * _initial_barrier_image_count);
-    VkBufferMemoryBarrier *buffer_barriers = (VkBufferMemoryBarrier *)alloca(sizeof(VkBufferMemoryBarrier) * _initial_barrier_buffer_count);
-
-    uint32_t ii = 0;
-    uint32_t bi = 0;
-    for (VulkanTextureContext *tc : _initial_barrier_textures) {
-      if (tc->_image != VK_NULL_HANDLE) {
-        VkImageMemoryBarrier &barrier = image_barriers[ii++];
-        barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
-        barrier.pNext = nullptr;
-        barrier.srcAccessMask = tc->_initial_src_access_mask;
-        barrier.dstAccessMask = tc->_initial_dst_access_mask;
-        barrier.oldLayout = tc->_initial_src_layout;
-        barrier.newLayout = tc->_initial_dst_layout;
-        barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.image = tc->_image;
-        barrier.subresourceRange.aspectMask = tc->_aspect_mask;
-        barrier.subresourceRange.baseMipLevel = 0;
-        barrier.subresourceRange.levelCount = tc->_mip_levels;
-        barrier.subresourceRange.baseArrayLayer = 0;
-        barrier.subresourceRange.layerCount = tc->_array_layers;
-      }
-
-      if (tc->_buffer != VK_NULL_HANDLE) {
-        VkBufferMemoryBarrier &barrier = buffer_barriers[bi++];
-        barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
-        barrier.pNext = nullptr;
-        barrier.srcAccessMask = tc->_initial_src_access_mask;
-        barrier.dstAccessMask = tc->_initial_dst_access_mask;
-        barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.buffer = tc->_buffer;
-        barrier.offset = 0;
-        barrier.size = VK_WHOLE_SIZE;
-      }
-
-      tc->_initial_src_access_mask = 0;
-      tc->_initial_dst_access_mask = 0;
-      tc->_initial_src_layout = VK_IMAGE_LAYOUT_UNDEFINED;
-      tc->_initial_dst_layout = VK_IMAGE_LAYOUT_UNDEFINED;
-    }
-    vkCmdPipelineBarrier(_transfer_cmd, _initial_barrier_src_stage_mask,
-                         _initial_barrier_dst_stage_mask, 0,
-                         0, nullptr, bi, buffer_barriers, ii, image_barriers);
-
-    _initial_barrier_textures.clear();
-    _initial_barrier_src_stage_mask = 0;
-    _initial_barrier_dst_stage_mask = 0;
-    _initial_barrier_image_count = 0;
-    _initial_barrier_buffer_count = 0;
-  }
-
-  vkEndCommandBuffer(_transfer_cmd);
-}
-
-/**
- *
- */
-bool VulkanFrameData::
-begin_render_cmd() {
-  static const VkCommandBufferBeginInfo begin_info = {
-    VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
-    nullptr,
-    VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
-    nullptr,
-  };
-
-  VkResult err;
-  err = vkBeginCommandBuffer(_cmd, &begin_info);
-  if (err != VK_SUCCESS) {
-    vulkan_error(err, "Can't begin render command buffer");
-    return false;
-  }
-  return true;
-}
-
-/**
- *
- */
-void VulkanFrameData::
-end_render_cmd() {
-  nassertv(_cmd != VK_NULL_HANDLE);
-  vkEndCommandBuffer(_cmd);
-}
-
 /**
  *
  */

+ 10 - 31
panda/src/vulkandisplay/vulkanFrameData.h

@@ -19,29 +19,20 @@
 #include "screenshotRequest.h"
 
 class VulkanTextureContext;
+class VulkanBufferContext;
 
 /**
- * Stores all the data that has been collected between a begin_frame/end_frame
- * pair, until the frame has finished rendering on the GPU.
+ * Stores all the resources that have been used during a frame until it has
+ * finished rendering on the GPU, at which point they may be released back to
+ * their respective pools (and other CPU-related tasks depending on the data
+ * may be performed).
  *
- * At the moment, the frame is divided up into two command buffers, one
- * collecting all the actions needed to prepare and upload the texture data
- * (_transfer_cmd) and one containing the actual rendering (_cmd).  At the end
- * of the transfer cmd we issue a barrier for preparing all the resources for
- * their first use.  Both command buffers are submitted in gsg->end_frame().
+ * The definition of a "frame" is a bit vague here, as the frame may be ended
+ * arbitrarily early, however it lasts at least as long as a
+ * begin_frame()/end_frame() pair and at most as long as a clock frame.
  */
 class VulkanFrameData {
 public:
-  bool add_initial_barrier(VulkanTextureContext *tc, VkImageLayout layout,
-                           VkPipelineStageFlags stage_mask,
-                           VkAccessFlags access_mask = 0);
-
-  bool begin_transfer_cmd();
-  void end_transfer_cmd();
-
-  bool begin_render_cmd();
-  void end_render_cmd();
-
   void finish_downloads(VkDevice device);
 
   void replace_timer_query_pool(VkQueryPool new_pool, size_t new_size);
@@ -50,22 +41,9 @@ public:
   uint64_t _frame_index = 0;
   int _clock_frame_number = 0;
   VkFence _fence = VK_NULL_HANDLE;
-  VkCommandBuffer _cmd = VK_NULL_HANDLE;
-  VkCommandBuffer _transfer_cmd = VK_NULL_HANDLE;
-
-  // The frame data takes ownership of this semaphore, which indicates when the
-  // frame is allowed to start rendering (the image is available).
-  VkSemaphore _wait_semaphore = VK_NULL_HANDLE;
-
-  // Barriers that are aggregated for the beginning of the frame, put at the
-  // end of the transfer command buffer.
-  pvector<VulkanTextureContext *> _initial_barrier_textures;
-  VkPipelineStageFlags _initial_barrier_src_stage_mask = 0;
-  VkPipelineStageFlags _initial_barrier_dst_stage_mask = 0;
-  size_t _initial_barrier_image_count = 0;
-  size_t _initial_barrier_buffer_count = 0;
 
   // Keep track of resources that should be deleted after this frame is done.
+  pvector<VkCommandBuffer> _pending_command_buffers;
   pvector<VulkanMemoryBlock> _pending_free;
   pvector<VkBuffer> _pending_destroy_buffers;
   pvector<VkBufferView> _pending_destroy_buffer_views;
@@ -74,6 +52,7 @@ public:
   pvector<VkImageView> _pending_destroy_image_views;
   pvector<VkRenderPass> _pending_destroy_render_passes;
   pvector<VkSampler> _pending_destroy_samplers;
+  pvector<VkSemaphore> _pending_destroy_semaphores;
   pvector<VkDescriptorSet> _pending_free_descriptor_sets;
 
   VkDeviceSize _uniform_buffer_head = 0;

+ 25 - 9
panda/src/vulkandisplay/vulkanGraphicsBuffer.cxx

@@ -141,8 +141,8 @@ begin_frame(FrameMode mode, Thread *current_thread) {
 
   // Now that we have a command buffer, start our render pass.  First
   // transition the swapchain images into the valid state for rendering into.
-  VulkanFrameData &frame_data = vkgsg->get_frame_data();
-  VkCommandBuffer cmd = frame_data._cmd;
+  VkCommandBuffer cmd = vkgsg->_render_cmd;
+  nassertr(cmd != VK_NULL_HANDLE, false);
 
   VkRenderingAttachmentInfo *color_attachments = (VkRenderingAttachmentInfo *)
     alloca(_attachments.size() * sizeof(VkRenderingAttachmentInfo));
@@ -158,8 +158,8 @@ begin_frame(FrameMode mode, Thread *current_thread) {
 
   for (size_t i = 0; i < _attachments.size(); ++i) {
     Attachment &attach = _attachments[i];
-    nassertr(!attach._tc->is_used_this_frame(frame_data), false);
-    attach._tc->mark_used_this_frame(frame_data);
+    nassertr(attach._tc->_read_seq < vkgsg->_render_cmd._seq, false);
+    attach._tc->set_active(true);
 
     VkImageLayout layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
     VkAccessFlags write_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
@@ -209,12 +209,15 @@ begin_frame(FrameMode mode, Thread *current_thread) {
       color_attachment.imageLayout = layout;
     }
 
-    if (attach._tc->_layout != layout ||
+    vkgsg->_render_cmd.add_barrier(attach._tc, layout, stage_mask,
+                                   read_access_mask | write_access_mask);
+
+    /*if (attach._tc->_layout != layout ||
         (attach._tc->_write_stage_mask & ~stage_mask) != 0 ||
         (attach._tc->_read_stage_mask & ~stage_mask) != 0) {
       frame_data.add_initial_barrier(attach._tc,
         layout, stage_mask, read_access_mask | write_access_mask);
-    }
+    }*/
   }
 
   vkgsg->_vkCmdBeginRendering(cmd, &render_info);
@@ -299,7 +302,7 @@ end_frame(FrameMode mode, Thread *current_thread) {
   if (mode == FM_render) {
     VulkanGraphicsStateGuardian *vkgsg;
     DCAST_INTO_V(vkgsg, _gsg);
-    VkCommandBuffer cmd = vkgsg->_frame_data->_cmd;
+    VkCommandBuffer cmd = vkgsg->_render_cmd;
     nassertv(cmd != VK_NULL_HANDLE);
 
     vkgsg->_vkCmdEndRendering(cmd);
@@ -314,6 +317,20 @@ end_frame(FrameMode mode, Thread *current_thread) {
       attach._tc->_write_stage_mask |= VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
     }*/
 
+    for (Attachment &attach : _attachments) {
+      if (attach._plane == RTP_stencil || attach._plane == RTP_depth ||
+          attach._plane == RTP_depth_stencil) {
+        attach._tc->_write_stage_mask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
+                                      | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+        attach._tc->_write_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+      } else {
+        attach._tc->_write_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+        attach._tc->_write_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+      }
+      attach._tc->_read_stage_mask = 0;
+      attach._tc->_write_seq = vkgsg->_render_cmd._seq;
+    }
+
     // Now we can do copy-to-texture, now that the render pass has ended.
     copy_to_textures();
 
@@ -585,8 +602,7 @@ destroy_framebuffer() {
   DCAST_INTO_V(vkgsg, _gsg);
   VkDevice device = vkgsg->_device;
 
-  // This shouldn't happen within a begin_frame/end_frame pair.
-  nassertv(vkgsg->_frame_data == nullptr);
+  vkgsg->flush();
 
   // Make sure that the GSG's command buffer releases its resources.
   //if (vkgsg->_cmd != VK_NULL_HANDLE) {

+ 3 - 1
panda/src/vulkandisplay/vulkanGraphicsStateGuardian.I

@@ -17,6 +17,8 @@
  */
 INLINE VulkanFrameData &VulkanGraphicsStateGuardian::
 get_frame_data() {
-  nassertr(_frame_data != nullptr, *_last_frame_data);
+  if (_frame_data == nullptr) {
+    _frame_data = &get_next_frame_data();
+  }
   return *_frame_data;
 }

File diff suppressed because it is too large
+ 437 - 263
panda/src/vulkandisplay/vulkanGraphicsStateGuardian.cxx


+ 36 - 4
panda/src/vulkandisplay/vulkanGraphicsStateGuardian.h

@@ -15,10 +15,12 @@
 #define VULKANGRAPHICSSTATEGUARDIAN_H
 
 #include "config_vulkandisplay.h"
+#include "vulkanCommandBuffer.h"
 #include "vulkanFrameData.h"
 #include "vulkanMemoryPage.h"
 #include "vulkanShaderContext.h"
 #include "circularAllocator.h"
+#include "completionToken.h"
 
 class VulkanBufferContext;
 class VulkanGraphicsPipe;
@@ -60,8 +62,10 @@ public:
                                     bool discard=false);
   virtual TextureContext *prepare_texture(Texture *tex);
   bool create_texture(VulkanTextureContext *vtc);
-  bool upload_texture(VulkanTextureContext *vtc);
-  virtual bool update_texture(TextureContext *tc, bool force);
+  bool upload_texture(VulkanTextureContext *vtc,
+                      CompletionToken token = CompletionToken());
+  virtual bool update_texture(TextureContext *tc, bool force,
+                              CompletionToken token = CompletionToken());
   virtual void release_texture(TextureContext *tc);
   virtual bool extract_texture_data(Texture *tex);
 
@@ -86,6 +90,9 @@ public:
                            bool force);
   virtual void release_index_buffer(IndexBufferContext *ibc);
 
+  VulkanBufferContext *use_shader_buffer(ShaderBuffer *buffer,
+                                         VkPipelineStageFlags stage_mask,
+                                         VkAccessFlags access_mask);
   virtual BufferContext *prepare_shader_buffer(ShaderBuffer *data);
   virtual void release_shader_buffer(BufferContext *bc);
   virtual bool extract_shader_buffer_data(ShaderBuffer *buffer, vector_uchar &data);
@@ -117,6 +124,11 @@ public:
   FrameData &get_next_frame_data(bool finish_frames = false);
   INLINE FrameData &get_frame_data();
 
+  VulkanCommandBuffer begin_command_buffer(VkSemaphore wait_for = VK_NULL_HANDLE);
+  void end_command_buffer(VulkanCommandBuffer &&cmd,
+                          VkSemaphore signal_done = VK_NULL_HANDLE);
+  bool flush(VkFence fence = VK_NULL_HANDLE);
+
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
                                      size_t num_instances, bool force);
@@ -158,8 +170,7 @@ public:
 private:
   bool do_extract_image(VulkanTextureContext *tc, Texture *tex, int view, int z=-1,
                         ScreenshotRequest *request = nullptr);
-  bool do_extract_buffer(VulkanFrameData &frame_data,
-                         VulkanBufferContext *tc, vector_uchar &data);
+  bool do_extract_buffer(VulkanBufferContext *tc, vector_uchar &data);
 
   bool do_draw_primitive_with_topology(const GeomPrimitivePipelineReader *reader,
                                       bool force, VkPrimitiveTopology topology,
@@ -175,6 +186,7 @@ public:
                     VkImageCreateFlags flags = 0);
 
   VkSemaphore create_semaphore();
+  VkFence create_fence();
 
   struct FbConfig;
   uint32_t choose_fb_config(FbConfig &out, FrameBufferProperties &props,
@@ -297,6 +309,26 @@ private:
   pdeque<VulkanMemoryPage> _memory_pages;
   VkDeviceSize _total_allocated = 0u;
 
+  // We store references to two command buffers.  The transfer cmd is used for
+  // anything that needs to happen outside a render pass (including transfers),
+  // the render cmd is used for anything inside.  The render cmd is only
+  // present between begin_frame() and end_frame() and MUST have a higher seq.
+  VulkanCommandBuffer _transfer_cmd;
+  VulkanCommandBuffer _render_cmd;
+
+  uint64_t _next_begin_command_buffer_seq = 0;
+  uint64_t _next_end_command_buffer_seq = 0;
+  pvector<VkCommandBuffer> _free_command_buffers; // new and unused
+  pvector<VkCommandBuffer> _pending_command_buffers; // ready to submit
+  uint32_t _first_pending_command_buffer_seq = 0;
+  struct PendingSubmission {
+    VkSemaphore _wait_semaphore;
+    VkSemaphore _signal_semaphore;
+    uint32_t _first_command_buffer; // Indexes into _pending_command_buffers
+    uint32_t _num_command_buffers;
+  };
+  pvector<PendingSubmission> _pending_submissions;
+
   static const size_t _frame_data_capacity = 5;
   FrameData _frame_data_pool[_frame_data_capacity];
   size_t _frame_data_head = _frame_data_capacity;

+ 48 - 26
panda/src/vulkandisplay/vulkanGraphicsWindow.cxx

@@ -153,17 +153,16 @@ begin_frame(FrameMode mode, Thread *current_thread) {
   nassertr(_image_index < _swap_buffers.size(), false);
   SwapBuffer &buffer = _swap_buffers[_image_index];
 
-  VulkanFrameData &frame_data = vkgsg->get_frame_data();
 
   VulkanTextureContext *color_tc;
-  nassertr(!buffer._tc->is_used_this_frame(frame_data), false);
-  buffer._tc->mark_used_this_frame(frame_data);
+  nassertr(buffer._tc->_read_seq < vkgsg->_render_cmd._seq, false);
+  buffer._tc->set_active(true);
 
   // If we have multisamples, we render to a different image, which we then
   // resolve into the swap chain image.
   if (_ms_color_tc != nullptr) {
-    nassertr(!_ms_color_tc->is_used_this_frame(frame_data), false);
-    _ms_color_tc->mark_used_this_frame(frame_data);
+    nassertr(_ms_color_tc->_read_seq < vkgsg->_render_cmd._seq, false);
+    _ms_color_tc->set_active(true);
     color_tc = _ms_color_tc;
   } else {
     color_tc = buffer._tc;
@@ -175,7 +174,8 @@ begin_frame(FrameMode mode, Thread *current_thread) {
 
   // Now that we have a command buffer, start our render pass.  First
   // transition the swapchain images into the valid state for rendering into.
-  VkCommandBuffer cmd = frame_data._cmd;
+  VkCommandBuffer cmd = vkgsg->_render_cmd;
+  nassertr(cmd != VK_NULL_HANDLE, false);
 
   VkRenderingInfo render_info = {VK_STRUCTURE_TYPE_RENDERING_INFO};
   render_info.layerCount = 1;
@@ -200,20 +200,36 @@ begin_frame(FrameMode mode, Thread *current_thread) {
   }
   color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
 
-  if (color_tc->_layout != color_attachment.imageLayout ||
+  // Reset this to reflect getting this texture fresh from the present engine.
+  color_tc->_write_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+  color_tc->_write_access_mask = 0;
+  color_tc->_read_stage_mask = 0;
+
+  //NB. We can't let add_barrier pool these barriers because they would be
+  // issued before the wait on the semaphore is complete.  Therefore we
+  // twiddle the _write_seq as well to force it to issue it in the middle of
+  // the command stream.
+  if (vkgsg->_render_cmd._wait_semaphore) {
+    color_tc->_read_seq = vkgsg->_render_cmd._seq;
+    color_tc->_write_seq = vkgsg->_render_cmd._seq;
+  }
+  vkgsg->_render_cmd.add_barrier(color_tc, color_attachment.imageLayout,
+                                 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+                                 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
+  /*if (color_tc->_layout != color_attachment.imageLayout ||
       (color_tc->_write_stage_mask & ~VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT) != 0 ||
       (color_tc->_read_stage_mask & ~VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT) != 0) {
     frame_data.add_initial_barrier(color_tc,
       color_attachment.imageLayout,
       VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
       VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
-  }
+  }*/
 
   VkRenderingAttachmentInfo depth_attachment = {VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO};
   VkRenderingAttachmentInfo stencil_attachment = {VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO};
   if (_depth_stencil_tc != nullptr) {
-    nassertr(!_depth_stencil_tc->is_used_this_frame(frame_data), false);
-    _depth_stencil_tc->mark_used_this_frame(frame_data);
+    nassertr(_depth_stencil_tc->_read_seq < vkgsg->_render_cmd._seq, false);
+    _depth_stencil_tc->set_active(true);
 
     if (_depth_stencil_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
       render_info.pDepthAttachment = &depth_attachment;
@@ -243,14 +259,19 @@ begin_frame(FrameMode mode, Thread *current_thread) {
       stencil_attachment.imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
     }
 
-    if (_depth_stencil_tc->_layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
+    /*if (_depth_stencil_tc->_layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
         (_depth_stencil_tc->_write_stage_mask & ~VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT) != 0 ||
         (_depth_stencil_tc->_read_stage_mask & ~VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT) != 0) {
       frame_data.add_initial_barrier(_depth_stencil_tc,
         VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
         VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
-    }
+    }*/
+
+    vkgsg->_render_cmd.add_barrier(_depth_stencil_tc,
+      VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+      VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
+      VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
   }
 
   vkgsg->_vkCmdBeginRendering(cmd, &render_info);
@@ -361,7 +382,7 @@ end_frame(FrameMode mode, Thread *current_thread) {
   VulkanGraphicsStateGuardian *vkgsg;
   DCAST_INTO_V(vkgsg, _gsg);
 
-  VkCommandBuffer cmd = vkgsg->_frame_data->_cmd;
+  VkCommandBuffer cmd = vkgsg->_render_cmd;
   nassertv(cmd != VK_NULL_HANDLE);
   SwapBuffer &buffer = _swap_buffers[_image_index];
 
@@ -373,14 +394,14 @@ end_frame(FrameMode mode, Thread *current_thread) {
 
     // The driver implicitly transitioned this to the final layout.
     buffer._tc->_layout = _final_layout;*/
-    buffer._tc->mark_written(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
-                             VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
+    //buffer._tc->mark_written(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+    //                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
 
     if (_depth_stencil_tc != nullptr) {
       //_depth_stencil_tc->_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
-      _depth_stencil_tc->mark_written(
-        VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
-        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
+      //_depth_stencil_tc->mark_written(
+      //  VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
+      //  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
     }
 
     // Now we can do copy-to-texture, now that the render pass has ended.
@@ -389,12 +410,12 @@ end_frame(FrameMode mode, Thread *current_thread) {
     signal_done = buffer._render_complete;
 
     // If we copied the textures, transition it back to the present state.
-    if (buffer._tc->_layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR) {
-      buffer._tc->transition(cmd, vkgsg->_graphics_queue_family_index,
-                             VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
-                             VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
-                             VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
-    }
+    //if (buffer._tc->_layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR) {
+    vkgsg->_render_cmd.add_barrier(buffer._tc,
+                                   VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+                                   VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                   0);
+    //}
   }
 
   // Note: this will close the command buffer, and unsignal the previous
@@ -744,7 +765,7 @@ setup_render_pass() {
       << "Creating render pass for VulkanGraphicsWindow " << this << "\n";
   }
 
-  nassertr(vkgsg->_frame_data == nullptr, false);
+  vkgsg->flush();
 
   {
     // Do we intend to copy the framebuffer to a texture?
@@ -1065,6 +1086,7 @@ create_swapchain() {
     buffer._tc->_extent = extent;
     buffer._tc->_mip_levels = 1;
     buffer._tc->_array_layers = 1;
+    buffer._tc->_swapchain_index = (int)i;
 
     VkImageViewCreateInfo view_info;
     view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
@@ -1221,7 +1243,7 @@ create_swapchain() {
     }*/
 
     // Don't start rendering until the image has been acquired.
-    buffer._tc->mark_written(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0);
+    //buffer._tc->mark_written(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0);
   }
 
   // Create a semaphore for signalling the availability of an image.

+ 9 - 2
panda/src/vulkandisplay/vulkanShaderContext.cxx

@@ -775,9 +775,16 @@ fetch_descriptor(VulkanGraphicsStateGuardian *gsg, const Descriptor &desc,
     for (ResourceId id : desc._resource_ids) {
       PT(ShaderBuffer) buffer = desc._binding->fetch_shader_buffer(state, id);
 
+      VkAccessFlags access_mask = 0;
+      if ((desc._access & ShaderType::Access::READ_ONLY) != ShaderType::Access::NONE) {
+        access_mask |= VK_ACCESS_SHADER_READ_BIT;
+      }
+      if ((desc._access & ShaderType::Access::WRITE_ONLY) != ShaderType::Access::NONE) {
+        access_mask |= VK_ACCESS_SHADER_WRITE_BIT;
+      }
+
       VulkanBufferContext *bc;
-      DCAST_INTO_R(bc, buffer->prepare_now(pgo, gsg), false);
-      bc->set_active(true);
+      bc = gsg->use_shader_buffer(buffer, desc._pipeline_stage_mask, access_mask);
 
       VkDescriptorBufferInfo &buffer_info = *buffer_infos++;
       buffer_info.buffer = bc->_buffer;

+ 1 - 56
panda/src/vulkandisplay/vulkanTextureContext.I

@@ -44,61 +44,6 @@ get_buffer_view(int view) const {
   }
 }
 
-/**
- * Returns true if the texture has been used this frame.  By "used" we mean
- * that it has been accessed by a shader, not updated from the CPU.
- */
-INLINE bool VulkanTextureContext::
-is_used_this_frame(VulkanFrameData &frame_data) const {
-  return frame_data._frame_index == _last_use_frame;
-}
-
-/**
- * Marks the texture as having been used this frame.  By "used" we mean
- * that it has been accessed by a shader, not updated from the CPU.
- */
-INLINE void VulkanTextureContext::
-mark_used_this_frame(VulkanFrameData &frame_data) {
-  if (frame_data._frame_index != _last_use_frame) {
-    _last_use_frame = frame_data._frame_index;
-
-    // Good opportunity to initialize these fields.
-    _initial_src_layout = VK_IMAGE_LAYOUT_UNDEFINED;
-    _initial_dst_layout = VK_IMAGE_LAYOUT_UNDEFINED;
-    _initial_src_access_mask = 0;
-    _initial_dst_access_mask = 0;
-
-    // And to call this.
-    set_active(true);
-  }
-}
-
-/**
- * Indicates that the texture is being read from by the given stage.  Any
- * subsequent writes must wait until the given stage has passed the pipeline.
- *
- * Generally you should rely on gsg->use_texture() or tc->transition() to take
- * care of this.
- */
-INLINE void VulkanTextureContext::
-mark_read(VkPipelineStageFlags stage) {
-  _read_stage_mask |= stage & ~VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
-}
-
-/**
- * Indicates that the texture is being written by the given stage.  Any
- * subsequent writes must wait until the given stage has passed the pipeline.
- *
- * Generally you should rely on gsg->use_texture() or tc->transition() to take
- * care of this.
- */
-INLINE void VulkanTextureContext::
-mark_written(VkPipelineStageFlags stage, VkAccessFlags access_mask) {
-  _write_stage_mask |= stage;
-  _write_access_mask |= access_mask;
-  _read_stage_mask = 0;
-}
-
 /**
  * Indicates that future commands aren't really interested in seeing the
  * results of writes made before it to this image.
@@ -109,5 +54,5 @@ discard() {
   // want the execution dependency; we don't want the next write to stomp over
   // the texture data while it's still being read.
   _layout = VK_IMAGE_LAYOUT_UNDEFINED;
-  _write_access_mask = 0;
+  //_write_access_mask = 0;
 }

+ 10 - 132
panda/src/vulkandisplay/vulkanTextureContext.cxx

@@ -12,6 +12,7 @@
  */
 
 #include "vulkanTextureContext.h"
+#include "vulkanCommandBuffer.h"
 #include "vulkanFrameData.h"
 
 TypeHandle VulkanTextureContext::_type_handle;
@@ -164,16 +165,15 @@ destroy_now(VkDevice device) {
  * Inserts commands to clear the image.
  */
 void VulkanTextureContext::
-clear_color_image(VkCommandBuffer cmd, const VkClearColorValue &value) {
+clear_color_image(VulkanCommandBuffer &cmd, const VkClearColorValue &value) {
   nassertv(_aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT);
   nassertv(_image != VK_NULL_HANDLE);
 
   // We're not interested in whatever was in here before.
   discard();
 
-  transition(cmd, 0,//vkgsg->_graphics_queue_family_index,
-    VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
-    VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
+  cmd.add_barrier(this, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                  VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
 
   VkImageSubresourceRange range;
   range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
@@ -188,16 +188,15 @@ clear_color_image(VkCommandBuffer cmd, const VkClearColorValue &value) {
  * Inserts commands to clear the image.
  */
 void VulkanTextureContext::
-clear_depth_stencil_image(VkCommandBuffer cmd, const VkClearDepthStencilValue &value) {
+clear_depth_stencil_image(VulkanCommandBuffer &cmd, const VkClearDepthStencilValue &value) {
   nassertv(_aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT);
   nassertv(_image != VK_NULL_HANDLE);
 
   // We're not interested in whatever was in here before.
   discard();
 
-  transition(cmd, 0,//vkgsg->_graphics_queue_family_index,
-    VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
-    VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
+  cmd.add_barrier(this, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                  VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
 
   VkImageSubresourceRange range;
   range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
@@ -212,133 +211,12 @@ clear_depth_stencil_image(VkCommandBuffer cmd, const VkClearDepthStencilValue &v
  * Inserts commands to clear the image.
  */
 void VulkanTextureContext::
-clear_buffer(VkCommandBuffer cmd, uint32_t fill) {
+clear_buffer(VulkanCommandBuffer &cmd, uint32_t fill) {
   nassertv(_buffer != VK_NULL_HANDLE);
 
   discard();
-  transition(cmd, 0,//vkgsg->_graphics_queue_family_index,
-    _layout, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
+  cmd.add_barrier(this, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                  VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
 
   vkCmdFillBuffer(cmd, _buffer, 0, VK_WHOLE_SIZE, fill);
-  mark_written(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
-}
-
-/**
- * Issues a command to transition the image to a new layout or queue family.
- * Also issues the appropriate memory barrier to prevent read-after-write and
- * write-after-write hazards.
- *
- * For a buffer texture, layout is ignored.
- *
- * Please be very aware on which command buffer the transition is happening:
- * it is assumed that the command written by this call is also submitted in
- * the same order as the transition() calls are made, so you may not call
- * transition() on the transfer command buffer after having called it on the
- * render command buffer!
- *
- * Implicitly calls mark_read() or mark_written() depending on the access mask.
- * Does not (yet) do inter-queue synchronization.
- */
-void VulkanTextureContext::
-transition(VkCommandBuffer cmd, uint32_t queue_family, VkImageLayout layout,
-           VkPipelineStageFlags dst_stage_mask, VkAccessFlags dst_access_mask) {
-
-  if (_image == VK_NULL_HANDLE) {
-    layout = _layout;
-  }
-
-  // Are we writing to the texture?
-  VkAccessFlags write_mask = (dst_access_mask &
-    (VK_ACCESS_SHADER_WRITE_BIT |
-     VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
-     VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
-     VK_ACCESS_TRANSFER_WRITE_BIT |
-     VK_ACCESS_HOST_WRITE_BIT |
-     VK_ACCESS_MEMORY_WRITE_BIT));
-
-  // If we wrote to this recently (or performed a layout transition), we must
-  // wait for that to be finished.
-  VkPipelineStageFlags src_stage_mask = _write_stage_mask;
-
-  if (_layout != layout || write_mask != 0) {
-    // Before a layout transition or a write, all previous reads must have
-    // finished.
-    src_stage_mask |= _read_stage_mask;
-
-    if (src_stage_mask == 0) {
-      // Can't specify a source stage mask of zero.
-      src_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
-    }
-  }
-  else if (src_stage_mask == 0) {
-    // No write has been done, nothing to do here.
-    return;
-  }
-  else {
-    // We've already synchronized these reads since the last write.
-    dst_stage_mask &= ~_read_stage_mask;
-    if (dst_stage_mask == 0) {
-      // We could probably improve this by also early-outing if we've already
-      // synchronized a *preceding* stage.
-      return;
-    }
-  }
-
-  VkImageMemoryBarrier img_barrier;
-  if (_image != VK_NULL_HANDLE) {
-    img_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
-    img_barrier.pNext = nullptr;
-    img_barrier.srcAccessMask = _write_access_mask;
-    img_barrier.dstAccessMask = dst_access_mask;
-    img_barrier.oldLayout = _layout;
-    img_barrier.newLayout = layout;
-    img_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-    img_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-    img_barrier.image = _image;
-    img_barrier.subresourceRange.aspectMask = _aspect_mask;
-    img_barrier.subresourceRange.baseMipLevel = 0;
-    img_barrier.subresourceRange.levelCount = _mip_levels;
-    img_barrier.subresourceRange.baseArrayLayer = 0;
-    img_barrier.subresourceRange.layerCount = _array_layers;
-  }
-  VkBufferMemoryBarrier buf_barrier;
-  if (_buffer != VK_NULL_HANDLE) {
-    buf_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
-    buf_barrier.pNext = nullptr;
-    buf_barrier.srcAccessMask = _write_access_mask;
-    buf_barrier.dstAccessMask = dst_access_mask;
-    buf_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-    buf_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-    buf_barrier.buffer = _buffer;
-    buf_barrier.offset = 0;
-    buf_barrier.size = VK_WHOLE_SIZE;
-  }
-  vkCmdPipelineBarrier(cmd, src_stage_mask, dst_stage_mask, 0,
-                       0, nullptr,
-                       (_buffer != VK_NULL_HANDLE), &buf_barrier,
-                       (_image != VK_NULL_HANDLE), &img_barrier);
-
-  _layout = layout;
-
-  if (write_mask != 0) {
-    // Remember which stages wrote to it and how.
-    _write_stage_mask = dst_stage_mask;
-    _write_access_mask = write_mask;
-    _read_stage_mask = 0;
-  }
-  else {
-    // This is a read-after-write barrier.  It's possible that there will be
-    // another read later from a different (earlier) stage, which is why we
-    // don't zero out _write_stage_mask.  We can just check _read_stage_mask
-    // the next time to see what we have already synchronized with the write.
-    mark_read(dst_stage_mask);
-
-    if (dst_stage_mask & (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
-      // Actually, looks like we've synchronized all stages.  We still do need
-      // to keep _read_access_mask, since a subsequent write still needs to
-      // wait for this read to complete.
-      _write_stage_mask = 0;
-      _write_access_mask = 0;
-    }
-  }
 }

+ 24 - 25
panda/src/vulkandisplay/vulkanTextureContext.h

@@ -18,6 +18,8 @@
 #include "textureContext.h"
 #include "small_vector.h"
 
+class VulkanCommandBuffer;
+
 /**
  * Manages a Vulkan image and device memory.
  */
@@ -36,20 +38,11 @@ public:
   INLINE VkImageView get_image_view(int view) const;
   INLINE VkBufferView get_buffer_view(int view) const;
 
-  INLINE bool is_used_this_frame(VulkanFrameData &frame_data) const;
-  INLINE void mark_used_this_frame(VulkanFrameData &frame_data);
-
-  INLINE void mark_read(VkPipelineStageFlags stage_mask);
-  INLINE void mark_written(VkPipelineStageFlags stage_mask,
-                           VkAccessFlags access_mask);
   INLINE void discard();
 
-  void clear_color_image(VkCommandBuffer cmd, const VkClearColorValue &value);
-  void clear_depth_stencil_image(VkCommandBuffer cmd, const VkClearDepthStencilValue &value);
-  void clear_buffer(VkCommandBuffer cmd, uint32_t fill);
-
-  void transition(VkCommandBuffer cmd, uint32_t queue_family, VkImageLayout layout,
-                  VkPipelineStageFlags dst_stage_mask, VkAccessFlags dst_access_mask);
+  void clear_color_image(VulkanCommandBuffer &cmd, const VkClearColorValue &value);
+  void clear_depth_stencil_image(VulkanCommandBuffer &cmd, const VkClearDepthStencilValue &value);
+  void clear_buffer(VulkanCommandBuffer &cmd, uint32_t fill);
 
 public:
   VkFormat _format = VK_FORMAT_UNDEFINED;
@@ -63,6 +56,9 @@ public:
   bool _swap_bgra8 = false;
   bool _supports_render_to_texture = false;
 
+  // Just for debugging.  It's -1 if it's not a swapchain image.
+  int _swapchain_index = -1;
+
   // Depending on whether it's a buffer texture or image texture, either the
   // image and image view or buffer and buffer view will be set.
   VkImage _image = VK_NULL_HANDLE;
@@ -71,26 +67,29 @@ public:
   small_vector<VkBufferView> _buffer_views;
   VulkanMemoryBlock _block;
 
-  // Frame number of the last time gsg->use_texture() was called.
-  uint64_t _last_use_frame = 0;
+  // Sequence number of the last command buffer in which this was used.
+  uint64_t _read_seq = 0;
+  uint64_t _write_seq = 0;
 
-  // These fields are managed by VulkanFrameData::add_initial_transition(),
-  // and are used to keep track of the transition we do at the beginning of a
-  // frame.
-  VkImageLayout _initial_src_layout = VK_IMAGE_LAYOUT_UNDEFINED;
-  VkImageLayout _initial_dst_layout = VK_IMAGE_LAYOUT_UNDEFINED;
-  VkAccessFlags _initial_src_access_mask = 0;
-  VkAccessFlags _initial_dst_access_mask = 0;
+  // Index of the barrier into the list of barriers of the _read_seq CB.
+  size_t _image_barrier_index = 0;
+  size_t _buffer_barrier_index = 0;
 
-  // Frame number of the last GPU write to this texture.
-  uint64_t _last_write_frame = 0;
-
-  // The "current" layout and access mask (as of the last command submitted)
+  // The "current" layout and details of the last write
   VkImageLayout _layout = VK_IMAGE_LAYOUT_UNDEFINED;
   VkAccessFlags _write_access_mask = 0;
   VkPipelineStageFlags _write_stage_mask = 0;
+
+  // Which stages we've already synchronized with the last write.
   VkPipelineStageFlags _read_stage_mask = 0;
 
+  // If you're wondering why there is no _read_access_mask, read this:
+  // https://github.com/KhronosGroup/Vulkan-Docs/issues/131
+
+  VkBuffer _async_staging_buffer = VK_NULL_HANDLE;
+  void *_async_staging_ptr = nullptr;
+  size_t _async_buffer_size = 0;
+
 public:
   static TypeHandle get_class_type() {
     return _type_handle;

Some files were not shown because too many files changed in this diff