Browse Source

vulkan: Assorted validation, hazard, UB fixes, best practices

rdb 1 year ago
parent
commit
d02bc60312

+ 8 - 0
panda/src/vulkandisplay/config_vulkandisplay.cxx

@@ -61,6 +61,14 @@ ConfigVariableInt64 vulkan_staging_buffer_size
           "buffer instead.  Set this to 0 to create separate staging buffers "
           "for all uploads."));
 
+ConfigVariableBool vulkan_support_custom_border_color
+("vulkan-support-custom-border-color", true,
+ PRC_DESC("If this is set to true, and this is supported by the video card, "
+          "any value for the texture border color is supported.  If false, "
+          "only opaque white, transparent white, opaque black and transparent "
+          "black are supported.  If you do not need to use a different border "
+          "color than these, it is strongly recommended to set this to false."));
+
 #define VK_ERROR_INVALID_SHADER_NV -1000012000
 
 /**

+ 3 - 1
panda/src/vulkandisplay/config_vulkandisplay.h

@@ -16,13 +16,15 @@
 
 #include "pandabase.h"
 #include "notifyCategoryProxy.h"
-#include "configVariableInt.h"
+#include "configVariableBool.h"
 #include "configVariableInt64.h"
 
 NotifyCategoryDecl(vulkandisplay, EXPCL_VULKANDISPLAY, EXPTP_VULKANDISPLAY);
 
 extern ConfigVariableInt64 vulkan_memory_page_size;
 extern ConfigVariableInt64 vulkan_global_uniform_buffer_size;
+extern ConfigVariableInt64 vulkan_staging_buffer_size;
+extern ConfigVariableBool vulkan_support_custom_border_color;
 
 extern EXPCL_VULKANDISPLAY void init_libvulkandisplay();
 extern "C" EXPCL_VULKANDISPLAY int get_pipe_type_p3vulkandisplay();

+ 36 - 9
panda/src/vulkandisplay/vulkanGraphicsBuffer.cxx

@@ -83,7 +83,21 @@ begin_frame(FrameMode mode, Thread *current_thread) {
 
   VulkanGraphicsStateGuardian *vkgsg;
   DCAST_INTO_R(vkgsg, _gsg, false);
-  //vkgsg->reset_if_new();
+
+  if (vkgsg->needs_reset()) {
+    vkQueueWaitIdle(vkgsg->_queue);
+    destroy_framebuffer();
+    if (_render_pass != VK_NULL_HANDLE) {
+      vkDestroyRenderPass(vkgsg->_device, _render_pass, nullptr);
+      _render_pass = VK_NULL_HANDLE;
+    }
+    vkgsg->reset_if_new();
+  }
+
+
+  if (!vkgsg->is_valid()) {
+    return false;
+  }
 
   vkgsg->_fb_color_tc = nullptr;
   vkgsg->_fb_depth_tc = nullptr;
@@ -148,7 +162,8 @@ begin_frame(FrameMode mode, Thread *current_thread) {
     attach._tc->set_active(true);
 
     VkImageLayout layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-    VkAccessFlags access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+    VkAccessFlags write_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+    VkAccessFlags read_access_mask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
     VkPipelineStageFlags stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
 
     if (attach._plane == RTP_stencil || attach._plane == RTP_depth ||
@@ -157,9 +172,10 @@ begin_frame(FrameMode mode, Thread *current_thread) {
       layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
       stage_mask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
                  | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
-      access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
-
-    } else if (attach._plane == RTP_color) {
+      write_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+      read_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+    }
+    else if (attach._plane == RTP_color) {
       vkgsg->_fb_color_tc = attach._tc;
     }
 
@@ -172,11 +188,12 @@ begin_frame(FrameMode mode, Thread *current_thread) {
 
       // This transition will be made when the first subpass is started.
       attach._tc->_layout = layout;
-      attach._tc->_access_mask = access_mask;
-      attach._tc->_stage_mask = stage_mask;
+      attach._tc->_read_stage_mask = stage_mask;
+      attach._tc->_write_stage_mask = stage_mask;
+      attach._tc->_write_access_mask = write_access_mask;
     } else {
       attach._tc->transition(cmd, vkgsg->_graphics_queue_family_index,
-                             layout, stage_mask, access_mask);
+                             layout, stage_mask, read_access_mask | write_access_mask);
     }
   }
 
@@ -208,7 +225,9 @@ end_frame(FrameMode mode, Thread *current_thread) {
     // The driver implicitly transitioned this to the final layout.
     for (Attachment &attach : _attachments) {
       attach._tc->_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
-      attach._tc->_access_mask = VK_ACCESS_MEMORY_READ_BIT;
+
+      // This seems to squelch a validation warning, not sure about this yet
+      attach._tc->_write_stage_mask |= VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
     }
 
     // Now we can do copy-to-texture, now that the render pass has ended.
@@ -273,6 +292,14 @@ open_buffer() {
     DCAST_INTO_R(vkgsg, _gsg.p(), false);
   }
 
+  vkgsg->reset_if_new();
+  if (!vkgsg->is_valid()) {
+    _gsg.clear();
+    vulkandisplay_cat.error()
+      << "VulkanGraphicsStateGuardian is not valid.\n";
+    return false;
+  }
+
   // Choose a suitable color format.  Sorted in order of preferability,
   // preferring lower bpps over higher bpps, and preferring formats that pack
   // bits in fewer channels (because if the user only requests red bits, they

+ 4 - 3
panda/src/vulkandisplay/vulkanGraphicsPipe.cxx

@@ -326,11 +326,12 @@ VulkanGraphicsPipe() : _max_allocation_size(0) {
       features2.pNext = &ro2_features;
     }
 
-    VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT div_features = {
-      VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT,
+    VkPhysicalDeviceVertexAttributeDivisorFeaturesKHR div_features = {
+      VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_KHR,
       features2.pNext,
     };
-    if (has_device_extension(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME)) {
+    if (has_device_extension(VK_KHR_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) ||
+        has_device_extension(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME)) {
       features2.pNext = &div_features;
     }
 

+ 72 - 16
panda/src/vulkandisplay/vulkanGraphicsStateGuardian.cxx

@@ -136,7 +136,8 @@ reset() {
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT,
     enabled_features.pNext,
   };
-  if (pipe->_gpu_supports_custom_border_colors) {
+  if (pipe->_gpu_supports_custom_border_colors &&
+      vulkan_support_custom_border_color) {
     cbc_features.customBorderColors = VK_TRUE;
     cbc_features.customBorderColorWithoutFormat = VK_TRUE;
     enabled_features.pNext = &cbc_features;
@@ -160,9 +161,9 @@ reset() {
     supports_null_descriptor = true;
   }
 
-  // VK_EXT_vertex_attribute_divisor
-  VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT div_features = {
-    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT,
+  // VK_KHR_vertex_attribute_divisor / VK_EXT_vertex_attribute_divisor
+  VkPhysicalDeviceVertexAttributeDivisorFeaturesKHR div_features = {
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_KHR,
     enabled_features.pNext,
   };
   if (pipe->_gpu_supports_vertex_attrib_divisor) {
@@ -170,7 +171,11 @@ reset() {
     div_features.vertexAttributeInstanceRateZeroDivisor = pipe->_gpu_supports_vertex_attrib_zero_divisor;
     enabled_features.pNext = &div_features;
 
-    extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
+    if (pipe->has_device_extension(VK_KHR_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME)) {
+      extensions.push_back(VK_KHR_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
+    } else {
+      extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
+    }
     _supports_vertex_attrib_divisor = true;
     _supports_vertex_attrib_zero_divisor = pipe->_gpu_supports_vertex_attrib_zero_divisor;
   } else {
@@ -387,6 +392,9 @@ reset() {
       return;
     }
     _needs_write_null_vertex_data = true;
+  } else {
+    _null_vertex_buffer = VK_NULL_HANDLE;
+    _needs_write_null_vertex_data = false;
   }
 
   // Create a push constant layout based on the available space.
@@ -1465,7 +1473,7 @@ upload_texture(VulkanTextureContext *tc) {
 
   VkBuffer buffer;
   uint32_t buffer_offset;
-  void *data = alloc_staging_buffer(buffer_size, buffer, buffer_offset);
+  void *data = alloc_staging_buffer(buffer_size + optimal_align - 1, buffer, buffer_offset);
   if (!data) {
     vulkandisplay_cat.error()
       << "Failed to allocate staging buffer for texture "
@@ -2098,6 +2106,21 @@ update_vertex_buffer(VulkanVertexBufferContext *vbc,
       region.dstOffset = 0;
       region.size = num_bytes;
       vkCmdCopyBuffer(_frame_data->_transfer_cmd, buffer, vbc->_buffer, 1, &region);
+
+      VkBufferMemoryBarrier barrier;
+      barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
+      barrier.pNext = nullptr;
+      barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+      barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
+      barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+      barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+      barrier.buffer = vbc->_buffer;
+      barrier.offset = 0;
+      barrier.size = VK_WHOLE_SIZE;
+      vkCmdPipelineBarrier(_frame_data->_transfer_cmd,
+                           VK_PIPELINE_STAGE_TRANSFER_BIT,
+                           VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
+                           0, 0, nullptr, 1, &barrier, 0, nullptr);
     }
 
     vbc->mark_loaded(reader);
@@ -2230,6 +2253,21 @@ update_index_buffer(VulkanIndexBufferContext *ibc,
       region.dstOffset = 0;
       region.size = num_bytes;
       vkCmdCopyBuffer(_frame_data->_transfer_cmd, buffer, ibc->_buffer, 1, &region);
+
+      VkBufferMemoryBarrier barrier;
+      barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
+      barrier.pNext = nullptr;
+      barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+      barrier.dstAccessMask = VK_ACCESS_INDEX_READ_BIT;
+      barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+      barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+      barrier.buffer = ibc->_buffer;
+      barrier.offset = 0;
+      barrier.size = VK_WHOLE_SIZE;
+      vkCmdPipelineBarrier(_frame_data->_transfer_cmd,
+                           VK_PIPELINE_STAGE_TRANSFER_BIT,
+                           VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
+                           0, 0, nullptr, 1, &barrier, 0, nullptr);
     }
 
     ibc->mark_loaded(reader);
@@ -2303,6 +2341,26 @@ prepare_shader_buffer(ShaderBuffer *data) {
     region.dstOffset = 0;
     region.size = data_size;
     vkCmdCopyBuffer(_frame_data->_transfer_cmd, buffer, bc->_buffer, 1, &region);
+
+    VkBufferMemoryBarrier barrier;
+    barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
+    barrier.pNext = nullptr;
+    barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+    barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+    barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    barrier.buffer = bc->_buffer;
+    barrier.offset = 0;
+    barrier.size = VK_WHOLE_SIZE;
+    vkCmdPipelineBarrier(_frame_data->_transfer_cmd,
+                         VK_PIPELINE_STAGE_TRANSFER_BIT,
+                         VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
+                         VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
+                         VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
+                         VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
+                         VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
+                         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+                         0, 0, nullptr, 1, &barrier, 0, nullptr);
   }
 
   //bc->enqueue_lru(&_prepared_objects->_graphics_memory_lru);
@@ -3420,6 +3478,12 @@ do_extract_image(VulkanTextureContext *tc, Texture *tex, int view, int z, Screen
   // We tack this onto the existing command buffer, for now.
   VkCommandBuffer cmd = _frame_data->_cmd;
 
+  // Issue a command to transition the image into a layout optimal for
+  // transferring from.
+  tc->transition(cmd, _graphics_queue_family_index,
+    VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+    VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT);
+
   if (tc->_image != VK_NULL_HANDLE) {
     VkBufferImageCopy region;
     region.bufferOffset = 0;
@@ -3439,22 +3503,14 @@ do_extract_image(VulkanTextureContext *tc, Texture *tex, int view, int z, Screen
     region.imageOffset.y = 0;
     region.imageOffset.z = 0;
     region.imageExtent = tc->_extent;
-
-    // Issue a command to transition the image into a layout optimal for
-    // transferring from.
-    tc->transition(cmd, _graphics_queue_family_index,
-      VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
-      VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT);
-
-    vkCmdCopyImageToBuffer(cmd, tc->_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
-                           down._buffer, 1, &region);
+    vkCmdCopyImageToBuffer(cmd, tc->_image, tc->_layout, down._buffer, 1, &region);
   }
   else {
     VkBufferCopy region;
     region.srcOffset = 0;
     region.dstOffset = 0;
     region.size = buffer_size;
-    vkCmdCopyBuffer(_frame_data->_transfer_cmd, tc->_buffer, down._buffer, 1, &region);
+    vkCmdCopyBuffer(cmd, tc->_buffer, down._buffer, 1, &region);
   }
 
   down._texture = tex;

+ 19 - 11
panda/src/vulkandisplay/vulkanGraphicsWindow.cxx

@@ -199,14 +199,16 @@ begin_frame(FrameMode mode, Thread *current_thread) {
     }
 
     color_tc->transition(cmd, vkgsg->_graphics_queue_family_index,
-                           VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
-                           VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
-                           VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
+                         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+                         VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
   } else {
     // This transition will be made when the first subpass is started.
     color_tc->_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-    color_tc->_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
-    color_tc->_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+    color_tc->_read_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+    color_tc->_write_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+    color_tc->_write_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
 
     LColor clear_color = get_clear_color();
     clears[0].color.float32[0] = clear_color[0];
@@ -224,13 +226,15 @@ begin_frame(FrameMode mode, Thread *current_thread) {
       _depth_stencil_tc->transition(cmd, vkgsg->_graphics_queue_family_index,
         VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
         VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
-        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
+        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
     } else {
       // This transition will be made when the first subpass is started.
       _depth_stencil_tc->_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
-      _depth_stencil_tc->_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
-      _depth_stencil_tc->_stage_mask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
-                                       VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+      _depth_stencil_tc->_write_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+      _depth_stencil_tc->_write_stage_mask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
+                                             VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+      _depth_stencil_tc->_read_stage_mask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
+                                            VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
     }
 
     if (get_clear_depth_active() || get_clear_stencil_active()) {
@@ -271,7 +275,8 @@ end_frame(FrameMode mode, Thread *current_thread) {
 
     // The driver implicitly transitioned this to the final layout.
     buffer._tc->_layout = _final_layout;
-    buffer._tc->_access_mask = VK_ACCESS_MEMORY_READ_BIT;
+    buffer._tc->mark_written(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+                             VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
 
     // Now we can do copy-to-texture, now that the render pass has ended.
     copy_to_textures();
@@ -283,7 +288,7 @@ end_frame(FrameMode mode, Thread *current_thread) {
   buffer._tc->transition(cmd, vkgsg->_graphics_queue_family_index,
                          VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
                          VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
-                         VK_ACCESS_MEMORY_READ_BIT);
+                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
 
   // Note: this will close the command buffer, and unsignal the previous
   // frame's semaphore.
@@ -1154,6 +1159,9 @@ create_swapchain() {
       vulkan_error(err, "Failed to create framebuffer");
       return false;
     }
+
+    // Don't start rendering until the image has been acquired.
+    buffer._tc->mark_written(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0);
   }
 
   // Create a semaphore for signalling the availability of an image.

+ 23 - 3
panda/src/vulkandisplay/vulkanShaderContext.cxx

@@ -367,6 +367,26 @@ r_extract_resources(const Shader::Parameter &param, const AccessChain &chain,
   if (desc._binding == nullptr) {
     desc._binding = param._binding;
     desc._stage_mask = param._stage_mask;
+    desc._pipeline_stage_mask = 0;
+
+    if (desc._stage_mask & VK_SHADER_STAGE_VERTEX_BIT) {
+      desc._pipeline_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
+    }
+    if (desc._stage_mask & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
+      desc._pipeline_stage_mask |= VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT;
+    }
+    if (desc._stage_mask & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
+      desc._pipeline_stage_mask |= VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT;
+    }
+    if (desc._stage_mask & VK_SHADER_STAGE_GEOMETRY_BIT) {
+      desc._pipeline_stage_mask |= VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT;
+    }
+    if (desc._stage_mask & VK_SHADER_STAGE_FRAGMENT_BIT) {
+      desc._pipeline_stage_mask |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+    }
+    if (desc._stage_mask & VK_SHADER_STAGE_COMPUTE_BIT) {
+      desc._pipeline_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+    }
 
     if (const ShaderType::SampledImage *sampler = type->as_sampled_image()) {
       desc._type =
@@ -565,7 +585,7 @@ fetch_descriptor(VulkanGraphicsStateGuardian *gsg, const Descriptor &desc,
       tc->transition(gsg->_frame_data->_transfer_cmd,
                      gsg->_graphics_queue_family_index,
                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
-                     desc._stage_mask, VK_ACCESS_SHADER_READ_BIT);
+                     desc._pipeline_stage_mask, VK_ACCESS_SHADER_READ_BIT);
 
       VkDescriptorImageInfo &image_info = *image_infos++;
       image_info.sampler = sc->_sampler;
@@ -591,7 +611,7 @@ fetch_descriptor(VulkanGraphicsStateGuardian *gsg, const Descriptor &desc,
       tc->transition(gsg->_frame_data->_transfer_cmd,
                      gsg->_graphics_queue_family_index,
                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
-                     desc._stage_mask, VK_ACCESS_SHADER_READ_BIT);
+                     desc._pipeline_stage_mask, VK_ACCESS_SHADER_READ_BIT);
 
       VkBufferView &texel_buffer_view = *texel_buffer_views++;
       texel_buffer_view = tc->get_buffer_view(view);
@@ -630,7 +650,7 @@ fetch_descriptor(VulkanGraphicsStateGuardian *gsg, const Descriptor &desc,
       tc->transition(gsg->_frame_data->_transfer_cmd,
                      gsg->_graphics_queue_family_index,
                      VK_IMAGE_LAYOUT_GENERAL,
-                     desc._stage_mask, access_mask);
+                     desc._pipeline_stage_mask, access_mask);
 
       int view = gsg->get_current_tex_view_offset();
       if (desc._type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {

+ 6 - 5
panda/src/vulkandisplay/vulkanShaderContext.h

@@ -122,7 +122,7 @@ private:
     };
     pvector<Binding> _bindings;
     VkDeviceSize _size = 0;
-    int _stage_mask = 0;
+    VkShaderStageFlags _stage_mask = 0;
     int _deps = 0;
   };
   Block _shader_input_block;
@@ -135,7 +135,8 @@ private:
     VkDescriptorType _type;
     PT(ShaderInputBinding) _binding;
     small_vector<ResourceId, 1> _resource_ids;
-    int _stage_mask = 0;
+    VkShaderStageFlags _stage_mask = 0;
+    VkPipelineStageFlags _pipeline_stage_mask = 0;
     ShaderType::Access _access = ShaderType::Access::READ_WRITE;
   };
   pvector<Descriptor> _tattr_descriptors;
@@ -152,9 +153,9 @@ private:
 
   // These are for the push constants; maybe in the future we'll replace this
   // with a more generic and flexible system.
-  int _push_constant_stage_mask = 0;
-  int _projection_mat_stage_mask = 0;
-  int _color_scale_stage_mask = 0;
+  VkShaderStageFlags _push_constant_stage_mask = 0;
+  VkShaderStageFlags _projection_mat_stage_mask = 0;
+  VkShaderStageFlags _color_scale_stage_mask = 0;
 
   // A map of all pipelines that use this shader.  This is in ShaderContext
   // because when a shader is released we have no more use of the pipelines

+ 20 - 10
panda/src/vulkandisplay/vulkanTextureContext.I

@@ -17,15 +17,13 @@
  */
 INLINE VulkanTextureContext::
 VulkanTextureContext(PreparedGraphicsObjects *pgo, Texture *texture) :
-  TextureContext(pgo, texture),
-  //TODO: it is not clear to me what we should set srcStageMask to here.
-  _stage_mask(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT) {
+  TextureContext(pgo, texture) {
 }
 
 /**
  * Returns the VkImageView handle for the given view of the texture.
  */
-INLINE const VkImageView &VulkanTextureContext::
+INLINE VkImageView VulkanTextureContext::
 get_image_view(int view) const {
   if (!_image_views.empty()) {
     return _image_views[std::min(std::max(view, 0), (int)_image_views.size() - 1)];
@@ -37,7 +35,7 @@ get_image_view(int view) const {
 /**
  * Returns the VkBufferView handle for the given view of the texture.
  */
-INLINE const VkBufferView &VulkanTextureContext::
+INLINE VkBufferView VulkanTextureContext::
 get_buffer_view(int view) const {
   if (!_buffer_views.empty()) {
     return _buffer_views[std::min(std::max(view, 0), (int)_buffer_views.size() - 1)];
@@ -47,12 +45,22 @@ get_buffer_view(int view) const {
 }
 
 /**
- * Records a way that the image has been accessed.
+ * Indicates that the texture is being read from by the given stage.  Any
+ * subsequent writes must wait until the given stage has passed the pipeline.
  */
 INLINE void VulkanTextureContext::
-access(VkPipelineStageFlags stage, VkAccessFlags access_mask) {
-  _stage_mask |= stage;
-  _access_mask |= access_mask;
+mark_read(VkPipelineStageFlags stage) {
+  _read_stage_mask |= stage;
+}
+
+/**
+ * Indicates that the texture is being written by the given stage.  Any
+ * subsequent writes must wait until the given stage has passed the pipeline.
+ */
+INLINE void VulkanTextureContext::
+mark_written(VkPipelineStageFlags stage, VkAccessFlags access_mask) {
+  _write_stage_mask |= stage;
+  _write_access_mask |= access_mask;
 }
 
 /**
@@ -61,6 +69,8 @@ access(VkPipelineStageFlags stage, VkAccessFlags access_mask) {
  */
 INLINE void VulkanTextureContext::
 discard() {
-  _access_mask = 0;
+  // We don't clear _write_stage_mask, because we still want the execution
+  // dependency.
   _layout = VK_IMAGE_LAYOUT_UNDEFINED;
+  _write_access_mask = 0;
 }

+ 78 - 22
panda/src/vulkandisplay/vulkanTextureContext.cxx

@@ -175,10 +175,17 @@ clear_buffer(VkCommandBuffer cmd, uint32_t fill) {
   nassertv(_buffer != VK_NULL_HANDLE);
 
   vkCmdFillBuffer(cmd, _buffer, 0, VK_WHOLE_SIZE, fill);
+  mark_written(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
 }
 
 /**
  * Issues a command to transition the image to a new layout or queue family.
+ * Also issues the appropriate memory barrier to prevent read-after-write and
+ * write-after-write hazards.
+ *
+ * For a buffer texture, layout is ignored.
+ *
+ * Implicitly calls mark_read() or mark_written() depending on the access mask.
  * Does not (yet) do inter-queue synchronization.
  */
 void VulkanTextureContext::
@@ -186,33 +193,82 @@ transition(VkCommandBuffer cmd, uint32_t queue_family, VkImageLayout layout,
            VkPipelineStageFlags dst_stage_mask, VkAccessFlags dst_access_mask) {
 
   if (_image == VK_NULL_HANDLE) {
-    return;
+    layout = _layout;
   }
 
-  if (_layout == layout) {
+  // Are we writing to the texture?
+  VkAccessFlags write_mask = (dst_access_mask &
+    (VK_ACCESS_SHADER_WRITE_BIT |
+     VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+     VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+     VK_ACCESS_TRANSFER_WRITE_BIT |
+     VK_ACCESS_HOST_WRITE_BIT |
+     VK_ACCESS_MEMORY_WRITE_BIT));
+
+  // If we wrote to this recently (or performed a layout transition), we must
+  // wait for that to be finished.
+  VkPipelineStageFlags src_stage_mask = _write_stage_mask;
+
+  if (_layout != layout || write_mask != 0) {
+    // Before a layout transition or a write, all previous reads must have
+    // finished.
+    src_stage_mask |= _read_stage_mask;
+
+    if (src_stage_mask == 0) {
+      // Can't specify a source stage mask of zero.
+      src_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+    }
+  }
+  else if (src_stage_mask == 0) {
+    // This is a read-after-read, nothing to do here.
     return;
   }
 
-  VkImageMemoryBarrier barrier;
-  barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
-  barrier.pNext = nullptr;
-  barrier.srcAccessMask = _access_mask;
-  barrier.dstAccessMask = dst_access_mask;
-  barrier.oldLayout = _layout;
-  barrier.newLayout = layout;
-  barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;//_graphics_queue_family_index;
-  barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;//_graphics_queue_family_index;
-  barrier.image = _image;
-  barrier.subresourceRange.aspectMask = _aspect_mask;
-  barrier.subresourceRange.baseMipLevel = 0;
-  barrier.subresourceRange.levelCount = _mip_levels;
-  barrier.subresourceRange.baseArrayLayer = 0;
-  barrier.subresourceRange.layerCount = _array_layers;
-
-  vkCmdPipelineBarrier(cmd, _stage_mask, dst_stage_mask, 0,
-                       0, nullptr, 0, nullptr, 1, &barrier);
+  VkImageMemoryBarrier img_barrier;
+  if (_image != VK_NULL_HANDLE) {
+    img_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+    img_barrier.pNext = nullptr;
+    img_barrier.srcAccessMask = _write_access_mask;
+    img_barrier.dstAccessMask = dst_access_mask;
+    img_barrier.oldLayout = _layout;
+    img_barrier.newLayout = layout;
+    img_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    img_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    img_barrier.image = _image;
+    img_barrier.subresourceRange.aspectMask = _aspect_mask;
+    img_barrier.subresourceRange.baseMipLevel = 0;
+    img_barrier.subresourceRange.levelCount = _mip_levels;
+    img_barrier.subresourceRange.baseArrayLayer = 0;
+    img_barrier.subresourceRange.layerCount = _array_layers;
+  }
+  VkBufferMemoryBarrier buf_barrier;
+  if (_buffer != VK_NULL_HANDLE) {
+    buf_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
+    buf_barrier.pNext = nullptr;
+    buf_barrier.srcAccessMask = _write_access_mask;
+    buf_barrier.dstAccessMask = dst_access_mask;
+    buf_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    buf_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    buf_barrier.buffer = _buffer;
+    buf_barrier.offset = 0;
+    buf_barrier.size = VK_WHOLE_SIZE;
+  }
+  vkCmdPipelineBarrier(cmd, src_stage_mask, dst_stage_mask, 0,
+                       0, nullptr,
+                       (_buffer != VK_NULL_HANDLE), &buf_barrier,
+                       (_image != VK_NULL_HANDLE), &img_barrier);
 
   _layout = layout;
-  _access_mask = dst_access_mask;
-  _stage_mask = dst_stage_mask;
+
+  if (write_mask != 0) {
+    // We are writing to it, remember that for next time.
+    _write_stage_mask = dst_stage_mask;
+    _write_access_mask = write_mask;
+  } else {
+    // Note that layout transitions create an implicit execution dependency,
+    // so if we're not writing, we don't need to set _write_stage_mask here.
+    _write_stage_mask = 0;
+    _write_access_mask = 0;
+  }
+  _read_stage_mask = dst_stage_mask & ~VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
 }

+ 8 - 5
panda/src/vulkandisplay/vulkanTextureContext.h

@@ -31,10 +31,12 @@ public:
   void release(VulkanFrameData &frame_data);
   void destroy_now(VkDevice device);
 
-  INLINE const VkImageView &get_image_view(int view) const;
-  INLINE const VkBufferView &get_buffer_view(int view) const;
+  INLINE VkImageView get_image_view(int view) const;
+  INLINE VkBufferView get_buffer_view(int view) const;
 
-  INLINE void access(VkPipelineStageFlags stage_mask, VkAccessFlags access_mask);
+  INLINE void mark_read(VkPipelineStageFlags stage_mask);
+  INLINE void mark_written(VkPipelineStageFlags stage_mask,
+                           VkAccessFlags access_mask);
   INLINE void discard();
 
   void clear_color_image(VkCommandBuffer cmd, const VkClearColorValue &value);
@@ -64,8 +66,9 @@ public:
   VulkanMemoryBlock _block;
 
   VkImageLayout _layout = VK_IMAGE_LAYOUT_UNDEFINED;
-  VkAccessFlags _access_mask = 0;
-  VkPipelineStageFlags _stage_mask;
+  VkAccessFlags _write_access_mask = 0;
+  VkPipelineStageFlags _write_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+  VkPipelineStageFlags _read_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
 
 public:
   static TypeHandle get_class_type() {