소스 검색

Merge pull request #87512 from DarioSamo/rd_graph_fixes

Fix validation errors by improving stage and slice tracking behavior of RenderingDeviceGraph.
Rémi Verschelde 1 년 전
부모
커밋
8202a73c73
3개의 변경된 파일197개의 추가작업 그리고 64개의 파일을 삭제
  1. 47 0
      drivers/vulkan/rendering_device_driver_vulkan.cpp
  2. 138 57
      servers/rendering/rendering_device_graph.cpp
  3. 12 7
      servers/rendering/rendering_device_graph.h

+ 47 - 0
drivers/vulkan/rendering_device_driver_vulkan.cpp

@@ -35,6 +35,8 @@
 #include "thirdparty/misc/smolv.h"
 #include "vulkan_context.h"
 
+#define PRINT_NATIVE_COMMANDS 0
+
 /*****************/
 /**** GENERIC ****/
 /*****************/
@@ -622,6 +624,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create(const TextureFormat &
 	tex_info->allocation.handle = allocation;
 	vmaGetAllocationInfo(allocator, tex_info->allocation.handle, &tex_info->allocation.info);
 
+#if PRINT_NATIVE_COMMANDS
+	print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX", uint64_t(vk_image_view), uint64_t(vk_image)));
+#endif
+
 	return TextureID(tex_info);
 }
 
@@ -710,6 +716,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared(TextureID p_or
 	tex_info->vk_view_create_info = image_view_create_info;
 	tex_info->allocation = {};
 
+#if PRINT_NATIVE_COMMANDS
+	print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX", uint64_t(new_vk_image_view), uint64_t(owner_tex_info->vk_view_create_info.image)));
+#endif
+
 	return TextureID(tex_info);
 }
 
@@ -759,6 +769,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared_from_slice(Tex
 	tex_info->vk_view_create_info = image_view_create_info;
 	tex_info->allocation = {};
 
+#if PRINT_NATIVE_COMMANDS
+	print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX (%d %d %d %d)", uint64_t(new_vk_image_view), uint64_t(owner_tex_info->vk_view_create_info.image), p_mipmap, p_mipmaps, p_layer, p_layers));
+#endif
+
 	return TextureID(tex_info);
 }
 
@@ -1071,6 +1085,23 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier(
 		vk_image_barriers[i].subresourceRange.layerCount = p_texture_barriers[i].subresources.layer_count;
 	}
 
+#if PRINT_NATIVE_COMMANDS
+	print_line(vformat("vkCmdPipelineBarrier MEMORY %d BUFFER %d TEXTURE %d", p_memory_barriers.size(), p_buffer_barriers.size(), p_texture_barriers.size()));
+	for (uint32_t i = 0; i < p_memory_barriers.size(); i++) {
+		print_line(vformat("  VkMemoryBarrier #%d src 0x%uX dst 0x%uX", i, vk_memory_barriers[i].srcAccessMask, vk_memory_barriers[i].dstAccessMask));
+	}
+
+	for (uint32_t i = 0; i < p_buffer_barriers.size(); i++) {
+		print_line(vformat("  VkBufferMemoryBarrier #%d src 0x%uX dst 0x%uX buffer 0x%ux", i, vk_buffer_barriers[i].srcAccessMask, vk_buffer_barriers[i].dstAccessMask, uint64_t(vk_buffer_barriers[i].buffer)));
+	}
+
+	for (uint32_t i = 0; i < p_texture_barriers.size(); i++) {
+		print_line(vformat("  VkImageMemoryBarrier #%d src 0x%uX dst 0x%uX image 0x%ux old %d new %d (%d %d %d %d)", i, vk_image_barriers[i].srcAccessMask, vk_image_barriers[i].dstAccessMask,
+				uint64_t(vk_image_barriers[i].image), vk_image_barriers[i].oldLayout, vk_image_barriers[i].newLayout, vk_image_barriers[i].subresourceRange.baseMipLevel, vk_image_barriers[i].subresourceRange.levelCount,
+				vk_image_barriers[i].subresourceRange.baseArrayLayer, vk_image_barriers[i].subresourceRange.layerCount));
+	}
+#endif
+
 	vkCmdPipelineBarrier(
 			(VkCommandBuffer)p_cmd_buffer.id,
 			(VkPipelineStageFlags)p_src_stages,
@@ -1225,6 +1256,14 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID
 	VkResult err = vkCreateFramebuffer(vk_device, &framebuffer_create_info, nullptr, &vk_framebuffer);
 	ERR_FAIL_COND_V_MSG(err, FramebufferID(), "vkCreateFramebuffer failed with error " + itos(err) + ".");
 
+#if PRINT_NATIVE_COMMANDS
+	print_line(vformat("vkCreateFramebuffer 0x%uX with %d attachments", uint64_t(vk_framebuffer), p_attachments.size()));
+	for (uint32_t i = 0; i < p_attachments.size(); i++) {
+		const TextureInfo *attachment_info = (const TextureInfo *)p_attachments[i].id;
+		print_line(vformat("  Attachment #%d: IMAGE 0x%uX VIEW 0x%uX", i, uint64_t(attachment_info->vk_view_create_info.image), uint64_t(attachment_info->vk_view)));
+	}
+#endif
+
 	return FramebufferID(vk_framebuffer);
 }
 
@@ -2467,10 +2506,18 @@ void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cm
 
 	VkSubpassContents vk_subpass_contents = p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY ? VK_SUBPASS_CONTENTS_INLINE : VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS;
 	vkCmdBeginRenderPass((VkCommandBuffer)p_cmd_buffer.id, &render_pass_begin, vk_subpass_contents);
+
+#if PRINT_NATIVE_COMMANDS
+	print_line(vformat("vkCmdBeginRenderPass Pass 0x%uX Framebuffer 0x%uX", p_render_pass.id, p_framebuffer.id));
+#endif
 }
 
 void RenderingDeviceDriverVulkan::command_end_render_pass(CommandBufferID p_cmd_buffer) {
 	vkCmdEndRenderPass((VkCommandBuffer)p_cmd_buffer.id);
+
+#if PRINT_NATIVE_COMMANDS
+	print_line("vkCmdEndRenderPass");
+#endif
 }
 
 void RenderingDeviceDriverVulkan::command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) {

+ 138 - 57
servers/rendering/rendering_device_graph.cpp

@@ -33,6 +33,7 @@
 #define PRINT_RENDER_GRAPH 0
 #define FORCE_FULL_ACCESS_BITS 0
 #define PRINT_RESOURCE_TRACKER_TOTAL 0
+#define PRINT_COMMAND_RECORDING 0
 
 RenderingDeviceGraph::RenderingDeviceGraph() {
 	// Default initialization.
@@ -163,20 +164,35 @@ void RenderingDeviceGraph::_add_adjacent_command(int32_t p_previous_command_inde
 	const uint32_t previous_command_data_offset = command_data_offsets[p_previous_command_index];
 	RecordedCommand &previous_command = *reinterpret_cast<RecordedCommand *>(&command_data[previous_command_data_offset]);
 	previous_command.adjacent_command_list_index = _add_to_command_list(p_command_index, previous_command.adjacent_command_list_index);
-	r_command->src_stages = r_command->src_stages | previous_command.dst_stages;
+	previous_command.next_stages = previous_command.next_stages | r_command->self_stages;
+	r_command->previous_stages = r_command->previous_stages | previous_command.self_stages;
 }
 
-int32_t RenderingDeviceGraph::_add_to_write_list(int32_t p_command_index, Rect2i suberesources, int32_t p_list_index) {
+int32_t RenderingDeviceGraph::_add_to_slice_read_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index) {
 	DEV_ASSERT(p_command_index < int32_t(command_count));
-	DEV_ASSERT(p_list_index < int32_t(write_list_nodes.size()));
+	DEV_ASSERT(p_list_index < int32_t(read_slice_list_nodes.size()));
 
-	int32_t next_index = int32_t(write_list_nodes.size());
-	write_list_nodes.resize(next_index + 1);
+	int32_t next_index = int32_t(read_slice_list_nodes.size());
+	read_slice_list_nodes.resize(next_index + 1);
 
-	RecordedWriteListNode &new_node = write_list_nodes[next_index];
+	RecordedSliceListNode &new_node = read_slice_list_nodes[next_index];
 	new_node.command_index = p_command_index;
 	new_node.next_list_index = p_list_index;
-	new_node.subresources = suberesources;
+	new_node.subresources = p_subresources;
+	return next_index;
+}
+
+int32_t RenderingDeviceGraph::_add_to_write_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index) {
+	DEV_ASSERT(p_command_index < int32_t(command_count));
+	DEV_ASSERT(p_list_index < int32_t(write_slice_list_nodes.size()));
+
+	int32_t next_index = int32_t(write_slice_list_nodes.size());
+	write_slice_list_nodes.resize(next_index + 1);
+
+	RecordedSliceListNode &new_node = write_slice_list_nodes[next_index];
+	new_node.command_index = p_command_index;
+	new_node.next_list_index = p_list_index;
+	new_node.subresources = p_subresources;
 	return next_index;
 }
 
@@ -203,6 +219,9 @@ RenderingDeviceGraph::ComputeListInstruction *RenderingDeviceGraph::_allocate_co
 }
 
 void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_trackers, ResourceUsage *p_resource_usages, uint32_t p_resource_count, int32_t p_command_index, RecordedCommand *r_command) {
+	// Assign the next stages derived from the stages the command requires first.
+	r_command->next_stages = r_command->self_stages;
+
 	if (command_label_index >= 0) {
 		// If a label is active, tag the command with the label.
 		r_command->label_index = command_label_index;
@@ -242,6 +261,10 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
 
 		resource_tracker->reset_if_outdated(tracking_frame);
 
+		const RDD::TextureSubresourceRange &subresources = resource_tracker->texture_subresources;
+		const Rect2i resource_tracker_rect(subresources.base_mipmap, subresources.base_layer, subresources.mipmap_count, subresources.layer_count);
+		Rect2i search_tracker_rect = resource_tracker_rect;
+
 		ResourceUsage new_resource_usage = p_resource_usages[i];
 		bool write_usage = _is_write_usage(new_resource_usage);
 		BitField<RDD::BarrierAccessBits> new_usage_access = _usage_to_access_bits(new_resource_usage);
@@ -264,9 +287,14 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
 				// If the parent hasn't been used yet, we assign the usage of the slice to the entire resource.
 				resource_tracker->parent->usage = new_resource_usage;
 
-				// Also assign the usage to the slice and consider it a write operation.
+				// Also assign the usage to the slice and consider it a write operation. Consider the parent's current usage access as its own.
 				resource_tracker->usage = new_resource_usage;
+				resource_tracker->usage_access = resource_tracker->parent->usage_access;
 				write_usage = true;
+
+				// Indicate the area that should be tracked is the entire resource.
+				const RDD::TextureSubresourceRange &parent_subresources = resource_tracker->parent->texture_subresources;
+				search_tracker_rect = Rect2i(parent_subresources.base_mipmap, parent_subresources.base_layer, parent_subresources.mipmap_count, parent_subresources.layer_count);
 			} else if (resource_tracker->in_parent_dirty_list) {
 				if (resource_tracker->parent->usage == new_resource_usage) {
 					// The slice will be transitioned to the resource of the parent and can be deleted from the dirty list.
@@ -274,6 +302,8 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
 					ResourceTracker *current_tracker = resource_tracker->parent->dirty_shared_list;
 					bool initialized_dirty_rect = false;
 					while (current_tracker != nullptr) {
+						current_tracker->reset_if_outdated(tracking_frame);
+
 						if (current_tracker == resource_tracker) {
 							current_tracker->in_parent_dirty_list = false;
 
@@ -305,6 +335,8 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
 					ResourceTracker *current_tracker = resource_tracker->parent->dirty_shared_list;
 					bool initialized_dirty_rect = false;
 					while (current_tracker != nullptr) {
+						current_tracker->reset_if_outdated(tracking_frame);
+
 						if (current_tracker->texture_slice_or_dirty_rect.intersects(resource_tracker->texture_slice_or_dirty_rect)) {
 							if (current_tracker->command_frame == tracking_frame && current_tracker->texture_slice_command_index == p_command_index) {
 								ERR_FAIL_MSG("Texture slices that overlap can't be used in the same command.");
@@ -312,6 +344,10 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
 								// Delete the slice from the dirty list and revert it to the usage of the parent.
 								if (current_tracker->texture_driver_id != 0) {
 									_add_texture_barrier_to_command(current_tracker->texture_driver_id, current_tracker->usage_access, new_usage_access, current_tracker->usage, resource_tracker->parent->usage, current_tracker->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count);
+
+									// Merge the area of the slice with the current tracking area of the command and indicate it's a write usage as well.
+									search_tracker_rect = search_tracker_rect.merge(current_tracker->texture_slice_or_dirty_rect);
+									write_usage = true;
 								}
 
 								current_tracker->in_parent_dirty_list = false;
@@ -339,8 +375,9 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
 					}
 				}
 
-				// If it wasn't in the list, assume the usage is the same as the parent.
+				// If it wasn't in the list, assume the usage is the same as the parent. Consider the parent's current usage access as its own.
 				resource_tracker->usage = resource_tracker->parent->usage;
+				resource_tracker->usage_access = resource_tracker->parent->usage_access;
 
 				if (resource_tracker->usage != new_resource_usage) {
 					// Insert to the dirty list if the requested usage is different.
@@ -355,27 +392,30 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
 				}
 			}
 		} else {
-			if (resource_tracker->dirty_shared_list != nullptr) {
+			ResourceTracker *current_tracker = resource_tracker->dirty_shared_list;
+			if (current_tracker != nullptr) {
 				// Consider the usage as write if we must transition any of the slices.
 				write_usage = true;
 			}
 
-			while (resource_tracker->dirty_shared_list != nullptr) {
-				if (resource_tracker->dirty_shared_list->texture_driver_id != 0) {
+			while (current_tracker != nullptr) {
+				current_tracker->reset_if_outdated(tracking_frame);
+
+				if (current_tracker->texture_driver_id != 0) {
 					// Transition all slices to the layout of the parent resource.
-					_add_texture_barrier_to_command(resource_tracker->dirty_shared_list->texture_driver_id, resource_tracker->dirty_shared_list->usage_access, new_usage_access, resource_tracker->dirty_shared_list->usage, resource_tracker->usage, resource_tracker->dirty_shared_list->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count);
+					_add_texture_barrier_to_command(current_tracker->texture_driver_id, current_tracker->usage_access, new_usage_access, current_tracker->usage, resource_tracker->usage, current_tracker->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count);
 				}
 
-				resource_tracker->dirty_shared_list->in_parent_dirty_list = false;
-				resource_tracker->dirty_shared_list = resource_tracker->dirty_shared_list->next_shared;
+				current_tracker->in_parent_dirty_list = false;
+				current_tracker = current_tracker->next_shared;
 			}
+
+			resource_tracker->dirty_shared_list = nullptr;
 		}
 
 		// Use the resource's parent tracker directly for all search operations.
 		bool resource_has_parent = resource_tracker->parent != nullptr;
 		ResourceTracker *search_tracker = resource_has_parent ? resource_tracker->parent : resource_tracker;
-		const RDD::TextureSubresourceRange &subresources = resource_tracker->texture_subresources;
-		Rect2i resource_tracker_rect(subresources.base_mipmap, subresources.base_layer, subresources.mipmap_count, subresources.layer_count);
 		bool different_usage = resource_tracker->usage != new_resource_usage;
 		bool write_usage_after_write = (write_usage && search_tracker->write_command_or_list_index >= 0);
 		if (different_usage || write_usage_after_write) {
@@ -418,18 +458,18 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
 				int32_t previous_write_list_index = -1;
 				int32_t write_list_index = search_tracker->write_command_or_list_index;
 				while (write_list_index >= 0) {
-					const RecordedWriteListNode &write_list_node = write_list_nodes[write_list_index];
-					if (!resource_has_parent || resource_tracker_rect.intersects(write_list_node.subresources)) {
+					const RecordedSliceListNode &write_list_node = write_slice_list_nodes[write_list_index];
+					if (!resource_has_parent || search_tracker_rect.intersects(write_list_node.subresources)) {
 						if (write_list_node.command_index == p_command_index) {
 							ERR_FAIL_COND_MSG(!resource_has_parent, "Command can't have itself as a dependency.");
 						} else {
 							// Command is dependent on this command. Add this command to the adjacency list of the write command.
 							_add_adjacent_command(write_list_node.command_index, p_command_index, r_command);
 
-							if (resource_has_parent && write_usage && resource_tracker_rect.encloses(write_list_node.subresources)) {
+							if (resource_has_parent && write_usage && search_tracker_rect.encloses(write_list_node.subresources)) {
 								// Eliminate redundant writes from the list.
 								if (previous_write_list_index >= 0) {
-									RecordedWriteListNode &previous_list_node = write_list_nodes[previous_write_list_index];
+									RecordedSliceListNode &previous_list_node = write_slice_list_nodes[previous_write_list_index];
 									previous_list_node.next_list_index = write_list_node.next_list_index;
 								} else {
 									search_tracker->write_command_or_list_index = write_list_node.next_list_index;
@@ -463,47 +503,69 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
 					search_tracker->write_command_or_list_index = _add_to_write_list(search_tracker->write_command_or_list_index, tracker_rect, -1);
 				}
 
-				search_tracker->write_command_or_list_index = _add_to_write_list(p_command_index, resource_tracker_rect, search_tracker->write_command_or_list_index);
+				search_tracker->write_command_or_list_index = _add_to_write_list(p_command_index, search_tracker_rect, search_tracker->write_command_or_list_index);
 				search_tracker->write_command_list_enabled = true;
 			} else {
 				search_tracker->write_command_or_list_index = p_command_index;
 				search_tracker->write_command_list_enabled = false;
 			}
 
-			// We add this command to the adjacency list of all commands that were reading from this resource. We clear the list in the process.
-			int32_t previous_command_list_index = -1;
-			int32_t read_command_list_index = search_tracker->read_command_list_index;
-			while (read_command_list_index >= 0) {
-				const RecordedCommandListNode &command_list_node = command_list_nodes[read_command_list_index];
+			// We add this command to the adjacency list of all commands that were reading from the entire resource.
+			int32_t read_full_command_list_index = search_tracker->read_full_command_list_index;
+			while (read_full_command_list_index >= 0) {
+				const RecordedCommandListNode &command_list_node = command_list_nodes[read_full_command_list_index];
 				if (command_list_node.command_index == p_command_index) {
 					if (!resource_has_parent) {
-						// Slices are allowed to be in different usages in the same command as they are guaranteed to have no overlap in the same command.
+						// Only slices are allowed to be in different usages in the same command as they are guaranteed to have no overlap in the same command.
 						ERR_FAIL_MSG("Command can't have itself as a dependency.");
-					} else {
-						// Advance to the next element.
-						read_command_list_index = command_list_node.next_list_index;
-						previous_command_list_index = read_command_list_index;
 					}
 				} else {
-					if (previous_command_list_index >= 0) {
+					// Add this command to the adjacency list of each command that was reading this resource.
+					_add_adjacent_command(command_list_node.command_index, p_command_index, r_command);
+				}
+
+				read_full_command_list_index = command_list_node.next_list_index;
+			}
+
+			if (!resource_has_parent) {
+				// Clear the full list if this resource is not a slice.
+				search_tracker->read_full_command_list_index = -1;
+			}
+
+			// We add this command to the adjacency list of all commands that were reading from resource slices.
+			int32_t previous_slice_command_list_index = -1;
+			int32_t read_slice_command_list_index = search_tracker->read_slice_command_list_index;
+			while (read_slice_command_list_index >= 0) {
+				const RecordedSliceListNode &read_list_node = read_slice_list_nodes[read_slice_command_list_index];
+				if (!resource_has_parent || search_tracker_rect.encloses(read_list_node.subresources)) {
+					if (previous_slice_command_list_index >= 0) {
 						// Erase this element and connect the previous one to the next element.
-						command_list_nodes[previous_command_list_index].next_list_index = command_list_node.next_list_index;
-						read_command_list_index = command_list_node.next_list_index;
-						previous_command_list_index = read_command_list_index;
+						read_slice_list_nodes[previous_slice_command_list_index].next_list_index = read_list_node.next_list_index;
 					} else {
 						// Erase this element from the head of the list.
-						DEV_ASSERT(search_tracker->read_command_list_index == read_command_list_index);
-						read_command_list_index = command_list_node.next_list_index;
-						search_tracker->read_command_list_index = read_command_list_index;
+						DEV_ASSERT(search_tracker->read_slice_command_list_index == read_slice_command_list_index);
+						search_tracker->read_slice_command_list_index = read_list_node.next_list_index;
 					}
 
+					// Advance to the next element.
+					read_slice_command_list_index = read_list_node.next_list_index;
+				} else {
+					previous_slice_command_list_index = read_slice_command_list_index;
+					read_slice_command_list_index = read_list_node.next_list_index;
+				}
+
+				if (!resource_has_parent || search_tracker_rect.intersects(read_list_node.subresources)) {
 					// Add this command to the adjacency list of each command that was reading this resource.
-					_add_adjacent_command(command_list_node.command_index, p_command_index, r_command);
+					// We only add the dependency if there's an intersection between slices or this resource isn't a slice.
+					_add_adjacent_command(read_list_node.command_index, p_command_index, r_command);
 				}
 			}
+		} else if (resource_has_parent) {
+			// We add a read dependency to the tracker to indicate this command reads from the resource slice.
+			search_tracker->read_slice_command_list_index = _add_to_slice_read_list(p_command_index, resource_tracker_rect, search_tracker->read_slice_command_list_index);
 		} else {
-			// We add a read dependency to the tracker to indicate this command reads from the resource.
-			search_tracker->read_command_list_index = _add_to_command_list(p_command_index, search_tracker->read_command_list_index);
+			// We add a read dependency to the tracker to indicate this command reads from the entire resource.
+			search_tracker->read_full_command_list_index = _add_to_command_list(p_command_index, search_tracker->read_full_command_list_index);
 		}
 	}
 }
@@ -913,9 +975,13 @@ void RenderingDeviceGraph::_group_barriers_for_render_commands(RDD::CommandBuffe
 		const uint32_t command_data_offset = command_data_offsets[command_index];
 		const RecordedCommand *command = reinterpret_cast<RecordedCommand *>(&command_data[command_data_offset]);
 
+#if PRINT_COMMAND_RECORDING
+		print_line(vformat("Grouping barriers for #%d", command_index));
+#endif
+
 		// Merge command's stage bits with the barrier group.
-		barrier_group.src_stages = barrier_group.src_stages | command->src_stages;
-		barrier_group.dst_stages = barrier_group.dst_stages | command->dst_stages;
+		barrier_group.src_stages = barrier_group.src_stages | command->previous_stages;
+		barrier_group.dst_stages = barrier_group.dst_stages | command->next_stages;
 
 		// Merge command's memory barrier bits with the barrier group.
 		barrier_group.memory_barrier.src_access = barrier_group.memory_barrier.src_access | command->memory_barrier.src_access;
@@ -925,11 +991,17 @@ void RenderingDeviceGraph::_group_barriers_for_render_commands(RDD::CommandBuffe
 		for (int32_t j = 0; j < command->normalization_barrier_count; j++) {
 			const RDD::TextureBarrier &recorded_barrier = command_normalization_barriers[command->normalization_barrier_index + j];
 			barrier_group.normalization_barriers.push_back(recorded_barrier);
+#if PRINT_COMMAND_RECORDING
+			print_line(vformat("Normalization Barrier #%d", barrier_group.normalization_barriers.size() - 1));
+#endif
 		}
 
 		for (int32_t j = 0; j < command->transition_barrier_count; j++) {
 			const RDD::TextureBarrier &recorded_barrier = command_transition_barriers[command->transition_barrier_index + j];
 			barrier_group.transition_barriers.push_back(recorded_barrier);
+#if PRINT_COMMAND_RECORDING
+			print_line(vformat("Transition Barrier #%d", barrier_group.transition_barriers.size() - 1));
+#endif
 		}
 
 #if USE_BUFFER_BARRIERS
@@ -1202,7 +1274,8 @@ void RenderingDeviceGraph::begin() {
 	command_label_colors.clear();
 	command_label_offsets.clear();
 	command_list_nodes.clear();
-	write_list_nodes.clear();
+	read_slice_list_nodes.clear();
+	write_slice_list_nodes.clear();
 	command_count = 0;
 	command_label_count = 0;
 	command_timestamp_index = -1;
@@ -1225,7 +1298,7 @@ void RenderingDeviceGraph::add_buffer_clear(RDD::BufferID p_dst, ResourceTracker
 	int32_t command_index;
 	RecordedBufferClearCommand *command = static_cast<RecordedBufferClearCommand *>(_allocate_command(sizeof(RecordedBufferClearCommand), command_index));
 	command->type = RecordedCommand::TYPE_BUFFER_CLEAR;
-	command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+	command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
 	command->buffer = p_dst;
 	command->offset = p_offset;
 	command->size = p_size;
@@ -1241,7 +1314,7 @@ void RenderingDeviceGraph::add_buffer_copy(RDD::BufferID p_src, ResourceTracker
 	int32_t command_index;
 	RecordedBufferCopyCommand *command = static_cast<RecordedBufferCopyCommand *>(_allocate_command(sizeof(RecordedBufferCopyCommand), command_index));
 	command->type = RecordedCommand::TYPE_BUFFER_COPY;
-	command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+	command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
 	command->source = p_src;
 	command->destination = p_dst;
 	command->region = p_region;
@@ -1256,7 +1329,7 @@ void RenderingDeviceGraph::add_buffer_get_data(RDD::BufferID p_src, ResourceTrac
 	int32_t command_index;
 	RecordedBufferGetDataCommand *command = static_cast<RecordedBufferGetDataCommand *>(_allocate_command(sizeof(RecordedBufferGetDataCommand), command_index));
 	command->type = RecordedCommand::TYPE_BUFFER_GET_DATA;
-	command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+	command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
 	command->source = p_src;
 	command->destination = p_dst;
 	command->region = p_region;
@@ -1277,7 +1350,7 @@ void RenderingDeviceGraph::add_buffer_update(RDD::BufferID p_dst, ResourceTracke
 	int32_t command_index;
 	RecordedBufferUpdateCommand *command = static_cast<RecordedBufferUpdateCommand *>(_allocate_command(command_size, command_index));
 	command->type = RecordedCommand::TYPE_BUFFER_UPDATE;
-	command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+	command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
 	command->destination = p_dst;
 	command->buffer_copies_count = p_buffer_copies.size();
 
@@ -1369,7 +1442,7 @@ void RenderingDeviceGraph::add_compute_list_end() {
 	uint32_t command_size = sizeof(RecordedComputeListCommand) + instruction_data_size;
 	RecordedComputeListCommand *command = static_cast<RecordedComputeListCommand *>(_allocate_command(command_size, command_index));
 	command->type = RecordedCommand::TYPE_COMPUTE_LIST;
-	command->dst_stages = compute_instruction_list.stages;
+	command->self_stages = compute_instruction_list.stages;
 	command->instruction_data_size = instruction_data_size;
 	memcpy(command->instruction_data(), compute_instruction_list.data.ptr(), instruction_data_size);
 	_add_command_to_graph(compute_instruction_list.command_trackers.ptr(), compute_instruction_list.command_tracker_usages.ptr(), compute_instruction_list.command_trackers.size(), command_index, command);
@@ -1579,7 +1652,7 @@ void RenderingDeviceGraph::add_draw_list_end() {
 	uint32_t command_size = sizeof(RecordedDrawListCommand) + clear_values_size + instruction_data_size;
 	RecordedDrawListCommand *command = static_cast<RecordedDrawListCommand *>(_allocate_command(command_size, command_index));
 	command->type = RecordedCommand::TYPE_DRAW_LIST;
-	command->dst_stages = draw_instruction_list.stages;
+	command->self_stages = draw_instruction_list.stages;
 	command->instruction_data_size = instruction_data_size;
 	command->render_pass = draw_instruction_list.render_pass;
 	command->framebuffer = draw_instruction_list.framebuffer;
@@ -1602,7 +1675,7 @@ void RenderingDeviceGraph::add_texture_clear(RDD::TextureID p_dst, ResourceTrack
 	int32_t command_index;
 	RecordedTextureClearCommand *command = static_cast<RecordedTextureClearCommand *>(_allocate_command(sizeof(RecordedTextureClearCommand), command_index));
 	command->type = RecordedCommand::TYPE_TEXTURE_CLEAR;
-	command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+	command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
 	command->texture = p_dst;
 	command->color = p_color;
 	command->range = p_range;
@@ -1618,7 +1691,7 @@ void RenderingDeviceGraph::add_texture_copy(RDD::TextureID p_src, ResourceTracke
 	int32_t command_index;
 	RecordedTextureCopyCommand *command = static_cast<RecordedTextureCopyCommand *>(_allocate_command(sizeof(RecordedTextureCopyCommand), command_index));
 	command->type = RecordedCommand::TYPE_TEXTURE_COPY;
-	command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+	command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
 	command->from_texture = p_src;
 	command->to_texture = p_dst;
 	command->region = p_region;
@@ -1635,7 +1708,7 @@ void RenderingDeviceGraph::add_texture_get_data(RDD::TextureID p_src, ResourceTr
 	uint64_t command_size = sizeof(RecordedTextureGetDataCommand) + p_buffer_texture_copy_regions.size() * sizeof(RDD::BufferTextureCopyRegion);
 	RecordedTextureGetDataCommand *command = static_cast<RecordedTextureGetDataCommand *>(_allocate_command(command_size, command_index));
 	command->type = RecordedCommand::TYPE_TEXTURE_GET_DATA;
-	command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+	command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
 	command->from_texture = p_src;
 	command->to_buffer = p_dst;
 	command->buffer_texture_copy_regions_count = p_buffer_texture_copy_regions.size();
@@ -1656,7 +1729,7 @@ void RenderingDeviceGraph::add_texture_resolve(RDD::TextureID p_src, ResourceTra
 	int32_t command_index;
 	RecordedTextureResolveCommand *command = static_cast<RecordedTextureResolveCommand *>(_allocate_command(sizeof(RecordedTextureResolveCommand), command_index));
 	command->type = RecordedCommand::TYPE_TEXTURE_RESOLVE;
-	command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+	command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
 	command->from_texture = p_src;
 	command->to_texture = p_dst;
 	command->src_layer = p_src_layer;
@@ -1676,7 +1749,7 @@ void RenderingDeviceGraph::add_texture_update(RDD::TextureID p_dst, ResourceTrac
 	uint64_t command_size = sizeof(RecordedTextureUpdateCommand) + p_buffer_copies.size() * sizeof(RecordedBufferToTextureCopy);
 	RecordedTextureUpdateCommand *command = static_cast<RecordedTextureUpdateCommand *>(_allocate_command(command_size, command_index));
 	command->type = RecordedCommand::TYPE_TEXTURE_UPDATE;
-	command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+	command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
 	command->to_texture = p_dst;
 	command->buffer_to_texture_copies_count = p_buffer_copies.size();
 
@@ -1693,7 +1766,7 @@ void RenderingDeviceGraph::add_capture_timestamp(RDD::QueryPoolID p_query_pool,
 	int32_t command_index;
 	RecordedCaptureTimestampCommand *command = static_cast<RecordedCaptureTimestampCommand *>(_allocate_command(sizeof(RecordedCaptureTimestampCommand), command_index));
 	command->type = RecordedCommand::TYPE_CAPTURE_TIMESTAMP;
-	command->dst_stages = 0;
+	command->self_stages = 0;
 	command->pool = p_query_pool;
 	command->index = p_index;
 	_add_command_to_graph(nullptr, nullptr, 0, command_index, command);
@@ -1852,6 +1925,10 @@ void RenderingDeviceGraph::end(RDD::CommandBufferID p_command_buffer, bool p_reo
 			_print_render_commands(commands_sorted.ptr(), command_count);
 #endif
 
+#if PRINT_COMMAND_RECORDING
+			print_line(vformat("Recording %d commands", command_count));
+#endif
+
 			uint32_t boosted_priority = 0;
 			uint32_t current_level = commands_sorted[0].level;
 			uint32_t current_level_start = 0;
@@ -1884,6 +1961,10 @@ void RenderingDeviceGraph::end(RDD::CommandBufferID p_command_buffer, bool p_reo
 		}
 
 		_run_label_command_change(p_command_buffer, -1, -1, true, false, nullptr, 0, current_label_index, current_label_level);
+
+#if PRINT_COMMAND_RECORDING
+		print_line(vformat("Recorded %d commands", command_count));
+#endif
 	}
 
 	// Advance the frame counter. It's not necessary to do this if no commands are recorded because that means no secondary command buffers were used.

+ 12 - 7
servers/rendering/rendering_device_graph.h

@@ -112,8 +112,9 @@ public:
 		int32_t buffer_barrier_count = 0;
 #endif
 		int32_t label_index = -1;
-		BitField<RDD::PipelineStageBits> src_stages;
-		BitField<RDD::PipelineStageBits> dst_stages;
+		BitField<RDD::PipelineStageBits> previous_stages;
+		BitField<RDD::PipelineStageBits> next_stages;
+		BitField<RDD::PipelineStageBits> self_stages;
 	};
 
 	struct RecordedBufferCopy {
@@ -150,7 +151,8 @@ public:
 	struct ResourceTracker {
 		uint32_t reference_count = 0;
 		int64_t command_frame = -1;
-		int32_t read_command_list_index = -1;
+		int32_t read_full_command_list_index = -1;
+		int32_t read_slice_command_list_index = -1;
 		int32_t write_command_or_list_index = -1;
 		int32_t draw_list_index = -1;
 		int32_t compute_list_index = -1;
@@ -171,7 +173,8 @@ public:
 			if (new_command_frame != command_frame) {
 				usage_access.clear();
 				command_frame = new_command_frame;
-				read_command_list_index = -1;
+				read_full_command_list_index = -1;
+				read_slice_command_list_index = -1;
 				write_command_or_list_index = -1;
 				draw_list_index = -1;
 				compute_list_index = -1;
@@ -237,7 +240,7 @@ private:
 		int32_t next_list_index = -1;
 	};
 
-	struct RecordedWriteListNode {
+	struct RecordedSliceListNode {
 		int32_t command_index = -1;
 		int32_t next_list_index = -1;
 		Rect2i subresources;
@@ -572,7 +575,8 @@ private:
 	uint32_t command_count = 0;
 	uint32_t command_label_count = 0;
 	LocalVector<RecordedCommandListNode> command_list_nodes;
-	LocalVector<RecordedWriteListNode> write_list_nodes;
+	LocalVector<RecordedSliceListNode> read_slice_list_nodes;
+	LocalVector<RecordedSliceListNode> write_slice_list_nodes;
 	int32_t command_timestamp_index = -1;
 	int32_t command_synchronization_index = -1;
 	bool command_synchronization_pending = false;
@@ -590,7 +594,8 @@ private:
 	static RDD::BarrierAccessBits _usage_to_access_bits(ResourceUsage p_usage);
 	int32_t _add_to_command_list(int32_t p_command_index, int32_t p_list_index);
 	void _add_adjacent_command(int32_t p_previous_command_index, int32_t p_command_index, RecordedCommand *r_command);
-	int32_t _add_to_write_list(int32_t p_command_index, Rect2i suberesources, int32_t p_list_index);
+	int32_t _add_to_slice_read_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index);
+	int32_t _add_to_write_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index);
 	RecordedCommand *_allocate_command(uint32_t p_command_size, int32_t &r_command_index);
 	DrawListInstruction *_allocate_draw_list_instruction(uint32_t p_instruction_size);
 	ComputeListInstruction *_allocate_compute_list_instruction(uint32_t p_instruction_size);