瀏覽代碼

Untested support for compute shaders

Juan Linietsky 6 年之前
父節點
當前提交
263bebe023

+ 491 - 62
drivers/vulkan/rendering_device_vulkan.cpp

@@ -1604,6 +1604,10 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T
 		image_create_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
 	}
 
+	if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) {
+		image_create_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
+	}
+
 	if (p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
 		image_create_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
 	}
@@ -1723,39 +1727,41 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T
 	texture.samples = p_format.samples;
 	texture.allowed_shared_formats = p_format.shareable_formats;
 
-	//set bound and unbound layouts
-	if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
+	//set base layout based on usage priority
 
-		texture.read_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
-		texture.barrier_aspect_mask = texture.read_aspect_mask;
-		if (format_has_stencil(p_format.format)) {
-			texture.barrier_aspect_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
-		}
+	if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) {
+		//first priority, readable
+		texture.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
 
-		if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) {
-			texture.unbound_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-		} else {
-			texture.unbound_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
-		}
-		texture.bound_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+	} else if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) {
+		//second priority, storage
+
+		texture.layout = VK_IMAGE_LAYOUT_GENERAL;
 
 	} else if (p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
+		//third priority, color or depth
 
-		texture.read_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
-		texture.barrier_aspect_mask = texture.read_aspect_mask;
+		texture.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
 
-		if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) {
-			texture.unbound_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-		} else {
-			texture.unbound_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+	} else if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
+
+		texture.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+
+	} else {
+		texture.layout = VK_IMAGE_LAYOUT_GENERAL;
+	}
+
+	if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
+
+		texture.read_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
+		texture.barrier_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
+
+		if (format_has_stencil(p_format.format)) {
+			texture.barrier_aspect_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
 		}
-		texture.bound_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
 	} else {
 		texture.read_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
-		texture.barrier_aspect_mask = texture.read_aspect_mask;
-
-		texture.unbound_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-		texture.bound_layout = VK_IMAGE_LAYOUT_UNDEFINED; //will never be bound
+		texture.barrier_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
 	}
 
 	texture.bound = false;
@@ -1825,7 +1831,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T
 		image_memory_barrier.srcAccessMask = 0;
 		image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
 		image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
-		image_memory_barrier.newLayout = texture.unbound_layout;
+		image_memory_barrier.newLayout = texture.layout;
 		image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
 		image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
 		image_memory_barrier.image = texture.image;
@@ -1835,7 +1841,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T
 		image_memory_barrier.subresourceRange.baseArrayLayer = 0;
 		image_memory_barrier.subresourceRange.layerCount = image_create_info.arrayLayers;
 
-		vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
+		vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
 	}
 
 	RID id = texture_owner.make_rid(texture);
@@ -2081,7 +2087,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con
 		image_memory_barrier.pNext = NULL;
 		image_memory_barrier.srcAccessMask = 0;
 		image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
-		image_memory_barrier.oldLayout = texture->unbound_layout;
+		image_memory_barrier.oldLayout = texture->layout;
 		image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
 
 		image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
@@ -2221,7 +2227,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con
 		image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
 		image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
 		image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
-		image_memory_barrier.newLayout = texture->unbound_layout;
+		image_memory_barrier.newLayout = texture->layout;
 		image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
 		image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
 		image_memory_barrier.image = texture->image;
@@ -2231,7 +2237,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con
 		image_memory_barrier.subresourceRange.baseArrayLayer = p_layer;
 		image_memory_barrier.subresourceRange.layerCount = 1;
 
-		vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
+		vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
 	}
 
 	return OK;
@@ -2368,7 +2374,7 @@ PoolVector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint3
 			image_memory_barrier.pNext = NULL;
 			image_memory_barrier.srcAccessMask = 0;
 			image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
-			image_memory_barrier.oldLayout = tex->unbound_layout;
+			image_memory_barrier.oldLayout = tex->layout;
 			image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
 
 			image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
@@ -2446,7 +2452,7 @@ PoolVector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint3
 			image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
 			image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
 			image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
-			image_memory_barrier.newLayout = tex->unbound_layout;
+			image_memory_barrier.newLayout = tex->layout;
 			image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
 			image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
 			image_memory_barrier.image = tex->image;
@@ -2456,7 +2462,7 @@ PoolVector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint3
 			image_memory_barrier.subresourceRange.baseArrayLayer = p_layer;
 			image_memory_barrier.subresourceRange.layerCount = 1;
 
-			vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
+			vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
 		}
 
 		{ //make dst readable
@@ -2559,7 +2565,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture,
 			image_memory_barrier.pNext = NULL;
 			image_memory_barrier.srcAccessMask = 0;
 			image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
-			image_memory_barrier.oldLayout = src_tex->unbound_layout;
+			image_memory_barrier.oldLayout = src_tex->layout;
 			image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
 
 			image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
@@ -2579,7 +2585,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture,
 			image_memory_barrier.pNext = NULL;
 			image_memory_barrier.srcAccessMask = 0;
 			image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
-			image_memory_barrier.oldLayout = dst_tex->unbound_layout;
+			image_memory_barrier.oldLayout = dst_tex->layout;
 			image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
 
 			image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
@@ -2631,7 +2637,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture,
 			image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
 			image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
 			image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
-			image_memory_barrier.newLayout = src_tex->unbound_layout;
+			image_memory_barrier.newLayout = src_tex->layout;
 			image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
 			image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
 			image_memory_barrier.image = src_tex->image;
@@ -2641,7 +2647,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture,
 			image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer;
 			image_memory_barrier.subresourceRange.layerCount = 1;
 
-			vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
+			vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
 		}
 
 		{ //make dst readable
@@ -2652,7 +2658,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture,
 			image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
 			image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
 			image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
-			image_memory_barrier.newLayout = dst_tex->unbound_layout;
+			image_memory_barrier.newLayout = dst_tex->layout;
 
 			image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
 			image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
@@ -2663,7 +2669,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture,
 			image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer;
 			image_memory_barrier.subresourceRange.layerCount = 1;
 
-			vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
+			vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
 		}
 	}
 
@@ -2733,7 +2739,8 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
 		ERR_FAIL_COND_V_MSG(!(p_format[i].usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT)), VK_NULL_HANDLE,
 				"Texture format for index (" + itos(i) + ") requires an attachment (depth, stencil or resolve) bit set.");
 
-		bool can_be_sampled = p_format[i].usage_flags & TEXTURE_USAGE_SAMPLING_BIT;
+		bool is_sampled = p_format[i].usage_flags & TEXTURE_USAGE_SAMPLING_BIT;
+		bool is_storage = p_format[i].usage_flags & TEXTURE_USAGE_STORAGE_BIT;
 
 		switch (p_initial_action) {
 
@@ -2745,7 +2752,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
 			case INITIAL_ACTION_KEEP_COLOR: {
 				if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
 					description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
-					description.initialLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+					description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
 					description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
 				} else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
 					description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
@@ -2761,10 +2768,10 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
 
 				if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
 					description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
-					description.initialLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+					description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
 				} else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
 					description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
-					description.initialLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; //don't care what is there
+					description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
 					description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
 				} else {
 					description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
@@ -2799,12 +2806,12 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
 				if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
 					description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
 					description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
-					description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+					description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
 				} else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
 
 					description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
 					description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
-					description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+					description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
 				} else {
 					description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
 					description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
@@ -2815,12 +2822,12 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
 					if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
 						description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
 						description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
-						description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+						description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
 					} else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
 
 						description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
 						description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
-						description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+						description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
 					} else {
 						description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
 						description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
@@ -2831,12 +2838,12 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
 					if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
 						description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
 						description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
-						description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+						description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
 					} else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
 
 						description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
 						description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
-						description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+						description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
 					} else {
 						description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
 						description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
@@ -3531,8 +3538,15 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages
 
 	uint32_t stages_processed = 0;
 
+	bool is_compute = false;
+
 	for (int i = 0; i < p_stages.size(); i++) {
 
+		if (p_stages[i].shader_stage == SHADER_STAGE_COMPUTE) {
+			is_compute = true;
+			ERR_FAIL_COND_V_MSG(p_stages.size() != 1, RID(),
+					"Compute shaders can only receive one stage, dedicated to compute.");
+		}
 		ERR_FAIL_COND_V_MSG(stages_processed & (1 << p_stages[i].shader_stage), RID(),
 				"Stage " + String(shader_stage_names[p_stages[i].shader_stage]) + " submitted more than once.");
 
@@ -3793,6 +3807,7 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages
 	shader.vertex_input_mask = vertex_input_mask;
 	shader.fragment_outputs = fragment_outputs;
 	shader.push_constant = push_constant;
+	shader.is_compute = is_compute;
 
 	String error_text;
 
@@ -4166,6 +4181,8 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms,
 	List<Vector<VkDescriptorImageInfo> > image_infos;
 	//used for verification to make sure a uniform set does not use a framebuffer bound texture
 	Vector<RID> attachable_textures;
+	Vector<Texture *> mutable_sampled_textures;
+	Vector<Texture *> mutable_storage_textures;
 
 	for (uint32_t i = 0; i < set_uniform_count; i++) {
 		const UniformInfo &set_uniform = set_uniforms[i];
@@ -4259,9 +4276,14 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms,
 						ERR_FAIL_COND_V(!texture, RID()); //bug, should never happen
 					}
 
-					img_info.imageLayout = texture->unbound_layout;
+					img_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
 
 					image_info.push_back(img_info);
+
+					if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) {
+						//can also be used as storage, add to mutable sampled
+						mutable_sampled_textures.push_back(texture);
+					}
 				}
 
 				write.dstArrayElement = 0;
@@ -4306,9 +4328,14 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms,
 						ERR_FAIL_COND_V(!texture, RID()); //bug, should never happen
 					}
 
-					img_info.imageLayout = texture->unbound_layout;
+					img_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
 
 					image_info.push_back(img_info);
+
+					if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) {
+						//can also be used as storage, add to mutable sampled
+						mutable_sampled_textures.push_back(texture);
+					}
 				}
 
 				write.dstArrayElement = 0;
@@ -4321,7 +4348,54 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms,
 				type_size = uniform.ids.size();
 			} break;
 			case UNIFORM_TYPE_IMAGE: {
-				//todo
+
+				if (uniform.ids.size() != set_uniform.length) {
+					if (set_uniform.length > 1) {
+						ERR_FAIL_V_MSG(RID(), "Image (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") textures, so it should be provided equal number of texture IDs to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ").");
+					} else {
+						ERR_FAIL_V_MSG(RID(), "Image (binding: " + itos(uniform.binding) + ") should provide one ID referencing a texture (IDs provided: " + itos(uniform.ids.size()) + ").");
+					}
+				}
+
+				Vector<VkDescriptorImageInfo> image_info;
+
+				for (int j = 0; j < uniform.ids.size(); j++) {
+					Texture *texture = texture_owner.getornull(uniform.ids[j]);
+
+					ERR_FAIL_COND_V_MSG(!texture, RID(),
+							"Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture.");
+
+					ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT), RID(),
+							"Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_STORAGE_BIT usage flag set in order to be used as uniform.");
+
+					VkDescriptorImageInfo img_info;
+					img_info.sampler = NULL;
+					img_info.imageView = texture->view;
+
+					if (texture->owner.is_valid()) {
+						texture = texture_owner.getornull(texture->owner);
+						ERR_FAIL_COND_V(!texture, RID()); //bug, should never happen
+					}
+
+					img_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+
+					image_info.push_back(img_info);
+
+					if (texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT) {
+						//can also be used as storage, add to mutable sampled
+						mutable_storage_textures.push_back(texture);
+					}
+				}
+
+				write.dstArrayElement = 0;
+				write.descriptorCount = uniform.ids.size();
+				write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+				write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
+				write.pBufferInfo = NULL;
+				write.pTexelBufferView = NULL;
+
+				type_size = uniform.ids.size();
+
 			} break;
 			case UNIFORM_TYPE_TEXTURE_BUFFER: {
 				if (uniform.ids.size() != set_uniform.length) {
@@ -4476,6 +4550,8 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms,
 	uniform_set.descriptor_set = descriptor_set;
 	uniform_set.format = shader->set_formats[p_shader_set];
 	uniform_set.attachable_textures = attachable_textures;
+	uniform_set.mutable_sampled_textures = mutable_sampled_textures;
+	uniform_set.mutable_storage_textures = mutable_storage_textures;
 	uniform_set.shader_set = p_shader_set;
 	uniform_set.shader_id = p_shader;
 
@@ -4651,6 +4727,9 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma
 	Shader *shader = shader_owner.getornull(p_shader);
 	ERR_FAIL_COND_V(!shader, RID());
 
+	ERR_FAIL_COND_V_MSG(shader->is_compute, RID(),
+			"Compute shaders can't be used in render pipelines");
+
 	if (p_framebuffer_format == INVALID_ID) {
 		//if nothing provided, use an empty one (no attachments)
 		p_framebuffer_format = framebuffer_format_create(Vector<AttachmentFormat>());
@@ -4996,7 +5075,7 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma
 	pipeline.validation.primitive_minimum = primitive_minimum[p_render_primitive];
 #endif
 	//create ID to associate with this pipeline
-	RID id = pipeline_owner.make_rid(pipeline);
+	RID id = render_pipeline_owner.make_rid(pipeline);
 	//now add aall the dependencies
 	_add_dependency(id, p_shader);
 	return id;
@@ -5004,7 +5083,55 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma
 
 bool RenderingDeviceVulkan::render_pipeline_is_valid(RID p_pipeline) {
 	_THREAD_SAFE_METHOD_
-	return pipeline_owner.owns(p_pipeline);
+	return render_pipeline_owner.owns(p_pipeline);
+}
+
+/**************************/
+/**** COMPUTE PIPELINE ****/
+/**************************/
+
+RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader) {
+	_THREAD_SAFE_METHOD_
+
+	//needs a shader
+	Shader *shader = shader_owner.getornull(p_shader);
+	ERR_FAIL_COND_V(!shader, RID());
+
+	ERR_FAIL_COND_V_MSG(!shader->is_compute, RID(),
+			"Non-compute shaders can't be used in compute pipelines");
+
+	//finally, pipeline create info
+	VkComputePipelineCreateInfo compute_pipeline_create_info;
+
+	compute_pipeline_create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
+	compute_pipeline_create_info.pNext = NULL;
+	compute_pipeline_create_info.flags = 0;
+
+	compute_pipeline_create_info.stage = shader->pipeline_stages[0];
+	compute_pipeline_create_info.layout = shader->pipeline_layout;
+	compute_pipeline_create_info.basePipelineHandle = NULL;
+	compute_pipeline_create_info.basePipelineIndex = 0;
+
+	ComputePipeline pipeline;
+	VkResult err = vkCreateComputePipelines(device, NULL, 1, &compute_pipeline_create_info, NULL, &pipeline.pipeline);
+	ERR_FAIL_COND_V(err, RID());
+
+	pipeline.set_formats = shader->set_formats;
+	pipeline.push_constant_stages = shader->push_constant.push_constants_vk_stage;
+	pipeline.pipeline_layout = shader->pipeline_layout;
+	pipeline.shader = p_shader;
+	pipeline.push_constant_size = shader->push_constant.push_constant_size;
+
+	//create ID to associate with this pipeline
+	RID id = compute_pipeline_owner.make_rid(pipeline);
+	//now add aall the dependencies
+	_add_dependency(id, p_shader);
+	return id;
+}
+
+bool RenderingDeviceVulkan::compute_pipeline_is_valid(RID p_pipeline) {
+
+	return compute_pipeline_owner.owns(p_pipeline);
 }
 
 /****************/
@@ -5055,6 +5182,8 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin_for_screen(in
 	_THREAD_SAFE_METHOD_
 
 	ERR_FAIL_COND_V_MSG(draw_list != NULL, INVALID_ID, "Only one draw list can be active at the same time.");
+	ERR_FAIL_COND_V_MSG(compute_list != NULL, INVALID_ID, "Only one draw/compute list can be active at the same time.");
+
 	VkCommandBuffer command_buffer = frames[frame].draw_command_buffer;
 	draw_list = memnew(DrawList);
 	draw_list->command_buffer = command_buffer;
@@ -5256,6 +5385,9 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu
 
 	_THREAD_SAFE_METHOD_
 
+	ERR_FAIL_COND_V_MSG(draw_list != NULL, INVALID_ID, "Only one draw list can be active at the same time.");
+	ERR_FAIL_COND_V_MSG(compute_list != NULL, INVALID_ID, "Only one draw/compute list can be active at the same time.");
+
 	Framebuffer *framebuffer = framebuffer_owner.getornull(p_framebuffer);
 	ERR_FAIL_COND_V(!framebuffer, INVALID_ID);
 
@@ -5530,7 +5662,7 @@ void RenderingDeviceVulkan::draw_list_bind_render_pipeline(DrawListID p_list, RI
 	ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
 #endif
 
-	const RenderPipeline *pipeline = pipeline_owner.getornull(p_render_pipeline);
+	const RenderPipeline *pipeline = render_pipeline_owner.getornull(p_render_pipeline);
 	ERR_FAIL_COND(!pipeline);
 #ifdef DEBUG_ENABLED
 	ERR_FAIL_COND(pipeline->validation.framebuffer_format != dl->validation.framebuffer_format);
@@ -5887,6 +6019,284 @@ void RenderingDeviceVulkan::draw_list_end() {
 	//  * Another render pass happens (since we may be done
 	_memory_barrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT, true);
 }
+
+/***********************/
+/**** COMPUTE LISTS ****/
+/***********************/
+
+RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin() {
+
+	ERR_FAIL_COND_V_MSG(draw_list != NULL, INVALID_ID, "Only one draw list can be active at the same time.");
+	ERR_FAIL_COND_V_MSG(compute_list != NULL, INVALID_ID, "Only one draw/compute list can be active at the same time.");
+
+	compute_list = memnew(ComputeList);
+	compute_list->command_buffer = frames[frame].draw_command_buffer;
+
+	return ID_TYPE_COMPUTE_LIST;
+}
+
+void RenderingDeviceVulkan::compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline) {
+	ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
+	ERR_FAIL_COND(!compute_list);
+
+	ComputeList *cl = compute_list;
+
+	const ComputePipeline *pipeline = compute_pipeline_owner.getornull(p_compute_pipeline);
+	ERR_FAIL_COND(!pipeline);
+
+	if (p_compute_pipeline == cl->state.pipeline) {
+		return; //redundant state, return.
+	}
+
+	cl->state.pipeline = p_compute_pipeline;
+	cl->state.pipeline_layout = pipeline->pipeline_layout;
+
+	vkCmdBindPipeline(cl->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline);
+
+	if (cl->state.pipeline_shader != pipeline->shader) {
+		// shader changed, so descriptor sets may become incompatible.
+
+		//go through ALL sets, and unbind them (and all those above) if the format is different
+
+		uint32_t pcount = pipeline->set_formats.size(); //formats count in this pipeline
+		cl->state.set_count = MAX(cl->state.set_count, pcount);
+		const uint32_t *pformats = pipeline->set_formats.ptr(); //pipeline set formats
+
+		bool sets_valid = true; //once invalid, all above become invalid
+		for (uint32_t i = 0; i < pcount; i++) {
+			//if a part of the format is different, invalidate it (and the rest)
+			if (!sets_valid || cl->state.sets[i].pipeline_expected_format != pformats[i]) {
+				cl->state.sets[i].bound = false;
+				cl->state.sets[i].pipeline_expected_format = pformats[i];
+				sets_valid = false;
+			}
+		}
+
+		for (uint32_t i = pcount; i < cl->state.set_count; i++) {
+			//unbind the ones above (not used) if exist
+			cl->state.sets[i].bound = false;
+		}
+
+		cl->state.set_count = pcount; //update set count
+
+		if (pipeline->push_constant_size) {
+			cl->state.pipeline_push_constant_stages = pipeline->push_constant_stages;
+#ifdef DEBUG_ENABLED
+			cl->validation.pipeline_push_constant_suppplied = false;
+#endif
+		}
+	}
+
+#ifdef DEBUG_ENABLED
+	//update compute pass pipeline info
+	cl->validation.pipeline_active = true;
+	cl->validation.pipeline_push_constant_size = pipeline->push_constant_size;
+#endif
+}
+void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) {
+	ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
+	ERR_FAIL_COND(!compute_list);
+
+	ComputeList *cl = compute_list;
+
+#ifdef DEBUG_ENABLED
+	ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index > MAX_UNIFORM_SETS,
+			"Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(limits.maxBoundDescriptorSets) + ").");
+#endif
+
+#ifdef DEBUG_ENABLED
+	ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
+#endif
+
+	UniformSet *uniform_set = uniform_set_owner.getornull(p_uniform_set);
+	ERR_FAIL_COND(!uniform_set);
+
+	if (p_index > cl->state.set_count) {
+		cl->state.set_count = p_index;
+	}
+
+	cl->state.sets[p_index].descriptor_set = uniform_set->descriptor_set; //update set pointer
+	cl->state.sets[p_index].bound = false; //needs rebind
+	cl->state.sets[p_index].uniform_set_format = uniform_set->format;
+	cl->state.sets[p_index].uniform_set = p_uniform_set;
+
+	uint32_t textures_to_sampled_count = uniform_set->mutable_sampled_textures.size();
+	Texture **textures_to_sampled = uniform_set->mutable_sampled_textures.ptrw();
+
+	for (uint32_t i = 0; i < textures_to_sampled_count; i++) {
+		if (textures_to_sampled[i]->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
+
+			VkImageMemoryBarrier image_memory_barrier;
+			image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+			image_memory_barrier.pNext = NULL;
+			image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+			image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+			image_memory_barrier.oldLayout = textures_to_sampled[i]->layout;
+			image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+			image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+			image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+			image_memory_barrier.image = textures_to_sampled[i]->image;
+			image_memory_barrier.subresourceRange.aspectMask = textures_to_sampled[i]->read_aspect_mask;
+			image_memory_barrier.subresourceRange.baseMipLevel = 0;
+			image_memory_barrier.subresourceRange.levelCount = textures_to_sampled[i]->mipmaps;
+			image_memory_barrier.subresourceRange.baseArrayLayer = 0;
+			image_memory_barrier.subresourceRange.layerCount = textures_to_sampled[i]->layers;
+
+			vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
+
+			textures_to_sampled[i]->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+			cl->state.textures_to_sampled_layout.erase(textures_to_sampled[i]);
+		}
+	}
+
+	uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size();
+	Texture **textures_to_storage = uniform_set->mutable_storage_textures.ptrw();
+
+	for (uint32_t i = 0; i < textures_to_storage_count; i++) {
+		if (textures_to_storage[i]->layout != VK_IMAGE_LAYOUT_GENERAL) {
+
+			VkImageMemoryBarrier image_memory_barrier;
+			image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+			image_memory_barrier.pNext = NULL;
+			image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+			image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+			image_memory_barrier.oldLayout = textures_to_storage[i]->layout;
+			image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
+
+			image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+			image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+			image_memory_barrier.image = textures_to_storage[i]->image;
+			image_memory_barrier.subresourceRange.aspectMask = textures_to_sampled[i]->read_aspect_mask;
+			image_memory_barrier.subresourceRange.baseMipLevel = 0;
+			image_memory_barrier.subresourceRange.levelCount = textures_to_storage[i]->mipmaps;
+			image_memory_barrier.subresourceRange.baseArrayLayer = 0;
+			image_memory_barrier.subresourceRange.layerCount = textures_to_storage[i]->layers;
+
+			vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
+
+			textures_to_storage[i]->layout = VK_IMAGE_LAYOUT_GENERAL;
+
+			cl->state.textures_to_sampled_layout.insert(textures_to_storage[i]); //needs to go back to sampled layout afterwards
+		}
+	}
+
+#if 0
+	{ //validate that textures bound are not attached as framebuffer bindings
+		uint32_t attachable_count = uniform_set->attachable_textures.size();
+		const RID *attachable_ptr = uniform_set->attachable_textures.ptr();
+		uint32_t bound_count = draw_list_bound_textures.size();
+		const RID *bound_ptr = draw_list_bound_textures.ptr();
+		for (uint32_t i = 0; i < attachable_count; i++) {
+			for (uint32_t j = 0; j < bound_count; j++) {
+				ERR_FAIL_COND_MSG(attachable_ptr[i] == bound_ptr[j],
+						"Attempted to use the same texture in framebuffer attachment and a uniform set, this is not allowed.");
+			}
+		}
+	}
+#endif
+}
+void RenderingDeviceVulkan::compute_list_set_push_constant(ComputeListID p_list, void *p_data, uint32_t p_data_size) {
+	ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
+	ERR_FAIL_COND(!compute_list);
+
+	ComputeList *cl = compute_list;
+
+#ifdef DEBUG_ENABLED
+	ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
+#endif
+
+#ifdef DEBUG_ENABLED
+	ERR_FAIL_COND_MSG(p_data_size != cl->validation.pipeline_push_constant_size,
+			"This compute pipeline requires (" + itos(cl->validation.pipeline_push_constant_size) + ") bytes of push constant data, supplied: (" + itos(p_data_size) + ")");
+#endif
+	vkCmdPushConstants(cl->command_buffer, cl->state.pipeline_layout, cl->state.pipeline_push_constant_stages, 0, p_data_size, p_data);
+#ifdef DEBUG_ENABLED
+	cl->validation.pipeline_push_constant_suppplied = true;
+#endif
+}
+void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
+	ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
+	ERR_FAIL_COND(!compute_list);
+
+	ComputeList *cl = compute_list;
+
+#ifdef DEBUG_ENABLED
+	ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
+#endif
+
+#ifdef DEBUG_ENABLED
+
+	ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw.");
+
+	if (cl->validation.pipeline_push_constant_size > 0) {
+		//using push constants, check that they were supplied
+		ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_suppplied,
+				"The shader in this pipeline requires a push constant to be set before drawing, but it's not present.");
+	}
+
+#endif
+
+	//Bind descriptor sets
+
+	for (uint32_t i = 0; i < cl->state.set_count; i++) {
+
+		if (cl->state.sets[i].pipeline_expected_format == 0) {
+			continue; //nothing expected by this pipeline
+		}
+#ifdef DEBUG_ENABLED
+		if (cl->state.sets[i].pipeline_expected_format != cl->state.sets[i].uniform_set_format) {
+
+			if (cl->state.sets[i].uniform_set_format == 0) {
+				ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline");
+			} else if (uniform_set_owner.owns(cl->state.sets[i].uniform_set)) {
+				UniformSet *us = uniform_set_owner.getornull(cl->state.sets[i].uniform_set);
+				ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader));
+			} else {
+				ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader));
+			}
+		}
+#endif
+		if (!cl->state.sets[i].bound) {
+			//All good, see if this requires re-binding
+			vkCmdBindDescriptorSets(cl->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, cl->state.pipeline_layout, i, 1, &cl->state.sets[i].descriptor_set, 0, NULL);
+			cl->state.sets[i].bound = true;
+		}
+	}
+
+	vkCmdDispatch(cl->command_buffer, p_x_groups, p_y_groups, p_z_groups);
+}
+void RenderingDeviceVulkan::compute_list_end() {
+	ERR_FAIL_COND(!compute_list);
+
+	for (Set<Texture *>::Element *E = compute_list->state.textures_to_sampled_layout.front(); E; E = E->next()) {
+
+		VkImageMemoryBarrier image_memory_barrier;
+		image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+		image_memory_barrier.pNext = NULL;
+		image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+		image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+		image_memory_barrier.oldLayout = E->get()->layout;
+		image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+		image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+		image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+		image_memory_barrier.image = E->get()->image;
+		image_memory_barrier.subresourceRange.aspectMask = E->get()->read_aspect_mask;
+		image_memory_barrier.subresourceRange.baseMipLevel = 0;
+		image_memory_barrier.subresourceRange.levelCount = E->get()->mipmaps;
+		image_memory_barrier.subresourceRange.baseArrayLayer = 0;
+		image_memory_barrier.subresourceRange.layerCount = E->get()->layers;
+
+		vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
+
+		E->get()->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+	}
+
+	memdelete(compute_list);
+}
+
 #if 0
 void RenderingDeviceVulkan::draw_list_render_secondary_to_framebuffer(ID p_framebuffer, ID *p_draw_lists, uint32_t p_draw_list_count, InitialAction p_initial_action, FinalAction p_final_action, const Vector<Variant> &p_clear_colors) {
 
@@ -5998,10 +6408,14 @@ void RenderingDeviceVulkan::_free_internal(RID p_id) {
 		UniformSet *uniform_set = uniform_set_owner.getornull(p_id);
 		frames[frame].uniform_sets_to_dispose_of.push_back(*uniform_set);
 		uniform_set_owner.free(p_id);
-	} else if (pipeline_owner.owns(p_id)) {
-		RenderPipeline *pipeline = pipeline_owner.getornull(p_id);
-		frames[frame].pipelines_to_dispose_of.push_back(*pipeline);
-		pipeline_owner.free(p_id);
+	} else if (render_pipeline_owner.owns(p_id)) {
+		RenderPipeline *pipeline = render_pipeline_owner.getornull(p_id);
+		frames[frame].render_pipelines_to_dispose_of.push_back(*pipeline);
+		render_pipeline_owner.free(p_id);
+	} else if (compute_pipeline_owner.owns(p_id)) {
+		ComputePipeline *pipeline = compute_pipeline_owner.getornull(p_id);
+		frames[frame].compute_pipelines_to_dispose_of.push_back(*pipeline);
+		compute_pipeline_owner.free(p_id);
 	} else {
 		ERR_PRINT("Attempted to free invalid ID: " + itos(p_id.get_id()));
 	}
@@ -6022,6 +6436,10 @@ void RenderingDeviceVulkan::finalize_frame() {
 		ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work).");
 	}
 
+	if (compute_list) {
+		ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work).");
+	}
+
 	{ //complete the setup buffer (that needs to be processed before anything else)
 		vkEndCommandBuffer(frames[frame].setup_command_buffer);
 		vkEndCommandBuffer(frames[frame].draw_command_buffer);
@@ -6032,12 +6450,20 @@ void RenderingDeviceVulkan::finalize_frame() {
 void RenderingDeviceVulkan::_free_pending_resources(int p_frame) {
 	//free in dependency usage order, so nothing weird happens
 	//pipelines
-	while (frames[p_frame].pipelines_to_dispose_of.front()) {
-		RenderPipeline *pipeline = &frames[p_frame].pipelines_to_dispose_of.front()->get();
+	while (frames[p_frame].render_pipelines_to_dispose_of.front()) {
+		RenderPipeline *pipeline = &frames[p_frame].render_pipelines_to_dispose_of.front()->get();
+
+		vkDestroyPipeline(device, pipeline->pipeline, NULL);
+
+		frames[p_frame].render_pipelines_to_dispose_of.pop_front();
+	}
+
+	while (frames[p_frame].compute_pipelines_to_dispose_of.front()) {
+		ComputePipeline *pipeline = &frames[p_frame].compute_pipelines_to_dispose_of.front()->get();
 
 		vkDestroyPipeline(device, pipeline->pipeline, NULL);
 
-		frames[p_frame].pipelines_to_dispose_of.pop_front();
+		frames[p_frame].compute_pipelines_to_dispose_of.pop_front();
 	}
 
 	//uniform sets
@@ -6344,6 +6770,8 @@ void RenderingDeviceVulkan::initialize(VulkanContext *p_context) {
 	draw_list = NULL;
 	draw_list_count = 0;
 	draw_list_split = false;
+
+	compute_list = NULL;
 }
 
 template <class T>
@@ -6430,7 +6858,8 @@ void RenderingDeviceVulkan::finalize() {
 
 	_flush(false);
 
-	_free_rids(pipeline_owner, "Pipeline");
+	_free_rids(render_pipeline_owner, "Pipeline");
+	_free_rids(compute_pipeline_owner, "Compute");
 	_free_rids(uniform_set_owner, "UniformSet");
 	_free_rids(texture_buffer_owner, "TextureBuffer");
 	_free_rids(storage_buffer_owner, "StorageBuffer");

+ 108 - 4
drivers/vulkan/rendering_device_vulkan.h

@@ -91,6 +91,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 		ID_TYPE_VERTEX_FORMAT,
 		ID_TYPE_DRAW_LIST,
 		ID_TYPE_SPLIT_DRAW_LIST,
+		ID_TYPE_COMPUTE_LIST,
 		ID_TYPE_MAX,
 		ID_BASE_SHIFT = 58 //5 bits for ID types
 	};
@@ -138,8 +139,8 @@ class RenderingDeviceVulkan : public RenderingDevice {
 
 		Vector<DataFormat> allowed_shared_formats;
 
-		VkImageLayout bound_layout; //layout used when bound to framebuffer being drawn
-		VkImageLayout unbound_layout; //layout used otherwise
+		VkImageLayout layout;
+
 		uint32_t read_aspect_mask;
 		uint32_t barrier_aspect_mask;
 		bool bound; //bound to framebffer
@@ -286,6 +287,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 			}
 		};
 
+		uint32_t storage_mask;
 		Vector<RID> texture_ids;
 
 		struct Version {
@@ -519,6 +521,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 
 		PushConstant push_constant;
 
+		bool is_compute = false;
 		int max_output;
 		Vector<Set> sets;
 		Vector<uint32_t> set_formats;
@@ -620,6 +623,8 @@ class RenderingDeviceVulkan : public RenderingDevice {
 		VkDescriptorSet descriptor_set;
 		//VkPipelineLayout pipeline_layout; //not owned, inherited from shader
 		Vector<RID> attachable_textures; //used for validation
+		Vector<Texture *> mutable_sampled_textures; //used for layout change
+		Vector<Texture *> mutable_storage_textures; //used for layout change
 	};
 
 	RID_Owner<UniformSet, true> uniform_set_owner;
@@ -660,7 +665,19 @@ class RenderingDeviceVulkan : public RenderingDevice {
 		uint32_t push_constant_stages;
 	};
 
-	RID_Owner<RenderPipeline, true> pipeline_owner;
+	RID_Owner<RenderPipeline, true> render_pipeline_owner;
+
+	struct ComputePipeline {
+
+		RID shader;
+		Vector<uint32_t> set_formats;
+		VkPipelineLayout pipeline_layout; // not owned, needed for push constants
+		VkPipeline pipeline;
+		uint32_t push_constant_size;
+		uint32_t push_constant_stages;
+	};
+
+	RID_Owner<ComputePipeline, true> compute_pipeline_owner;
 
 	/*******************/
 	/**** DRAW LIST ****/
@@ -796,6 +813,74 @@ class RenderingDeviceVulkan : public RenderingDevice {
 	Error _draw_list_render_pass_begin(Framebuffer *framebuffer, InitialAction p_initial_action, FinalAction p_final_action, const Vector<Color> &p_clear_colors, Point2i viewport_offset, Point2i viewport_size, VkFramebuffer vkframebuffer, VkRenderPass render_pass, VkCommandBuffer command_buffer, VkSubpassContents subpass_contents);
 	_FORCE_INLINE_ DrawList *_get_draw_list_ptr(DrawListID p_id);
 
+	/**********************/
+	/**** COMPUTE LIST ****/
+	/**********************/
+
+	struct ComputeList {
+
+		VkCommandBuffer command_buffer; //if persistent, this is owned, otherwise it's shared with the ringbuffer
+
+		struct SetState {
+			uint32_t pipeline_expected_format;
+			uint32_t uniform_set_format;
+			VkDescriptorSet descriptor_set;
+			RID uniform_set;
+			bool bound;
+			SetState() {
+				bound = false;
+				pipeline_expected_format = 0;
+				uniform_set_format = 0;
+				descriptor_set = VK_NULL_HANDLE;
+			}
+		};
+
+		struct State {
+			Set<Texture *> textures_to_sampled_layout;
+
+			SetState sets[MAX_UNIFORM_SETS];
+			uint32_t set_count;
+			RID pipeline;
+			RID pipeline_shader;
+			VkPipelineLayout pipeline_layout;
+			uint32_t pipeline_push_constant_stages;
+
+			State() {
+				set_count = 0;
+				pipeline_layout = VK_NULL_HANDLE;
+				pipeline_push_constant_stages = 0;
+			}
+		} state;
+#ifdef DEBUG_ENABLED
+
+		struct Validation {
+			bool active; //means command buffer was not closes, so you can keep adding things
+			Vector<uint32_t> set_formats;
+			Vector<bool> set_bound;
+			Vector<RID> set_rids;
+			//last pipeline set values
+			bool pipeline_active;
+			RID pipeline_shader;
+			uint32_t invalid_set_from;
+			Vector<uint32_t> pipeline_set_formats;
+			uint32_t pipeline_push_constant_size;
+			bool pipeline_push_constant_suppplied;
+
+			Validation() {
+				active = true;
+				invalid_set_from = 0;
+
+				//pipeline state initalize
+				pipeline_active = false;
+				pipeline_push_constant_size = 0;
+				pipeline_push_constant_suppplied = false;
+			}
+		} validation;
+#endif
+	};
+
+	ComputeList *compute_list;
+
 	/**************************/
 	/**** FRAME MANAGEMENT ****/
 	/**************************/
@@ -823,7 +908,8 @@ class RenderingDeviceVulkan : public RenderingDevice {
 		List<Shader> shaders_to_dispose_of;
 		List<VkBufferView> buffer_views_to_dispose_of;
 		List<UniformSet> uniform_sets_to_dispose_of;
-		List<RenderPipeline> pipelines_to_dispose_of;
+		List<RenderPipeline> render_pipelines_to_dispose_of;
+		List<ComputePipeline> compute_pipelines_to_dispose_of;
 
 		VkCommandPool command_pool;
 		VkCommandBuffer setup_command_buffer; //used at the begining of every frame for set-up
@@ -940,6 +1026,13 @@ public:
 	virtual RID render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags = 0);
 	virtual bool render_pipeline_is_valid(RID p_pipeline);
 
+	/**************************/
+	/**** COMPUTE PIPELINE ****/
+	/**************************/
+
+	virtual RID compute_pipeline_create(RID p_shader);
+	virtual bool compute_pipeline_is_valid(RID p_pipeline);
+
 	/****************/
 	/**** SCREEN ****/
 	/****************/
@@ -970,6 +1063,17 @@ public:
 
 	virtual void draw_list_end();
 
+	/***********************/
+	/**** COMPUTE LISTS ****/
+	/***********************/
+
+	virtual ComputeListID compute_list_begin();
+	virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline);
+	virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index);
+	virtual void compute_list_set_push_constant(ComputeListID p_list, void *p_data, uint32_t p_data_size);
+	virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups);
+	virtual void compute_list_end();
+
 	/**************/
 	/**** FREE ****/
 	/**************/

+ 2 - 1
editor/spatial_editor_gizmos.cpp

@@ -170,8 +170,9 @@ void EditorSpatialGizmo::Instance::create_instance(Spatial *p_base, bool p_hidde
 
 	instance = VS::get_singleton()->instance_create2(mesh->get_rid(), p_base->get_world()->get_scenario());
 	VS::get_singleton()->instance_attach_object_instance_id(instance, p_base->get_instance_id());
-	if (skin_reference.is_valid())
+	if (skin_reference.is_valid()) {
 		VS::get_singleton()->instance_attach_skeleton(instance, skin_reference->get_skeleton());
+	}
 	if (extra_margin)
 		VS::get_singleton()->instance_set_extra_visibility_margin(instance, 1);
 	VS::get_singleton()->instance_geometry_set_cast_shadows_setting(instance, VS::SHADOW_CASTING_SETTING_OFF);

+ 45 - 15
gles_builders.py

@@ -509,6 +509,7 @@ class RDHeaderStruct:
     def __init__(self):
         self.vertex_lines = []
         self.fragment_lines = []
+        self.compute_lines = []
 
         self.vertex_included_files = []
         self.fragment_included_files = []
@@ -517,6 +518,7 @@ class RDHeaderStruct:
         self.line_offset = 0
         self.vertex_offset = 0
         self.fragment_offset = 0
+        self.compute_offset = 0
 
 
 def include_file_in_rd_header(filename, header_data, depth):
@@ -539,6 +541,13 @@ def include_file_in_rd_header(filename, header_data, depth):
             header_data.fragment_offset = header_data.line_offset
             continue
 
+        if line.find("[compute]") != -1:
+            header_data.reading = "compute"
+            line = fs.readline()
+            header_data.line_offset += 1
+            header_data.compute_offset = header_data.line_offset
+            continue
+
         while line.find("#include ") != -1:
             includeline = line.replace("#include ", "").strip()[1:-1]
 
@@ -553,6 +562,10 @@ def include_file_in_rd_header(filename, header_data, depth):
                 header_data.fragment_included_files += [included_file]
                 if include_file_in_rd_header(included_file, header_data, depth + 1) is None:
                     print("Error in file '" + filename + "': #include " + includeline + "could not be found!")
+            elif not included_file in header_data.compute_included_files and header_data.reading == "compute":
+                header_data.compute_included_files += [included_file]
+                if include_file_in_rd_header(included_file, header_data, depth + 1) is None:
+                    print("Error in file '" + filename + "': #include " + includeline + "could not be found!")
 
             line = fs.readline()
 
@@ -563,6 +576,8 @@ def include_file_in_rd_header(filename, header_data, depth):
             header_data.vertex_lines += [line]
         if header_data.reading == "fragment":
             header_data.fragment_lines += [line]
+        if header_data.reading == "compute":
+            header_data.compute_lines += [line]
 
         line = fs.readline()
         header_data.line_offset += 1
@@ -572,7 +587,7 @@ def include_file_in_rd_header(filename, header_data, depth):
     return header_data
 
 def build_rd_header(filename):
-    header_data = LegacyGLHeaderStruct()
+    header_data = RDHeaderStruct()
     include_file_in_rd_header(filename, header_data, 0)
 
     out_file = filename + ".gen.h"
@@ -598,24 +613,39 @@ def build_rd_header(filename):
 
     fd.write("\t"+out_file_class+"() {\n\n")
 
+    if (len(header_data.compute_lines)):
 
-    fd.write("\t\tstatic const char _vertex_code[]={\n")
-    for x in header_data.vertex_lines:
-        for c in x:
-            fd.write(str(ord(c)) + ",")
+        fd.write("\t\tstatic const char _compute_code[]={\n")
+        for x in header_data.compute_lines:
+            for c in x:
+                fd.write(str(ord(c)) + ",")
 
-        fd.write(str(ord('\n')) + ",")
-    fd.write("\t\t0};\n\n")
+            fd.write(str(ord('\n')) + ",")
 
-    fd.write("\t\tstatic const char _fragment_code[]={\n")
-    for x in header_data.fragment_lines:
-        for c in x:
-            fd.write(str(ord(c)) + ",")
+        fd.write("\t\t0};\n\n")
+        fd.write("\t\tsetup(nullptr,nullptr,_compute_code,\""+out_file_class+"\");\n")
+        fd.write("\t}\n")
 
-        fd.write(str(ord('\n')) + ",")
-    fd.write("\t\t0};\n\n")
-    fd.write("\t\tsetup(_vertex_code,_fragment_code,\""+out_file_class+"\");\n")
-    fd.write("\t}\n")
+    else:
+
+        fd.write("\t\tstatic const char _vertex_code[]={\n")
+        for x in header_data.vertex_lines:
+            for c in x:
+                fd.write(str(ord(c)) + ",")
+    
+            fd.write(str(ord('\n')) + ",")
+        fd.write("\t\t0};\n\n")
+    
+        fd.write("\t\tstatic const char _fragment_code[]={\n")
+        for x in header_data.fragment_lines:
+            for c in x:
+               fd.write(str(ord(c)) + ",")
+    
+            fd.write(str(ord('\n')) + ",")
+    
+        fd.write("\t\t0};\n\n")
+        fd.write("\t\tsetup(_vertex_code,_fragment_code,nullptr,\""+out_file_class+"\");\n")
+        fd.write("\t}\n")
 
 
     fd.write("};\n\n")

+ 3 - 1
scene/3d/skeleton.cpp

@@ -830,7 +830,9 @@ Ref<SkinReference> Skeleton::register_skin(const Ref<Skin> &p_skin) {
 	skin_bindings.insert(skin_ref.operator->());
 
 	skin->connect("changed", skin_ref.operator->(), "_skin_changed");
-	_make_dirty();
+
+	_make_dirty(); //skin needs to be updated, so update skeleton
+
 	return skin_ref;
 }
 

+ 9 - 0
servers/visual/rasterizer_rd/rasterizer_scene_rd.cpp

@@ -1469,6 +1469,15 @@ RasterizerSceneRD::RasterizerSceneRD(RasterizerStorageRD *p_storage) {
 	sky_ggx_samples_realtime = GLOBAL_GET("rendering/quality/reflections/ggx_samples_realtime");
 	sky_use_cubemap_array = GLOBAL_GET("rendering/quality/reflections/texture_array_reflections");
 	//	sky_use_cubemap_array = false;
+
+	{
+		String defines = "";
+		Vector<String> versions;
+		versions.push_back("");
+		giprobe_lighting_shader.initialize(versions, defines);
+		giprobe_lighting_shader_version = giprobe_lighting_shader.version_create();
+		giprobe_lighting_shader_version_shader = giprobe_lighting_shader.version_get_shader(giprobe_lighting_shader_version, 0);
+	}
 }
 
 RasterizerSceneRD::~RasterizerSceneRD() {

+ 7 - 0
servers/visual/rasterizer_rd/rasterizer_scene_rd.h

@@ -4,6 +4,7 @@
 #include "core/rid_owner.h"
 #include "servers/visual/rasterizer.h"
 #include "servers/visual/rasterizer_rd/rasterizer_storage_rd.h"
+#include "servers/visual/rasterizer_rd/shaders/giprobe_lighting.glsl.gen.h"
 #include "servers/visual/rendering_device.h"
 
 class RasterizerSceneRD : public RasterizerScene {
@@ -108,6 +109,12 @@ private:
 
 	mutable RID_Owner<ReflectionProbeInstance> reflection_probe_instance_owner;
 
+	/* GIPROBE INSTANCE */
+
+	GiprobeLightingShaderRD giprobe_lighting_shader;
+	RID giprobe_lighting_shader_version;
+	RID giprobe_lighting_shader_version_shader;
+
 	/* SHADOW ATLAS */
 
 	struct ShadowAtlas {

+ 2 - 0
servers/visual/rasterizer_rd/rasterizer_storage_rd.h

@@ -778,6 +778,8 @@ public:
 
 	_FORCE_INLINE_ RID skeleton_get_3d_uniform_set(RID p_skeleton, RID p_shader, uint32_t p_set) const {
 		Skeleton *skeleton = skeleton_owner.getornull(p_skeleton);
+		ERR_FAIL_COND_V(!skeleton, RID());
+		ERR_FAIL_COND_V(skeleton->size == 0, RID());
 		if (skeleton->use_2d) {
 			return RID();
 		}

+ 115 - 6
servers/visual/rasterizer_rd/shader_rd.cpp

@@ -33,11 +33,11 @@
 #include "rasterizer_rd.h"
 #include "servers/visual/rendering_device.h"
 
-void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name) {
+void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name) {
 
 	name = p_name;
 	//split vertex and shader code (thank you, shader compiler programmers from you know what company).
-	{
+	if (p_vertex_code) {
 		String defines_tag = "\nVERSION_DEFINES";
 		String globals_tag = "\nVERTEX_SHADER_GLOBALS";
 		String material_tag = "\nMATERIAL_UNIFORMS";
@@ -79,7 +79,7 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con
 		}
 	}
 
-	{
+	if (p_fragment_code) {
 		String defines_tag = "\nVERSION_DEFINES";
 		String globals_tag = "\nFRAGMENT_SHADER_GLOBALS";
 		String material_tag = "\nMATERIAL_UNIFORMS";
@@ -135,6 +135,50 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con
 			}
 		}
 	}
+
+	if (p_compute_code) {
+		is_compute = true;
+
+		String defines_tag = "\nVERSION_DEFINES";
+		String globals_tag = "\nCOMPUTE_SHADER_GLOBALS";
+		String material_tag = "\nMATERIAL_UNIFORMS";
+		String code_tag = "\nCOMPUTE_SHADER_CODE";
+		String code = p_compute_code;
+
+		int cpos = code.find(defines_tag);
+		if (cpos != -1) {
+			compute_codev = code.substr(0, cpos).ascii();
+			code = code.substr(cpos + defines_tag.length(), code.length());
+		}
+
+		cpos = code.find(material_tag);
+
+		if (cpos == -1) {
+			compute_code0 = code.ascii();
+		} else {
+			compute_code0 = code.substr(0, cpos).ascii();
+			code = code.substr(cpos + material_tag.length(), code.length());
+
+			cpos = code.find(globals_tag);
+
+			if (cpos == -1) {
+				compute_code1 = code.ascii();
+			} else {
+
+				compute_code1 = code.substr(0, cpos).ascii();
+				String code2 = code.substr(cpos + globals_tag.length(), code.length());
+
+				cpos = code2.find(code_tag);
+				if (cpos == -1) {
+					compute_code2 = code2.ascii();
+				} else {
+
+					compute_code2 = code2.substr(0, cpos).ascii();
+					compute_code3 = code2.substr(cpos + code_tag.length(), code2.length()).ascii();
+				}
+			}
+		}
+	}
 }
 
 RID ShaderRD::version_create() {
@@ -171,7 +215,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
 	RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX;
 	bool build_ok = true;
 
-	{
+	if (!is_compute) {
 		//vertex stage
 
 		StringBuilder builder;
@@ -211,7 +255,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
 		}
 	}
 
-	if (build_ok) {
+	if (!is_compute && build_ok) {
 		//fragment stage
 		current_stage = RD::SHADER_STAGE_FRAGMENT;
 
@@ -256,9 +300,50 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
 		}
 	}
 
+	if (is_compute) {
+		//compute stage
+		current_stage = RD::SHADER_STAGE_COMPUTE;
+
+		StringBuilder builder;
+
+		builder.append(compute_codev.get_data()); // version info (if exists)
+		builder.append("\n"); //make sure defines begin at newline
+		builder.append(general_defines.get_data());
+		builder.append(variant_defines[p_variant].get_data());
+
+		for (int j = 0; j < p_version->custom_defines.size(); j++) {
+			builder.append(p_version->custom_defines[j].get_data());
+		}
+
+		builder.append(compute_code0.get_data()); //first part of compute
+
+		builder.append(p_version->uniforms.get_data()); //uniforms (same for compute and fragment)
+
+		builder.append(compute_code1.get_data()); //second part of compute
+
+		builder.append(p_version->compute_globals.get_data()); // compute globals
+
+		builder.append(compute_code2.get_data()); //third part of compute
+
+		builder.append(p_version->compute_code.get_data()); // code
+
+		builder.append(compute_code3.get_data()); //fourth of compute
+
+		current_source = builder.as_string();
+		RD::ShaderStageData stage;
+		stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_COMPUTE, current_source, RD::SHADER_LANGUAGE_GLSL, &error);
+		if (stage.spir_v.size() == 0) {
+			build_ok = false;
+		} else {
+
+			stage.shader_stage = RD::SHADER_STAGE_COMPUTE;
+			stages.push_back(stage);
+		}
+	}
+
 	if (!build_ok) {
 		variant_set_mutex.lock(); //properly print the errors
-		ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment") + " shader, variant #" + itos(p_variant) + " (" + variant_defines[p_variant].get_data() + ").");
+		ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_COMPUTE ? "Compute " : (current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment")) + " shader, variant #" + itos(p_variant) + " (" + variant_defines[p_variant].get_data() + ").");
 		ERR_PRINT(error);
 
 #ifdef DEBUG_ENABLED
@@ -319,6 +404,8 @@ void ShaderRD::_compile_version(Version *p_version) {
 
 void ShaderRD::version_set_code(RID p_version, const String &p_uniforms, const String &p_vertex_globals, const String &p_vertex_code, const String &p_fragment_globals, const String &p_fragment_light, const String &p_fragment_code, const Vector<String> &p_custom_defines) {
 
+	ERR_FAIL_COND(is_compute);
+
 	Version *version = version_owner.getornull(p_version);
 	ERR_FAIL_COND(!version);
 	version->vertex_globals = p_vertex_globals.utf8();
@@ -340,6 +427,28 @@ void ShaderRD::version_set_code(RID p_version, const String &p_uniforms, const S
 	}
 }
 
+void ShaderRD::version_set_compute_code(RID p_version, const String &p_uniforms, const String &p_compute_globals, const String &p_compute_code, const Vector<String> &p_custom_defines) {
+
+	ERR_FAIL_COND(!is_compute);
+
+	Version *version = version_owner.getornull(p_version);
+	ERR_FAIL_COND(!version);
+	version->compute_globals = p_compute_globals.utf8();
+	version->compute_code = p_compute_code.utf8();
+	version->uniforms = p_uniforms.utf8();
+
+	version->custom_defines.clear();
+	for (int i = 0; i < p_custom_defines.size(); i++) {
+		version->custom_defines.push_back(p_custom_defines[i].utf8());
+	}
+
+	version->dirty = true;
+	if (version->initialize_needed) {
+		_compile_version(version);
+		version->initialize_needed = false;
+	}
+}
+
 bool ShaderRD::version_is_valid(RID p_version) {
 	Version *version = version_owner.getornull(p_version);
 	ERR_FAIL_COND_V(!version, false);

+ 12 - 1
servers/visual/rasterizer_rd/shader_rd.h

@@ -55,6 +55,8 @@ class ShaderRD {
 		CharString uniforms;
 		CharString vertex_globals;
 		CharString vertex_code;
+		CharString compute_globals;
+		CharString compute_code;
 		CharString fragment_light;
 		CharString fragment_globals;
 		CharString fragment_code;
@@ -89,16 +91,25 @@ class ShaderRD {
 	CharString vertex_code2;
 	CharString vertex_code3;
 
+	bool is_compute = false;
+
+	CharString compute_codev; //for version and extensions
+	CharString compute_code0;
+	CharString compute_code1;
+	CharString compute_code2;
+	CharString compute_code3;
+
 	const char *name;
 
 protected:
 	ShaderRD() {}
-	void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name);
+	void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name);
 
 public:
 	RID version_create();
 
 	void version_set_code(RID p_version, const String &p_uniforms, const String &p_vertex_globals, const String &p_vertex_code, const String &p_fragment_globals, const String &p_fragment_light, const String &p_fragment_code, const Vector<String> &p_custom_defines);
+	void version_set_compute_code(RID p_version, const String &p_uniforms, const String &p_compute_globals, const String &p_compute_code, const Vector<String> &p_custom_defines);
 
 	_FORCE_INLINE_ RID version_get_shader(RID p_version, int p_variant) {
 		ERR_FAIL_INDEX_V(p_variant, variant_defines.size(), RID());

+ 1 - 0
servers/visual/rasterizer_rd/shaders/SCsub

@@ -11,4 +11,5 @@ if 'RD_GLSL' in env['BUILDERS']:
     env.RD_GLSL('sky.glsl');
     env.RD_GLSL('tonemap.glsl');
     env.RD_GLSL('copy.glsl');
+    env.RD_GLSL('giprobe_lighting.glsl');
 

+ 241 - 0
servers/visual/rasterizer_rd/shaders/giprobe_lighting.glsl

@@ -0,0 +1,241 @@
+[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+#define NO_CHILDREN 0xFFFFFFFF
+#define GREY_VEC vec3(0.33333,0.33333,0.33333)
+
+struct CellPosition {
+	uint children[8];
+};
+
+
+layout(set=0,binding=1,std140) buffer CellPositions {
+    CellPosition data[];
+} cell_positions;
+
+struct CellMaterial {
+	uint position; // xyz 10 bits
+	uint albedo; //rgb albedo
+	uint emission; //rgb normalized with e as multiplier
+	uint normal; //RGB normal encoded
+};
+
+layout(set=0,binding=2,std140) buffer CellMaterials {
+    CellMaterial data[];
+} cell_materials;
+
+#define LIGHT_TYPE_DIRECTIONAL 0
+#define LIGHT_TYPE_OMNI 1
+#define LIGHT_TYPE_SPOT 2
+
+struct Light {
+
+	uint type;
+	float energy;
+	float radius;
+	float attenuation;
+
+	vec3 color;
+	float spot_angle_radians;
+
+	float advance;
+	float max_length;
+	uint pad0;
+	uint pad2;
+
+	vec3 position;
+	float spot_attenuation;
+
+
+	vec3 direction;
+	bool visible;
+
+	vec4 clip_planes[3];
+};
+
+layout(set=0,binding=3,std140) buffer Lights {
+    Light data[];
+} lights;
+
+
+layout(set=0,binding=4,std140) uniform Params {
+	vec3 limits;
+	float max_length;
+	uint size;
+	uint stack_size;
+	uint light_count;
+	float emission_scale;
+} params;
+
+
+layout (rgba8,set=0,binding=5) uniform restrict writeonly image3D color_tex;
+
+
+uint raymarch(float distance,float distance_adv,vec3 from,vec3 direction) {
+
+	uint result = NO_CHILDREN;
+
+	while (distance > -distance_adv) { //use this to avoid precision errors
+
+		uint cell = 0;
+
+		ivec3 pos = ivec3(from);
+		ivec3 ofs = ivec3(0);
+		ivec3 half_size = ivec3(params.size) / 2;
+		if (any(lessThan(pos,ivec3(0))) || any(greaterThanEqual(pos,ivec3(params.size)))) {
+			return NO_CHILDREN; //outside range
+		}
+
+		for (int i = 0; i < params.stack_size - 1; i++) {
+
+			bvec3 greater = greaterThanEqual(pos,ofs+half_size);
+
+			ofs += mix(ivec3(0),half_size,greater);
+
+			uint child = 0; //wonder if this can be done faster
+			if (greater.x) {
+				child|=1;
+			}
+			if (greater.y) {
+				child|=2;
+			}
+			if (greater.z) {
+				child|=4;
+			}
+
+			cell = cell_positions.data[cell].children[child];
+			if (cell == NO_CHILDREN)
+				break;
+
+			half_size >>= ivec3(1);
+		}
+
+		if ( cell != NO_CHILDREN) {
+			return cell; //found cell!
+		}
+
+		from += direction * distance_adv;
+		distance -= distance_adv;
+	}
+
+	return NO_CHILDREN;
+}
+
+bool compute_light_vector(uint light,uint cell, vec3 pos,out float attenuation, out vec3 light_pos) {
+
+	if (lights.data[light].type==LIGHT_TYPE_DIRECTIONAL) {
+
+		light_pos = pos - lights.data[light].direction * params.max_length;
+		attenuation = 1.0;
+
+	} else {
+
+		light_pos = lights.data[light].position;
+		float distance = length(pos - light_pos);
+		if (distance >= lights.data[light].radius) {
+			return false;
+		}
+
+		attenuation = pow( distance / lights.data[light].radius + 0.0001, lights.data[light].attenuation );
+
+
+		if (lights.data[light].type==LIGHT_TYPE_SPOT) {
+
+			vec3 rel = normalize(pos - light_pos);
+			float angle = acos(dot(rel,lights.data[light].direction));
+			if (angle > lights.data[light].spot_angle_radians) {
+				return false;
+			}
+
+			float d = clamp(angle / lights.data[light].spot_angle_radians, 0, 1);
+			attenuation *= pow(1.0 - d, lights.data[light].spot_attenuation);
+		}
+	}
+
+	return true;
+}
+
+void main() {
+
+	uint cell_index = gl_GlobalInvocationID.x;
+
+	uvec3 posu = uvec3(cell_materials.data[cell_index].position&0x3FF,(cell_materials.data[cell_index].position>>10)&0x3FF,cell_materials.data[cell_index].position>>20);
+	vec3 pos = vec3(posu);
+
+	vec3 emission = vec3(ivec3(cell_materials.data[cell_index].emission&0x3FF,(cell_materials.data[cell_index].emission>>10)&0x7FF,cell_materials.data[cell_index].emission>>21)) * params.emission_scale;
+	vec4 albedo = unpackUnorm4x8(cell_materials.data[cell_index].albedo);
+	vec4 normal = unpackSnorm4x8(cell_materials.data[cell_index].normal); //w >0.5 means, all directions
+
+#ifdef MODE_ANISOTROPIC
+	vec3 accum[6]=vec3[](vec3(0.0),vec3(0.0),vec3(0.0),vec3(0.0),vec3(0.0),vec3(0.0));
+	const vec3 accum_dirs[6]=vec3[](vec3(1.0,0.0,0.0),vec3(-1.0,0.0,0.0),vec3(0.0,1.0,0.0),vec3(0.0,-1.0,0.0),vec3(0.0,0.0,1.0),vec3(0.0,0.0,-1.0));
+#else
+	vec3 accum = vec3(0);
+#endif
+
+	for(uint i=0;i<params.light_count;i++) {
+
+		float attenuation;
+		vec3 light_pos;
+
+		if (!compute_light_vector(i,cell_index,pos,attenuation,light_pos)) {
+			continue;
+		}
+
+		float distance_adv = lights.data[i].advance;
+
+		vec3 light_dir = pos - light_pos;
+		float distance = length(light_dir);
+
+		light_dir=normalize(light_dir);
+
+		distance += distance_adv - mod(distance, distance_adv); //make it reach the center of the box always
+
+		vec3 from = pos - light_dir * distance; //approximate
+
+		if (normal.w < 0.5 && dot(normal.xyz,light_dir)>=0) {
+			continue; //not facing the light
+		}
+
+		uint result = raymarch(distance,distance_adv,from,lights.data[i].direction);
+
+		if (result != cell_index) {
+			continue; //was occluded
+		}
+
+		vec3 light = lights.data[i].color * albedo.rgb * attenuation;
+
+#ifdef MODE_ANISOTROPIC
+		for(uint j=0;j<6;j++) {
+			accum[j]+=max(0.0,dot(accum_dir,-light_dir))*light+emission;
+		}
+#else
+		if (normal.w < 0.5) {
+			accum+=max(0.0,dot(normal.xyz,-light_dir))*light+emission;
+		} else {
+			//all directions
+			accum+=light+emission;
+		}
+#endif
+
+	}
+
+#ifdef MODE_ANISOTROPIC
+
+	vec3 accum_total = accum[0]+accum[1]+accum[2]+accum[3]+accum[4]+accum[5];
+	float accum_total_energy = max(dot(accum_total,GREY_VEC),0.00001);
+	vec3 iso_positive = vec3(dot(aniso[0],GREY_VEC),dot(aniso[2],GREY_VEC),dot(aniso[4],GREY_VEC))/vec3(accum_total_energy);
+	vec3 iso_negative = vec3(dot(aniso[1],GREY_VEC),dot(aniso[3],GREY_VEC),dot(aniso[5],GREY_VEC))/vec3(accum_total_energy);
+
+	//store in 3D textures, total color, and isotropic magnitudes
+#else
+	//store in 3D texture pos, accum
+	imageStore(color_tex,ivec3(posu),vec4(accum,albedo.a));
+#endif
+
+}

+ 20 - 0
servers/visual/rendering_device.h

@@ -882,6 +882,13 @@ public:
 	virtual RID render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags = 0) = 0;
 	virtual bool render_pipeline_is_valid(RID p_pipeline) = 0;
 
+	/**************************/
+	/**** COMPUTE PIPELINE ****/
+	/**************************/
+
+	virtual RID compute_pipeline_create(RID p_shader) = 0;
+	virtual bool compute_pipeline_is_valid(RID p_pipeline) = 0;
+
 	/****************/
 	/**** SCREEN ****/
 	/****************/
@@ -930,6 +937,19 @@ public:
 
 	virtual void draw_list_end() = 0;
 
+	/***********************/
+	/**** COMPUTE LISTS ****/
+	/***********************/
+
+	typedef int64_t ComputeListID;
+
+	virtual ComputeListID compute_list_begin() = 0;
+	virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline) = 0;
+	virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) = 0;
+	virtual void compute_list_set_push_constant(ComputeListID p_list, void *p_data, uint32_t p_data_size) = 0;
+	virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) = 0;
+	virtual void compute_list_end() = 0;
+
 	/***************/
 	/**** FREE! ****/
 	/***************/