Kaynağa Gözat

Merge pull request #102552 from DarioSamo/shader-baker

Add shader baker to project exporter.
Thaddeus Crews 2 ay önce
ebeveyn
işleme
de37627404
100 değiştirilmiş dosya ile 4437 ekleme ve 3633 silme
  1. 55 879
      drivers/d3d12/rendering_device_driver_d3d12.cpp
  2. 5 88
      drivers/d3d12/rendering_device_driver_d3d12.h
  3. 912 0
      drivers/d3d12/rendering_shader_container_d3d12.cpp
  4. 179 0
      drivers/d3d12/rendering_shader_container_d3d12.h
  5. 0 6
      drivers/gles3/shader_gles3.cpp
  6. 1 1
      drivers/gles3/storage/material_storage.cpp
  7. 5 1
      drivers/gles3/storage/material_storage.h
  8. 2 1
      drivers/metal/metal_device_properties.h
  9. 2 45
      drivers/metal/metal_device_properties.mm
  10. 12 59
      drivers/metal/metal_objects.h
  11. 89 4
      drivers/metal/metal_objects.mm
  12. 6 15
      drivers/metal/rendering_device_driver_metal.h
  13. 1 1343
      drivers/metal/rendering_device_driver_metal.mm
  14. 265 0
      drivers/metal/rendering_shader_container_metal.h
  15. 699 0
      drivers/metal/rendering_shader_container_metal.mm
  16. 75 0
      drivers/metal/sha256_digest.h
  17. 79 332
      drivers/vulkan/rendering_device_driver_vulkan.cpp
  18. 4 40
      drivers/vulkan/rendering_device_driver_vulkan.h
  19. 103 0
      drivers/vulkan/rendering_shader_container_vulkan.cpp
  20. 57 0
      drivers/vulkan/rendering_shader_container_vulkan.h
  21. 36 0
      editor/editor_node.cpp
  22. 36 15
      editor/export/editor_export_platform.cpp
  23. 13 0
      editor/export/editor_export_platform_apple_embedded.cpp
  24. 18 0
      editor/export/editor_export_platform_pc.cpp
  25. 1 1
      editor/export/editor_export_platform_pc.h
  26. 8 0
      editor/export/editor_export_plugin.cpp
  27. 3 0
      editor/export/editor_export_plugin.h
  28. 0 5
      editor/import/resource_importer_shader_file.cpp
  29. 1 0
      editor/plugins/SCsub
  30. 13 0
      editor/plugins/shader_baker/SCsub
  31. 57 0
      editor/plugins/shader_baker/shader_baker_export_plugin_platform_d3d12.cpp
  32. 45 0
      editor/plugins/shader_baker/shader_baker_export_plugin_platform_d3d12.h
  33. 51 0
      editor/plugins/shader_baker/shader_baker_export_plugin_platform_metal.cpp
  34. 39 0
      editor/plugins/shader_baker/shader_baker_export_plugin_platform_metal.h
  35. 41 0
      editor/plugins/shader_baker/shader_baker_export_plugin_platform_vulkan.cpp
  36. 41 0
      editor/plugins/shader_baker/shader_baker_export_plugin_platform_vulkan.h
  37. 459 0
      editor/plugins/shader_baker_export_plugin.cpp
  38. 102 0
      editor/plugins/shader_baker_export_plugin.h
  39. 6 82
      modules/glslang/register_types.cpp
  40. 35 0
      modules/glslang/shader_compile.h
  41. 3 0
      platform/android/doc_classes/EditorExportPlatformAndroid.xml
  42. 13 0
      platform/android/export/export_plugin.cpp
  43. 3 0
      platform/ios/doc_classes/EditorExportPlatformIOS.xml
  44. 3 0
      platform/linuxbsd/doc_classes/EditorExportPlatformLinuxBSD.xml
  45. 3 0
      platform/macos/doc_classes/EditorExportPlatformMacOS.xml
  46. 15 0
      platform/macos/export/export_plugin.cpp
  47. 3 0
      platform/visionos/doc_classes/EditorExportPlatformVisionOS.xml
  48. 3 0
      platform/windows/doc_classes/EditorExportPlatformWindows.xml
  49. 46 48
      scene/resources/3d/sky_material.cpp
  50. 3 3
      scene/resources/3d/sky_material.h
  51. 4 2
      scene/resources/material.cpp
  52. 56 26
      scene/resources/particle_process_material.cpp
  53. 4 1
      scene/resources/particle_process_material.h
  54. 1 1
      servers/rendering/dummy/storage/material_storage.cpp
  55. 5 1
      servers/rendering/dummy/storage/material_storage.h
  56. 14 10
      servers/rendering/renderer_rd/cluster_builder_rd.cpp
  57. 5 0
      servers/rendering/renderer_rd/cluster_builder_rd.h
  58. 9 6
      servers/rendering/renderer_rd/effects/fsr.cpp
  59. 5 0
      servers/rendering/renderer_rd/effects/fsr.h
  60. 32 39
      servers/rendering/renderer_rd/effects/fsr2.cpp
  61. 5 0
      servers/rendering/renderer_rd/environment/fog.cpp
  62. 1 0
      servers/rendering/renderer_rd/environment/fog.h
  63. 60 38
      servers/rendering/renderer_rd/environment/gi.cpp
  64. 9 0
      servers/rendering/renderer_rd/environment/gi.h
  65. 5 0
      servers/rendering/renderer_rd/environment/sky.cpp
  66. 1 0
      servers/rendering/renderer_rd/environment/sky.h
  67. 24 4
      servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
  68. 5 0
      servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h
  69. 14 2
      servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp
  70. 2 0
      servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h
  71. 15 1
      servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp
  72. 5 0
      servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h
  73. 29 20
      servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp
  74. 8 1
      servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h
  75. 19 24
      servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp
  76. 3 4
      servers/rendering/renderer_rd/renderer_canvas_render_rd.h
  77. 33 28
      servers/rendering/renderer_rd/renderer_compositor_rd.cpp
  78. 2 0
      servers/rendering/renderer_rd/renderer_compositor_rd.h
  79. 2 1
      servers/rendering/renderer_rd/renderer_scene_render_rd.h
  80. 252 120
      servers/rendering/renderer_rd/shader_rd.cpp
  81. 37 7
      servers/rendering/renderer_rd/shader_rd.h
  82. 2 2
      servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl
  83. 1 28
      servers/rendering/renderer_rd/shaders/cluster_render.glsl
  84. 1 14
      servers/rendering/renderer_rd/shaders/effects/copy_to_fb.glsl
  85. 2 29
      servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl
  86. 0 23
      servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl
  87. 0 10
      servers/rendering/renderer_rd/shaders/effects/tonemap.glsl
  88. 0 13
      servers/rendering/renderer_rd/shaders/effects/vrs.glsl
  89. 2 27
      servers/rendering/renderer_rd/shaders/environment/sdfgi_debug_probes.glsl
  90. 1 16
      servers/rendering/renderer_rd/shaders/environment/sky.glsl
  91. 1 37
      servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl
  92. 15 52
      servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl
  93. 0 11
      servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl
  94. 6 18
      servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl
  95. 0 4
      servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl
  96. 33 1
      servers/rendering/renderer_rd/storage_rd/material_storage.cpp
  97. 10 2
      servers/rendering/renderer_rd/storage_rd/material_storage.h
  98. 4 0
      servers/rendering/renderer_rd/storage_rd/particles_storage.cpp
  99. 1 0
      servers/rendering/renderer_rd/storage_rd/particles_storage.h
  100. 41 42
      servers/rendering/rendering_device.cpp

Dosya farkı çok büyük olduğundan ihmal edildi
+ 55 - 879
drivers/d3d12/rendering_device_driver_d3d12.cpp


+ 5 - 88
drivers/d3d12/rendering_device_driver_d3d12.h

@@ -33,6 +33,7 @@
 #include "core/templates/hash_map.h"
 #include "core/templates/paged_allocator.h"
 #include "core/templates/self_list.h"
+#include "rendering_shader_container_d3d12.h"
 #include "servers/rendering/rendering_device_driver.h"
 
 #ifndef _MSC_VER
@@ -54,8 +55,6 @@
 
 using Microsoft::WRL::ComPtr;
 
-#define D3D12_BITCODE_OFFSETS_NUM_STAGES 3
-
 #ifdef DEV_ENABLED
 #define CUSTOM_INFO_QUEUE_ENABLED 0
 #endif
@@ -131,6 +130,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
 	FormatCapabilities format_capabilities;
 	BarrierCapabilities barrier_capabilities;
 	MiscFeaturesSupport misc_features_support;
+	RenderingShaderContainerFormatD3D12 shader_container_format;
 	String pipeline_cache_id;
 
 	class DescriptorsHeap {
@@ -518,6 +518,7 @@ public:
 	/****************/
 	/**** SHADER ****/
 	/****************/
+
 private:
 	static const uint32_t ROOT_SIGNATURE_SIZE = 256;
 	static const uint32_t PUSH_CONSTANT_SIZE = 128; // Mimicking Vulkan.
@@ -535,82 +536,6 @@ private:
 		MAX_UNIFORM_SETS = (ROOT_SIGNATURE_SIZE - PUSH_CONSTANT_SIZE) / sizeof(uint32_t),
 	};
 
-	enum RootSignatureLocationType {
-		RS_LOC_TYPE_RESOURCE,
-		RS_LOC_TYPE_SAMPLER,
-	};
-
-	enum ResourceClass {
-		RES_CLASS_INVALID,
-		RES_CLASS_CBV,
-		RES_CLASS_SRV,
-		RES_CLASS_UAV,
-	};
-
-	struct ShaderBinary {
-		// Version 1: Initial.
-		// Version 2: 64-bit vertex input mask.
-		// Version 3: Added SC stage mask.
-		static const uint32_t VERSION = 3;
-
-		// Phase 1: SPIR-V reflection, where the Vulkan/RD interface of the shader is discovered.
-		// Phase 2: SPIR-V to DXIL translation, where the DXIL interface is discovered, which may have gaps due to optimizations.
-
-		struct DataBinding {
-			// - Phase 1.
-			uint32_t type = 0;
-			uint32_t binding = 0;
-			uint32_t stages = 0;
-			uint32_t length = 0; // Size of arrays (in total elements), or ubos (in bytes * total elements).
-			uint32_t writable = 0;
-			// - Phase 2.
-			uint32_t res_class = 0;
-			uint32_t has_sampler = 0;
-			uint32_t dxil_stages = 0;
-			struct RootSignatureLocation {
-				uint32_t root_param_idx = UINT32_MAX; // UINT32_MAX if unused.
-				uint32_t range_idx = UINT32_MAX; // UINT32_MAX if unused.
-			};
-			RootSignatureLocation root_sig_locations[2]; // Index is RootSignatureLocationType.
-
-			// We need to sort these to fill the root signature locations properly.
-			bool operator<(const DataBinding &p_other) const {
-				return binding < p_other.binding;
-			}
-		};
-
-		struct SpecializationConstant {
-			// - Phase 1.
-			uint32_t type = 0;
-			uint32_t constant_id = 0;
-			union {
-				uint32_t int_value = 0;
-				float float_value;
-				bool bool_value;
-			};
-			uint32_t stage_flags = 0;
-			// - Phase 2.
-			uint64_t stages_bit_offsets[D3D12_BITCODE_OFFSETS_NUM_STAGES] = {};
-		};
-
-		struct Data {
-			uint64_t vertex_input_mask = 0;
-			uint32_t fragment_output_mask = 0;
-			uint32_t specialization_constants_count = 0;
-			uint32_t spirv_specialization_constants_ids_mask = 0;
-			uint32_t is_compute = 0;
-			uint32_t compute_local_size[3] = {};
-			uint32_t set_count = 0;
-			uint32_t push_constant_size = 0;
-			uint32_t dxil_push_constant_stages = 0; // Phase 2.
-			uint32_t nir_runtime_data_root_param_idx = 0; // Phase 2.
-			uint32_t stage_count = 0;
-			uint32_t shader_name_len = 0;
-			uint32_t root_signature_len = 0;
-			uint32_t root_signature_crc = 0;
-		};
-	};
-
 	struct ShaderInfo {
 		uint32_t dxil_push_constant_size = 0;
 		uint32_t nir_runtime_data_root_param_idx = UINT32_MAX;
@@ -661,22 +586,13 @@ private:
 		uint32_t root_signature_crc = 0;
 	};
 
-	uint32_t _shader_patch_dxil_specialization_constant(
-			PipelineSpecializationConstantType p_type,
-			const void *p_value,
-			const uint64_t (&p_stages_bit_offsets)[D3D12_BITCODE_OFFSETS_NUM_STAGES],
-			HashMap<ShaderStage, Vector<uint8_t>> &r_stages_bytecodes,
-			bool p_is_first_patch);
 	bool _shader_apply_specialization_constants(
 			const ShaderInfo *p_shader_info,
 			VectorView<PipelineSpecializationConstant> p_specialization_constants,
 			HashMap<ShaderStage, Vector<uint8_t>> &r_final_stages_bytecode);
-	void _shader_sign_dxil_bytecode(ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob);
 
 public:
-	virtual String shader_get_binary_cache_key() override final;
-	virtual Vector<uint8_t> shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) override final;
-	virtual ShaderID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector<ImmutableSampler> &p_immutable_samplers) override final;
+	virtual ShaderID shader_create_from_container(const Ref<RenderingShaderContainer> &p_shader_container, const Vector<ImmutableSampler> &p_immutable_samplers) override final;
 	virtual uint32_t shader_get_layout_hash(ShaderID p_shader) override final;
 	virtual void shader_free(ShaderID p_shader) override final;
 	virtual void shader_destroy_modules(ShaderID p_shader) override final;
@@ -979,6 +895,7 @@ public:
 	virtual String get_api_version() const override final;
 	virtual String get_pipeline_cache_uuid() const override final;
 	virtual const Capabilities &get_capabilities() const override final;
+	virtual const RenderingShaderContainerFormat &get_shader_container_format() const override final;
 
 	virtual bool is_composite_alpha_supported(CommandQueueID p_queue) const override final;
 

+ 912 - 0
drivers/d3d12/rendering_shader_container_d3d12.cpp

@@ -0,0 +1,912 @@
+/**************************************************************************/
+/*  rendering_shader_container_d3d12.cpp                                  */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "rendering_shader_container_d3d12.h"
+
+#include "core/templates/sort_array.h"
+
+#include "dxil_hash.h"
+
+#include <zlib.h>
+
+#ifndef _MSC_VER
+// Match current version used by MinGW, MSVC and Direct3D 12 headers use 500.
+#define __REQUIRED_RPCNDR_H_VERSION__ 475
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wswitch"
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+#elif defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+#pragma clang diagnostic ignored "-Wstring-plus-int"
+#pragma clang diagnostic ignored "-Wswitch"
+#pragma clang diagnostic ignored "-Wmissing-field-initializers"
+#pragma clang diagnostic ignored "-Wimplicit-fallthrough"
+#endif
+
+#include "d3dx12.h"
+#include <dxgi1_6.h>
+#define D3D12MA_D3D12_HEADERS_ALREADY_INCLUDED
+#include "D3D12MemAlloc.h"
+
+#include <wrl/client.h>
+
+#if defined(_MSC_VER) && defined(MemoryBarrier)
+// Annoying define from winnt.h. Reintroduced by some of the headers above.
+#undef MemoryBarrier
+#endif
+
+// No point in fighting warnings in Mesa.
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4200) // "nonstandard extension used: zero-sized array in struct/union".
+#pragma warning(disable : 4806) // "'&': unsafe operation: no value of type 'bool' promoted to type 'uint32_t' can equal the given constant".
+#endif
+
+#include "nir_spirv.h"
+#include "nir_to_dxil.h"
+#include "spirv_to_dxil.h"
+extern "C" {
+#include "dxil_spirv_nir.h"
+}
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#elif defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+static D3D12_SHADER_VISIBILITY stages_to_d3d12_visibility(uint32_t p_stages_mask) {
+	switch (p_stages_mask) {
+		case RenderingDeviceCommons::SHADER_STAGE_VERTEX_BIT:
+			return D3D12_SHADER_VISIBILITY_VERTEX;
+		case RenderingDeviceCommons::SHADER_STAGE_FRAGMENT_BIT:
+			return D3D12_SHADER_VISIBILITY_PIXEL;
+		default:
+			return D3D12_SHADER_VISIBILITY_ALL;
+	}
+}
+
+uint32_t RenderingDXIL::patch_specialization_constant(
+		RenderingDeviceCommons::PipelineSpecializationConstantType p_type,
+		const void *p_value,
+		const uint64_t (&p_stages_bit_offsets)[D3D12_BITCODE_OFFSETS_NUM_STAGES],
+		HashMap<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> &r_stages_bytecodes,
+		bool p_is_first_patch) {
+	uint32_t patch_val = 0;
+	switch (p_type) {
+		case RenderingDeviceCommons::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT: {
+			uint32_t int_value = *((const int *)p_value);
+			ERR_FAIL_COND_V(int_value & (1 << 31), 0);
+			patch_val = int_value;
+		} break;
+		case RenderingDeviceCommons::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL: {
+			bool bool_value = *((const bool *)p_value);
+			patch_val = (uint32_t)bool_value;
+		} break;
+		case RenderingDeviceCommons::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT: {
+			uint32_t int_value = *((const int *)p_value);
+			ERR_FAIL_COND_V(int_value & (1 << 31), 0);
+			patch_val = (int_value >> 1);
+		} break;
+	}
+	// For VBR encoding to encode the number of bits we expect (32), we need to set the MSB unconditionally.
+	// However, signed VBR moves the MSB to the LSB, so setting the MSB to 1 wouldn't help. Therefore,
+	// the bit we set to 1 is the one at index 30.
+	patch_val |= (1 << 30);
+	patch_val <<= 1; // What signed VBR does.
+
+	auto tamper_bits = [](uint8_t *p_start, uint64_t p_bit_offset, uint64_t p_tb_value) -> uint64_t {
+		uint64_t original = 0;
+		uint32_t curr_input_byte = p_bit_offset / 8;
+		uint8_t curr_input_bit = p_bit_offset % 8;
+		auto get_curr_input_bit = [&]() -> bool {
+			return ((p_start[curr_input_byte] >> curr_input_bit) & 1);
+		};
+		auto move_to_next_input_bit = [&]() {
+			if (curr_input_bit == 7) {
+				curr_input_bit = 0;
+				curr_input_byte++;
+			} else {
+				curr_input_bit++;
+			}
+		};
+		auto tamper_input_bit = [&](bool p_new_bit) {
+			p_start[curr_input_byte] &= ~((uint8_t)1 << curr_input_bit);
+			if (p_new_bit) {
+				p_start[curr_input_byte] |= (uint8_t)1 << curr_input_bit;
+			}
+		};
+		uint8_t value_bit_idx = 0;
+		for (uint32_t i = 0; i < 5; i++) { // 32 bits take 5 full bytes in VBR.
+			for (uint32_t j = 0; j < 7; j++) {
+				bool input_bit = get_curr_input_bit();
+				original |= (uint64_t)(input_bit ? 1 : 0) << value_bit_idx;
+				tamper_input_bit((p_tb_value >> value_bit_idx) & 1);
+				move_to_next_input_bit();
+				value_bit_idx++;
+			}
+#ifdef DEV_ENABLED
+			bool input_bit = get_curr_input_bit();
+			DEV_ASSERT((i < 4 && input_bit) || (i == 4 && !input_bit));
+#endif
+			move_to_next_input_bit();
+		}
+		return original;
+	};
+	uint32_t stages_patched_mask = 0;
+	for (int stage = 0; stage < RenderingDeviceCommons::SHADER_STAGE_MAX; stage++) {
+		if (!r_stages_bytecodes.has((RenderingDeviceCommons::ShaderStage)stage)) {
+			continue;
+		}
+
+		uint64_t offset = p_stages_bit_offsets[RenderingShaderContainerD3D12::SHADER_STAGES_BIT_OFFSET_INDICES[stage]];
+		if (offset == 0) {
+			// This constant does not appear at this stage.
+			continue;
+		}
+
+		Vector<uint8_t> &bytecode = r_stages_bytecodes[(RenderingDeviceCommons::ShaderStage)stage];
+#ifdef DEV_ENABLED
+		uint64_t orig_patch_val = tamper_bits(bytecode.ptrw(), offset, patch_val);
+		// Checking against the value the NIR patch should have set.
+		DEV_ASSERT(!p_is_first_patch || ((orig_patch_val >> 1) & GODOT_NIR_SC_SENTINEL_MAGIC_MASK) == GODOT_NIR_SC_SENTINEL_MAGIC);
+		uint64_t readback_patch_val = tamper_bits(bytecode.ptrw(), offset, patch_val);
+		DEV_ASSERT(readback_patch_val == patch_val);
+#else
+		tamper_bits(bytecode.ptrw(), offset, patch_val);
+#endif
+
+		stages_patched_mask |= (1 << stage);
+	}
+
+	return stages_patched_mask;
+}
+
+void RenderingDXIL::sign_bytecode(RenderingDeviceCommons::ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob) {
+	uint8_t *w = r_dxil_blob.ptrw();
+	compute_dxil_hash(w + 20, r_dxil_blob.size() - 20, w + 4);
+}
+
+// RenderingShaderContainerD3D12
+
+uint32_t RenderingShaderContainerD3D12::_format() const {
+	return 0x43443344;
+}
+
+uint32_t RenderingShaderContainerD3D12::_format_version() const {
+	return FORMAT_VERSION;
+}
+
+uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_extra_data(const uint8_t *p_bytes) {
+	reflection_data_d3d12 = *(const ReflectionDataD3D12 *)(p_bytes);
+	return sizeof(ReflectionDataD3D12);
+}
+
+uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) {
+	reflection_binding_set_uniforms_data_d3d12.resize(reflection_binding_set_uniforms_data.size());
+	return 0;
+}
+
+uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) {
+	reflection_binding_set_uniforms_data_d3d12.ptrw()[p_index] = *(const ReflectionBindingDataD3D12 *)(p_bytes);
+	return sizeof(ReflectionBindingDataD3D12);
+}
+
+uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) {
+	reflection_specialization_data_d3d12.resize(reflection_specialization_data.size());
+	return 0;
+}
+
+uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) {
+	reflection_specialization_data_d3d12.ptrw()[p_index] = *(const ReflectionSpecializationDataD3D12 *)(p_bytes);
+	return sizeof(ReflectionSpecializationDataD3D12);
+}
+
+uint32_t RenderingShaderContainerD3D12::_from_bytes_footer_extra_data(const uint8_t *p_bytes) {
+	ContainerFooterD3D12 footer = *(const ContainerFooterD3D12 *)(p_bytes);
+	root_signature_crc = footer.root_signature_crc;
+	root_signature_bytes.resize(footer.root_signature_length);
+	memcpy(root_signature_bytes.ptrw(), p_bytes + sizeof(ContainerFooterD3D12), root_signature_bytes.size());
+	return sizeof(ContainerFooterD3D12) + footer.root_signature_length;
+}
+
+uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_extra_data(uint8_t *p_bytes) const {
+	if (p_bytes != nullptr) {
+		*(ReflectionDataD3D12 *)(p_bytes) = reflection_data_d3d12;
+	}
+
+	return sizeof(ReflectionDataD3D12);
+}
+
+uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
+	if (p_bytes != nullptr) {
+		*(ReflectionBindingDataD3D12 *)(p_bytes) = reflection_binding_set_uniforms_data_d3d12[p_index];
+	}
+
+	return sizeof(ReflectionBindingDataD3D12);
+}
+
+uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
+	if (p_bytes != nullptr) {
+		*(ReflectionSpecializationDataD3D12 *)(p_bytes) = reflection_specialization_data_d3d12[p_index];
+	}
+
+	return sizeof(ReflectionSpecializationDataD3D12);
+}
+
+uint32_t RenderingShaderContainerD3D12::_to_bytes_footer_extra_data(uint8_t *p_bytes) const {
+	if (p_bytes != nullptr) {
+		ContainerFooterD3D12 &footer = *(ContainerFooterD3D12 *)(p_bytes);
+		footer.root_signature_length = root_signature_bytes.size();
+		footer.root_signature_crc = root_signature_crc;
+		memcpy(p_bytes + sizeof(ContainerFooterD3D12), root_signature_bytes.ptr(), root_signature_bytes.size());
+	}
+
+	return sizeof(ContainerFooterD3D12) + root_signature_bytes.size();
+}
+
+#if NIR_ENABLED
+bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(const Vector<RenderingDeviceCommons::ShaderStageSPIRVData> &p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap<int, nir_shader *> &r_stages_nir_shaders, Vector<RenderingDeviceCommons::ShaderStage> &r_stages, BitField<RenderingDeviceCommons::ShaderStage> &r_stages_processed) {
+	r_stages_processed.clear();
+
+	dxil_spirv_runtime_conf dxil_runtime_conf = {};
+	dxil_runtime_conf.runtime_data_cbv.base_shader_register = RUNTIME_DATA_REGISTER;
+	dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER;
+	dxil_runtime_conf.zero_based_vertex_instance_id = true;
+	dxil_runtime_conf.zero_based_compute_workgroup_id = true;
+	dxil_runtime_conf.declared_read_only_images_as_srvs = true;
+
+	// Making this explicit to let maintainers know that in practice this didn't improve performance,
+	// probably because data generated by one shader and consumed by another one forces the resource
+	// to transition from UAV to SRV, and back, instead of being an UAV all the time.
+	// In case someone wants to try, care must be taken so in case of incompatible bindings across stages
+	// happen as a result, all the stages are re-translated. That can happen if, for instance, a stage only
+	// uses an allegedly writable resource only for reading but the next stage doesn't.
+	dxil_runtime_conf.inferred_read_only_images_as_srvs = false;
+
+	// Translate SPIR-V to NIR.
+	for (int64_t i = 0; i < p_spirv.size(); i++) {
+		RenderingDeviceCommons::ShaderStage stage = p_spirv[i].shader_stage;
+		RenderingDeviceCommons::ShaderStage stage_flag = (RenderingDeviceCommons::ShaderStage)(1 << stage);
+		r_stages.push_back(stage);
+		r_stages_processed.set_flag(stage_flag);
+
+		const char *entry_point = "main";
+		static const gl_shader_stage SPIRV_TO_MESA_STAGES[RenderingDeviceCommons::SHADER_STAGE_MAX] = {
+			MESA_SHADER_VERTEX, // SHADER_STAGE_VERTEX
+			MESA_SHADER_FRAGMENT, // SHADER_STAGE_FRAGMENT
+			MESA_SHADER_TESS_CTRL, // SHADER_STAGE_TESSELATION_CONTROL
+			MESA_SHADER_TESS_EVAL, // SHADER_STAGE_TESSELATION_EVALUATION
+			MESA_SHADER_COMPUTE, // SHADER_STAGE_COMPUTE
+		};
+
+		nir_shader *shader = spirv_to_nir(
+				(const uint32_t *)(p_spirv[i].spirv.ptr()),
+				p_spirv[i].spirv.size() / sizeof(uint32_t),
+				nullptr,
+				0,
+				SPIRV_TO_MESA_STAGES[stage],
+				entry_point,
+				dxil_spirv_nir_get_spirv_options(),
+				p_compiler_options);
+
+		ERR_FAIL_NULL_V_MSG(shader, false, "Shader translation (step 1) at stage " + String(RenderingDeviceCommons::SHADER_STAGE_NAMES[stage]) + " failed.");
+
+#ifdef DEV_ENABLED
+		nir_validate_shader(shader, "Validate before feeding NIR to the DXIL compiler");
+#endif
+
+		if (stage == RenderingDeviceCommons::SHADER_STAGE_VERTEX) {
+			dxil_runtime_conf.yz_flip.y_mask = 0xffff;
+			dxil_runtime_conf.yz_flip.mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL;
+		} else {
+			dxil_runtime_conf.yz_flip.y_mask = 0;
+			dxil_runtime_conf.yz_flip.mode = DXIL_SPIRV_YZ_FLIP_NONE;
+		}
+
+		dxil_spirv_nir_prep(shader);
+		bool requires_runtime_data = false;
+		dxil_spirv_nir_passes(shader, &dxil_runtime_conf, &requires_runtime_data);
+
+		r_stages_nir_shaders[stage] = shader;
+	}
+
+	// Link NIR shaders.
+	for (int i = RenderingDeviceCommons::SHADER_STAGE_MAX - 1; i >= 0; i--) {
+		if (!r_stages_nir_shaders.has(i)) {
+			continue;
+		}
+		nir_shader *shader = r_stages_nir_shaders[i];
+		nir_shader *prev_shader = nullptr;
+		for (int j = i - 1; j >= 0; j--) {
+			if (r_stages_nir_shaders.has(j)) {
+				prev_shader = r_stages_nir_shaders[j];
+				break;
+			}
+		}
+		// There is a bug in the Direct3D runtime during creation of a PSO with view instancing. If a fragment
+		// shader uses front/back face detection (SV_IsFrontFace), its signature must include the pixel position
+		// builtin variable (SV_Position), otherwise an Internal Runtime error will occur.
+		if (i == RenderingDeviceCommons::SHADER_STAGE_FRAGMENT) {
+			const bool use_front_face =
+					nir_find_variable_with_location(shader, nir_var_shader_in, VARYING_SLOT_FACE) ||
+					(shader->info.inputs_read & VARYING_BIT_FACE) ||
+					nir_find_variable_with_location(shader, nir_var_system_value, SYSTEM_VALUE_FRONT_FACE) ||
+					BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
+			const bool use_position =
+					nir_find_variable_with_location(shader, nir_var_shader_in, VARYING_SLOT_POS) ||
+					(shader->info.inputs_read & VARYING_BIT_POS) ||
+					nir_find_variable_with_location(shader, nir_var_system_value, SYSTEM_VALUE_FRAG_COORD) ||
+					BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
+			if (use_front_face && !use_position) {
+				nir_variable *const pos = nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(), "gl_FragCoord");
+				pos->data.location = VARYING_SLOT_POS;
+				shader->info.inputs_read |= VARYING_BIT_POS;
+			}
+		}
+		if (prev_shader) {
+			bool requires_runtime_data = {};
+			dxil_spirv_nir_link(shader, prev_shader, &dxil_runtime_conf, &requires_runtime_data);
+		}
+	}
+
+	return true;
+}
+
+struct GodotNirCallbackUserData {
+	RenderingShaderContainerD3D12 *container;
+	RenderingDeviceCommons::ShaderStage stage;
+};
+
+static dxil_shader_model shader_model_d3d_to_dxil(D3D_SHADER_MODEL p_d3d_shader_model) {
+	static_assert(SHADER_MODEL_6_0 == 0x60000);
+	static_assert(SHADER_MODEL_6_3 == 0x60003);
+	static_assert(D3D_SHADER_MODEL_6_0 == 0x60);
+	static_assert(D3D_SHADER_MODEL_6_3 == 0x63);
+	return (dxil_shader_model)((p_d3d_shader_model >> 4) * 0x10000 + (p_d3d_shader_model & 0xf));
+}
+
+bool RenderingShaderContainerD3D12::_convert_nir_to_dxil(const HashMap<int, nir_shader *> &p_stages_nir_shaders, BitField<RenderingDeviceCommons::ShaderStage> p_stages_processed, HashMap<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> &r_dxil_blobs) {
+	// Translate NIR to DXIL.
+	for (KeyValue<int, nir_shader *> it : p_stages_nir_shaders) {
+		RenderingDeviceCommons::ShaderStage stage = (RenderingDeviceCommons::ShaderStage)(it.key);
+		GodotNirCallbackUserData godot_nir_callback_user_data;
+		godot_nir_callback_user_data.container = this;
+		godot_nir_callback_user_data.stage = stage;
+
+		GodotNirCallbacks godot_nir_callbacks = {};
+		godot_nir_callbacks.data = &godot_nir_callback_user_data;
+		godot_nir_callbacks.report_resource = _nir_report_resource;
+		godot_nir_callbacks.report_sc_bit_offset_fn = _nir_report_sc_bit_offset;
+		godot_nir_callbacks.report_bitcode_bit_offset_fn = _nir_report_bitcode_bit_offset;
+
+		nir_to_dxil_options nir_to_dxil_options = {};
+		nir_to_dxil_options.environment = DXIL_ENVIRONMENT_VULKAN;
+		nir_to_dxil_options.shader_model_max = shader_model_d3d_to_dxil(D3D_SHADER_MODEL(REQUIRED_SHADER_MODEL));
+		nir_to_dxil_options.validator_version_max = NO_DXIL_VALIDATION;
+		nir_to_dxil_options.godot_nir_callbacks = &godot_nir_callbacks;
+
+		dxil_logger logger = {};
+		logger.log = [](void *p_priv, const char *p_msg) {
+#ifdef DEBUG_ENABLED
+			print_verbose(p_msg);
+#endif
+		};
+
+		blob dxil_blob = {};
+		bool ok = nir_to_dxil(it.value, &nir_to_dxil_options, &logger, &dxil_blob);
+		ERR_FAIL_COND_V_MSG(!ok, false, "Shader translation at stage " + String(RenderingDeviceCommons::SHADER_STAGE_NAMES[stage]) + " failed.");
+
+		Vector<uint8_t> blob_copy;
+		blob_copy.resize(dxil_blob.size);
+		memcpy(blob_copy.ptrw(), dxil_blob.data, dxil_blob.size);
+		blob_finish(&dxil_blob);
+		r_dxil_blobs.insert(stage, blob_copy);
+	}
+
+	return true;
+}
+
+bool RenderingShaderContainerD3D12::_convert_spirv_to_dxil(const Vector<RenderingDeviceCommons::ShaderStageSPIRVData> &p_spirv, HashMap<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> &r_dxil_blobs, Vector<RenderingDeviceCommons::ShaderStage> &r_stages, BitField<RenderingDeviceCommons::ShaderStage> &r_stages_processed) {
+	r_dxil_blobs.clear();
+
+	HashMap<int, nir_shader *> stages_nir_shaders;
+	auto free_nir_shaders = [&]() {
+		for (KeyValue<int, nir_shader *> &E : stages_nir_shaders) {
+			ralloc_free(E.value);
+		}
+		stages_nir_shaders.clear();
+	};
+
+	// This structure must live as long as the shaders are alive.
+	nir_shader_compiler_options compiler_options = *dxil_get_nir_compiler_options();
+	compiler_options.lower_base_vertex = false;
+
+	// This is based on spirv2dxil.c. May need updates when it changes.
+	// Also, this has to stay around until after linking.
+	if (!_convert_spirv_to_nir(p_spirv, &compiler_options, stages_nir_shaders, r_stages, r_stages_processed)) {
+		free_nir_shaders();
+		return false;
+	}
+
+	if (!_convert_nir_to_dxil(stages_nir_shaders, r_stages_processed, r_dxil_blobs)) {
+		free_nir_shaders();
+		return false;
+	}
+
+	free_nir_shaders();
+	return true;
+}
+
+bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingDeviceCommons::ShaderStage> p_stages_processed) {
+	// Root (push) constants.
+	LocalVector<D3D12_ROOT_PARAMETER1> root_params;
+	if (reflection_data_d3d12.dxil_push_constant_stages) {
+		CD3DX12_ROOT_PARAMETER1 push_constant;
+		push_constant.InitAsConstants(
+				reflection_data.push_constant_size / sizeof(uint32_t),
+				ROOT_CONSTANT_REGISTER,
+				0,
+				stages_to_d3d12_visibility(reflection_data_d3d12.dxil_push_constant_stages));
+
+		root_params.push_back(push_constant);
+	}
+
+	// NIR-DXIL runtime data.
+	if (reflection_data_d3d12.nir_runtime_data_root_param_idx == 1) { // Set above to 1 when discovering runtime data is needed.
+		DEV_ASSERT(!reflection_data.is_compute); // Could be supported if needed, but it's pointless as of now.
+		reflection_data_d3d12.nir_runtime_data_root_param_idx = root_params.size();
+		CD3DX12_ROOT_PARAMETER1 nir_runtime_data;
+		nir_runtime_data.InitAsConstants(
+				sizeof(dxil_spirv_vertex_runtime_data) / sizeof(uint32_t),
+				RUNTIME_DATA_REGISTER,
+				0,
+				D3D12_SHADER_VISIBILITY_VERTEX);
+		root_params.push_back(nir_runtime_data);
+	}
+
+	// Descriptor tables (up to two per uniform set, for resources and/or samplers).
+	// These have to stay around until serialization!
+	struct TraceableDescriptorTable {
+		uint32_t stages_mask = {};
+		Vector<D3D12_DESCRIPTOR_RANGE1> ranges;
+		Vector<RootSignatureLocation *> root_signature_locations;
+	};
+
+	uint32_t binding_start = 0;
+	Vector<TraceableDescriptorTable> resource_tables_maps;
+	Vector<TraceableDescriptorTable> sampler_tables_maps;
+	for (uint32_t i = 0; i < reflection_binding_set_uniforms_count.size(); i++) {
+		bool first_resource_in_set = true;
+		bool first_sampler_in_set = true;
+		uint32_t uniform_count = reflection_binding_set_uniforms_count[i];
+		for (uint32_t j = 0; j < uniform_count; j++) {
+			const ReflectionBindingData &uniform = reflection_binding_set_uniforms_data[binding_start + j];
+			ReflectionBindingDataD3D12 &uniform_d3d12 = reflection_binding_set_uniforms_data_d3d12.ptrw()[binding_start + j];
+			bool really_used = uniform_d3d12.dxil_stages != 0;
+#ifdef DEV_ENABLED
+			bool anybody_home = (ResourceClass)(uniform_d3d12.resource_class) != RES_CLASS_INVALID || uniform_d3d12.has_sampler;
+			DEV_ASSERT(anybody_home == really_used);
+#endif
+			if (!really_used) {
+				continue; // Existed in SPIR-V; went away in DXIL.
+			}
+
+			auto insert_range = [](D3D12_DESCRIPTOR_RANGE_TYPE p_range_type,
+										uint32_t p_num_descriptors,
+										uint32_t p_dxil_register,
+										uint32_t p_dxil_stages_mask,
+										RootSignatureLocation *p_root_sig_locations,
+										Vector<TraceableDescriptorTable> &r_tables,
+										bool &r_first_in_set) {
+				if (r_first_in_set) {
+					r_tables.resize(r_tables.size() + 1);
+					r_first_in_set = false;
+				}
+
+				TraceableDescriptorTable &table = r_tables.write[r_tables.size() - 1];
+				table.stages_mask |= p_dxil_stages_mask;
+
+				CD3DX12_DESCRIPTOR_RANGE1 range;
+				// Due to the aliasing hack for SRV-UAV of different families,
+				// we can be causing an unintended change of data (sometimes the validation layers catch it).
+				D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE;
+				if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_SRV || p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_UAV) {
+					flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE;
+				} else if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_CBV) {
+					flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE;
+				}
+				range.Init(p_range_type, p_num_descriptors, p_dxil_register, 0, flags);
+
+				table.ranges.push_back(range);
+				table.root_signature_locations.push_back(p_root_sig_locations);
+			};
+
+			uint32_t num_descriptors = 1;
+			D3D12_DESCRIPTOR_RANGE_TYPE resource_range_type = {};
+			switch ((ResourceClass)(uniform_d3d12.resource_class)) {
+				case RES_CLASS_INVALID: {
+					num_descriptors = uniform.length;
+					DEV_ASSERT(uniform_d3d12.has_sampler);
+				} break;
+				case RES_CLASS_CBV: {
+					resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
+					DEV_ASSERT(!uniform_d3d12.has_sampler);
+				} break;
+				case RES_CLASS_SRV: {
+					resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
+					num_descriptors = MAX(1u, uniform.length); // An unbound R/O buffer is reflected as zero-size.
+				} break;
+				case RES_CLASS_UAV: {
+					resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
+					num_descriptors = MAX(1u, uniform.length); // An unbound R/W buffer is reflected as zero-size.
+					DEV_ASSERT(!uniform_d3d12.has_sampler);
+				} break;
+			}
+
+			uint32_t dxil_register = i * GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER + uniform.binding * GODOT_NIR_BINDING_MULTIPLIER;
+			if (uniform_d3d12.resource_class != RES_CLASS_INVALID) {
+				insert_range(
+						resource_range_type,
+						num_descriptors,
+						dxil_register,
+						uniform_d3d12.dxil_stages,
+						&uniform_d3d12.root_signature_locations[RS_LOC_TYPE_RESOURCE],
+						resource_tables_maps,
+						first_resource_in_set);
+			}
+
+			if (uniform_d3d12.has_sampler) {
+				insert_range(
+						D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
+						num_descriptors,
+						dxil_register,
+						uniform_d3d12.dxil_stages,
+						&uniform_d3d12.root_signature_locations[RS_LOC_TYPE_SAMPLER],
+						sampler_tables_maps,
+						first_sampler_in_set);
+			}
+		}
+
+		binding_start += uniform_count;
+	}
+
+	auto make_descriptor_tables = [&root_params](const Vector<TraceableDescriptorTable> &p_tables) {
+		for (const TraceableDescriptorTable &table : p_tables) {
+			D3D12_SHADER_VISIBILITY visibility = stages_to_d3d12_visibility(table.stages_mask);
+			DEV_ASSERT(table.ranges.size() == table.root_signature_locations.size());
+			for (int i = 0; i < table.ranges.size(); i++) {
+				// By now we know very well which root signature location corresponds to the pointed uniform.
+				table.root_signature_locations[i]->root_param_index = root_params.size();
+				table.root_signature_locations[i]->range_index = i;
+			}
+
+			CD3DX12_ROOT_PARAMETER1 root_table;
+			root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), visibility);
+			root_params.push_back(root_table);
+		}
+	};
+
+	make_descriptor_tables(resource_tables_maps);
+	make_descriptor_tables(sampler_tables_maps);
+
+	CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {};
+	D3D12_ROOT_SIGNATURE_FLAGS root_sig_flags =
+			D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS |
+			D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS |
+			D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS |
+			D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS |
+			D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS;
+
+	if (!p_stages_processed.has_flag(RenderingDeviceCommons::SHADER_STAGE_VERTEX_BIT)) {
+		root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS;
+	}
+
+	if (!p_stages_processed.has_flag(RenderingDeviceCommons::SHADER_STAGE_FRAGMENT_BIT)) {
+		root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS;
+	}
+
+	if (reflection_data.vertex_input_mask) {
+		root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
+	}
+
+	root_sig_desc.Init_1_1(root_params.size(), root_params.ptr(), 0, nullptr, root_sig_flags);
+
+	// Create and store the root signature and its CRC32.
+	ID3DBlob *error_blob = nullptr;
+	ID3DBlob *root_sig_blob = nullptr;
+	HRESULT res = D3DX12SerializeVersionedRootSignature(HMODULE(lib_d3d12), &root_sig_desc, D3D_ROOT_SIGNATURE_VERSION_1_1, &root_sig_blob, &error_blob);
+	if (SUCCEEDED(res)) {
+		root_signature_bytes.resize(root_sig_blob->GetBufferSize());
+		memcpy(root_signature_bytes.ptrw(), root_sig_blob->GetBufferPointer(), root_sig_blob->GetBufferSize());
+
+		root_signature_crc = crc32(0, nullptr, 0);
+		root_signature_crc = crc32(root_signature_crc, (const Bytef *)root_sig_blob->GetBufferPointer(), root_sig_blob->GetBufferSize());
+
+		return true;
+	} else {
+		if (root_sig_blob != nullptr) {
+			root_sig_blob->Release();
+		}
+
+		String error_string;
+		if (error_blob != nullptr) {
+			error_string = vformat("Serialization of root signature failed with error 0x%08ux and the following message:\n%s", uint32_t(res), String::ascii(Span((char *)error_blob->GetBufferPointer(), error_blob->GetBufferSize())));
+			error_blob->Release();
+		} else {
+			error_string = vformat("Serialization of root signature failed with error 0x%08ux", uint32_t(res));
+		}
+
+		ERR_FAIL_V_MSG(false, error_string);
+	}
+}
+
+void RenderingShaderContainerD3D12::_nir_report_resource(uint32_t p_register, uint32_t p_space, uint32_t p_dxil_type, void *p_data) {
+	const GodotNirCallbackUserData &user_data = *(GodotNirCallbackUserData *)p_data;
+
+	// Types based on Mesa's dxil_container.h.
+	static const uint32_t DXIL_RES_SAMPLER = 1;
+	static const ResourceClass DXIL_TYPE_TO_CLASS[] = {
+		RES_CLASS_INVALID, // DXIL_RES_INVALID
+		RES_CLASS_INVALID, // DXIL_RES_SAMPLER
+		RES_CLASS_CBV, // DXIL_RES_CBV
+		RES_CLASS_SRV, // DXIL_RES_SRV_TYPED
+		RES_CLASS_SRV, // DXIL_RES_SRV_RAW
+		RES_CLASS_SRV, // DXIL_RES_SRV_STRUCTURED
+		RES_CLASS_UAV, // DXIL_RES_UAV_TYPED
+		RES_CLASS_UAV, // DXIL_RES_UAV_RAW
+		RES_CLASS_UAV, // DXIL_RES_UAV_STRUCTURED
+		RES_CLASS_INVALID, // DXIL_RES_UAV_STRUCTURED_WITH_COUNTER
+	};
+
+	DEV_ASSERT(p_dxil_type < ARRAY_SIZE(DXIL_TYPE_TO_CLASS));
+	ResourceClass resource_class = DXIL_TYPE_TO_CLASS[p_dxil_type];
+
+	if (p_register == ROOT_CONSTANT_REGISTER && p_space == 0) {
+		DEV_ASSERT(resource_class == RES_CLASS_CBV);
+		user_data.container->reflection_data_d3d12.dxil_push_constant_stages |= (1 << user_data.stage);
+	} else if (p_register == RUNTIME_DATA_REGISTER && p_space == 0) {
+		DEV_ASSERT(resource_class == RES_CLASS_CBV);
+		user_data.container->reflection_data_d3d12.nir_runtime_data_root_param_idx = 1; // Temporary, to be determined later.
+	} else {
+		DEV_ASSERT(p_space == 0);
+
+		uint32_t set = p_register / GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER;
+		uint32_t binding = (p_register % GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER) / GODOT_NIR_BINDING_MULTIPLIER;
+
+		DEV_ASSERT(set < (uint32_t)user_data.container->reflection_binding_set_uniforms_count.size());
+
+		uint32_t binding_start = 0;
+		for (uint32_t i = 0; i < set; i++) {
+			binding_start += user_data.container->reflection_binding_set_uniforms_count[i];
+		}
+
+		[[maybe_unused]] bool found = false;
+		for (uint32_t i = 0; i < user_data.container->reflection_binding_set_uniforms_count[set]; i++) {
+			const ReflectionBindingData &uniform = user_data.container->reflection_binding_set_uniforms_data[binding_start + i];
+			ReflectionBindingDataD3D12 &uniform_d3d12 = user_data.container->reflection_binding_set_uniforms_data_d3d12.ptrw()[binding_start + i];
+			if (uniform.binding != binding) {
+				continue;
+			}
+
+			uniform_d3d12.dxil_stages |= (1 << user_data.stage);
+			if (resource_class != RES_CLASS_INVALID) {
+				DEV_ASSERT(uniform_d3d12.resource_class == (uint32_t)RES_CLASS_INVALID || uniform_d3d12.resource_class == (uint32_t)resource_class);
+				uniform_d3d12.resource_class = resource_class;
+			} else if (p_dxil_type == DXIL_RES_SAMPLER) {
+				uniform_d3d12.has_sampler = (uint32_t)true;
+			} else {
+				DEV_ASSERT(false && "Unknown resource class.");
+			}
+			found = true;
+		}
+
+		DEV_ASSERT(found);
+	}
+}
+
+void RenderingShaderContainerD3D12::_nir_report_sc_bit_offset(uint32_t p_sc_id, uint64_t p_bit_offset, void *p_data) {
+	const GodotNirCallbackUserData &user_data = *(GodotNirCallbackUserData *)p_data;
+	[[maybe_unused]] bool found = false;
+	for (int64_t i = 0; i < user_data.container->reflection_specialization_data.size(); i++) {
+		const ReflectionSpecializationData &sc = user_data.container->reflection_specialization_data[i];
+		ReflectionSpecializationDataD3D12 &sc_d3d12 = user_data.container->reflection_specialization_data_d3d12.ptrw()[i];
+		if (sc.constant_id != p_sc_id) {
+			continue;
+		}
+
+		uint32_t offset_idx = SHADER_STAGES_BIT_OFFSET_INDICES[user_data.stage];
+		DEV_ASSERT(sc_d3d12.stages_bit_offsets[offset_idx] == 0);
+		sc_d3d12.stages_bit_offsets[offset_idx] = p_bit_offset;
+		found = true;
+		break;
+	}
+
+	DEV_ASSERT(found);
+}
+
+void RenderingShaderContainerD3D12::_nir_report_bitcode_bit_offset(uint64_t p_bit_offset, void *p_data) {
+	DEV_ASSERT(p_bit_offset % 8 == 0);
+
+	const GodotNirCallbackUserData &user_data = *(GodotNirCallbackUserData *)p_data;
+	uint32_t offset_idx = SHADER_STAGES_BIT_OFFSET_INDICES[user_data.stage];
+	for (int64_t i = 0; i < user_data.container->reflection_specialization_data.size(); i++) {
+		ReflectionSpecializationDataD3D12 &sc_d3d12 = user_data.container->reflection_specialization_data_d3d12.ptrw()[i];
+		if (sc_d3d12.stages_bit_offsets[offset_idx] == 0) {
+			// This SC has been optimized out from this stage.
+			continue;
+		}
+
+		sc_d3d12.stages_bit_offsets[offset_idx] += p_bit_offset;
+	}
+}
+#endif
+
+void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const String &p_shader_name, const RenderingDeviceCommons::ShaderReflection &p_reflection) {
+	reflection_binding_set_uniforms_data_d3d12.resize(reflection_binding_set_uniforms_data.size());
+	reflection_specialization_data_d3d12.resize(reflection_specialization_data.size());
+
+	// Sort bindings inside each uniform set. This guarantees the root signature will be generated in the correct order.
+	SortArray<ReflectionBindingData> sorter;
+	uint32_t binding_start = 0;
+	for (uint32_t i = 0; i < reflection_binding_set_uniforms_count.size(); i++) {
+		uint32_t uniform_count = reflection_binding_set_uniforms_count[i];
+		if (uniform_count > 0) {
+			sorter.sort(&reflection_binding_set_uniforms_data.ptrw()[binding_start], uniform_count);
+			binding_start += uniform_count;
+		}
+	}
+}
+
+bool RenderingShaderContainerD3D12::_set_code_from_spirv(const Vector<RenderingDeviceCommons::ShaderStageSPIRVData> &p_spirv) {
+#if NIR_ENABLED
+	reflection_data_d3d12.nir_runtime_data_root_param_idx = UINT32_MAX;
+
+	for (int64_t i = 0; i < reflection_specialization_data.size(); i++) {
+		DEV_ASSERT(reflection_specialization_data[i].constant_id < (sizeof(reflection_data_d3d12.spirv_specialization_constants_ids_mask) * 8) && "Constant IDs with values above 31 are not supported.");
+		reflection_data_d3d12.spirv_specialization_constants_ids_mask |= (1 << reflection_specialization_data[i].constant_id);
+	}
+
+	// Translate SPIR-V shaders to DXIL, and collect shader info from the new representation.
+	HashMap<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> dxil_blobs;
+	Vector<RenderingDeviceCommons::ShaderStage> stages;
+	BitField<RenderingDeviceCommons::ShaderStage> stages_processed = {};
+	if (!_convert_spirv_to_dxil(p_spirv, dxil_blobs, stages, stages_processed)) {
+		return false;
+	}
+
+	// Patch with default values of specialization constants.
+	DEV_ASSERT(reflection_specialization_data.size() == reflection_specialization_data_d3d12.size());
+	for (int32_t i = 0; i < reflection_specialization_data.size(); i++) {
+		const ReflectionSpecializationData &sc = reflection_specialization_data[i];
+		const ReflectionSpecializationDataD3D12 &sc_d3d12 = reflection_specialization_data_d3d12[i];
+		RenderingDXIL::patch_specialization_constant((RenderingDeviceCommons::PipelineSpecializationConstantType)(sc.type), &sc.int_value, sc_d3d12.stages_bit_offsets, dxil_blobs, true);
+	}
+
+	// Sign.
+	uint32_t shader_index = 0;
+	for (KeyValue<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> &E : dxil_blobs) {
+		RenderingDXIL::sign_bytecode(E.key, E.value);
+	}
+
+	// Store compressed DXIL blobs as the shaders.
+	shaders.resize(p_spirv.size());
+	for (int64_t i = 0; i < shaders.size(); i++) {
+		const PackedByteArray &dxil_bytes = dxil_blobs[stages[i]];
+		RenderingShaderContainer::Shader &shader = shaders.ptrw()[i];
+		uint32_t compressed_size = 0;
+		shader.shader_stage = stages[i];
+		shader.code_decompressed_size = dxil_bytes.size();
+		shader.code_compressed_bytes.resize(dxil_bytes.size());
+
+		bool compressed = compress_code(dxil_bytes.ptr(), dxil_bytes.size(), shader.code_compressed_bytes.ptrw(), &compressed_size, &shader.code_compression_flags);
+		ERR_FAIL_COND_V_MSG(!compressed, false, vformat("Failed to compress native code to native for SPIR-V #%d.", shader_index));
+
+		shader.code_compressed_bytes.resize(compressed_size);
+	}
+
+	if (!_generate_root_signature(stages_processed)) {
+		return false;
+	}
+
+	return true;
+#else
+	ERR_FAIL_V_MSG(false, "Shader compilation is not supported at runtime without NIR.");
+#endif
+}
+
+RenderingShaderContainerD3D12::RenderingShaderContainerD3D12() {
+	// Default empty constructor.
+}
+
+RenderingShaderContainerD3D12::RenderingShaderContainerD3D12(void *p_lib_d3d12) {
+	lib_d3d12 = p_lib_d3d12;
+}
+
+RenderingShaderContainerD3D12::ShaderReflectionD3D12 RenderingShaderContainerD3D12::get_shader_reflection_d3d12() const {
+	ShaderReflectionD3D12 reflection;
+	reflection.spirv_specialization_constants_ids_mask = reflection_data_d3d12.spirv_specialization_constants_ids_mask;
+	reflection.dxil_push_constant_stages = reflection_data_d3d12.dxil_push_constant_stages;
+	reflection.nir_runtime_data_root_param_idx = reflection_data_d3d12.nir_runtime_data_root_param_idx;
+	reflection.reflection_specialization_data_d3d12 = reflection_specialization_data_d3d12;
+	reflection.root_signature_bytes = root_signature_bytes;
+	reflection.root_signature_crc = root_signature_crc;
+
+	// Transform data vector into a vector of vectors that's easier to user.
+	uint32_t uniform_index = 0;
+	reflection.reflection_binding_set_uniforms_d3d12.resize(reflection_binding_set_uniforms_count.size());
+	for (int64_t i = 0; i < reflection.reflection_binding_set_uniforms_d3d12.size(); i++) {
+		Vector<ReflectionBindingDataD3D12> &uniforms = reflection.reflection_binding_set_uniforms_d3d12.ptrw()[i];
+		uniforms.resize(reflection_binding_set_uniforms_count[i]);
+		for (int64_t j = 0; j < uniforms.size(); j++) {
+			uniforms.ptrw()[j] = reflection_binding_set_uniforms_data_d3d12[uniform_index];
+			uniform_index++;
+		}
+	}
+
+	return reflection;
+}
+
+// RenderingShaderContainerFormatD3D12
+
+void RenderingShaderContainerFormatD3D12::set_lib_d3d12(void *p_lib_d3d12) {
+	lib_d3d12 = p_lib_d3d12;
+}
+
+Ref<RenderingShaderContainer> RenderingShaderContainerFormatD3D12::create_container() const {
+	return memnew(RenderingShaderContainerD3D12(lib_d3d12));
+}
+
+RenderingDeviceCommons::ShaderLanguageVersion RenderingShaderContainerFormatD3D12::get_shader_language_version() const {
+	// NIR-DXIL is Vulkan 1.1-conformant.
+	return SHADER_LANGUAGE_VULKAN_VERSION_1_1;
+}
+
+RenderingDeviceCommons::ShaderSpirvVersion RenderingShaderContainerFormatD3D12::get_shader_spirv_version() const {
+	// The SPIR-V part of Mesa supports 1.6, but:
+	// - SPIRV-Reflect won't be able to parse the compute workgroup size.
+	// - We want to play it safe with NIR-DXIL.
+	return SHADER_SPIRV_VERSION_1_5;
+}
+
+RenderingShaderContainerFormatD3D12::RenderingShaderContainerFormatD3D12() {}
+
+RenderingShaderContainerFormatD3D12::~RenderingShaderContainerFormatD3D12() {}

+ 179 - 0
drivers/d3d12/rendering_shader_container_d3d12.h

@@ -0,0 +1,179 @@
+/**************************************************************************/
+/*  rendering_shader_container_d3d12.h                                    */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#pragma once
+
+#include "servers/rendering/rendering_shader_container.h"
+
+#define NIR_ENABLED 1
+
+#ifdef SHADER_BAKER_RUNTIME_ENABLED
+#undef NIR_ENABLED
+#endif
+
+#include "d3d12_godot_nir_bridge.h"
+
+#define D3D12_BITCODE_OFFSETS_NUM_STAGES 3
+
+#if NIR_ENABLED
+struct nir_shader;
+struct nir_shader_compiler_options;
+#endif
+
+enum RootSignatureLocationType {
+	RS_LOC_TYPE_RESOURCE,
+	RS_LOC_TYPE_SAMPLER,
+};
+
+enum ResourceClass {
+	RES_CLASS_INVALID,
+	RES_CLASS_CBV,
+	RES_CLASS_SRV,
+	RES_CLASS_UAV,
+};
+
+struct RenderingDXIL {
+	static uint32_t patch_specialization_constant(
+			RenderingDeviceCommons::PipelineSpecializationConstantType p_type,
+			const void *p_value,
+			const uint64_t (&p_stages_bit_offsets)[D3D12_BITCODE_OFFSETS_NUM_STAGES],
+			HashMap<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> &r_stages_bytecodes,
+			bool p_is_first_patch);
+
+	static void sign_bytecode(RenderingDeviceCommons::ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob);
+};
+
+class RenderingShaderContainerD3D12 : public RenderingShaderContainer {
+	GDSOFTCLASS(RenderingShaderContainerD3D12, RenderingShaderContainer);
+
+public:
+	static constexpr uint32_t REQUIRED_SHADER_MODEL = 0x62; // D3D_SHADER_MODEL_6_2
+	static constexpr uint32_t ROOT_CONSTANT_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RenderingDeviceCommons::MAX_UNIFORM_SETS + 1);
+	static constexpr uint32_t RUNTIME_DATA_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RenderingDeviceCommons::MAX_UNIFORM_SETS + 2);
+	static constexpr uint32_t FORMAT_VERSION = 1;
+	static constexpr uint32_t SHADER_STAGES_BIT_OFFSET_INDICES[RenderingDeviceCommons::SHADER_STAGE_MAX] = {
+		0, // SHADER_STAGE_VERTEX
+		1, // SHADER_STAGE_FRAGMENT
+		UINT32_MAX, // SHADER_STAGE_TESSELATION_CONTROL
+		UINT32_MAX, // SHADER_STAGE_TESSELATION_EVALUATION
+		2, // SHADER_STAGE_COMPUTE
+	};
+
+	struct RootSignatureLocation {
+		uint32_t root_param_index = UINT32_MAX;
+		uint32_t range_index = UINT32_MAX;
+	};
+
+	struct ReflectionBindingDataD3D12 {
+		uint32_t resource_class = 0;
+		uint32_t has_sampler = 0;
+		uint32_t dxil_stages = 0;
+		RootSignatureLocation root_signature_locations[2];
+	};
+
+	struct ReflectionSpecializationDataD3D12 {
+		uint64_t stages_bit_offsets[D3D12_BITCODE_OFFSETS_NUM_STAGES] = {};
+	};
+
+protected:
+	struct ReflectionDataD3D12 {
+		uint32_t spirv_specialization_constants_ids_mask = 0;
+		uint32_t dxil_push_constant_stages = 0;
+		uint32_t nir_runtime_data_root_param_idx = 0;
+	};
+
+	struct ContainerFooterD3D12 {
+		uint32_t root_signature_length = 0;
+		uint32_t root_signature_crc = 0;
+	};
+
+	void *lib_d3d12 = nullptr;
+	ReflectionDataD3D12 reflection_data_d3d12;
+	Vector<ReflectionBindingDataD3D12> reflection_binding_set_uniforms_data_d3d12;
+	Vector<ReflectionSpecializationDataD3D12> reflection_specialization_data_d3d12;
+	Vector<uint8_t> root_signature_bytes;
+	uint32_t root_signature_crc = 0;
+
+#if NIR_ENABLED
+	bool _convert_spirv_to_nir(const Vector<RenderingDeviceCommons::ShaderStageSPIRVData> &p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap<int, nir_shader *> &r_stages_nir_shaders, Vector<RenderingDeviceCommons::ShaderStage> &r_stages, BitField<RenderingDeviceCommons::ShaderStage> &r_stages_processed);
+	bool _convert_nir_to_dxil(const HashMap<int, nir_shader *> &p_stages_nir_shaders, BitField<RenderingDeviceCommons::ShaderStage> p_stages_processed, HashMap<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> &r_dxil_blobs);
+	bool _convert_spirv_to_dxil(const Vector<RenderingDeviceCommons::ShaderStageSPIRVData> &p_spirv, HashMap<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> &r_dxil_blobs, Vector<RenderingDeviceCommons::ShaderStage> &r_stages, BitField<RenderingDeviceCommons::ShaderStage> &r_stages_processed);
+	bool _generate_root_signature(BitField<RenderingDeviceCommons::ShaderStage> p_stages_processed);
+
+	// GodotNirCallbacks.
+	static void _nir_report_resource(uint32_t p_register, uint32_t p_space, uint32_t p_dxil_type, void *p_data);
+	static void _nir_report_sc_bit_offset(uint32_t p_sc_id, uint64_t p_bit_offset, void *p_data);
+	static void _nir_report_bitcode_bit_offset(uint64_t p_bit_offset, void *p_data);
+#endif
+
+	// RenderingShaderContainer overrides.
+	virtual uint32_t _format() const override;
+	virtual uint32_t _format_version() const override;
+	virtual uint32_t _from_bytes_reflection_extra_data(const uint8_t *p_bytes) override;
+	virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) override;
+	virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) override;
+	virtual uint32_t _from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) override;
+	virtual uint32_t _from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) override;
+	virtual uint32_t _from_bytes_footer_extra_data(const uint8_t *p_bytes) override;
+	virtual uint32_t _to_bytes_reflection_extra_data(uint8_t *p_bytes) const override;
+	virtual uint32_t _to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const override;
+	virtual uint32_t _to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const override;
+	virtual uint32_t _to_bytes_footer_extra_data(uint8_t *p_bytes) const override;
+	virtual void _set_from_shader_reflection_post(const String &p_shader_name, const RenderingDeviceCommons::ShaderReflection &p_reflection) override;
+	virtual bool _set_code_from_spirv(const Vector<RenderingDeviceCommons::ShaderStageSPIRVData> &p_spirv) override;
+
+public:
+	struct ShaderReflectionD3D12 {
+		uint32_t spirv_specialization_constants_ids_mask = 0;
+		uint32_t dxil_push_constant_stages = 0;
+		uint32_t nir_runtime_data_root_param_idx = 0;
+		Vector<Vector<ReflectionBindingDataD3D12>> reflection_binding_set_uniforms_d3d12;
+		Vector<ReflectionSpecializationDataD3D12> reflection_specialization_data_d3d12;
+		Vector<uint8_t> root_signature_bytes;
+		uint32_t root_signature_crc = 0;
+	};
+
+	RenderingShaderContainerD3D12();
+	RenderingShaderContainerD3D12(void *p_lib_d3d12);
+	ShaderReflectionD3D12 get_shader_reflection_d3d12() const;
+};
+
+class RenderingShaderContainerFormatD3D12 : public RenderingShaderContainerFormat {
+protected:
+	void *lib_d3d12 = nullptr;
+
+public:
+	void set_lib_d3d12(void *p_lib_d3d12);
+	virtual Ref<RenderingShaderContainer> create_container() const override;
+	virtual ShaderLanguageVersion get_shader_language_version() const override;
+	virtual ShaderSpirvVersion get_shader_spirv_version() const override;
+	RenderingShaderContainerFormatD3D12();
+	virtual ~RenderingShaderContainerFormatD3D12();
+};

+ 0 - 6
drivers/gles3/shader_gles3.cpp

@@ -129,12 +129,6 @@ void ShaderGLES3::_setup(const char *p_vertex_code, const char *p_fragment_code,
 	feedback_count = p_feedback_count;
 
 	StringBuilder tohash;
-	/*
-	tohash.append("[SpirvCacheKey]");
-	tohash.append(RenderingDevice::get_singleton()->shader_get_spirv_cache_key());
-	tohash.append("[BinaryCacheKey]");
-	tohash.append(RenderingDevice::get_singleton()->shader_get_binary_cache_key());
-	*/
 	tohash.append("[Vertex]");
 	tohash.append(p_vertex_code ? p_vertex_code : "");
 	tohash.append("[Fragment]");

+ 1 - 1
drivers/gles3/storage/material_storage.cpp

@@ -2164,7 +2164,7 @@ RID MaterialStorage::shader_allocate() {
 	return shader_owner.allocate_rid();
 }
 
-void MaterialStorage::shader_initialize(RID p_rid) {
+void MaterialStorage::shader_initialize(RID p_rid, bool p_embedded) {
 	Shader shader;
 	shader.data = nullptr;
 	shader.mode = RS::SHADER_MAX;

+ 5 - 1
drivers/gles3/storage/material_storage.h

@@ -482,6 +482,7 @@ private:
 	mutable RID_Owner<Material, true> material_owner;
 
 	SelfList<Material>::List material_update_list;
+	HashSet<RID> dummy_embedded_set;
 
 public:
 	static MaterialStorage *get_singleton();
@@ -574,7 +575,7 @@ public:
 	void _shader_make_dirty(Shader *p_shader);
 
 	virtual RID shader_allocate() override;
-	virtual void shader_initialize(RID p_rid) override;
+	virtual void shader_initialize(RID p_rid, bool p_embedded = true) override;
 	virtual void shader_free(RID p_rid) override;
 
 	virtual void shader_set_code(RID p_shader, const String &p_code) override;
@@ -587,6 +588,9 @@ public:
 	virtual Variant shader_get_parameter_default(RID p_shader, const StringName &p_name) const override;
 
 	virtual RS::ShaderNativeSourceCode shader_get_native_source_code(RID p_shader) const override;
+	virtual void shader_embedded_set_lock() override {}
+	virtual const HashSet<RID> &shader_embedded_set_get() const override { return dummy_embedded_set; }
+	virtual void shader_embedded_set_unlock() override {}
 
 	/* MATERIAL API */
 

+ 2 - 1
drivers/metal/metal_device_properties.h

@@ -70,7 +70,8 @@ typedef NS_OPTIONS(NSUInteger, SampleCount) {
 };
 
 struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures {
-	uint32_t mslVersion = 0;
+	uint32_t mslVersionMajor = 0;
+	uint32_t mslVersionMinor = 0;
 	MTLGPUFamily highestFamily = MTLGPUFamilyApple4;
 	bool supportsBCTextureCompression = false;
 	bool supportsDepth24Stencil8 = false;

+ 2 - 45
drivers/metal/metal_device_properties.mm

@@ -137,51 +137,8 @@ void MetalDeviceProperties::init_features(id<MTLDevice> p_device) {
 
 	MTLCompileOptions *opts = [MTLCompileOptions new];
 	features.mslVersionEnum = opts.languageVersion; // By default, Metal uses the most recent language version.
-
-#define setMSLVersion(m_maj, m_min) \
-	features.mslVersion = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::make_msl_version(m_maj, m_min)
-
-	switch (features.mslVersionEnum) {
-#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 180000 || __TV_OS_VERSION_MAX_ALLOWED >= 180000 || __VISION_OS_VERSION_MAX_ALLOWED >= 20000
-		case MTLLanguageVersion3_2:
-			setMSLVersion(3, 2);
-			break;
-#endif
-#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 140000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 170000 || __TV_OS_VERSION_MAX_ALLOWED >= 170000
-		case MTLLanguageVersion3_1:
-			setMSLVersion(3, 1);
-			break;
-#endif
-		case MTLLanguageVersion3_0:
-			setMSLVersion(3, 0);
-			break;
-		case MTLLanguageVersion2_4:
-			setMSLVersion(2, 4);
-			break;
-		case MTLLanguageVersion2_3:
-			setMSLVersion(2, 3);
-			break;
-		case MTLLanguageVersion2_2:
-			setMSLVersion(2, 2);
-			break;
-		case MTLLanguageVersion2_1:
-			setMSLVersion(2, 1);
-			break;
-		case MTLLanguageVersion2_0:
-			setMSLVersion(2, 0);
-			break;
-		case MTLLanguageVersion1_2:
-			setMSLVersion(1, 2);
-			break;
-		case MTLLanguageVersion1_1:
-			setMSLVersion(1, 1);
-			break;
-#if TARGET_OS_IPHONE && !TARGET_OS_MACCATALYST && !TARGET_OS_VISION
-		case MTLLanguageVersion1_0:
-			setMSLVersion(1, 0);
-			break;
-#endif
-	}
+	features.mslVersionMajor = (opts.languageVersion >> 0x10) & 0xff;
+	features.mslVersionMinor = (opts.languageVersion >> 0x00) & 0xff;
 }
 
 void MetalDeviceProperties::init_limits(id<MTLDevice> p_device) {

+ 12 - 59
drivers/metal/metal_objects.h

@@ -53,6 +53,7 @@
 #import "metal_device_properties.h"
 #import "metal_utils.h"
 #import "pixel_formats.h"
+#import "sha256_digest.h"
 
 #include "servers/rendering/rendering_device_driver.h"
 
@@ -82,9 +83,6 @@ MTL_CLASS(Texture)
 
 } //namespace MTL
 
-/// Metal buffer index for the view mask when rendering multi-view.
-const uint32_t VIEW_MASK_BUFFER_INDEX = 24;
-
 enum ShaderStageUsage : uint32_t {
 	None = 0,
 	Vertex = RDD::SHADER_STAGE_VERTEX_BIT,
@@ -574,34 +572,6 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) BindingInfo {
 		desc.arrayLength = arrayLength;
 		return desc;
 	}
-
-	size_t serialize_size() const {
-		return sizeof(uint32_t) * 8 /* 8 uint32_t fields */;
-	}
-
-	template <typename W>
-	void serialize(W &p_writer) const {
-		p_writer.write((uint32_t)dataType);
-		p_writer.write(index);
-		p_writer.write((uint32_t)access);
-		p_writer.write((uint32_t)usage);
-		p_writer.write((uint32_t)textureType);
-		p_writer.write(imageFormat);
-		p_writer.write(arrayLength);
-		p_writer.write(isMultisampled);
-	}
-
-	template <typename R>
-	void deserialize(R &p_reader) {
-		p_reader.read((uint32_t &)dataType);
-		p_reader.read(index);
-		p_reader.read((uint32_t &)access);
-		p_reader.read((uint32_t &)usage);
-		p_reader.read((uint32_t &)textureType);
-		p_reader.read((uint32_t &)imageFormat);
-		p_reader.read(arrayLength);
-		p_reader.read(isMultisampled);
-	}
 };
 
 using RDC = RenderingDeviceCommons;
@@ -635,38 +605,28 @@ enum class ShaderLoadStrategy {
 /// A Metal shader library.
 @interface MDLibrary : NSObject {
 	ShaderCacheEntry *_entry;
+	NSString *_original_source;
 };
 - (id<MTLLibrary>)library;
 - (NSError *)error;
 - (void)setLabel:(NSString *)label;
+#ifdef DEV_ENABLED
+- (NSString *)originalSource;
+#endif
 
 + (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry
 								  device:(id<MTLDevice>)device
 								  source:(NSString *)source
 								 options:(MTLCompileOptions *)options
 								strategy:(ShaderLoadStrategy)strategy;
-@end
-
-struct SHA256Digest {
-	unsigned char data[CC_SHA256_DIGEST_LENGTH];
-
-	uint32_t hash() const {
-		uint32_t c = crc32(0, data, CC_SHA256_DIGEST_LENGTH);
-		return c;
-	}
 
-	SHA256Digest() {
-		bzero(data, CC_SHA256_DIGEST_LENGTH);
-	}
-
-	SHA256Digest(const char *p_data, size_t p_length) {
-		CC_SHA256(p_data, (CC_LONG)p_length, data);
-	}
-
-	_FORCE_INLINE_ uint32_t short_sha() const {
-		return __builtin_bswap32(*(uint32_t *)&data[0]);
-	}
-};
++ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry
+								  device:(id<MTLDevice>)device
+#ifdef DEV_ENABLED
+								  source:(NSString *)source
+#endif
+									data:(dispatch_data_t)data;
+@end
 
 template <>
 struct HashMapComparatorDefault<SHA256Digest> {
@@ -717,9 +677,6 @@ public:
 	MTLSize local = {};
 
 	MDLibrary *kernel;
-#if DEV_ENABLED
-	CharString kernel_source;
-#endif
 
 	void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
 
@@ -742,10 +699,6 @@ public:
 
 	MDLibrary *vert;
 	MDLibrary *frag;
-#if DEV_ENABLED
-	CharString vert_source;
-	CharString frag_source;
-#endif
 
 	void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
 

+ 89 - 4
drivers/metal/metal_objects.mm

@@ -53,6 +53,7 @@
 #import "metal_utils.h"
 #import "pixel_formats.h"
 #import "rendering_device_driver_metal.h"
+#import "rendering_shader_container_metal.h"
 
 #import <os/signpost.h>
 
@@ -1941,7 +1942,11 @@ void ShaderCacheEntry::notify_free() const {
 }
 
 @interface MDLibrary ()
-- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry;
+- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry
+#ifdef DEV_ENABLED
+							source:(NSString *)source;
+#endif
+;
 @end
 
 /// Loads the MTLLibrary when the library is first accessed.
@@ -1975,6 +1980,18 @@ void ShaderCacheEntry::notify_free() const {
 						   options:(MTLCompileOptions *)options;
 @end
 
+@interface MDBinaryLibrary : MDLibrary {
+	id<MTLLibrary> _library;
+	NSError *_error;
+}
+- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry
+							device:(id<MTLDevice>)device
+#ifdef DEV_ENABLED
+							source:(NSString *)source
+#endif
+							  data:(dispatch_data_t)data;
+@end
+
 @implementation MDLibrary
 
 + (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry
@@ -1992,6 +2009,26 @@ void ShaderCacheEntry::notify_free() const {
 	}
 }
 
++ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry
+								  device:(id<MTLDevice>)device
+#ifdef DEV_ENABLED
+								  source:(NSString *)source
+#endif
+									data:(dispatch_data_t)data {
+	return [[MDBinaryLibrary alloc] initWithCacheEntry:entry
+												device:device
+#ifdef DEV_ENABLED
+												source:source
+#endif
+												  data:data];
+}
+
+#ifdef DEV_ENABLED
+- (NSString *)originalSource {
+	return _original_source;
+}
+#endif
+
 - (id<MTLLibrary>)library {
 	CRASH_NOW_MSG("Not implemented");
 	return nil;
@@ -2005,10 +2042,17 @@ void ShaderCacheEntry::notify_free() const {
 - (void)setLabel:(NSString *)label {
 }
 
-- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry {
+- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry
+#ifdef DEV_ENABLED
+							source:(NSString *)source
+#endif
+{
 	self = [super init];
 	_entry = entry;
 	_entry->library = self;
+#ifdef DEV_ENABLED
+	_original_source = source;
+#endif
 	return self;
 }
 
@@ -2024,7 +2068,11 @@ void ShaderCacheEntry::notify_free() const {
 							device:(id<MTLDevice>)device
 							source:(NSString *)source
 						   options:(MTLCompileOptions *)options {
-	self = [super initWithCacheEntry:entry];
+	self = [super initWithCacheEntry:entry
+#ifdef DEV_ENABLED
+							  source:source
+#endif
+	];
 	_complete = false;
 	_ready = false;
 
@@ -2076,7 +2124,11 @@ void ShaderCacheEntry::notify_free() const {
 							device:(id<MTLDevice>)device
 							source:(NSString *)source
 						   options:(MTLCompileOptions *)options {
-	self = [super initWithCacheEntry:entry];
+	self = [super initWithCacheEntry:entry
+#ifdef DEV_ENABLED
+							  source:source
+#endif
+	];
 	_device = device;
 	_source = source;
 	_options = options;
@@ -2121,3 +2173,36 @@ void ShaderCacheEntry::notify_free() const {
 }
 
 @end
+
+@implementation MDBinaryLibrary
+
+- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry
+							device:(id<MTLDevice>)device
+#ifdef DEV_ENABLED
+							source:(NSString *)source
+#endif
+							  data:(dispatch_data_t)data {
+	self = [super initWithCacheEntry:entry
+#ifdef DEV_ENABLED
+							  source:source
+#endif
+	];
+	NSError *error = nil;
+	_library = [device newLibraryWithData:data error:&error];
+	if (error != nil) {
+		_error = error;
+		NSString *desc = [error description];
+		ERR_PRINT(vformat("Unable to load shader library: %s", desc.UTF8String));
+	}
+	return self;
+}
+
+- (id<MTLLibrary>)library {
+	return _library;
+}
+
+- (NSError *)error {
+	return _error;
+}
+
+@end

+ 6 - 15
drivers/metal/rendering_device_driver_metal.h

@@ -31,6 +31,7 @@
 #pragma once
 
 #import "metal_objects.h"
+#import "rendering_shader_container_metal.h"
 
 #include "servers/rendering/rendering_device_driver.h"
 
@@ -57,9 +58,9 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet
 	RenderingContextDriver::Device context_device;
 	id<MTLDevice> device = nil;
 
-	uint32_t version_major = 2;
-	uint32_t version_minor = 0;
 	MetalDeviceProperties *device_properties = nullptr;
+	MetalDeviceProfile device_profile;
+	RenderingShaderContainerFormatMetal *shader_container_format = nullptr;
 	PixelFormats *pixel_formats = nullptr;
 	std::unique_ptr<MDResourceCache> resource_cache;
 
@@ -77,7 +78,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet
 	String pipeline_cache_id;
 
 	Error _create_device();
-	Error _check_capabilities();
+	void _check_capabilities();
 
 #pragma mark - Shader Cache
 
@@ -241,21 +242,11 @@ private:
 	friend struct ShaderBinaryData;
 	friend struct PushConstantData;
 
-private:
-	/// Contains additional metadata about the shader.
-	struct ShaderMeta {
-		/// Indicates whether the shader uses multiview.
-		bool has_multiview = false;
-	};
-
-	Error _reflect_spirv16(VectorView<ShaderStageSPIRVData> p_spirv, ShaderReflection &r_reflection, ShaderMeta &r_shader_meta);
-
 public:
-	virtual String shader_get_binary_cache_key() override final;
-	virtual Vector<uint8_t> shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) override final;
-	virtual ShaderID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector<ImmutableSampler> &p_immutable_samplers) override final;
+	virtual ShaderID shader_create_from_container(const Ref<RenderingShaderContainer> &p_shader_container, const Vector<ImmutableSampler> &p_immutable_samplers) override final;
 	virtual void shader_free(ShaderID p_shader) override final;
 	virtual void shader_destroy_modules(ShaderID p_shader) override final;
+	virtual const RenderingShaderContainerFormat &get_shader_container_format() const override final;
 
 #pragma mark - Uniform Set
 

Dosya farkı çok büyük olduğundan ihmal edildi
+ 1 - 1343
drivers/metal/rendering_device_driver_metal.mm


+ 265 - 0
drivers/metal/rendering_shader_container_metal.h

@@ -0,0 +1,265 @@
+/**************************************************************************/
+/*  rendering_shader_container_metal.h                                    */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#pragma once
+
+#import "sha256_digest.h"
+
+#import "servers/rendering/rendering_device_driver.h"
+#import "servers/rendering/rendering_shader_container.h"
+
+constexpr uint32_t R32UI_ALIGNMENT_CONSTANT_ID = 65535;
+/// Metal buffer index for the view mask when rendering multi-view.
+const uint32_t VIEW_MASK_BUFFER_INDEX = 24;
+
+class RenderingShaderContainerFormatMetal;
+
+/// @brief A minimal structure that defines a device profile for Metal.
+///
+/// This structure is used by the `RenderingShaderContainerMetal` class to
+/// determine options for compiling SPIR-V to Metal source. It currently only
+/// contains the minimum properties required to transform shaders from SPIR-V to Metal
+/// and potentially compile to a `.metallib`.
+struct MetalDeviceProfile {
+	enum class Platform : uint32_t {
+		macOS = 0,
+		iOS = 1,
+	};
+
+	/// @brief The GPU family.
+	enum class GPU : uint32_t {
+		Apple1,
+		Apple2,
+		Apple3,
+		Apple4,
+		Apple5,
+		Apple6,
+		Apple7,
+		Apple8,
+		Apple9,
+	};
+
+	enum class ArgumentBuffersTier : uint32_t {
+		Tier1 = 0,
+		Tier2 = 1,
+	};
+
+	struct Features {
+		uint32_t mslVersionMajor = 0;
+		uint32_t mslVersionMinor = 0;
+		ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1;
+		bool simdPermute = false;
+	};
+
+	Platform platform = Platform::macOS;
+	GPU gpu = GPU::Apple4;
+	Features features;
+
+	static const MetalDeviceProfile *get_profile(Platform p_platform, GPU p_gpu);
+
+	MetalDeviceProfile() = default;
+
+private:
+	static Mutex profiles_lock; ///< Mutex to protect access to the profiles map.
+	static HashMap<uint32_t, MetalDeviceProfile> profiles;
+};
+
+class RenderingShaderContainerMetal : public RenderingShaderContainer {
+	GDSOFTCLASS(RenderingShaderContainerMetal, RenderingShaderContainer);
+
+public:
+	struct HeaderData {
+		enum Flags : uint32_t {
+			NONE = 0,
+			NEEDS_VIEW_MASK_BUFFER = 1 << 0,
+			USES_ARGUMENT_BUFFERS = 1 << 1,
+		};
+
+		/// The base profile that was used to generate this shader.
+		MetalDeviceProfile profile;
+
+		/// The Metal language version specified when compiling SPIR-V to MSL.
+		/// Format is major * 10000 + minor * 100 + patch.
+		uint32_t msl_version = UINT32_MAX;
+		uint32_t flags = NONE;
+
+		/// @brief Returns `true` if the shader is compiled with multi-view support.
+		bool needs_view_mask_buffer() const {
+			return flags & NEEDS_VIEW_MASK_BUFFER;
+		}
+
+		void set_needs_view_mask_buffer(bool p_value) {
+			if (p_value) {
+				flags |= NEEDS_VIEW_MASK_BUFFER;
+			} else {
+				flags &= ~NEEDS_VIEW_MASK_BUFFER;
+			}
+		}
+
+		/// @brief Returns `true` if the shader was compiled with argument buffer support.
+		bool uses_argument_buffers() const {
+			return flags & USES_ARGUMENT_BUFFERS;
+		}
+
+		void set_uses_argument_buffers(bool p_value) {
+			if (p_value) {
+				flags |= USES_ARGUMENT_BUFFERS;
+			} else {
+				flags &= ~USES_ARGUMENT_BUFFERS;
+			}
+		}
+	};
+
+	struct StageData {
+		uint32_t vertex_input_binding_mask = 0;
+		uint32_t is_position_invariant = 0; ///< <c>true</c> if the position output is invariant
+		uint32_t supports_fast_math = 0;
+		SHA256Digest hash; ///< SHA 256 hash of the shader code
+		uint32_t source_size = 0; ///< size of the source code in the returned bytes
+		uint32_t library_size = 0; ///< size of the compiled library in the returned bytes, 0 if it is not compiled
+		uint32_t push_constant_binding = UINT32_MAX; ///< Metal binding slot for the push constant data
+	};
+
+	struct BindingInfoData {
+		uint32_t shader_stage = UINT32_MAX; ///< The shader stage this binding is used in, or UINT32_MAX if not used.
+		uint32_t data_type = 0; // MTLDataTypeNone
+		uint32_t index = 0;
+		uint32_t access = 0; // MTLBindingAccessReadOnly
+		uint32_t usage = 0; // MTLResourceUsage (none)
+		uint32_t texture_type = 2; // MTLTextureType2D
+		uint32_t image_format = 0;
+		uint32_t array_length = 0;
+		uint32_t is_multisampled = 0;
+	};
+
+	struct UniformData {
+		/// Specifies the index into the `bindings` array for the shader stage.
+		///
+		/// For example, a vertex and fragment shader use slots 0 and 1 of the bindings and bindings_secondary arrays.
+		static constexpr uint32_t STAGE_INDEX[RenderingDeviceCommons::SHADER_STAGE_MAX] = {
+			0, // SHADER_STAGE_VERTEX
+			1, // SHADER_STAGE_FRAGMENT
+			0, // SHADER_STAGE_TESSELATION_CONTROL
+			1, // SHADER_STAGE_TESSELATION_EVALUATION
+			0, // SHADER_STAGE_COMPUTE
+		};
+
+		/// Specifies the stages the uniform data is
+		/// used by the Metal shader.
+		uint32_t active_stages = 0;
+		/// The primary binding information for the uniform data.
+		///
+		/// A maximum of two stages is expected for any given pipeline, such as a vertex and fragment, so
+		/// the array size is fixed to 2.
+		BindingInfoData bindings[2];
+		/// The secondary binding information for the uniform data.
+		///
+		/// This is typically a sampler for an image-sampler uniform
+		BindingInfoData bindings_secondary[2];
+
+		_FORCE_INLINE_ constexpr uint32_t get_index_for_stage(RenderingDeviceCommons::ShaderStage p_stage) const {
+			return STAGE_INDEX[p_stage];
+		}
+
+		_FORCE_INLINE_ BindingInfoData &get_binding_for_stage(RenderingDeviceCommons::ShaderStage p_stage) {
+			BindingInfoData &info = bindings[get_index_for_stage(p_stage)];
+			DEV_ASSERT(info.shader_stage == UINT32_MAX || info.shader_stage == p_stage); // make sure this uniform isn't used in the other stage
+			info.shader_stage = p_stage;
+			return info;
+		}
+
+		_FORCE_INLINE_ BindingInfoData &get_secondary_binding_for_stage(RenderingDeviceCommons::ShaderStage p_stage) {
+			BindingInfoData &info = bindings_secondary[get_index_for_stage(p_stage)];
+			DEV_ASSERT(info.shader_stage == UINT32_MAX || info.shader_stage == p_stage); // make sure this uniform isn't used in the other stage
+			info.shader_stage = p_stage;
+			return info;
+		}
+	};
+
+	struct SpecializationData {
+		uint32_t used_stages = 0;
+	};
+
+	HeaderData mtl_reflection_data; // compliment to reflection_data
+	Vector<StageData> mtl_shaders; // compliment to shaders
+
+private:
+	const MetalDeviceProfile *device_profile = nullptr;
+	bool export_mode = false;
+
+	Vector<UniformData> mtl_reflection_binding_set_uniforms_data; // compliment to reflection_binding_set_uniforms_data
+	Vector<SpecializationData> mtl_reflection_specialization_data; // compliment to reflection_specialization_data
+
+	Error compile_metal_source(const char *p_source, const StageData &p_stage_data, Vector<uint8_t> &r_binary_data);
+
+public:
+	static constexpr uint32_t FORMAT_VERSION = 1;
+
+	void set_export_mode(bool p_export_mode) { export_mode = p_export_mode; }
+	void set_device_profile(const MetalDeviceProfile *p_device_profile) { device_profile = p_device_profile; }
+
+	struct MetalShaderReflection {
+		Vector<Vector<UniformData>> uniform_sets;
+		Vector<SpecializationData> specialization_constants;
+	};
+
+	MetalShaderReflection get_metal_shader_reflection() const;
+
+protected:
+	virtual uint32_t _from_bytes_reflection_extra_data(const uint8_t *p_bytes) override;
+	virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) override;
+	virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) override;
+	virtual uint32_t _from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) override;
+	virtual uint32_t _from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) override;
+	virtual uint32_t _from_bytes_shader_extra_data_start(const uint8_t *p_bytes) override;
+	virtual uint32_t _from_bytes_shader_extra_data(const uint8_t *p_bytes, uint32_t p_index) override;
+
+	virtual uint32_t _to_bytes_reflection_extra_data(uint8_t *p_bytes) const override;
+	virtual uint32_t _to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const override;
+	virtual uint32_t _to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const override;
+	virtual uint32_t _to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const override;
+
+	virtual uint32_t _format() const override;
+	virtual uint32_t _format_version() const override;
+	virtual bool _set_code_from_spirv(const Vector<RenderingDeviceCommons::ShaderStageSPIRVData> &p_spirv) override;
+};
+
+class RenderingShaderContainerFormatMetal : public RenderingShaderContainerFormat {
+	bool export_mode = false;
+
+	const MetalDeviceProfile *device_profile = nullptr;
+
+public:
+	virtual Ref<RenderingShaderContainer> create_container() const override;
+	virtual ShaderLanguageVersion get_shader_language_version() const override;
+	virtual ShaderSpirvVersion get_shader_spirv_version() const override;
+	RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export = false);
+	virtual ~RenderingShaderContainerFormatMetal() = default;
+};

+ 699 - 0
drivers/metal/rendering_shader_container_metal.mm

@@ -0,0 +1,699 @@
+/**************************************************************************/
+/*  rendering_shader_container_metal.mm                                   */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "rendering_shader_container_metal.h"
+
+#include "servers/rendering/rendering_device.h"
+
+#import "core/io/marshalls.h"
+
+#import <Metal/Metal.h>
+#import <spirv.hpp>
+#import <spirv_msl.hpp>
+#import <spirv_parser.hpp>
+
+Mutex MetalDeviceProfile::profiles_lock;
+HashMap<uint32_t, MetalDeviceProfile> MetalDeviceProfile::profiles;
+
+const MetalDeviceProfile *MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform p_platform, MetalDeviceProfile::GPU p_gpu) {
+	DEV_ASSERT(p_platform == Platform::macOS || p_platform == Platform::iOS);
+
+	MutexLock lock(profiles_lock);
+
+	uint32_t key = (uint32_t)p_platform << 16 | (uint32_t)p_gpu;
+	if (MetalDeviceProfile *profile = profiles.getptr(key)) {
+		return profile;
+	}
+
+	MetalDeviceProfile res;
+	res.platform = p_platform;
+	res.gpu = p_gpu;
+	if (p_platform == Platform::macOS) {
+		res.features.mslVersionMajor = 3;
+		res.features.mslVersionMinor = 2;
+		res.features.argument_buffers_tier = ArgumentBuffersTier::Tier2;
+		res.features.simdPermute = true;
+	} else if (p_platform == Platform::iOS) {
+		switch (p_gpu) {
+			case GPU::Apple1:
+			case GPU::Apple2:
+			case GPU::Apple3:
+			case GPU::Apple4:
+			case GPU::Apple5: {
+				res.features.simdPermute = false;
+				res.features.argument_buffers_tier = ArgumentBuffersTier::Tier1;
+			} break;
+			case GPU::Apple6:
+			case GPU::Apple7:
+			case GPU::Apple8:
+			case GPU::Apple9: {
+				res.features.argument_buffers_tier = ArgumentBuffersTier::Tier2;
+				res.features.simdPermute = true;
+			} break;
+		}
+		res.features.mslVersionMajor = 3;
+		res.features.mslVersionMinor = 2;
+	}
+
+	return &profiles.insert(key, res)->value;
+}
+
+Error RenderingShaderContainerMetal::compile_metal_source(const char *p_source, const StageData &p_stage_data, Vector<uint8_t> &r_binary_data) {
+	String name(shader_name.ptr());
+	if (name.contains_char(':')) {
+		name = name.replace_char(':', '_');
+	}
+	Error r_error;
+	Ref<FileAccess> source_file = FileAccess::create_temp(FileAccess::ModeFlags::READ_WRITE,
+			name + "_" + itos(p_stage_data.hash.short_sha()),
+			"metal", false, &r_error);
+	ERR_FAIL_COND_V_MSG(r_error != OK, r_error, "Unable to create temporary source file.");
+	if (!source_file->store_buffer((const uint8_t *)p_source, strlen(p_source))) {
+		ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unable to write temporary source file");
+	}
+	source_file->flush();
+	Ref<FileAccess> result_file = FileAccess::create_temp(FileAccess::ModeFlags::READ_WRITE,
+			name + "_" + itos(p_stage_data.hash.short_sha()),
+			"metallib", false, &r_error);
+
+	ERR_FAIL_COND_V_MSG(r_error != OK, r_error, "Unable to create temporary target file");
+
+	String sdk;
+	switch (device_profile->platform) {
+		case MetalDeviceProfile::Platform::macOS:
+			sdk = "macosx";
+			break;
+		case MetalDeviceProfile::Platform::iOS:
+			sdk = "iphoneos";
+			break;
+	}
+
+	// Build the metallib binary.
+	{
+		List<String> args{ "-sdk", sdk, "metal", "-O3" };
+		if (p_stage_data.is_position_invariant) {
+			args.push_back("-fpreserve-invariance");
+		}
+		args.push_back("-fmetal-math-mode=fast");
+		args.push_back(source_file->get_path_absolute());
+		args.push_back("-o");
+		args.push_back(result_file->get_path_absolute());
+		String r_pipe;
+		int exit_code;
+		Error err = OS::get_singleton()->execute("/usr/bin/xcrun", args, &r_pipe, &exit_code, true);
+		if (!r_pipe.is_empty()) {
+			print_line(r_pipe);
+		}
+		if (err != OK) {
+			ERR_PRINT(vformat("Metal compiler returned error code: %d", err));
+		}
+
+		if (exit_code != 0) {
+			ERR_PRINT(vformat("Metal compiler exited with error code: %d", exit_code));
+		}
+		int len = result_file->get_length();
+		ERR_FAIL_COND_V_MSG(len == 0, ERR_CANT_CREATE, "Metal compiler created empty library");
+	}
+
+	// Strip the source from the binary.
+	{
+		List<String> args{ "-sdk", sdk, "metal-dsymutil", "--remove-source", result_file->get_path_absolute() };
+		String r_pipe;
+		int exit_code;
+		Error err = OS::get_singleton()->execute("/usr/bin/xcrun", args, &r_pipe, &exit_code, true);
+		if (!r_pipe.is_empty()) {
+			print_line(r_pipe);
+		}
+		if (err != OK) {
+			ERR_PRINT(vformat("metal-dsymutil tool returned error code: %d", err));
+		}
+
+		if (exit_code != 0) {
+			ERR_PRINT(vformat("metal-dsymutil Compiler exited with error code: %d", exit_code));
+		}
+		int len = result_file->get_length();
+		ERR_FAIL_COND_V_MSG(len == 0, ERR_CANT_CREATE, "metal-dsymutil tool created empty library");
+	}
+
+	r_binary_data = result_file->get_buffer(result_file->get_length());
+
+	return OK;
+}
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunguarded-availability"
+
+bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingDeviceCommons::ShaderStageSPIRVData> &p_spirv) {
+	using namespace spirv_cross;
+	using spirv_cross::CompilerMSL;
+	using spirv_cross::Resource;
+
+	// initialize Metal-specific reflection data
+	shaders.resize(p_spirv.size());
+	mtl_shaders.resize(p_spirv.size());
+	mtl_reflection_binding_set_uniforms_data.resize(reflection_binding_set_uniforms_data.size());
+	mtl_reflection_specialization_data.resize(reflection_specialization_data.size());
+
+	mtl_reflection_data.set_needs_view_mask_buffer(reflection_data.has_multiview);
+
+	// set_indexes will contain the starting offsets of each descriptor set in the binding set uniforms data
+	// including the last one, which is the size of reflection_binding_set_uniforms_count.
+	LocalVector<uint32_t> set_indexes;
+	uint32_t set_indexes_size = reflection_binding_set_uniforms_count.size() + 1;
+	{
+		// calculate the starting offsets of each descriptor set in the binding set uniforms data
+		uint32_t size = reflection_binding_set_uniforms_count.size();
+		set_indexes.resize(set_indexes_size);
+		uint32_t offset = 0;
+		for (uint32_t i = 0; i < size; i++) {
+			set_indexes[i] = offset;
+			offset += reflection_binding_set_uniforms_count.get(i);
+		}
+		set_indexes[set_indexes_size - 1] = offset;
+	}
+	CompilerMSL::Options msl_options{};
+	msl_options.set_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor);
+	mtl_reflection_data.msl_version = msl_options.msl_version;
+	msl_options.platform = device_profile->platform == MetalDeviceProfile::Platform::macOS ? CompilerMSL::Options::macOS : CompilerMSL::Options::iOS;
+
+	if (device_profile->platform == MetalDeviceProfile::Platform::iOS) {
+		msl_options.ios_use_simdgroup_functions = device_profile->features.simdPermute;
+		msl_options.ios_support_base_vertex_instance = true;
+	}
+
+	bool disable_argument_buffers = false;
+	if (String v = OS::get_singleton()->get_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") {
+		disable_argument_buffers = true;
+	}
+
+	if (device_profile->features.argument_buffers_tier >= MetalDeviceProfile::ArgumentBuffersTier::Tier2 && !disable_argument_buffers) {
+		msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier2;
+		msl_options.argument_buffers = true;
+		mtl_reflection_data.set_uses_argument_buffers(true);
+	} else {
+		msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier1;
+		// Tier 1 argument buffers don't support writable textures, so we disable them completely.
+		msl_options.argument_buffers = false;
+		mtl_reflection_data.set_uses_argument_buffers(false);
+	}
+	msl_options.force_active_argument_buffer_resources = true;
+	// We can't use this, as we have to add the descriptor sets via compiler.add_msl_resource_binding.
+	// msl_options.pad_argument_buffer_resources = true;
+	msl_options.texture_buffer_native = true; // Enable texture buffer support.
+	msl_options.use_framebuffer_fetch_subpasses = false;
+	msl_options.pad_fragment_output_components = true;
+	msl_options.r32ui_alignment_constant_id = R32UI_ALIGNMENT_CONSTANT_ID;
+	msl_options.agx_manual_cube_grad_fixup = true;
+	if (reflection_data.has_multiview) {
+		msl_options.multiview = true;
+		msl_options.multiview_layered_rendering = true;
+		msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX;
+	}
+
+	CompilerGLSL::Options options{};
+	options.vertex.flip_vert_y = true;
+#if DEV_ENABLED
+	options.emit_line_directives = true;
+#endif
+
+	for (uint32_t i = 0; i < p_spirv.size(); i++) {
+		StageData &stage_data = mtl_shaders.write[i];
+		RD::ShaderStageSPIRVData const &v = p_spirv[i];
+		RD::ShaderStage stage = v.shader_stage;
+		char const *stage_name = RD::SHADER_STAGE_NAMES[stage];
+		uint32_t const *const ir = reinterpret_cast<uint32_t const *const>(v.spirv.ptr());
+		size_t word_count = v.spirv.size() / sizeof(uint32_t);
+		Parser parser(ir, word_count);
+		try {
+			parser.parse();
+		} catch (CompilerError &e) {
+			ERR_FAIL_V_MSG(false, "Failed to parse IR at stage " + String(RD::SHADER_STAGE_NAMES[stage]) + ": " + e.what());
+		}
+
+		CompilerMSL compiler(std::move(parser.get_parsed_ir()));
+		compiler.set_msl_options(msl_options);
+		compiler.set_common_options(options);
+
+		std::unordered_set<VariableID> active = compiler.get_active_interface_variables();
+		ShaderResources resources = compiler.get_shader_resources();
+
+		std::string source;
+		try {
+			source = compiler.compile();
+		} catch (CompilerError &e) {
+			ERR_FAIL_V_MSG(false, "Failed to compile stage " + String(RD::SHADER_STAGE_NAMES[stage]) + ": " + e.what());
+		}
+
+		ERR_FAIL_COND_V_MSG(compiler.get_entry_points_and_stages().size() != 1, false, "Expected a single entry point and stage.");
+
+		SmallVector<EntryPoint> entry_pts_stages = compiler.get_entry_points_and_stages();
+		EntryPoint &entry_point_stage = entry_pts_stages.front();
+		SPIREntryPoint &entry_point = compiler.get_entry_point(entry_point_stage.name, entry_point_stage.execution_model);
+
+		// Process specialization constants.
+		if (!compiler.get_specialization_constants().empty()) {
+			uint32_t size = reflection_specialization_data.size();
+			for (SpecializationConstant const &constant : compiler.get_specialization_constants()) {
+				uint32_t j = 0;
+				while (j < size) {
+					const ReflectionSpecializationData &res = reflection_specialization_data.ptr()[j];
+					if (res.constant_id == constant.constant_id) {
+						mtl_reflection_specialization_data.ptrw()[j].used_stages |= 1 << stage;
+						// emulate labeled for loop and continue
+						goto outer_continue;
+					}
+					++j;
+				}
+				if (j == size) {
+					WARN_PRINT(String(stage_name) + ": unable to find constant_id: " + itos(constant.constant_id));
+				}
+			outer_continue:;
+			}
+		}
+
+		// Process bindings.
+		uint32_t uniform_sets_size = reflection_binding_set_uniforms_count.size();
+		using BT = SPIRType::BaseType;
+
+		// Always clearer than a boolean.
+		enum class Writable {
+			No,
+			Maybe,
+		};
+
+		// Returns a std::optional containing the value of the
+		// decoration, if it exists.
+		auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) {
+			uint32_t res = -1;
+			if (compiler.has_decoration(id, decoration)) {
+				res = compiler.get_decoration(id, decoration);
+			}
+			return res;
+		};
+
+		auto descriptor_bindings = [&compiler, &active, this, &set_indexes, uniform_sets_size, stage, &get_decoration](SmallVector<Resource> &p_resources, Writable p_writable) {
+			for (Resource const &res : p_resources) {
+				uint32_t dset = get_decoration(res.id, spv::DecorationDescriptorSet);
+				uint32_t dbin = get_decoration(res.id, spv::DecorationBinding);
+				UniformData *found = nullptr;
+				if (dset != (uint32_t)-1 && dbin != (uint32_t)-1 && dset < uniform_sets_size) {
+					uint32_t begin = set_indexes[dset];
+					uint32_t end = set_indexes[dset + 1];
+					for (uint32_t j = begin; j < end; j++) {
+						const ReflectionBindingData &ref_bind = reflection_binding_set_uniforms_data[j];
+						if (dbin == ref_bind.binding) {
+							found = &mtl_reflection_binding_set_uniforms_data.write[j];
+							break;
+						}
+					}
+				}
+
+				ERR_FAIL_NULL_V_MSG(found, ERR_CANT_CREATE, "UniformData not found");
+
+				bool is_active = active.find(res.id) != active.end();
+				if (is_active) {
+					found->active_stages |= 1 << stage;
+				}
+
+				BindingInfoData &primary = found->get_binding_for_stage(stage);
+
+				SPIRType const &a_type = compiler.get_type(res.type_id);
+				BT basetype = a_type.basetype;
+
+				switch (basetype) {
+					case BT::Struct: {
+						primary.data_type = MTLDataTypePointer;
+					} break;
+
+					case BT::Image:
+					case BT::SampledImage: {
+						primary.data_type = MTLDataTypeTexture;
+					} break;
+
+					case BT::Sampler: {
+						primary.data_type = MTLDataTypeSampler;
+						primary.array_length = 1;
+						for (uint32_t const &a : a_type.array) {
+							primary.array_length *= a;
+						}
+					} break;
+
+					default: {
+						ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unexpected BaseType");
+					} break;
+				}
+
+				// Find array length of image.
+				if (basetype == BT::Image || basetype == BT::SampledImage) {
+					primary.array_length = 1;
+					for (uint32_t const &a : a_type.array) {
+						primary.array_length *= a;
+					}
+					primary.is_multisampled = a_type.image.ms;
+
+					SPIRType::ImageType const &image = a_type.image;
+					primary.image_format = image.format;
+
+					switch (image.dim) {
+						case spv::Dim1D: {
+							if (image.arrayed) {
+								primary.texture_type = MTLTextureType1DArray;
+							} else {
+								primary.texture_type = MTLTextureType1D;
+							}
+						} break;
+						case spv::DimSubpassData: {
+							[[fallthrough]];
+						}
+						case spv::Dim2D: {
+							if (image.arrayed && image.ms) {
+								primary.texture_type = MTLTextureType2DMultisampleArray;
+							} else if (image.arrayed) {
+								primary.texture_type = MTLTextureType2DArray;
+							} else if (image.ms) {
+								primary.texture_type = MTLTextureType2DMultisample;
+							} else {
+								primary.texture_type = MTLTextureType2D;
+							}
+						} break;
+						case spv::Dim3D: {
+							primary.texture_type = MTLTextureType3D;
+						} break;
+						case spv::DimCube: {
+							if (image.arrayed) {
+								primary.texture_type = MTLTextureTypeCube;
+							}
+						} break;
+						case spv::DimRect: {
+						} break;
+						case spv::DimBuffer: {
+							// VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER
+							primary.texture_type = MTLTextureTypeTextureBuffer;
+						} break;
+						case spv::DimMax: {
+							// Add all enumerations to silence the compiler warning
+							// and generate future warnings, should a new one be added.
+						} break;
+					}
+				}
+
+				// Update writable.
+				if (p_writable == Writable::Maybe) {
+					if (basetype == BT::Struct) {
+						Bitset flags = compiler.get_buffer_block_flags(res.id);
+						if (!flags.get(spv::DecorationNonWritable)) {
+							if (flags.get(spv::DecorationNonReadable)) {
+								primary.access = MTLBindingAccessWriteOnly;
+							} else {
+								primary.access = MTLBindingAccessReadWrite;
+							}
+						}
+					} else if (basetype == BT::Image) {
+						switch (a_type.image.access) {
+							case spv::AccessQualifierWriteOnly:
+								primary.access = MTLBindingAccessWriteOnly;
+								break;
+							case spv::AccessQualifierReadWrite:
+								primary.access = MTLBindingAccessReadWrite;
+								break;
+							case spv::AccessQualifierReadOnly:
+								break;
+							case spv::AccessQualifierMax:
+								[[fallthrough]];
+							default:
+								if (!compiler.has_decoration(res.id, spv::DecorationNonWritable)) {
+									if (compiler.has_decoration(res.id, spv::DecorationNonReadable)) {
+										primary.access = MTLBindingAccessWriteOnly;
+									} else {
+										primary.access = MTLBindingAccessReadWrite;
+									}
+								}
+								break;
+						}
+					}
+				}
+
+				switch (primary.access) {
+					case MTLBindingAccessReadOnly:
+						primary.usage = MTLResourceUsageRead;
+						break;
+					case MTLBindingAccessWriteOnly:
+						primary.usage = MTLResourceUsageWrite;
+						break;
+					case MTLBindingAccessReadWrite:
+						primary.usage = MTLResourceUsageRead | MTLResourceUsageWrite;
+						break;
+				}
+
+				primary.index = compiler.get_automatic_msl_resource_binding(res.id);
+
+				// A sampled image contains two bindings, the primary
+				// is to the image, and the secondary is to the associated sampler.
+				if (basetype == BT::SampledImage) {
+					uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id);
+					if (binding != (uint32_t)-1) {
+						BindingInfoData &secondary = found->get_secondary_binding_for_stage(stage);
+						secondary.data_type = MTLDataTypeSampler;
+						secondary.index = binding;
+						secondary.access = MTLBindingAccessReadOnly;
+					}
+				}
+
+				// An image may have a secondary binding if it is used
+				// for atomic operations.
+				if (basetype == BT::Image) {
+					uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id);
+					if (binding != (uint32_t)-1) {
+						BindingInfoData &secondary = found->get_secondary_binding_for_stage(stage);
+						secondary.data_type = MTLDataTypePointer;
+						secondary.index = binding;
+						secondary.access = MTLBindingAccessReadWrite;
+					}
+				}
+			}
+			return Error::OK;
+		};
+
+		if (!resources.uniform_buffers.empty()) {
+			Error err = descriptor_bindings(resources.uniform_buffers, Writable::No);
+			ERR_FAIL_COND_V(err != OK, false);
+		}
+		if (!resources.storage_buffers.empty()) {
+			Error err = descriptor_bindings(resources.storage_buffers, Writable::Maybe);
+			ERR_FAIL_COND_V(err != OK, false);
+		}
+		if (!resources.storage_images.empty()) {
+			Error err = descriptor_bindings(resources.storage_images, Writable::Maybe);
+			ERR_FAIL_COND_V(err != OK, false);
+		}
+		if (!resources.sampled_images.empty()) {
+			Error err = descriptor_bindings(resources.sampled_images, Writable::No);
+			ERR_FAIL_COND_V(err != OK, false);
+		}
+		if (!resources.separate_images.empty()) {
+			Error err = descriptor_bindings(resources.separate_images, Writable::No);
+			ERR_FAIL_COND_V(err != OK, false);
+		}
+		if (!resources.separate_samplers.empty()) {
+			Error err = descriptor_bindings(resources.separate_samplers, Writable::No);
+			ERR_FAIL_COND_V(err != OK, false);
+		}
+		if (!resources.subpass_inputs.empty()) {
+			Error err = descriptor_bindings(resources.subpass_inputs, Writable::No);
+			ERR_FAIL_COND_V(err != OK, false);
+		}
+
+		if (!resources.push_constant_buffers.empty()) {
+			for (Resource const &res : resources.push_constant_buffers) {
+				uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id);
+				if (binding != (uint32_t)-1) {
+					stage_data.push_constant_binding = binding;
+				}
+			}
+		}
+
+		ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), false, "Atomic counters not supported");
+		ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), false, "Acceleration structures not supported");
+		ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), false, "Shader record buffers not supported");
+
+		if (!resources.stage_inputs.empty()) {
+			for (Resource const &res : resources.stage_inputs) {
+				uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id);
+				if (binding != (uint32_t)-1) {
+					stage_data.vertex_input_binding_mask |= 1 << binding;
+				}
+			}
+		}
+
+		stage_data.is_position_invariant = compiler.is_position_invariant();
+		stage_data.supports_fast_math = !entry_point.flags.get(spv::ExecutionModeSignedZeroInfNanPreserve);
+		stage_data.hash = SHA256Digest(source.c_str(), source.length());
+		stage_data.source_size = source.length();
+		::Vector<uint8_t> binary_data;
+		binary_data.resize(stage_data.source_size);
+		memcpy(binary_data.ptrw(), source.c_str(), stage_data.source_size);
+
+		if (export_mode) {
+			// Try to compile the Metal source code
+			::Vector<uint8_t> library_data;
+			Error compile_err = compile_metal_source(source.c_str(), stage_data, library_data);
+			if (compile_err == OK) {
+				stage_data.library_size = library_data.size();
+				binary_data.resize(stage_data.source_size + stage_data.library_size);
+				memcpy(binary_data.ptrw() + stage_data.source_size, library_data.ptr(), stage_data.library_size);
+			}
+		}
+
+		uint32_t binary_data_size = binary_data.size();
+		Shader &shader = shaders.write[i];
+		shader.shader_stage = stage;
+		shader.code_decompressed_size = binary_data_size;
+		shader.code_compressed_bytes.resize(binary_data_size);
+
+		uint32_t compressed_size = 0;
+		bool compressed = compress_code(binary_data.ptr(), binary_data_size, shader.code_compressed_bytes.ptrw(), &compressed_size, &shader.code_compression_flags);
+		ERR_FAIL_COND_V_MSG(!compressed, false, vformat("Failed to compress native code to native for SPIR-V #%d.", i));
+
+		shader.code_compressed_bytes.resize(compressed_size);
+	}
+
+	return true;
+}
+
+#pragma clang diagnostic pop
+
+uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_extra_data(uint8_t *p_bytes) const {
+	if (p_bytes != nullptr) {
+		*(HeaderData *)p_bytes = mtl_reflection_data;
+	}
+	return sizeof(HeaderData);
+}
+
+uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
+	if (p_bytes != nullptr) {
+		*(UniformData *)p_bytes = mtl_reflection_binding_set_uniforms_data[p_index];
+	}
+	return sizeof(UniformData);
+}
+
+uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
+	if (p_bytes != nullptr) {
+		*(SpecializationData *)p_bytes = mtl_reflection_specialization_data[p_index];
+	}
+	return sizeof(SpecializationData);
+}
+
+uint32_t RenderingShaderContainerMetal::_to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
+	if (p_bytes != nullptr) {
+		*(StageData *)p_bytes = mtl_shaders[p_index];
+	}
+	return sizeof(StageData);
+}
+
+uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_extra_data(const uint8_t *p_bytes) {
+	mtl_reflection_data = *(HeaderData *)p_bytes;
+	return sizeof(HeaderData);
+}
+
+uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) {
+	mtl_reflection_binding_set_uniforms_data.resize(reflection_binding_set_uniforms_data.size());
+	return 0;
+}
+
+uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) {
+	mtl_reflection_binding_set_uniforms_data.ptrw()[p_index] = *(UniformData *)p_bytes;
+	return sizeof(UniformData);
+}
+
+uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) {
+	mtl_reflection_specialization_data.resize(reflection_specialization_data.size());
+	return 0;
+}
+
+uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) {
+	mtl_reflection_specialization_data.ptrw()[p_index] = *(SpecializationData *)p_bytes;
+	return sizeof(SpecializationData);
+}
+
+uint32_t RenderingShaderContainerMetal::_from_bytes_shader_extra_data_start(const uint8_t *p_bytes) {
+	mtl_shaders.resize(shaders.size());
+	return 0;
+}
+
+uint32_t RenderingShaderContainerMetal::_from_bytes_shader_extra_data(const uint8_t *p_bytes, uint32_t p_index) {
+	mtl_shaders.ptrw()[p_index] = *(StageData *)p_bytes;
+	return sizeof(StageData);
+}
+
+RenderingShaderContainerMetal::MetalShaderReflection RenderingShaderContainerMetal::get_metal_shader_reflection() const {
+	MetalShaderReflection res;
+
+	res.specialization_constants = mtl_reflection_specialization_data;
+	uint32_t uniform_set_count = reflection_binding_set_uniforms_count.size();
+	uint32_t start = 0;
+	res.uniform_sets.resize(uniform_set_count);
+	for (uint32_t i = 0; i < uniform_set_count; i++) {
+		Vector<UniformData> &set = res.uniform_sets.ptrw()[i];
+		uint32_t count = reflection_binding_set_uniforms_count.get(i);
+		set.resize(count);
+		memcpy(set.ptrw(), &mtl_reflection_binding_set_uniforms_data.ptr()[start], count * sizeof(UniformData));
+		start += count;
+	}
+
+	return res;
+}
+
+uint32_t RenderingShaderContainerMetal::_format() const {
+	return 0x42424242;
+}
+
+uint32_t RenderingShaderContainerMetal::_format_version() const {
+	return FORMAT_VERSION;
+}
+
+Ref<RenderingShaderContainer> RenderingShaderContainerFormatMetal::create_container() const {
+	Ref<RenderingShaderContainerMetal> result;
+	result.instantiate();
+	result->set_export_mode(export_mode);
+	result->set_device_profile(device_profile);
+	return result;
+}
+
+RenderingDeviceCommons::ShaderLanguageVersion RenderingShaderContainerFormatMetal::get_shader_language_version() const {
+	return SHADER_LANGUAGE_VULKAN_VERSION_1_1;
+}
+
+RenderingDeviceCommons::ShaderSpirvVersion RenderingShaderContainerFormatMetal::get_shader_spirv_version() const {
+	return SHADER_SPIRV_VERSION_1_6;
+}
+
+RenderingShaderContainerFormatMetal::RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export) :
+		export_mode(p_export), device_profile(p_device_profile) {
+}

+ 75 - 0
drivers/metal/sha256_digest.h

@@ -0,0 +1,75 @@
+/**************************************************************************/
+/*  sha256_digest.h                                                       */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#pragma once
+
+#import <CommonCrypto/CommonDigest.h>
+#import <simd/simd.h>
+#import <zlib.h>
+
+#include "core/templates/local_vector.h"
+
+struct SHA256Digest {
+	unsigned char data[CC_SHA256_DIGEST_LENGTH];
+
+	static constexpr size_t serialized_size() { return CC_SHA256_DIGEST_LENGTH; }
+
+	uint32_t hash() const {
+		uint32_t c = crc32(0, data, CC_SHA256_DIGEST_LENGTH);
+		return c;
+	}
+
+	SHA256Digest() {
+		bzero(data, CC_SHA256_DIGEST_LENGTH);
+	}
+
+	SHA256Digest(const char *p_hash) {
+		memcpy(data, p_hash, CC_SHA256_DIGEST_LENGTH);
+	}
+
+	SHA256Digest(const char *p_data, size_t p_length) {
+		CC_SHA256(p_data, (CC_LONG)p_length, data);
+	}
+
+	_FORCE_INLINE_ uint32_t short_sha() const {
+		return __builtin_bswap32(*(uint32_t *)&data[0]);
+	}
+
+	LocalVector<uint8_t> serialize() const {
+		LocalVector<uint8_t> result;
+		result.resize(CC_SHA256_DIGEST_LENGTH);
+		memcpy(result.ptr(), data, CC_SHA256_DIGEST_LENGTH);
+		return result;
+	}
+
+	static SHA256Digest deserialize(LocalVector<uint8_t> p_ser) {
+		return SHA256Digest((const char *)p_ser.ptr());
+	}
+};

+ 79 - 332
drivers/vulkan/rendering_device_driver_vulkan.cpp

@@ -32,9 +32,12 @@
 
 #include "core/config/project_settings.h"
 #include "core/io/marshalls.h"
-#include "thirdparty/misc/smolv.h"
 #include "vulkan_hooks.h"
 
+#if RENDERING_SHADER_CONTAINER_VULKAN_SMOLV
+#include "thirdparty/misc/smolv.h"
+#endif
+
 #if defined(ANDROID_ENABLED)
 #include "platform/android/java_godot_wrapper.h"
 #include "platform/android/os_android.h"
@@ -3550,260 +3553,34 @@ static VkShaderStageFlagBits RD_STAGE_TO_VK_SHADER_STAGE_BITS[RDD::SHADER_STAGE_
 	VK_SHADER_STAGE_COMPUTE_BIT,
 };
 
-String RenderingDeviceDriverVulkan::shader_get_binary_cache_key() {
-	return "Vulkan-SV" + uitos(ShaderBinary::VERSION);
-}
-
-Vector<uint8_t> RenderingDeviceDriverVulkan::shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) {
-	ShaderReflection shader_refl;
-	if (_reflect_spirv(p_spirv, shader_refl) != OK) {
-		return Vector<uint8_t>();
-	}
-
-	ERR_FAIL_COND_V_MSG((uint32_t)shader_refl.uniform_sets.size() > physical_device_properties.limits.maxBoundDescriptorSets, Vector<uint8_t>(),
-			"Number of uniform sets is larger than what is supported by the hardware (" + itos(physical_device_properties.limits.maxBoundDescriptorSets) + ").");
-
-	// Collect reflection data into binary data.
-	ShaderBinary::Data binary_data;
-	Vector<Vector<ShaderBinary::DataBinding>> uniforms; // Set bindings.
-	Vector<ShaderBinary::SpecializationConstant> specialization_constants;
-	{
-		binary_data.vertex_input_mask = shader_refl.vertex_input_mask;
-		binary_data.fragment_output_mask = shader_refl.fragment_output_mask;
-		binary_data.specialization_constants_count = shader_refl.specialization_constants.size();
-		binary_data.is_compute = shader_refl.is_compute;
-		binary_data.compute_local_size[0] = shader_refl.compute_local_size[0];
-		binary_data.compute_local_size[1] = shader_refl.compute_local_size[1];
-		binary_data.compute_local_size[2] = shader_refl.compute_local_size[2];
-		binary_data.set_count = shader_refl.uniform_sets.size();
-		binary_data.push_constant_size = shader_refl.push_constant_size;
-		for (uint32_t i = 0; i < SHADER_STAGE_MAX; i++) {
-			if (shader_refl.push_constant_stages.has_flag((ShaderStage)(1 << i))) {
-				binary_data.vk_push_constant_stages_mask |= RD_STAGE_TO_VK_SHADER_STAGE_BITS[i];
-			}
-		}
-
-		for (const Vector<ShaderUniform> &set_refl : shader_refl.uniform_sets) {
-			Vector<ShaderBinary::DataBinding> set_bindings;
-			for (const ShaderUniform &uniform_refl : set_refl) {
-				ShaderBinary::DataBinding binding;
-				binding.type = (uint32_t)uniform_refl.type;
-				binding.binding = uniform_refl.binding;
-				binding.stages = (uint32_t)uniform_refl.stages;
-				binding.length = uniform_refl.length;
-				binding.writable = (uint32_t)uniform_refl.writable;
-				set_bindings.push_back(binding);
-			}
-			uniforms.push_back(set_bindings);
-		}
-
-		for (const ShaderSpecializationConstant &refl_sc : shader_refl.specialization_constants) {
-			ShaderBinary::SpecializationConstant spec_constant;
-			spec_constant.type = (uint32_t)refl_sc.type;
-			spec_constant.constant_id = refl_sc.constant_id;
-			spec_constant.int_value = refl_sc.int_value;
-			spec_constant.stage_flags = (uint32_t)refl_sc.stages;
-			specialization_constants.push_back(spec_constant);
+RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_container(const Ref<RenderingShaderContainer> &p_shader_container, const Vector<ImmutableSampler> &p_immutable_samplers) {
+	ShaderReflection shader_refl = p_shader_container->get_shader_reflection();
+	ShaderInfo shader_info;
+	for (uint32_t i = 0; i < SHADER_STAGE_MAX; i++) {
+		if (shader_refl.push_constant_stages.has_flag((ShaderStage)(1 << i))) {
+			shader_info.vk_push_constant_stages |= RD_STAGE_TO_VK_SHADER_STAGE_BITS[i];
 		}
 	}
 
-	Vector<Vector<uint8_t>> compressed_stages;
-	Vector<uint32_t> smolv_size;
-	Vector<uint32_t> zstd_size; // If 0, zstd not used.
-
-	uint32_t stages_binary_size = 0;
-
-	bool strip_debug = false;
-
-	for (uint32_t i = 0; i < p_spirv.size(); i++) {
-		smolv::ByteArray smolv;
-		if (!smolv::Encode(p_spirv[i].spirv.ptr(), p_spirv[i].spirv.size(), smolv, strip_debug ? smolv::kEncodeFlagStripDebugInfo : 0)) {
-			ERR_FAIL_V_MSG(Vector<uint8_t>(), "Error compressing shader stage :" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]));
-		} else {
-			smolv_size.push_back(smolv.size());
-			{ // zstd.
-				Vector<uint8_t> zstd;
-				zstd.resize(Compression::get_max_compressed_buffer_size(smolv.size(), Compression::MODE_ZSTD));
-				int dst_size = Compression::compress(zstd.ptrw(), &smolv[0], smolv.size(), Compression::MODE_ZSTD);
-
-				if (dst_size > 0 && (uint32_t)dst_size < smolv.size()) {
-					zstd_size.push_back(dst_size);
-					zstd.resize(dst_size);
-					compressed_stages.push_back(zstd);
-				} else {
-					Vector<uint8_t> smv;
-					smv.resize(smolv.size());
-					memcpy(smv.ptrw(), &smolv[0], smolv.size());
-					zstd_size.push_back(0); // Not using zstd.
-					compressed_stages.push_back(smv);
-				}
-			}
-		}
-		uint32_t s = compressed_stages[i].size();
-		stages_binary_size += STEPIFY(s, 4);
-	}
-
-	binary_data.specialization_constants_count = specialization_constants.size();
-	binary_data.set_count = uniforms.size();
-	binary_data.stage_count = p_spirv.size();
-
-	CharString shader_name_utf = p_shader_name.utf8();
-
-	binary_data.shader_name_len = shader_name_utf.length();
-
-	uint32_t total_size = sizeof(uint32_t) * 4; // Header + version + pad + main datasize;.
-	total_size += sizeof(ShaderBinary::Data);
-
-	total_size += STEPIFY(binary_data.shader_name_len, 4);
-
-	for (int i = 0; i < uniforms.size(); i++) {
-		total_size += sizeof(uint32_t);
-		total_size += uniforms[i].size() * sizeof(ShaderBinary::DataBinding);
-	}
-
-	total_size += sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size();
-
-	total_size += compressed_stages.size() * sizeof(uint32_t) * 3; // Sizes.
-	total_size += stages_binary_size;
-
-	Vector<uint8_t> ret;
-	ret.resize(total_size);
-	{
-		uint32_t offset = 0;
-		uint8_t *binptr = ret.ptrw();
-		binptr[0] = 'G';
-		binptr[1] = 'S';
-		binptr[2] = 'B';
-		binptr[3] = 'D'; // Godot Shader Binary Data.
-		offset += 4;
-		encode_uint32(ShaderBinary::VERSION, binptr + offset);
-		offset += sizeof(uint32_t);
-		encode_uint32(sizeof(ShaderBinary::Data), binptr + offset);
-		offset += sizeof(uint32_t);
-		encode_uint32(0, binptr + offset); // Pad to align ShaderBinary::Data to 8 bytes.
-		offset += sizeof(uint32_t);
-		memcpy(binptr + offset, &binary_data, sizeof(ShaderBinary::Data));
-		offset += sizeof(ShaderBinary::Data);
-
-#define ADVANCE_OFFSET_WITH_ALIGNMENT(m_bytes)                         \
-	{                                                                  \
-		offset += m_bytes;                                             \
-		uint32_t padding = STEPIFY(m_bytes, 4) - m_bytes;              \
-		memset(binptr + offset, 0, padding); /* Avoid garbage data. */ \
-		offset += padding;                                             \
-	}
-
-		if (binary_data.shader_name_len > 0) {
-			memcpy(binptr + offset, shader_name_utf.ptr(), binary_data.shader_name_len);
-			ADVANCE_OFFSET_WITH_ALIGNMENT(binary_data.shader_name_len);
-		}
-
-		for (int i = 0; i < uniforms.size(); i++) {
-			int count = uniforms[i].size();
-			encode_uint32(count, binptr + offset);
-			offset += sizeof(uint32_t);
-			if (count > 0) {
-				memcpy(binptr + offset, uniforms[i].ptr(), sizeof(ShaderBinary::DataBinding) * count);
-				offset += sizeof(ShaderBinary::DataBinding) * count;
-			}
-		}
-
-		if (specialization_constants.size()) {
-			memcpy(binptr + offset, specialization_constants.ptr(), sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size());
-			offset += sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size();
-		}
-
-		for (int i = 0; i < compressed_stages.size(); i++) {
-			encode_uint32(p_spirv[i].shader_stage, binptr + offset);
-			offset += sizeof(uint32_t);
-			encode_uint32(smolv_size[i], binptr + offset);
-			offset += sizeof(uint32_t);
-			encode_uint32(zstd_size[i], binptr + offset);
-			offset += sizeof(uint32_t);
-			memcpy(binptr + offset, compressed_stages[i].ptr(), compressed_stages[i].size());
-			ADVANCE_OFFSET_WITH_ALIGNMENT(compressed_stages[i].size());
-		}
-
-		DEV_ASSERT(offset == (uint32_t)ret.size());
-	}
-
-	return ret;
-}
-
-RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector<ImmutableSampler> &p_immutable_samplers) {
-	r_shader_desc = {}; // Driver-agnostic.
-	ShaderInfo shader_info; // Driver-specific.
-
-	const uint8_t *binptr = p_shader_binary.ptr();
-	uint32_t binsize = p_shader_binary.size();
-
-	uint32_t read_offset = 0;
-
-	// Consistency check.
-	ERR_FAIL_COND_V(binsize < sizeof(uint32_t) * 4 + sizeof(ShaderBinary::Data), ShaderID());
-	ERR_FAIL_COND_V(binptr[0] != 'G' || binptr[1] != 'S' || binptr[2] != 'B' || binptr[3] != 'D', ShaderID());
-
-	uint32_t bin_version = decode_uint32(binptr + 4);
-	ERR_FAIL_COND_V(bin_version != ShaderBinary::VERSION, ShaderID());
-
-	uint32_t bin_data_size = decode_uint32(binptr + 8);
-
-	// 16, not 12, to skip alignment padding.
-	const ShaderBinary::Data &binary_data = *(reinterpret_cast<const ShaderBinary::Data *>(binptr + 16));
-
-	r_shader_desc.push_constant_size = binary_data.push_constant_size;
-	shader_info.vk_push_constant_stages = binary_data.vk_push_constant_stages_mask;
-
-	r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask;
-	r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask;
-
-	r_shader_desc.is_compute = binary_data.is_compute;
-	r_shader_desc.compute_local_size[0] = binary_data.compute_local_size[0];
-	r_shader_desc.compute_local_size[1] = binary_data.compute_local_size[1];
-	r_shader_desc.compute_local_size[2] = binary_data.compute_local_size[2];
-
-	read_offset += sizeof(uint32_t) * 4 + bin_data_size;
-
-	if (binary_data.shader_name_len) {
-		r_name.clear();
-		r_name.append_utf8((const char *)(binptr + read_offset), binary_data.shader_name_len);
-		read_offset += STEPIFY(binary_data.shader_name_len, 4);
-	}
-
+	// Set bindings.
 	Vector<Vector<VkDescriptorSetLayoutBinding>> vk_set_bindings;
-
-	r_shader_desc.uniform_sets.resize(binary_data.set_count);
-	vk_set_bindings.resize(binary_data.set_count);
-
-	for (uint32_t i = 0; i < binary_data.set_count; i++) {
-		ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) >= binsize, ShaderID());
-		uint32_t set_count = decode_uint32(binptr + read_offset);
-		read_offset += sizeof(uint32_t);
-		const ShaderBinary::DataBinding *set_ptr = reinterpret_cast<const ShaderBinary::DataBinding *>(binptr + read_offset);
-		uint32_t set_size = set_count * sizeof(ShaderBinary::DataBinding);
-		ERR_FAIL_COND_V(read_offset + set_size >= binsize, ShaderID());
-
-		for (uint32_t j = 0; j < set_count; j++) {
-			ShaderUniform info;
-			info.type = UniformType(set_ptr[j].type);
-			info.writable = set_ptr[j].writable;
-			info.length = set_ptr[j].length;
-			info.binding = set_ptr[j].binding;
-			info.stages = set_ptr[j].stages;
-
+	vk_set_bindings.resize(shader_refl.uniform_sets.size());
+	for (uint32_t i = 0; i < shader_refl.uniform_sets.size(); i++) {
+		for (uint32_t j = 0; j < shader_refl.uniform_sets[i].size(); j++) {
+			const ShaderUniform &uniform = shader_refl.uniform_sets[i][j];
 			VkDescriptorSetLayoutBinding layout_binding = {};
-			layout_binding.binding = set_ptr[j].binding;
+			layout_binding.binding = uniform.binding;
 			layout_binding.descriptorCount = 1;
 			for (uint32_t k = 0; k < SHADER_STAGE_MAX; k++) {
-				if ((set_ptr[j].stages & (1 << k))) {
+				if ((uniform.stages.has_flag(ShaderStage(1U << k)))) {
 					layout_binding.stageFlags |= RD_STAGE_TO_VK_SHADER_STAGE_BITS[k];
 				}
 			}
 
-			switch (info.type) {
+			switch (uniform.type) {
 				case UNIFORM_TYPE_SAMPLER: {
 					layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
-					layout_binding.descriptorCount = set_ptr[j].length;
+					layout_binding.descriptorCount = uniform.length;
 					// Immutable samplers: here they get set in the layoutbinding, given that they will not be changed later.
 					int immutable_bind_index = -1;
 					if (immutable_samplers_enabled && p_immutable_samplers.size() > 0) {
@@ -3820,19 +3597,19 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec
 				} break;
 				case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
 					layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
-					layout_binding.descriptorCount = set_ptr[j].length;
+					layout_binding.descriptorCount = uniform.length;
 				} break;
 				case UNIFORM_TYPE_TEXTURE: {
 					layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
-					layout_binding.descriptorCount = set_ptr[j].length;
+					layout_binding.descriptorCount = uniform.length;
 				} break;
 				case UNIFORM_TYPE_IMAGE: {
 					layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-					layout_binding.descriptorCount = set_ptr[j].length;
+					layout_binding.descriptorCount = uniform.length;
 				} break;
 				case UNIFORM_TYPE_TEXTURE_BUFFER: {
 					layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
-					layout_binding.descriptorCount = set_ptr[j].length;
+					layout_binding.descriptorCount = uniform.length;
 				} break;
 				case UNIFORM_TYPE_IMAGE_BUFFER: {
 					layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
@@ -3851,104 +3628,72 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec
 				}
 			}
 
-			r_shader_desc.uniform_sets.write[i].push_back(info);
 			vk_set_bindings.write[i].push_back(layout_binding);
 		}
-
-		read_offset += set_size;
-	}
-
-	ERR_FAIL_COND_V(read_offset + binary_data.specialization_constants_count * sizeof(ShaderBinary::SpecializationConstant) >= binsize, ShaderID());
-
-	r_shader_desc.specialization_constants.resize(binary_data.specialization_constants_count);
-	for (uint32_t i = 0; i < binary_data.specialization_constants_count; i++) {
-		const ShaderBinary::SpecializationConstant &src_sc = *(reinterpret_cast<const ShaderBinary::SpecializationConstant *>(binptr + read_offset));
-		ShaderSpecializationConstant sc;
-		sc.type = PipelineSpecializationConstantType(src_sc.type);
-		sc.constant_id = src_sc.constant_id;
-		sc.int_value = src_sc.int_value;
-		sc.stages = src_sc.stage_flags;
-		r_shader_desc.specialization_constants.write[i] = sc;
-
-		read_offset += sizeof(ShaderBinary::SpecializationConstant);
 	}
 
-	Vector<Vector<uint8_t>> stages_spirv;
-	stages_spirv.resize(binary_data.stage_count);
-	r_shader_desc.stages.resize(binary_data.stage_count);
-
-	for (uint32_t i = 0; i < binary_data.stage_count; i++) {
-		ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) * 3 >= binsize, ShaderID());
-
-		uint32_t stage = decode_uint32(binptr + read_offset);
-		read_offset += sizeof(uint32_t);
-		uint32_t smolv_size = decode_uint32(binptr + read_offset);
-		read_offset += sizeof(uint32_t);
-		uint32_t zstd_size = decode_uint32(binptr + read_offset);
-		read_offset += sizeof(uint32_t);
-
-		uint32_t buf_size = (zstd_size > 0) ? zstd_size : smolv_size;
-
-		Vector<uint8_t> smolv;
-		const uint8_t *src_smolv = nullptr;
-
-		if (zstd_size > 0) {
-			// Decompress to smolv.
-			smolv.resize(smolv_size);
-			int dec_smolv_size = Compression::decompress(smolv.ptrw(), smolv.size(), binptr + read_offset, zstd_size, Compression::MODE_ZSTD);
-			ERR_FAIL_COND_V(dec_smolv_size != (int32_t)smolv_size, ShaderID());
-			src_smolv = smolv.ptr();
-		} else {
-			src_smolv = binptr + read_offset;
+	// Modules.
+	VkResult res;
+	String error_text;
+	Vector<uint8_t> decompressed_code;
+	Vector<uint8_t> decoded_spirv;
+	VkShaderModule vk_module;
+	for (int i = 0; i < shader_refl.stages_vector.size(); i++) {
+		const RenderingShaderContainer::Shader &shader = p_shader_container->shaders[i];
+#if RENDERING_SHADER_CONTAINER_VULKAN_COMPRESSION
+		bool requires_decompression = (shader.code_decompressed_size > 0);
+		if (requires_decompression) {
+			decompressed_code.resize(shader.code_decompressed_size);
+			bool decompressed = p_shader_container->decompress_code(shader.code_compressed_bytes.ptr(), shader.code_compressed_bytes.size(), shader.code_compression_flags, decompressed_code.ptrw(), decompressed_code.size());
+			if (!decompressed) {
+				error_text = vformat("Failed to decompress code on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]]));
+				break;
+			}
 		}
+#else
+		bool requires_decompression = false;
+#endif
 
-		Vector<uint8_t> &spirv = stages_spirv.ptrw()[i];
-		uint32_t spirv_size = smolv::GetDecodedBufferSize(src_smolv, smolv_size);
-		spirv.resize(spirv_size);
-		if (!smolv::Decode(src_smolv, smolv_size, spirv.ptrw(), spirv_size)) {
-			ERR_FAIL_V_MSG(ShaderID(), "Malformed smolv input uncompressing shader stage:" + String(SHADER_STAGE_NAMES[stage]));
+		const uint8_t *smolv_input = requires_decompression ? decompressed_code.ptr() : shader.code_compressed_bytes.ptr();
+		uint32_t smolv_input_size = requires_decompression ? decompressed_code.size() : shader.code_compressed_bytes.size();
+#if RENDERING_SHADER_CONTAINER_VULKAN_SMOLV
+		decoded_spirv.resize(smolv::GetDecodedBufferSize(smolv_input, smolv_input_size));
+		if (decoded_spirv.is_empty()) {
+			error_text = vformat("Malformed smolv input on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]]));
+			break;
 		}
 
-		r_shader_desc.stages.set(i, ShaderStage(stage));
-
-		buf_size = STEPIFY(buf_size, 4);
-		read_offset += buf_size;
-		ERR_FAIL_COND_V(read_offset > binsize, ShaderID());
-	}
-
-	ERR_FAIL_COND_V(read_offset != binsize, ShaderID());
-
-	// Modules.
-
-	String error_text;
+		if (!smolv::Decode(smolv_input, smolv_input_size, decoded_spirv.ptrw(), decoded_spirv.size())) {
+			error_text = vformat("Malformed smolv input on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]]));
+			break;
+		}
+#else
+		decoded_spirv.resize(smolv_input_size);
+		memcpy(decoded_spirv.ptrw(), smolv_input, decoded_spirv.size());
+#endif
 
-	for (int i = 0; i < r_shader_desc.stages.size(); i++) {
 		VkShaderModuleCreateInfo shader_module_create_info = {};
 		shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
-		shader_module_create_info.codeSize = stages_spirv[i].size();
-		shader_module_create_info.pCode = (const uint32_t *)stages_spirv[i].ptr();
+		shader_module_create_info.codeSize = decoded_spirv.size();
+		shader_module_create_info.pCode = (const uint32_t *)(decoded_spirv.ptr());
 
-		VkShaderModule vk_module = VK_NULL_HANDLE;
-		VkResult res = vkCreateShaderModule(vk_device, &shader_module_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE), &vk_module);
-		if (res) {
-			error_text = "Error (" + itos(res) + ") creating shader module for stage: " + String(SHADER_STAGE_NAMES[r_shader_desc.stages[i]]);
+		res = vkCreateShaderModule(vk_device, &shader_module_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE), &vk_module);
+		if (res != VK_SUCCESS) {
+			error_text = vformat("Error (%d) creating module for shader stage %s.", res, String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]]));
 			break;
 		}
 
 		VkPipelineShaderStageCreateInfo create_info = {};
 		create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
-		create_info.stage = RD_STAGE_TO_VK_SHADER_STAGE_BITS[r_shader_desc.stages[i]];
+		create_info.stage = RD_STAGE_TO_VK_SHADER_STAGE_BITS[shader_refl.stages_vector[i]];
 		create_info.module = vk_module;
 		create_info.pName = "main";
-
 		shader_info.vk_stages_create_info.push_back(create_info);
 	}
 
 	// Descriptor sets.
-
 	if (error_text.is_empty()) {
-		DEV_ASSERT((uint32_t)vk_set_bindings.size() == binary_data.set_count);
-		for (uint32_t i = 0; i < binary_data.set_count; i++) {
+		for (uint32_t i = 0; i < shader_refl.uniform_sets.size(); i++) {
 			// Empty ones are fine if they were not used according to spec (binding count will be 0).
 			VkDescriptorSetLayoutCreateInfo layout_create_info = {};
 			layout_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
@@ -3956,9 +3701,9 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec
 			layout_create_info.pBindings = vk_set_bindings[i].ptr();
 
 			VkDescriptorSetLayout layout = VK_NULL_HANDLE;
-			VkResult res = vkCreateDescriptorSetLayout(vk_device, &layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT), &layout);
+			res = vkCreateDescriptorSetLayout(vk_device, &layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT), &layout);
 			if (res) {
-				error_text = "Error (" + itos(res) + ") creating descriptor set layout for set " + itos(i);
+				error_text = vformat("Error (%d) creating descriptor set layout for set %d.", res, i);
 				break;
 			}
 
@@ -3968,24 +3713,23 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec
 
 	if (error_text.is_empty()) {
 		// Pipeline layout.
-
 		VkPipelineLayoutCreateInfo pipeline_layout_create_info = {};
 		pipeline_layout_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
-		pipeline_layout_create_info.setLayoutCount = binary_data.set_count;
+		pipeline_layout_create_info.setLayoutCount = shader_info.vk_descriptor_set_layouts.size();
 		pipeline_layout_create_info.pSetLayouts = shader_info.vk_descriptor_set_layouts.ptr();
 
-		if (binary_data.push_constant_size) {
+		if (shader_refl.push_constant_size > 0) {
 			VkPushConstantRange *push_constant_range = ALLOCA_SINGLE(VkPushConstantRange);
 			*push_constant_range = {};
-			push_constant_range->stageFlags = binary_data.vk_push_constant_stages_mask;
-			push_constant_range->size = binary_data.push_constant_size;
+			push_constant_range->stageFlags = shader_info.vk_push_constant_stages;
+			push_constant_range->size = shader_refl.push_constant_size;
 			pipeline_layout_create_info.pushConstantRangeCount = 1;
 			pipeline_layout_create_info.pPushConstantRanges = push_constant_range;
 		}
 
-		VkResult err = vkCreatePipelineLayout(vk_device, &pipeline_layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_LAYOUT), &shader_info.vk_pipeline_layout);
-		if (err) {
-			error_text = "Error (" + itos(err) + ") creating pipeline layout.";
+		res = vkCreatePipelineLayout(vk_device, &pipeline_layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_LAYOUT), &shader_info.vk_pipeline_layout);
+		if (res != VK_SUCCESS) {
+			error_text = vformat("Error (%d) creating pipeline layout.", res);
 		}
 	}
 
@@ -3994,7 +3738,7 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec
 		for (uint32_t i = 0; i < shader_info.vk_stages_create_info.size(); i++) {
 			vkDestroyShaderModule(vk_device, shader_info.vk_stages_create_info[i].module, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE));
 		}
-		for (uint32_t i = 0; i < binary_data.set_count; i++) {
+		for (uint32_t i = 0; i < shader_info.vk_descriptor_set_layouts.size(); i++) {
 			vkDestroyDescriptorSetLayout(vk_device, shader_info.vk_descriptor_set_layouts[i], VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT));
 		}
 
@@ -4002,7 +3746,6 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec
 	}
 
 	// Bookkeep.
-
 	ShaderInfo *shader_info_ptr = VersatileResource::allocate<ShaderInfo>(resources_allocator);
 	*shader_info_ptr = shader_info;
 	return ShaderID(shader_info_ptr);
@@ -6185,6 +5928,10 @@ const RDD::Capabilities &RenderingDeviceDriverVulkan::get_capabilities() const {
 	return device_capabilities;
 }
 
+const RenderingShaderContainerFormat &RenderingDeviceDriverVulkan::get_shader_container_format() const {
+	return shader_container_format;
+}
+
 bool RenderingDeviceDriverVulkan::is_composite_alpha_supported(CommandQueueID p_queue) const {
 	if (has_comp_alpha.has((uint64_t)p_queue.id)) {
 		return has_comp_alpha[(uint64_t)p_queue.id];

+ 4 - 40
drivers/vulkan/rendering_device_driver_vulkan.h

@@ -33,6 +33,7 @@
 #include "core/templates/hash_map.h"
 #include "core/templates/paged_allocator.h"
 #include "drivers/vulkan/rendering_context_driver_vulkan.h"
+#include "drivers/vulkan/rendering_shader_container_vulkan.h"
 #include "servers/rendering/rendering_device_driver.h"
 
 #ifdef DEBUG_ENABLED
@@ -130,6 +131,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver {
 	FragmentDensityMapCapabilities fdm_capabilities;
 	ShaderCapabilities shader_capabilities;
 	StorageBufferCapabilities storage_buffer_capabilities;
+	RenderingShaderContainerFormatVulkan shader_container_format;
 	bool buffer_device_address_support = false;
 	bool pipeline_cache_control_support = false;
 	bool device_fault_support = false;
@@ -408,43 +410,6 @@ public:
 	/**** SHADER ****/
 	/****************/
 private:
-	struct ShaderBinary {
-		// Version 1: initial.
-		// Version 2: Added shader name.
-		// Version 3: Added writable.
-		// Version 4: 64-bit vertex input mask.
-		// Version 5: Add 4 bytes padding to align the Data struct after the change in version 4.
-		static const uint32_t VERSION = 5;
-
-		struct DataBinding {
-			uint32_t type = 0;
-			uint32_t binding = 0;
-			uint32_t stages = 0;
-			uint32_t length = 0; // Size of arrays (in total elements), or UBOs (in bytes * total elements).
-			uint32_t writable = 0;
-		};
-
-		struct SpecializationConstant {
-			uint32_t type = 0;
-			uint32_t constant_id = 0;
-			uint32_t int_value = 0;
-			uint32_t stage_flags = 0;
-		};
-
-		struct Data {
-			uint64_t vertex_input_mask = 0;
-			uint32_t fragment_output_mask = 0;
-			uint32_t specialization_constants_count = 0;
-			uint32_t is_compute = 0;
-			uint32_t compute_local_size[3] = {};
-			uint32_t set_count = 0;
-			uint32_t push_constant_size = 0;
-			uint32_t vk_push_constant_stages_mask = 0;
-			uint32_t stage_count = 0;
-			uint32_t shader_name_len = 0;
-		};
-	};
-
 	struct ShaderInfo {
 		VkShaderStageFlags vk_push_constant_stages = 0;
 		TightLocalVector<VkPipelineShaderStageCreateInfo> vk_stages_create_info;
@@ -453,9 +418,7 @@ private:
 	};
 
 public:
-	virtual String shader_get_binary_cache_key() override final;
-	virtual Vector<uint8_t> shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) override final;
-	virtual ShaderID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector<ImmutableSampler> &p_immutable_samplers) override final;
+	virtual ShaderID shader_create_from_container(const Ref<RenderingShaderContainer> &p_shader_container, const Vector<ImmutableSampler> &p_immutable_samplers) override final;
 	virtual void shader_free(ShaderID p_shader) override final;
 
 	virtual void shader_destroy_modules(ShaderID p_shader) override final;
@@ -711,6 +674,7 @@ public:
 	virtual String get_api_version() const override final;
 	virtual String get_pipeline_cache_uuid() const override final;
 	virtual const Capabilities &get_capabilities() const override final;
+	virtual const RenderingShaderContainerFormat &get_shader_container_format() const override final;
 
 	virtual bool is_composite_alpha_supported(CommandQueueID p_queue) const override final;
 

+ 103 - 0
drivers/vulkan/rendering_shader_container_vulkan.cpp

@@ -0,0 +1,103 @@
+/**************************************************************************/
+/*  rendering_shader_container_vulkan.cpp                                 */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "rendering_shader_container_vulkan.h"
+
+#if RENDERING_SHADER_CONTAINER_VULKAN_SMOLV
+#include "thirdparty/misc/smolv.h"
+#endif
+
+// RenderingShaderContainerVulkan
+
+const uint32_t RenderingShaderContainerVulkan::FORMAT_VERSION = 1;
+
+uint32_t RenderingShaderContainerVulkan::_format() const {
+	return 0x43565053;
+}
+
+uint32_t RenderingShaderContainerVulkan::_format_version() const {
+	return FORMAT_VERSION;
+}
+
+bool RenderingShaderContainerVulkan::_set_code_from_spirv(const Vector<RenderingDeviceCommons::ShaderStageSPIRVData> &p_spirv) {
+	PackedByteArray code_bytes;
+	shaders.resize(p_spirv.size());
+	for (int64_t i = 0; i < p_spirv.size(); i++) {
+#if RENDERING_SHADER_CONTAINER_VULKAN_SMOLV
+		// Encode into smolv.
+		smolv::ByteArray smolv_bytes;
+		bool smolv_encoded = smolv::Encode(p_spirv[i].spirv.ptr(), p_spirv[i].spirv.size(), smolv_bytes, smolv::kEncodeFlagStripDebugInfo);
+		ERR_FAIL_COND_V_MSG(!smolv_encoded, false, "Failed to compress SPIR-V into smolv.");
+
+		code_bytes.resize(smolv_bytes.size());
+		memcpy(code_bytes.ptrw(), smolv_bytes.data(), code_bytes.size());
+#else
+		code_bytes.resize(p_spirv[i].spirv.size());
+		memcpy(code_bytes.ptrw(), p_spirv[i].spirv.ptr(), code_bytes.size());
+#endif
+
+		RenderingShaderContainer::Shader &shader = shaders.ptrw()[i];
+#if RENDERING_SHADER_CONTAINER_VULKAN_COMPRESSION
+		uint32_t compressed_size = 0;
+		shader.code_decompressed_size = code_bytes.size();
+		shader.code_compressed_bytes.resize(code_bytes.size());
+
+		bool compressed = compress_code(code_bytes.ptr(), code_bytes.size(), shader.code_compressed_bytes.ptrw(), &compressed_size, &shader.code_compression_flags);
+		ERR_FAIL_COND_V_MSG(!compressed, false, vformat("Failed to compress native code to native for SPIR-V #%d.", i));
+
+		shader.code_compressed_bytes.resize(compressed_size);
+#else
+		shader.code_decompressed_size = 0;
+		shader.code_compression_flags = 0;
+		shader.code_compressed_bytes = code_bytes;
+#endif
+		shader.shader_stage = p_spirv[i].shader_stage;
+	}
+
+	return true;
+}
+
+// RenderingShaderContainerFormatVulkan
+
+Ref<RenderingShaderContainer> RenderingShaderContainerFormatVulkan::create_container() const {
+	return memnew(RenderingShaderContainerVulkan);
+}
+
+RenderingDeviceCommons::ShaderLanguageVersion RenderingShaderContainerFormatVulkan::get_shader_language_version() const {
+	return SHADER_LANGUAGE_VULKAN_VERSION_1_1;
+}
+
+RenderingDeviceCommons::ShaderSpirvVersion RenderingShaderContainerFormatVulkan::get_shader_spirv_version() const {
+	return SHADER_SPIRV_VERSION_1_3;
+}
+
+RenderingShaderContainerFormatVulkan::RenderingShaderContainerFormatVulkan() {}
+
+RenderingShaderContainerFormatVulkan::~RenderingShaderContainerFormatVulkan() {}

+ 57 - 0
drivers/vulkan/rendering_shader_container_vulkan.h

@@ -0,0 +1,57 @@
+/**************************************************************************/
+/*  rendering_shader_container_vulkan.h                                   */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#pragma once
+
+#include "servers/rendering/rendering_shader_container.h"
+
+#define RENDERING_SHADER_CONTAINER_VULKAN_COMPRESSION 1
+#define RENDERING_SHADER_CONTAINER_VULKAN_SMOLV 1
+
+class RenderingShaderContainerVulkan : public RenderingShaderContainer {
+	GDSOFTCLASS(RenderingShaderContainerVulkan, RenderingShaderContainer);
+
+public:
+	static const uint32_t FORMAT_VERSION;
+
+protected:
+	virtual uint32_t _format() const override;
+	virtual uint32_t _format_version() const override;
+	virtual bool _set_code_from_spirv(const Vector<RenderingDeviceCommons::ShaderStageSPIRVData> &p_spirv) override;
+};
+
+class RenderingShaderContainerFormatVulkan : public RenderingShaderContainerFormat {
+public:
+	virtual Ref<RenderingShaderContainer> create_container() const override;
+	virtual ShaderLanguageVersion get_shader_language_version() const override;
+	virtual ShaderSpirvVersion get_shader_spirv_version() const override;
+	RenderingShaderContainerFormatVulkan();
+	virtual ~RenderingShaderContainerFormatVulkan();
+};

+ 36 - 0
editor/editor_node.cpp

@@ -155,6 +155,7 @@
 #include "editor/plugins/plugin_config_dialog.h"
 #include "editor/plugins/root_motion_editor_plugin.h"
 #include "editor/plugins/script_text_editor.h"
+#include "editor/plugins/shader_baker_export_plugin.h"
 #include "editor/plugins/text_editor.h"
 #include "editor/plugins/version_control_editor_plugin.h"
 #include "editor/plugins/visual_shader_editor_plugin.h"
@@ -167,6 +168,18 @@
 #include "editor/themes/editor_theme_manager.h"
 #include "editor/window_wrapper.h"
 
+#ifdef VULKAN_ENABLED
+#include "editor/plugins/shader_baker/shader_baker_export_plugin_platform_vulkan.h"
+#endif
+
+#ifdef D3D12_ENABLED
+#include "editor/plugins/shader_baker/shader_baker_export_plugin_platform_d3d12.h"
+#endif
+
+#ifdef METAL_ENABLED
+#include "editor/plugins/shader_baker/shader_baker_export_plugin_platform_metal.h"
+#endif
+
 #include "modules/modules_enabled.gen.h" // For gdscript, mono.
 
 #ifndef PHYSICS_2D_DISABLED
@@ -8541,6 +8554,29 @@ EditorNode::EditorNode() {
 
 	EditorExport::get_singleton()->add_export_plugin(dedicated_server_export_plugin);
 
+	Ref<ShaderBakerExportPlugin> shader_baker_export_plugin;
+	shader_baker_export_plugin.instantiate();
+
+#ifdef VULKAN_ENABLED
+	Ref<ShaderBakerExportPluginPlatformVulkan> shader_baker_export_plugin_platform_vulkan;
+	shader_baker_export_plugin_platform_vulkan.instantiate();
+	shader_baker_export_plugin->add_platform(shader_baker_export_plugin_platform_vulkan);
+#endif
+
+#ifdef D3D12_ENABLED
+	Ref<ShaderBakerExportPluginPlatformD3D12> shader_baker_export_plugin_platform_d3d12;
+	shader_baker_export_plugin_platform_d3d12.instantiate();
+	shader_baker_export_plugin->add_platform(shader_baker_export_plugin_platform_d3d12);
+#endif
+
+#ifdef METAL_ENABLED
+	Ref<ShaderBakerExportPluginPlatformMetal> shader_baker_export_plugin_platform_metal;
+	shader_baker_export_plugin_platform_metal.instantiate();
+	shader_baker_export_plugin->add_platform(shader_baker_export_plugin_platform_metal);
+#endif
+
+	EditorExport::get_singleton()->add_export_plugin(shader_baker_export_plugin);
+
 	Ref<PackedSceneEditorTranslationParserPlugin> packed_scene_translation_parser_plugin;
 	packed_scene_translation_parser_plugin.instantiate();
 	EditorTranslationParser::get_singleton()->add_parser(packed_scene_translation_parser_plugin, EditorTranslationParser::STANDARD);

+ 36 - 15
editor/export/editor_export_platform.cpp

@@ -1200,27 +1200,31 @@ Error EditorExportPlatform::export_project_files(const Ref<EditorExportPreset> &
 		}
 	};
 
-	// Always sort by name, to so if for some reason they are re-arranged, it still works.
-	export_plugins.sort_custom<SortByName>();
-
-	for (int i = 0; i < export_plugins.size(); i++) {
-		if (p_so_func) {
-			for (int j = 0; j < export_plugins[i]->shared_objects.size(); j++) {
-				err = p_so_func(p_udata, export_plugins[i]->shared_objects[j]);
+	auto add_shared_objects_and_extra_files_from_export_plugins = [&]() {
+		for (int i = 0; i < export_plugins.size(); i++) {
+			if (p_so_func) {
+				for (int j = 0; j < export_plugins[i]->shared_objects.size(); j++) {
+					err = p_so_func(p_udata, export_plugins[i]->shared_objects[j]);
+					if (err != OK) {
+						return err;
+					}
+				}
+			}
+			for (int j = 0; j < export_plugins[i]->extra_files.size(); j++) {
+				err = save_proxy.save_file(p_udata, export_plugins[i]->extra_files[j].path, export_plugins[i]->extra_files[j].data, 0, paths.size(), enc_in_filters, enc_ex_filters, key, seed);
 				if (err != OK) {
 					return err;
 				}
 			}
-		}
-		for (int j = 0; j < export_plugins[i]->extra_files.size(); j++) {
-			err = save_proxy.save_file(p_udata, export_plugins[i]->extra_files[j].path, export_plugins[i]->extra_files[j].data, 0, paths.size(), enc_in_filters, enc_ex_filters, key, seed);
-			if (err != OK) {
-				return err;
-			}
+
+			export_plugins.write[i]->_clear();
 		}
 
-		export_plugins.write[i]->_clear();
-	}
+		return OK;
+	};
+
+	// Always sort by name, to so if for some reason they are re-arranged, it still works.
+	export_plugins.sort_custom<SortByName>();
 
 	HashSet<String> features = get_features(p_preset, p_debug);
 	PackedStringArray features_psa;
@@ -1252,6 +1256,12 @@ Error EditorExportPlatform::export_project_files(const Ref<EditorExportPreset> &
 		}
 	}
 
+	// Add any files that might've been defined during the initial steps of the export plugins.
+	err = add_shared_objects_and_extra_files_from_export_plugins();
+	if (err != OK) {
+		return err;
+	}
+
 	HashMap<String, FileExportCache> export_cache;
 	String export_base_path = ProjectSettings::get_singleton()->get_project_data_path().path_join("exported/") + itos(custom_resources_hash);
 
@@ -1283,6 +1293,10 @@ Error EditorExportPlatform::export_project_files(const Ref<EditorExportPreset> &
 		}
 	}
 
+	for (int i = 0; i < export_plugins.size(); i++) {
+		export_plugins.write[i]->set_export_base_path(export_base_path);
+	}
+
 	//store everything in the export medium
 	int total = paths.size();
 	// idx is incremented at the beginning of the paths loop to easily allow
@@ -1521,6 +1535,13 @@ Error EditorExportPlatform::export_project_files(const Ref<EditorExportPreset> &
 			plugin->_end_customize_scenes();
 		}
 	}
+
+	// Add any files that might've been defined during the final steps of the export plugins.
+	err = add_shared_objects_and_extra_files_from_export_plugins();
+	if (err != OK) {
+		return err;
+	}
+
 	//save config!
 
 	Vector<String> custom_list;

+ 13 - 0
editor/export/editor_export_platform_apple_embedded.cpp

@@ -51,6 +51,10 @@ void EditorExportPlatformAppleEmbedded::get_preset_features(const Ref<EditorExpo
 	r_features->push_back("etc2");
 	r_features->push_back("astc");
 
+	if (p_preset->get("shader_baker/enabled")) {
+		r_features->push_back("shader_baker");
+	}
+
 	Vector<String> architectures = _get_preset_architectures(p_preset);
 	for (int i = 0; i < architectures.size(); ++i) {
 		r_features->push_back(architectures[i]);
@@ -186,6 +190,13 @@ String EditorExportPlatformAppleEmbedded::get_export_option_warning(const Editor
 			if (access == 0) {
 				return TTR("At least one system boot time access reason should be selected.");
 			}
+		} else if (p_name == "shader_baker/enabled") {
+			String export_renderer = GLOBAL_GET("rendering/renderer/rendering_method.mobile");
+			if (OS::get_singleton()->get_current_rendering_method() == "gl_compatibility") {
+				return TTR("\"Shader Baker\" doesn't work with the Compatibility renderer.");
+			} else if (OS::get_singleton()->get_current_rendering_method() != export_renderer) {
+				return vformat(TTR("The editor is currently using a different renderer than what the target platform will use. \"Shader Baker\" won't be able to include core shaders. Switch to \"%s\" renderer temporarily to fix this."), export_renderer);
+			}
 		}
 	}
 	return String();
@@ -297,6 +308,8 @@ void EditorExportPlatformAppleEmbedded::get_export_options(List<ExportOption> *r
 	r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "capabilities/performance_a12"), false));
 	r_options->push_back(ExportOption(PropertyInfo(Variant::PACKED_STRING_ARRAY, "capabilities/additional"), PackedStringArray()));
 
+	r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "shader_baker/enabled"), false));
+
 	r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "user_data/accessible_from_files_app"), false));
 	r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "user_data/accessible_from_itunes_sharing"), false));
 

+ 18 - 0
editor/export/editor_export_platform_pc.cpp

@@ -42,6 +42,9 @@ void EditorExportPlatformPC::get_preset_features(const Ref<EditorExportPreset> &
 		r_features->push_back("etc2");
 		r_features->push_back("astc");
 	}
+	if (p_preset->get("shader_baker/enabled")) {
+		r_features->push_back("shader_baker");
+	}
 	// PC platforms only have one architecture per export, since
 	// we export a single executable instead of a bundle.
 	r_features->push_back(p_preset->get("binary_format/architecture"));
@@ -58,6 +61,21 @@ void EditorExportPlatformPC::get_export_options(List<ExportOption> *r_options) c
 
 	r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "texture_format/s3tc_bptc"), true));
 	r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "texture_format/etc2_astc"), false));
+
+	r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "shader_baker/enabled"), false));
+}
+
+String EditorExportPlatformPC::get_export_option_warning(const EditorExportPreset *p_preset, const StringName &p_name) const {
+	if (p_name == "shader_baker/enabled") {
+		String export_renderer = GLOBAL_GET("rendering/renderer/rendering_method");
+		if (OS::get_singleton()->get_current_rendering_method() == "gl_compatibility") {
+			return TTR("\"Shader Baker\" is not supported when using the Compatibility renderer.");
+		} else if (OS::get_singleton()->get_current_rendering_method() != export_renderer) {
+			return vformat(TTR("The editor is currently using a different renderer than what the target platform will use. \"Shader Baker\" won't be able to include core shaders. Switch to the \"%s\" renderer temporarily to fix this."), export_renderer);
+		}
+	}
+
+	return String();
 }
 
 String EditorExportPlatformPC::get_name() const {

+ 1 - 1
editor/export/editor_export_platform_pc.h

@@ -44,8 +44,8 @@ private:
 
 public:
 	virtual void get_preset_features(const Ref<EditorExportPreset> &p_preset, List<String> *r_features) const override;
-
 	virtual void get_export_options(List<ExportOption> *r_options) const override;
+	virtual String get_export_option_warning(const EditorExportPreset *p_preset, const StringName &p_name) const override;
 
 	virtual String get_name() const override;
 	virtual String get_os_name() const override;

+ 8 - 0
editor/export/editor_export_plugin.cpp

@@ -33,6 +33,14 @@
 #include "core/config/project_settings.h"
 #include "editor/export/editor_export_platform.h"
 
+void EditorExportPlugin::set_export_base_path(const String &p_export_base_path) {
+	export_base_path = p_export_base_path;
+}
+
+const String &EditorExportPlugin::get_export_base_path() const {
+	return export_base_path;
+}
+
 void EditorExportPlugin::set_export_preset(const Ref<EditorExportPreset> &p_preset) {
 	if (p_preset.is_valid()) {
 		export_preset = p_preset;

+ 3 - 0
editor/export/editor_export_plugin.h

@@ -42,6 +42,7 @@ class EditorExportPlugin : public RefCounted {
 	friend class EditorExportPlatform;
 	friend class EditorExportPreset;
 
+	String export_base_path;
 	Ref<EditorExportPreset> export_preset;
 
 	Vector<SharedObject> shared_objects;
@@ -87,6 +88,8 @@ class EditorExportPlugin : public RefCounted {
 	String _has_valid_export_configuration(const Ref<EditorExportPlatform> &p_export_platform, const Ref<EditorExportPreset> &p_preset);
 
 protected:
+	void set_export_base_path(const String &p_export_base_path);
+	const String &get_export_base_path() const;
 	void set_export_preset(const Ref<EditorExportPreset> &p_preset);
 	Ref<EditorExportPreset> get_export_preset() const;
 	Ref<EditorExportPlatform> get_export_platform() const;

+ 0 - 5
editor/import/resource_importer_shader_file.cpp

@@ -89,11 +89,6 @@ static String _include_function(const String &p_path, void *userpointer) {
 }
 
 Error ResourceImporterShaderFile::import(ResourceUID::ID p_source_id, const String &p_source_file, const String &p_save_path, const HashMap<StringName, Variant> &p_options, List<String> *r_platform_variants, List<String> *r_gen_files, Variant *r_metadata) {
-	/* STEP 1, Read shader code */
-	ERR_FAIL_COND_V_EDMSG((OS::get_singleton()->get_current_rendering_method() == "gl_compatibility"), ERR_UNAVAILABLE, "Cannot import custom .glsl shaders when using the Compatibility renderer. Please switch to the Forward+ or Mobile renderer to use custom shaders.");
-	ERR_FAIL_COND_V_EDMSG((OS::get_singleton()->get_current_rendering_method() == "dummy"), ERR_UNAVAILABLE, "Cannot import custom .glsl shaders when using the Dummy renderer. Please switch to the Forward+ or Mobile renderer to use custom shaders.");
-	ERR_FAIL_COND_V_EDMSG((DisplayServer::get_singleton()->get_name() == "headless"), ERR_UNAVAILABLE, "Cannot import custom .glsl shaders when running in headless mode.");
-
 	Error err;
 	Ref<FileAccess> file = FileAccess::open(p_source_file, FileAccess::READ, &err);
 	ERR_FAIL_COND_V(err != OK, ERR_CANT_OPEN);

+ 1 - 0
editor/plugins/SCsub

@@ -6,4 +6,5 @@ Import("env")
 env.add_source_files(env.editor_sources, "*.cpp")
 
 SConscript("gizmos/SCsub")
+SConscript("shader_baker/SCsub")
 SConscript("tiles/SCsub")

+ 13 - 0
editor/plugins/shader_baker/SCsub

@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+from misc.utility.scons_hints import *
+
+Import("env")
+
+if env["vulkan"]:
+    env.add_source_files(env.editor_sources, "shader_baker_export_plugin_platform_vulkan.cpp")
+
+if env["d3d12"]:
+    env.add_source_files(env.editor_sources, "shader_baker_export_plugin_platform_d3d12.cpp")
+
+if env["metal"]:
+    env.add_source_files(env.editor_sources, "shader_baker_export_plugin_platform_metal.cpp")

+ 57 - 0
editor/plugins/shader_baker/shader_baker_export_plugin_platform_d3d12.cpp

@@ -0,0 +1,57 @@
+/**************************************************************************/
+/*  shader_baker_export_plugin_platform_d3d12.cpp                         */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "shader_baker_export_plugin_platform_d3d12.h"
+
+#include "drivers/d3d12/rendering_shader_container_d3d12.h"
+
+#include <windows.h>
+
+RenderingShaderContainerFormat *ShaderBakerExportPluginPlatformD3D12::create_shader_container_format(const Ref<EditorExportPlatform> &p_platform) {
+	if (lib_d3d12 == nullptr) {
+		lib_d3d12 = LoadLibraryW(L"D3D12.dll");
+		ERR_FAIL_NULL_V_MSG(lib_d3d12, nullptr, "Unable to load D3D12.dll.");
+	}
+
+	// Shader Model 6.2 is required to export shaders that have FP16 variants.
+	RenderingShaderContainerFormatD3D12 *shader_container_format_d3d12 = memnew(RenderingShaderContainerFormatD3D12);
+	shader_container_format_d3d12->set_lib_d3d12(lib_d3d12);
+	return shader_container_format_d3d12;
+}
+
+bool ShaderBakerExportPluginPlatformD3D12::matches_driver(const String &p_driver) {
+	return p_driver == "d3d12";
+}
+
+ShaderBakerExportPluginPlatformD3D12 ::~ShaderBakerExportPluginPlatformD3D12() {
+	if (lib_d3d12 != nullptr) {
+		FreeLibrary((HMODULE)(lib_d3d12));
+	}
+}

+ 45 - 0
editor/plugins/shader_baker/shader_baker_export_plugin_platform_d3d12.h

@@ -0,0 +1,45 @@
+/**************************************************************************/
+/*  shader_baker_export_plugin_platform_d3d12.h                           */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#pragma once
+
+#include "editor/plugins/shader_baker_export_plugin.h"
+
+class ShaderBakerExportPluginPlatformD3D12 : public ShaderBakerExportPluginPlatform {
+	GDCLASS(ShaderBakerExportPluginPlatformD3D12, ShaderBakerExportPluginPlatform);
+
+private:
+	void *lib_d3d12 = nullptr;
+
+public:
+	virtual RenderingShaderContainerFormat *create_shader_container_format(const Ref<EditorExportPlatform> &p_platform) override;
+	virtual bool matches_driver(const String &p_driver) override;
+	virtual ~ShaderBakerExportPluginPlatformD3D12() override;
+};

+ 51 - 0
editor/plugins/shader_baker/shader_baker_export_plugin_platform_metal.cpp

@@ -0,0 +1,51 @@
+/**************************************************************************/
+/*  shader_baker_export_plugin_platform_metal.cpp                         */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "shader_baker_export_plugin_platform_metal.h"
+
+#include "drivers/metal/rendering_shader_container_metal.h"
+
+RenderingShaderContainerFormat *ShaderBakerExportPluginPlatformMetal::create_shader_container_format(const Ref<EditorExportPlatform> &p_platform) {
+	const String &os_name = p_platform->get_os_name();
+	const MetalDeviceProfile *profile;
+
+	if (os_name == U"macOS") {
+		profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::macOS, MetalDeviceProfile::GPU::Apple7);
+	} else if (os_name == U"iOS") {
+		profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::iOS, MetalDeviceProfile::GPU::Apple7);
+	} else {
+		ERR_FAIL_V_MSG(nullptr, vformat("Unsupported platform: %s", os_name));
+	}
+	return memnew(RenderingShaderContainerFormatMetal(profile, true));
+}
+
+bool ShaderBakerExportPluginPlatformMetal::matches_driver(const String &p_driver) {
+	return p_driver == "metal";
+}

+ 39 - 0
editor/plugins/shader_baker/shader_baker_export_plugin_platform_metal.h

@@ -0,0 +1,39 @@
+/**************************************************************************/
+/*  shader_baker_export_plugin_platform_metal.h                           */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#pragma once
+
+#include "editor/plugins/shader_baker_export_plugin.h"
+
+class ShaderBakerExportPluginPlatformMetal : public ShaderBakerExportPluginPlatform {
+public:
+	virtual RenderingShaderContainerFormat *create_shader_container_format(const Ref<EditorExportPlatform> &p_platform) override;
+	virtual bool matches_driver(const String &p_driver) override;
+};

+ 41 - 0
editor/plugins/shader_baker/shader_baker_export_plugin_platform_vulkan.cpp

@@ -0,0 +1,41 @@
+/**************************************************************************/
+/*  shader_baker_export_plugin_platform_vulkan.cpp                        */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "shader_baker_export_plugin_platform_vulkan.h"
+
+#include "drivers/vulkan/rendering_shader_container_vulkan.h"
+
+RenderingShaderContainerFormat *ShaderBakerExportPluginPlatformVulkan::create_shader_container_format(const Ref<EditorExportPlatform> &p_platform) {
+	return memnew(RenderingShaderContainerFormatVulkan);
+}
+
+bool ShaderBakerExportPluginPlatformVulkan::matches_driver(const String &p_driver) {
+	return p_driver == "vulkan";
+}

+ 41 - 0
editor/plugins/shader_baker/shader_baker_export_plugin_platform_vulkan.h

@@ -0,0 +1,41 @@
+/**************************************************************************/
+/*  shader_baker_export_plugin_platform_vulkan.h                          */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#pragma once
+
+#include "editor/plugins/shader_baker_export_plugin.h"
+
+class ShaderBakerExportPluginPlatformVulkan : public ShaderBakerExportPluginPlatform {
+	GDCLASS(ShaderBakerExportPluginPlatformVulkan, ShaderBakerExportPluginPlatform);
+
+public:
+	virtual RenderingShaderContainerFormat *create_shader_container_format(const Ref<EditorExportPlatform> &p_platform) override;
+	virtual bool matches_driver(const String &p_driver) override;
+};

+ 459 - 0
editor/plugins/shader_baker_export_plugin.cpp

@@ -0,0 +1,459 @@
+/**************************************************************************/
+/*  shader_baker_export_plugin.cpp                                        */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "shader_baker_export_plugin.h"
+
+#include "core/config/project_settings.h"
+#include "core/version.h"
+#include "editor/editor_node.h"
+#include "scene/3d/label_3d.h"
+#include "scene/3d/sprite_3d.h"
+#include "servers/rendering/renderer_rd/renderer_scene_render_rd.h"
+#include "servers/rendering/renderer_rd/storage_rd/material_storage.h"
+
+// Ensure that AlphaCut is the same between the two classes so we can share the code to detect transparency.
+static_assert(ENUM_MEMBERS_EQUAL(SpriteBase3D::ALPHA_CUT_DISABLED, Label3D::ALPHA_CUT_DISABLED));
+static_assert(ENUM_MEMBERS_EQUAL(SpriteBase3D::ALPHA_CUT_DISCARD, Label3D::ALPHA_CUT_DISCARD));
+static_assert(ENUM_MEMBERS_EQUAL(SpriteBase3D::ALPHA_CUT_OPAQUE_PREPASS, Label3D::ALPHA_CUT_OPAQUE_PREPASS));
+static_assert(ENUM_MEMBERS_EQUAL(SpriteBase3D::ALPHA_CUT_HASH, Label3D::ALPHA_CUT_HASH));
+static_assert(ENUM_MEMBERS_EQUAL(SpriteBase3D::ALPHA_CUT_MAX, Label3D::ALPHA_CUT_MAX));
+
+String ShaderBakerExportPlugin::get_name() const {
+	return "ShaderBaker";
+}
+
+bool ShaderBakerExportPlugin::_is_active(const Vector<String> &p_features) const {
+	// Shader baker should only work when a RendererRD driver is active, as the embedded shaders won't be found otherwise.
+	return RendererSceneRenderRD::get_singleton() != nullptr && RendererRD::MaterialStorage::get_singleton() != nullptr && p_features.has("shader_baker");
+}
+
+bool ShaderBakerExportPlugin::_initialize_container_format(const Ref<EditorExportPlatform> &p_platform, const Vector<String> &p_features) {
+	Variant driver_variant = GLOBAL_GET("rendering/rendering_device/driver." + p_platform->get_os_name().to_lower());
+	if (!driver_variant.is_string()) {
+		driver_variant = GLOBAL_GET("rendering/rendering_device/driver");
+		if (!driver_variant.is_string()) {
+			return false;
+		}
+	}
+
+	shader_container_driver = driver_variant;
+
+	for (Ref<ShaderBakerExportPluginPlatform> platform : platforms) {
+		if (platform->matches_driver(shader_container_driver)) {
+			shader_container_format = platform->create_shader_container_format(p_platform);
+			ERR_FAIL_NULL_V_MSG(shader_container_format, false, "Unable to create shader container format for the export platform.");
+			return true;
+		}
+	}
+
+	return false;
+}
+
+void ShaderBakerExportPlugin::_cleanup_container_format() {
+	if (shader_container_format != nullptr) {
+		memdelete(shader_container_format);
+		shader_container_format = nullptr;
+	}
+}
+
+bool ShaderBakerExportPlugin::_initialize_cache_directory() {
+	shader_cache_export_path = get_export_base_path().path_join("shader_baker").path_join(shader_cache_platform_name).path_join(shader_container_driver);
+
+	if (!DirAccess::dir_exists_absolute(shader_cache_export_path)) {
+		Error err = DirAccess::make_dir_recursive_absolute(shader_cache_export_path);
+		ERR_FAIL_COND_V_MSG(err != OK, false, "Can't create shader cache folder for exporting.");
+	}
+
+	return true;
+}
+
+bool ShaderBakerExportPlugin::_begin_customize_resources(const Ref<EditorExportPlatform> &p_platform, const Vector<String> &p_features) {
+	if (!_is_active(p_features)) {
+		return false;
+	}
+
+	if (!_initialize_container_format(p_platform, p_features)) {
+		return false;
+	}
+
+	shader_cache_platform_name = p_platform->get_os_name();
+	shader_cache_renderer_name = RendererSceneRenderRD::get_singleton()->get_name();
+	tasks_processed = 0;
+	tasks_total = 0;
+	tasks_cancelled = false;
+
+	StringBuilder to_hash;
+	to_hash.append("[GodotVersionNumber]");
+	to_hash.append(VERSION_NUMBER);
+	to_hash.append("[GodotVersionHash]");
+	to_hash.append(VERSION_HASH);
+	to_hash.append("[Renderer]");
+	to_hash.append(shader_cache_renderer_name);
+	customization_configuration_hash = to_hash.as_string().hash64();
+
+	BitField<RenderingShaderLibrary::FeatureBits> renderer_features = {};
+	bool xr_enabled = GLOBAL_GET("xr/shaders/enabled");
+	renderer_features.set_flag(RenderingShaderLibrary::FEATURE_ADVANCED_BIT);
+	if (xr_enabled) {
+		renderer_features.set_flag(RenderingShaderLibrary::FEATURE_MULTIVIEW_BIT);
+	}
+
+	int vrs_mode = GLOBAL_GET("rendering/vrs/mode");
+	if (vrs_mode != 0) {
+		renderer_features.set_flag(RenderingShaderLibrary::FEATURE_VRS_BIT);
+	}
+
+	RendererSceneRenderRD::get_singleton()->enable_features(renderer_features);
+
+	// Included all shaders created by renderers and effects.
+	ShaderRD::shaders_embedded_set_lock();
+	const ShaderRD::ShaderVersionPairSet &pair_set = ShaderRD::shaders_embedded_set_get();
+	for (Pair<ShaderRD *, RID> pair : pair_set) {
+		_customize_shader_version(pair.first, pair.second);
+	}
+
+	ShaderRD::shaders_embedded_set_unlock();
+
+	// Include all shaders created by embedded materials.
+	RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton();
+	material_storage->shader_embedded_set_lock();
+	const HashSet<RID> &rid_set = material_storage->shader_embedded_set_get();
+	for (RID rid : rid_set) {
+		RendererRD::MaterialStorage::ShaderData *shader_data = material_storage->shader_get_data(rid);
+		if (shader_data != nullptr) {
+			Pair<ShaderRD *, RID> shader_version_pair = shader_data->get_native_shader_and_version();
+			if (shader_version_pair.first != nullptr) {
+				_customize_shader_version(shader_version_pair.first, shader_version_pair.second);
+			}
+		}
+	}
+
+	material_storage->shader_embedded_set_unlock();
+
+	return true;
+}
+
+bool ShaderBakerExportPlugin::_begin_customize_scenes(const Ref<EditorExportPlatform> &p_platform, const Vector<String> &p_features) {
+	if (!_is_active(p_features)) {
+		return false;
+	}
+
+	if (shader_container_format == nullptr) {
+		// Resource customization failed to initialize.
+		return false;
+	}
+
+	return true;
+}
+
+void ShaderBakerExportPlugin::_end_customize_resources() {
+	if (!_initialize_cache_directory()) {
+		return;
+	}
+
+	// Run a progress bar that waits for all shader baking tasks to finish.
+	bool progress_active = true;
+	EditorProgress editor_progress("baking_shaders", TTR("Baking shaders"), tasks_total);
+	editor_progress.step("Baking...", 0);
+	while (progress_active) {
+		uint32_t tasks_for_progress = 0;
+		{
+			MutexLock lock(tasks_mutex);
+			if (tasks_processed >= tasks_total) {
+				progress_active = false;
+			} else {
+				tasks_condition.wait(lock);
+				tasks_for_progress = tasks_processed;
+			}
+		}
+
+		if (progress_active && editor_progress.step("Baking...", tasks_for_progress)) {
+			// User skipped the shader baker, we just don't pack the shaders in the project.
+			tasks_cancelled = true;
+			progress_active = false;
+		}
+	}
+
+	String shader_cache_user_dir = ShaderRD::get_shader_cache_user_dir();
+	for (const ShaderGroupItem &group_item : shader_group_items) {
+		// Wait for all shader compilation tasks of the group to be finished.
+		for (WorkerThreadPool::TaskID task_id : group_item.variant_tasks) {
+			WorkerThreadPool::get_singleton()->wait_for_task_completion(task_id);
+		}
+
+		if (!tasks_cancelled) {
+			WorkResult work_result;
+			{
+				MutexLock lock(shader_work_results_mutex);
+				work_result = shader_work_results[group_item.cache_path];
+			}
+
+			PackedByteArray cache_file_bytes = ShaderRD::save_shader_cache_bytes(group_item.variants, work_result.variant_data);
+			add_file(shader_cache_user_dir.path_join(group_item.cache_path), cache_file_bytes, false);
+
+			String cache_file_path = shader_cache_export_path.path_join(group_item.cache_path);
+			if (!DirAccess::exists(cache_file_path)) {
+				DirAccess::make_dir_recursive_absolute(cache_file_path.get_base_dir());
+			}
+
+			Ref<FileAccess> cache_file_access = FileAccess::open(cache_file_path, FileAccess::WRITE);
+			if (cache_file_access.is_valid()) {
+				cache_file_access->store_buffer(cache_file_bytes);
+			}
+		}
+	}
+
+	if (!tasks_cancelled) {
+		String file_cache_path = shader_cache_export_path.path_join("file_cache");
+		Ref<FileAccess> cache_list_access = FileAccess::open(file_cache_path, FileAccess::READ_WRITE);
+		if (cache_list_access.is_null()) {
+			cache_list_access = FileAccess::open(file_cache_path, FileAccess::WRITE);
+		}
+
+		if (cache_list_access.is_valid()) {
+			String cache_list_line;
+			while (cache_list_line = cache_list_access->get_line(), !cache_list_line.is_empty()) {
+				PackedByteArray cache_file_bytes = FileAccess::get_file_as_bytes(shader_cache_export_path.path_join(cache_list_line));
+				if (!cache_file_bytes.is_empty()) {
+					add_file(shader_cache_user_dir.path_join(cache_list_line), cache_file_bytes, false);
+				}
+
+				shader_paths_processed.erase(cache_list_line);
+			}
+
+			for (const String &shader_path : shader_paths_processed) {
+				cache_list_access->store_line(shader_path);
+			}
+
+			cache_list_access->close();
+		}
+	}
+
+	shader_paths_processed.clear();
+	shader_work_results.clear();
+	shader_group_items.clear();
+
+	_cleanup_container_format();
+}
+
+Ref<Resource> ShaderBakerExportPlugin::_customize_resource(const Ref<Resource> &p_resource, const String &p_path) {
+	RendererRD::MaterialStorage *singleton = RendererRD::MaterialStorage::get_singleton();
+	DEV_ASSERT(singleton != nullptr);
+
+	Ref<Material> material = p_resource;
+	if (material.is_valid()) {
+		RID material_rid = material->get_rid();
+		if (material_rid.is_valid()) {
+			RendererRD::MaterialStorage::ShaderData *shader_data = singleton->material_get_shader_data(material_rid);
+			if (shader_data != nullptr) {
+				Pair<ShaderRD *, RID> shader_version_pair = shader_data->get_native_shader_and_version();
+				if (shader_version_pair.first != nullptr) {
+					_customize_shader_version(shader_version_pair.first, shader_version_pair.second);
+				}
+			}
+		}
+	}
+
+	return Ref<Resource>();
+}
+
+Node *ShaderBakerExportPlugin::_customize_scene(Node *p_root, const String &p_path) {
+	LocalVector<Node *> nodes_to_visit;
+	nodes_to_visit.push_back(p_root);
+	while (!nodes_to_visit.is_empty()) {
+		// Visit all nodes recursively in the scene to find the Label3Ds and Sprite3Ds.
+		Node *node = nodes_to_visit[nodes_to_visit.size() - 1];
+		nodes_to_visit.remove_at(nodes_to_visit.size() - 1);
+
+		Label3D *label_3d = Object::cast_to<Label3D>(node);
+		Sprite3D *sprite_3d = Object::cast_to<Sprite3D>(node);
+		if (label_3d != nullptr || sprite_3d != nullptr) {
+			// Create materials for Label3D and Sprite3D, which are normally generated at runtime on demand.
+			HashMap<StringName, Variant> properties;
+
+			// These must match the defaults set by Sprite3D/Label3D.
+			properties["transparent"] = true; // Label3D doesn't have this property, but it is always true anyway.
+			properties["shaded"] = false;
+			properties["double_sided"] = true;
+			properties["no_depth_test"] = false;
+			properties["fixed_size"] = false;
+			properties["billboard"] = StandardMaterial3D::BILLBOARD_DISABLED;
+			properties["texture_filter"] = StandardMaterial3D::TEXTURE_FILTER_LINEAR_WITH_MIPMAPS;
+			properties["alpha_antialiasing_mode"] = StandardMaterial3D::ALPHA_ANTIALIASING_OFF;
+			properties["alpha_cut"] = SpriteBase3D::ALPHA_CUT_DISABLED;
+
+			List<PropertyInfo> property_list;
+			node->get_property_list(&property_list);
+			for (const PropertyInfo &info : property_list) {
+				bool valid = false;
+				Variant property = node->get(info.name, &valid);
+				if (valid) {
+					properties[info.name] = property;
+				}
+			}
+
+			// This must follow the logic in Sprite3D::draw_texture_rect().
+			BaseMaterial3D::Transparency mat_transparency = BaseMaterial3D::Transparency::TRANSPARENCY_DISABLED;
+			if (properties["transparent"]) {
+				SpriteBase3D::AlphaCutMode acm = SpriteBase3D::AlphaCutMode(int(properties["alpha_cut"]));
+				if (acm == SpriteBase3D::ALPHA_CUT_DISCARD) {
+					mat_transparency = BaseMaterial3D::Transparency::TRANSPARENCY_ALPHA_SCISSOR;
+				} else if (acm == SpriteBase3D::ALPHA_CUT_OPAQUE_PREPASS) {
+					mat_transparency = BaseMaterial3D::Transparency::TRANSPARENCY_ALPHA_DEPTH_PRE_PASS;
+				} else if (acm == SpriteBase3D::ALPHA_CUT_HASH) {
+					mat_transparency = BaseMaterial3D::Transparency::TRANSPARENCY_ALPHA_HASH;
+				} else {
+					mat_transparency = BaseMaterial3D::Transparency::TRANSPARENCY_ALPHA;
+				}
+			}
+
+			StandardMaterial3D::BillboardMode billboard_mode = StandardMaterial3D::BillboardMode(int(properties["billboard"]));
+			Ref<Material> sprite_3d_material = StandardMaterial3D::get_material_for_2d(bool(properties["shaded"]), mat_transparency, bool(properties["double_sided"]), billboard_mode == StandardMaterial3D::BILLBOARD_ENABLED, billboard_mode == StandardMaterial3D::BILLBOARD_FIXED_Y, false, bool(properties["no_depth_test"]), bool(properties["fixed_size"]), BaseMaterial3D::TextureFilter(int(properties["texture_filter"])), BaseMaterial3D::AlphaAntiAliasing(int(properties["alpha_antialiasing_mode"])));
+			_customize_resource(sprite_3d_material, String());
+
+			if (label_3d != nullptr) {
+				// Generate variants with and without MSDF support since we don't have access to the font here.
+				Ref<Material> label_3d_material = StandardMaterial3D::get_material_for_2d(bool(properties["shaded"]), mat_transparency, bool(properties["double_sided"]), billboard_mode == StandardMaterial3D::BILLBOARD_ENABLED, billboard_mode == StandardMaterial3D::BILLBOARD_FIXED_Y, true, bool(properties["no_depth_test"]), bool(properties["fixed_size"]), BaseMaterial3D::TextureFilter(int(properties["texture_filter"])), BaseMaterial3D::AlphaAntiAliasing(int(properties["alpha_antialiasing_mode"])));
+				_customize_resource(label_3d_material, String());
+			}
+		}
+
+		// Visit children.
+		int child_count = node->get_child_count();
+		for (int i = 0; i < child_count; i++) {
+			nodes_to_visit.push_back(node->get_child(i));
+		}
+	}
+
+	return nullptr;
+}
+
+uint64_t ShaderBakerExportPlugin::_get_customization_configuration_hash() const {
+	return customization_configuration_hash;
+}
+
+void ShaderBakerExportPlugin::_customize_shader_version(ShaderRD *p_shader, RID p_version) {
+	const int64_t variant_count = p_shader->get_variant_count();
+	const int64_t group_count = p_shader->get_group_count();
+	LocalVector<ShaderGroupItem> group_items;
+	group_items.resize(group_count);
+
+	RBSet<uint32_t> groups_to_compile;
+	for (int64_t i = 0; i < group_count; i++) {
+		if (!p_shader->is_group_enabled(i)) {
+			continue;
+		}
+
+		String cache_path = p_shader->version_get_cache_file_relative_path(p_version, i, shader_container_driver);
+		if (shader_paths_processed.has(cache_path)) {
+			continue;
+		}
+
+		shader_paths_processed.insert(cache_path);
+		groups_to_compile.insert(i);
+
+		group_items[i].cache_path = cache_path;
+		group_items[i].variants = p_shader->get_group_to_variants(i);
+
+		{
+			MutexLock lock(shader_work_results_mutex);
+			shader_work_results[cache_path].variant_data.resize(variant_count);
+		}
+	}
+
+	for (int64_t i = 0; i < variant_count; i++) {
+		int group = p_shader->get_variant_to_group(i);
+		if (!p_shader->is_variant_enabled(i) || !groups_to_compile.has(group)) {
+			continue;
+		}
+
+		WorkItem work_item;
+		work_item.cache_path = group_items[group].cache_path;
+		work_item.shader_name = p_shader->get_name();
+		work_item.stage_sources = p_shader->version_build_variant_stage_sources(p_version, i);
+		work_item.variant = i;
+
+		WorkerThreadPool::TaskID task_id = WorkerThreadPool::get_singleton()->add_template_task(this, &ShaderBakerExportPlugin::_process_work_item, work_item);
+		group_items[group].variant_tasks.push_back(task_id);
+		tasks_total++;
+	}
+
+	for (uint32_t i : groups_to_compile) {
+		shader_group_items.push_back(group_items[i]);
+	}
+}
+
+void ShaderBakerExportPlugin::_process_work_item(WorkItem p_work_item) {
+	if (!tasks_cancelled) {
+		// Only process the item if the tasks haven't been cancelled by the user yet.
+		Vector<RD::ShaderStageSPIRVData> spirv_data = ShaderRD::compile_stages(p_work_item.stage_sources);
+		ERR_FAIL_COND_MSG(spirv_data.is_empty(), "Unable to retrieve SPIR-V data for shader");
+
+		RD::ShaderReflection shader_refl;
+		Error err = RenderingDeviceCommons::reflect_spirv(spirv_data, shader_refl);
+		ERR_FAIL_COND_MSG(err != OK, "Unable to reflect SPIR-V data that was compiled");
+
+		Ref<RenderingShaderContainer> shader_container = shader_container_format->create_container();
+		shader_container->set_from_shader_reflection(p_work_item.shader_name, shader_refl);
+
+		// Compile shader binary from SPIR-V.
+		bool code_compiled = shader_container->set_code_from_spirv(spirv_data);
+		ERR_FAIL_COND_MSG(!code_compiled, vformat("Failed to compile code to native for SPIR-V."));
+
+		PackedByteArray shader_bytes = shader_container->to_bytes();
+		{
+			MutexLock lock(shader_work_results_mutex);
+			shader_work_results[p_work_item.cache_path].variant_data.ptrw()[p_work_item.variant] = shader_bytes;
+		}
+	}
+
+	{
+		MutexLock lock(tasks_mutex);
+		tasks_processed++;
+	}
+
+	tasks_condition.notify_one();
+}
+
+ShaderBakerExportPlugin::ShaderBakerExportPlugin() {
+	// Do nothing.
+}
+
+ShaderBakerExportPlugin::~ShaderBakerExportPlugin() {
+	// Do nothing.
+}
+
+void ShaderBakerExportPlugin::add_platform(Ref<ShaderBakerExportPluginPlatform> p_platform) {
+	platforms.push_back(p_platform);
+}
+
+void ShaderBakerExportPlugin::remove_platform(Ref<ShaderBakerExportPluginPlatform> p_platform) {
+	platforms.erase(p_platform);
+}

+ 102 - 0
editor/plugins/shader_baker_export_plugin.h

@@ -0,0 +1,102 @@
+/**************************************************************************/
+/*  shader_baker_export_plugin.h                                          */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#pragma once
+
+#include "editor/export/editor_export_plugin.h"
+#include "servers/rendering/renderer_rd/shader_rd.h"
+#include "servers/rendering/rendering_shader_container.h"
+
+class ShaderBakerExportPluginPlatform : public RefCounted {
+	GDCLASS(ShaderBakerExportPluginPlatform, RefCounted);
+
+public:
+	virtual RenderingShaderContainerFormat *create_shader_container_format(const Ref<EditorExportPlatform> &p_platform) = 0;
+	virtual bool matches_driver(const String &p_driver) = 0;
+	virtual ~ShaderBakerExportPluginPlatform() {}
+};
+
+class ShaderBakerExportPlugin : public EditorExportPlugin {
+protected:
+	struct WorkItem {
+		String cache_path;
+		String shader_name;
+		Vector<String> stage_sources;
+		int64_t variant = 0;
+	};
+
+	struct WorkResult {
+		// Since this result is per group, this vector will have gaps in the data it covers as the indices must stay relative to all variants.
+		Vector<PackedByteArray> variant_data;
+	};
+
+	struct ShaderGroupItem {
+		String cache_path;
+		LocalVector<int> variants;
+		LocalVector<WorkerThreadPool::TaskID> variant_tasks;
+	};
+
+	String shader_cache_platform_name;
+	String shader_cache_renderer_name;
+	String shader_cache_export_path;
+	RBSet<String> shader_paths_processed;
+	HashMap<String, WorkResult> shader_work_results;
+	Mutex shader_work_results_mutex;
+	LocalVector<ShaderGroupItem> shader_group_items;
+	RenderingShaderContainerFormat *shader_container_format = nullptr;
+	String shader_container_driver;
+	Vector<Ref<ShaderBakerExportPluginPlatform>> platforms;
+	uint64_t customization_configuration_hash = 0;
+	uint32_t tasks_processed = 0;
+	uint32_t tasks_total = 0;
+	std::atomic<bool> tasks_cancelled;
+	BinaryMutex tasks_mutex;
+	ConditionVariable tasks_condition;
+
+	virtual String get_name() const override;
+	virtual bool _is_active(const Vector<String> &p_features) const;
+	virtual bool _initialize_container_format(const Ref<EditorExportPlatform> &p_platform, const Vector<String> &p_features);
+	virtual void _cleanup_container_format();
+	virtual bool _initialize_cache_directory();
+	virtual bool _begin_customize_resources(const Ref<EditorExportPlatform> &p_platform, const Vector<String> &p_features) override;
+	virtual bool _begin_customize_scenes(const Ref<EditorExportPlatform> &p_platform, const Vector<String> &p_features) override;
+	virtual void _end_customize_resources() override;
+	virtual Ref<Resource> _customize_resource(const Ref<Resource> &p_resource, const String &p_path) override;
+	virtual Node *_customize_scene(Node *p_root, const String &p_path) override;
+	virtual uint64_t _get_customization_configuration_hash() const override;
+	virtual void _customize_shader_version(ShaderRD *p_shader, RID p_version);
+	void _process_work_item(WorkItem p_work_item);
+
+public:
+	ShaderBakerExportPlugin();
+	virtual ~ShaderBakerExportPlugin() override;
+	void add_platform(Ref<ShaderBakerExportPluginPlatform> p_platform);
+	void remove_platform(Ref<ShaderBakerExportPluginPlatform> p_platform);
+};

+ 6 - 82
modules/glslang/register_types.cpp

@@ -31,19 +31,15 @@
 #include "register_types.h"
 
 #include "core/config/engine.h"
-#include "servers/rendering/rendering_device.h"
+#include "shader_compile.h"
 
 #include <glslang/Public/ResourceLimits.h>
 #include <glslang/Public/ShaderLang.h>
 #include <glslang/SPIRV/GlslangToSpv.h>
 
-static Vector<uint8_t> _compile_shader_glsl(RenderingDevice::ShaderStage p_stage, const String &p_source_code, RenderingDevice::ShaderLanguage p_language, String *r_error, const RenderingDevice *p_render_device) {
-	const RDD::Capabilities &capabilities = p_render_device->get_device_capabilities();
+Vector<uint8_t> compile_glslang_shader(RenderingDeviceCommons::ShaderStage p_stage, const String &p_source_code, RenderingDeviceCommons::ShaderLanguageVersion p_language_version, RenderingDeviceCommons::ShaderSpirvVersion p_spirv_version, String *r_error) {
 	Vector<uint8_t> ret;
-
-	ERR_FAIL_COND_V(p_language == RenderingDevice::SHADER_LANGUAGE_HLSL, ret);
-
-	EShLanguage stages[RenderingDevice::SHADER_STAGE_MAX] = {
+	EShLanguage stages[RenderingDeviceCommons::SHADER_STAGE_MAX] = {
 		EShLangVertex,
 		EShLangFragment,
 		EShLangTessControl,
@@ -53,36 +49,9 @@ static Vector<uint8_t> _compile_shader_glsl(RenderingDevice::ShaderStage p_stage
 
 	int ClientInputSemanticsVersion = 100; // maps to, say, #define VULKAN 100
 
-	glslang::EShTargetClientVersion ClientVersion = glslang::EShTargetVulkan_1_2;
-	glslang::EShTargetLanguageVersion TargetVersion = glslang::EShTargetSpv_1_5;
-
-	if (capabilities.device_family == RDD::DEVICE_VULKAN) {
-		if (capabilities.version_major == 1 && capabilities.version_minor == 0) {
-			ClientVersion = glslang::EShTargetVulkan_1_0;
-			TargetVersion = glslang::EShTargetSpv_1_0;
-		} else if (capabilities.version_major == 1 && capabilities.version_minor == 1) {
-			ClientVersion = glslang::EShTargetVulkan_1_1;
-			TargetVersion = glslang::EShTargetSpv_1_3;
-		} else {
-			// use defaults
-		}
-	} else if (capabilities.device_family == RDD::DEVICE_DIRECTX) {
-		// NIR-DXIL is Vulkan 1.1-conformant.
-		ClientVersion = glslang::EShTargetVulkan_1_1;
-		// The SPIR-V part of Mesa supports 1.6, but:
-		// - SPIRV-Reflect won't be able to parse the compute workgroup size.
-		// - We want to play it safe with NIR-DXIL.
-		TargetVersion = glslang::EShTargetSpv_1_3;
-	} else if (capabilities.device_family == RDD::DEVICE_METAL) {
-		ClientVersion = glslang::EShTargetVulkan_1_1;
-		TargetVersion = glslang::EShTargetSpv_1_6;
-	} else {
-		// once we support other backends we'll need to do something here
-		if (r_error) {
-			(*r_error) = "GLSLANG - Unsupported device family";
-		}
-		return ret;
-	}
+	// The enum values can be converted directly.
+	glslang::EShTargetClientVersion ClientVersion = (glslang::EShTargetClientVersion)p_language_version;
+	glslang::EShTargetLanguageVersion TargetVersion = (glslang::EShTargetLanguageVersion)p_spirv_version;
 
 	glslang::TShader shader(stages[p_stage]);
 	CharString cs = p_source_code.ascii();
@@ -94,42 +63,6 @@ static Vector<uint8_t> _compile_shader_glsl(RenderingDevice::ShaderStage p_stage
 	shader.setEnvClient(glslang::EShClientVulkan, ClientVersion);
 	shader.setEnvTarget(glslang::EShTargetSpv, TargetVersion);
 
-	{
-		uint32_t stage_bit = 1 << p_stage;
-
-		uint32_t subgroup_in_shaders = uint32_t(p_render_device->limit_get(RD::LIMIT_SUBGROUP_IN_SHADERS));
-		uint32_t subgroup_operations = uint32_t(p_render_device->limit_get(RD::LIMIT_SUBGROUP_OPERATIONS));
-		if ((subgroup_in_shaders & stage_bit) == stage_bit) {
-			// stage supports subgroups
-			preamble += "#define has_GL_KHR_shader_subgroup_basic 1\n";
-			if (subgroup_operations & RenderingDevice::SUBGROUP_VOTE_BIT) {
-				preamble += "#define has_GL_KHR_shader_subgroup_vote 1\n";
-			}
-			if (subgroup_operations & RenderingDevice::SUBGROUP_ARITHMETIC_BIT) {
-				preamble += "#define has_GL_KHR_shader_subgroup_arithmetic 1\n";
-			}
-			if (subgroup_operations & RenderingDevice::SUBGROUP_BALLOT_BIT) {
-				preamble += "#define has_GL_KHR_shader_subgroup_ballot 1\n";
-			}
-			if (subgroup_operations & RenderingDevice::SUBGROUP_SHUFFLE_BIT) {
-				preamble += "#define has_GL_KHR_shader_subgroup_shuffle 1\n";
-			}
-			if (subgroup_operations & RenderingDevice::SUBGROUP_SHUFFLE_RELATIVE_BIT) {
-				preamble += "#define has_GL_KHR_shader_subgroup_shuffle_relative 1\n";
-			}
-			if (subgroup_operations & RenderingDevice::SUBGROUP_CLUSTERED_BIT) {
-				preamble += "#define has_GL_KHR_shader_subgroup_clustered 1\n";
-			}
-			if (subgroup_operations & RenderingDevice::SUBGROUP_QUAD_BIT) {
-				preamble += "#define has_GL_KHR_shader_subgroup_quad 1\n";
-			}
-		}
-	}
-
-	if (p_render_device->has_feature(RD::SUPPORTS_MULTIVIEW)) {
-		preamble += "#define has_VK_KHR_multiview 1\n";
-	}
-
 	if (!preamble.empty()) {
 		shader.setPreamble(preamble.c_str());
 	}
@@ -187,13 +120,6 @@ static Vector<uint8_t> _compile_shader_glsl(RenderingDevice::ShaderStage p_stage
 	return ret;
 }
 
-static String _get_cache_key_function_glsl(const RenderingDevice *p_render_device) {
-	const RenderingDeviceDriver::Capabilities &capabilities = p_render_device->get_device_capabilities();
-	String version;
-	version = "SpirVGen=" + itos(glslang::GetSpirvGeneratorVersion()) + ", major=" + itos(capabilities.version_major) + ", minor=" + itos(capabilities.version_minor) + " , subgroup_size=" + itos(p_render_device->limit_get(RD::LIMIT_SUBGROUP_SIZE)) + " , subgroup_ops=" + itos(p_render_device->limit_get(RD::LIMIT_SUBGROUP_OPERATIONS)) + " , subgroup_in_shaders=" + itos(p_render_device->limit_get(RD::LIMIT_SUBGROUP_IN_SHADERS)) + " , debug=" + itos(Engine::get_singleton()->is_generate_spirv_debug_info_enabled());
-	return version;
-}
-
 void initialize_glslang_module(ModuleInitializationLevel p_level) {
 	if (p_level != MODULE_INITIALIZATION_LEVEL_CORE) {
 		return;
@@ -202,8 +128,6 @@ void initialize_glslang_module(ModuleInitializationLevel p_level) {
 	// Initialize in case it's not initialized. This is done once per thread
 	// and it's safe to call multiple times.
 	glslang::InitializeProcess();
-	RenderingDevice::shader_set_compile_to_spirv_function(_compile_shader_glsl);
-	RenderingDevice::shader_set_get_cache_key_function(_get_cache_key_function_glsl);
 }
 
 void uninitialize_glslang_module(ModuleInitializationLevel p_level) {

+ 35 - 0
modules/glslang/shader_compile.h

@@ -0,0 +1,35 @@
+/**************************************************************************/
+/*  shader_compile.h                                                      */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#pragma once
+
+#include "servers/rendering/rendering_device_commons.h"
+
+Vector<uint8_t> compile_glslang_shader(RenderingDeviceCommons::ShaderStage p_stage, const String &p_source_code, RenderingDeviceCommons::ShaderLanguageVersion p_language_version, RenderingDeviceCommons::ShaderSpirvVersion p_spirv_version, String *r_error);

+ 3 - 0
platform/android/doc_classes/EditorExportPlatformAndroid.xml

@@ -613,6 +613,9 @@
 		<member name="screen/support_xlarge" type="bool" setter="" getter="">
 			Indicates whether the application supports extra large screen form-factors.
 		</member>
+		<member name="shader_baker/enabled" type="bool" setter="" getter="">
+			If [code]true[/code], shaders will be compiled and embedded in the application. This option is only supported when using the Forward+ or Mobile renderers.
+		</member>
 		<member name="user_data_backup/allow" type="bool" setter="" getter="">
 			If [code]true[/code], allows the application to participate in the backup and restore infrastructure.
 		</member>

+ 13 - 0
platform/android/export/export_plugin.cpp

@@ -1967,6 +1967,10 @@ void EditorExportPlatformAndroid::get_preset_features(const Ref<EditorExportPres
 	r_features->push_back("etc2");
 	r_features->push_back("astc");
 
+	if (p_preset->get("shader_baker/enabled")) {
+		r_features->push_back("shader_baker");
+	}
+
 	Vector<ABI> abis = get_enabled_abis(p_preset);
 	for (int i = 0; i < abis.size(); ++i) {
 		r_features->push_back(abis[i].arch);
@@ -2063,6 +2067,13 @@ String EditorExportPlatformAndroid::get_export_option_warning(const EditorExport
 			if (!bool(p_preset->get("package/show_in_app_library")) && !gradle_build_enabled) {
 				return TTR("\"Use Gradle Build\" must be enabled to disable \"Show In App Library\".");
 			}
+		} else if (p_name == "shader_baker/enabled") {
+			String export_renderer = GLOBAL_GET("rendering/renderer/rendering_method.mobile");
+			if (OS::get_singleton()->get_current_rendering_method() == "gl_compatibility") {
+				return TTR("\"Shader Baker\" is not supported when using the Compatibility renderer.");
+			} else if (OS::get_singleton()->get_current_rendering_method() != export_renderer) {
+				return vformat(TTR("The editor is currently using a different renderer than what the target platform will use. \"Shader Baker\" won't be able to include core shaders. Switch to the \"%s\" renderer temporarily to fix this."), export_renderer);
+			}
 		}
 	}
 	return String();
@@ -2130,6 +2141,8 @@ void EditorExportPlatformAndroid::get_export_options(List<ExportOption> *r_optio
 
 	r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "graphics/opengl_debug"), false));
 
+	r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "shader_baker/enabled"), false));
+
 	r_options->push_back(ExportOption(PropertyInfo(Variant::INT, "xr_features/xr_mode", PROPERTY_HINT_ENUM, "Regular,OpenXR"), XR_MODE_REGULAR, false, true));
 
 	r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "gesture/swipe_to_dismiss"), false));

+ 3 - 0
platform/ios/doc_classes/EditorExportPlatformIOS.xml

@@ -726,6 +726,9 @@
 		<member name="privacy/user_defaults_access_reasons" type="int" setter="" getter="">
 			The reasons your app use user defaults API. See [url=https://developer.apple.com/documentation/bundleresources/privacy_manifest_files/describing_use_of_required_reason_api]Describing use of required reason API[/url].
 		</member>
+		<member name="shader_baker/enabled" type="bool" setter="" getter="">
+			If [code]true[/code], shaders will be compiled and embedded in the application. This option is only supported when using the Forward+ or Mobile renderers.
+		</member>
 		<member name="storyboard/custom_bg_color" type="Color" setter="" getter="">
 			A custom background color of the storyboard launch screen.
 		</member>

+ 3 - 0
platform/linuxbsd/doc_classes/EditorExportPlatformLinuxBSD.xml

@@ -26,6 +26,9 @@
 		<member name="debug/export_console_wrapper" type="int" setter="" getter="">
 			If [code]true[/code], a console wrapper is exported alongside the main executable, which allows running the project with enabled console output.
 		</member>
+		<member name="shader_baker/enabled" type="bool" setter="" getter="">
+			If [code]true[/code], shaders will be compiled and embedded in the application. This option is only supported when using the Forward+ or Mobile renderers.
+		</member>
 		<member name="ssh_remote_deploy/cleanup_script" type="String" setter="" getter="">
 			Script code to execute on the remote host when app is finished.
 			The following variables can be used in the script:

+ 3 - 0
platform/macos/doc_classes/EditorExportPlatformMacOS.xml

@@ -695,6 +695,9 @@
 		<member name="privacy/tracking_enabled" type="bool" setter="" getter="">
 			Indicates whether your app uses data for tracking. See [url=https://developer.apple.com/documentation/bundleresources/privacy_manifest_files]Privacy manifest files[/url].
 		</member>
+		<member name="shader_baker/enabled" type="bool" setter="" getter="">
+			If [code]true[/code], shaders will be compiled and embedded in the application. This option is only supported when using the Forward+ or Mobile renderers.
+		</member>
 		<member name="ssh_remote_deploy/cleanup_script" type="String" setter="" getter="">
 			Script code to execute on the remote host when app is finished.
 			The following variables can be used in the script:

+ 15 - 0
platform/macos/export/export_plugin.cpp

@@ -63,6 +63,10 @@ void EditorExportPlatformMacOS::get_preset_features(const Ref<EditorExportPreset
 		ERR_PRINT("Invalid architecture");
 	}
 
+	if (p_preset->get("shader_baker/enabled")) {
+		r_features->push_back("shader_baker");
+	}
+
 	if (architecture == "universal") {
 		r_features->push_back("x86_64");
 		r_features->push_back("arm64");
@@ -99,6 +103,15 @@ String EditorExportPlatformMacOS::get_export_option_warning(const EditorExportPr
 			}
 		}
 
+		if (p_name == "shader_baker/enabled") {
+			String export_renderer = GLOBAL_GET("rendering/renderer/rendering_method");
+			if (OS::get_singleton()->get_current_rendering_method() == "gl_compatibility") {
+				return TTR("\"Shader Baker\" is not supported when using the Compatibility renderer.");
+			} else if (OS::get_singleton()->get_current_rendering_method() != export_renderer) {
+				return vformat(TTR("The editor is currently using a different renderer than what the target platform will use. \"Shader Baker\" won't be able to include core shaders. Switch to the \"%s\" renderer temporarily to fix this."), export_renderer);
+			}
+		}
+
 		if (p_name == "codesign/certificate_file" || p_name == "codesign/certificate_password" || p_name == "codesign/identity") {
 			if (dist_type == 2) {
 				if (ad_hoc) {
@@ -468,6 +481,8 @@ void EditorExportPlatformMacOS::get_export_options(List<ExportOption> *r_options
 	r_options->push_back(ExportOption(PropertyInfo(Variant::INT, "application/export_angle", PROPERTY_HINT_ENUM, "Auto,Yes,No"), 0, true));
 	r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "display/high_res"), true));
 
+	r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "shader_baker/enabled"), false));
+
 	r_options->push_back(ExportOption(PropertyInfo(Variant::STRING, "application/additional_plist_content", PROPERTY_HINT_MULTILINE_TEXT), ""));
 
 	r_options->push_back(ExportOption(PropertyInfo(Variant::STRING, "xcode/platform_build"), "14C18"));

+ 3 - 0
platform/visionos/doc_classes/EditorExportPlatformVisionOS.xml

@@ -578,6 +578,9 @@
 		<member name="privacy/user_defaults_access_reasons" type="int" setter="" getter="">
 			The reasons your app use user defaults API. See [url=https://developer.apple.com/documentation/bundleresources/privacy_manifest_files/describing_use_of_required_reason_api]Describing use of required reason API[/url].
 		</member>
+		<member name="shader_baker/enabled" type="bool" setter="" getter="">
+			If [code]true[/code], shaders will be compiled and embedded in the application. This option is only supported when using the Forward+ and Mobile renderers.
+		</member>
 		<member name="user_data/accessible_from_files_app" type="bool" setter="" getter="">
 			If [code]true[/code], the app "Documents" folder can be accessed via "Files" app. See [url=https://developer.apple.com/documentation/bundleresources/information_property_list/lssupportsopeningdocumentsinplace]LSSupportsOpeningDocumentsInPlace[/url].
 		</member>

+ 3 - 0
platform/windows/doc_classes/EditorExportPlatformWindows.xml

@@ -98,6 +98,9 @@
 		<member name="debug/export_console_wrapper" type="int" setter="" getter="">
 			If [code]true[/code], a console wrapper executable is exported alongside the main executable, which allows running the project with enabled console output.
 		</member>
+		<member name="shader_baker/enabled" type="bool" setter="" getter="">
+			If [code]true[/code], shaders will be compiled and embedded in the application. This option is only supported when using the Forward+ and Mobile renderers.
+		</member>
 		<member name="ssh_remote_deploy/cleanup_script" type="String" setter="" getter="">
 			Script code to execute on the remote host when app is finished.
 			The following variables can be used in the script:

+ 46 - 48
scene/resources/3d/sky_material.cpp

@@ -85,6 +85,8 @@ void ProceduralSkyMaterial::set_sky_cover(const Ref<Texture2D> &p_sky_cover) {
 		RS::get_singleton()->material_set_param(_get_material(), "sky_cover", Variant());
 	}
 
+	_update_shader(use_debanding, sky_cover.is_valid());
+
 	if (shader_set) {
 		RS::get_singleton()->material_set_shader(_get_material(), get_shader_cache());
 	}
@@ -164,7 +166,7 @@ float ProceduralSkyMaterial::get_sun_curve() const {
 
 void ProceduralSkyMaterial::set_use_debanding(bool p_use_debanding) {
 	use_debanding = p_use_debanding;
-	_update_shader();
+	_update_shader(use_debanding, sky_cover.is_valid());
 	// Only set if shader already compiled
 	if (shader_set) {
 		RS::get_singleton()->material_set_shader(_get_material(), get_shader_cache());
@@ -195,7 +197,7 @@ RID ProceduralSkyMaterial::get_shader_cache() const {
 }
 
 RID ProceduralSkyMaterial::get_rid() const {
-	_update_shader();
+	_update_shader(use_debanding, sky_cover.is_valid());
 	if (!shader_set) {
 		RS::get_singleton()->material_set_shader(_get_material(), get_shader_cache());
 		shader_set = true;
@@ -204,7 +206,7 @@ RID ProceduralSkyMaterial::get_rid() const {
 }
 
 RID ProceduralSkyMaterial::get_shader_rid() const {
-	_update_shader();
+	_update_shader(use_debanding, sky_cover.is_valid());
 	return get_shader_cache();
 }
 
@@ -281,22 +283,21 @@ void ProceduralSkyMaterial::_bind_methods() {
 }
 
 void ProceduralSkyMaterial::cleanup_shader() {
-	if (shader_cache[0].is_valid()) {
-		RS::get_singleton()->free(shader_cache[0]);
-		RS::get_singleton()->free(shader_cache[1]);
-		RS::get_singleton()->free(shader_cache[2]);
-		RS::get_singleton()->free(shader_cache[3]);
+	for (int i = 0; i < 4; i++) {
+		if (shader_cache[i].is_valid()) {
+			RS::get_singleton()->free(shader_cache[i]);
+		}
 	}
 }
 
-void ProceduralSkyMaterial::_update_shader() {
+void ProceduralSkyMaterial::_update_shader(bool p_use_debanding, bool p_use_sky_cover) {
 	MutexLock shader_lock(shader_mutex);
-	if (shader_cache[0].is_null()) {
-		for (int i = 0; i < 4; i++) {
-			shader_cache[i] = RS::get_singleton()->shader_create();
+	int index = int(p_use_debanding) + int(p_use_sky_cover) * 2;
+	if (shader_cache[index].is_null()) {
+		shader_cache[index] = RS::get_singleton()->shader_create();
 
-			// Add a comment to describe the shader origin (useful when converting to ShaderMaterial).
-			RS::get_singleton()->shader_set_code(shader_cache[i], vformat(R"(
+		// Add a comment to describe the shader origin (useful when converting to ShaderMaterial).
+		RS::get_singleton()->shader_set_code(shader_cache[index], vformat(R"(
 // NOTE: Shader automatically converted from )" GODOT_VERSION_NAME " " GODOT_VERSION_FULL_CONFIG R"('s ProceduralSkyMaterial.
 
 shader_type sky;
@@ -370,8 +371,7 @@ void sky() {
 	COLOR = mix(ground, sky, step(0.0, EYEDIR.y)) * exposure;
 }
 )",
-																		  (i % 2) ? "render_mode use_debanding;" : "", i > 1 ? "vec4 sky_cover_texture = texture(sky_cover, SKY_COORDS);" : "", i > 1 ? "sky += (sky_cover_texture.rgb * sky_cover_modulate.rgb) * sky_cover_texture.a * sky_cover_modulate.a;" : ""));
-		}
+																		  p_use_debanding ? "render_mode use_debanding;" : "", p_use_sky_cover ? "vec4 sky_cover_texture = texture(sky_cover, SKY_COORDS);" : "", p_use_sky_cover ? "sky += (sky_cover_texture.rgb * sky_cover_modulate.rgb) * sky_cover_texture.a * sky_cover_modulate.a;" : ""));
 	}
 }
 
@@ -416,7 +416,7 @@ Ref<Texture2D> PanoramaSkyMaterial::get_panorama() const {
 void PanoramaSkyMaterial::set_filtering_enabled(bool p_enabled) {
 	filter = p_enabled;
 	notify_property_list_changed();
-	_update_shader();
+	_update_shader(filter);
 	// Only set if shader already compiled
 	if (shader_set) {
 		RS::get_singleton()->material_set_shader(_get_material(), shader_cache[int(filter)]);
@@ -441,10 +441,8 @@ Shader::Mode PanoramaSkyMaterial::get_shader_mode() const {
 }
 
 RID PanoramaSkyMaterial::get_rid() const {
-	_update_shader();
-	// Don't compile shaders until first use, then compile both
+	_update_shader(filter);
 	if (!shader_set) {
-		RS::get_singleton()->material_set_shader(_get_material(), shader_cache[1 - int(filter)]);
 		RS::get_singleton()->material_set_shader(_get_material(), shader_cache[int(filter)]);
 		shader_set = true;
 	}
@@ -452,7 +450,7 @@ RID PanoramaSkyMaterial::get_rid() const {
 }
 
 RID PanoramaSkyMaterial::get_shader_rid() const {
-	_update_shader();
+	_update_shader(filter);
 	return shader_cache[int(filter)];
 }
 
@@ -475,20 +473,21 @@ Mutex PanoramaSkyMaterial::shader_mutex;
 RID PanoramaSkyMaterial::shader_cache[2];
 
 void PanoramaSkyMaterial::cleanup_shader() {
-	if (shader_cache[0].is_valid()) {
-		RS::get_singleton()->free(shader_cache[0]);
-		RS::get_singleton()->free(shader_cache[1]);
+	for (int i = 0; i < 2; i++) {
+		if (shader_cache[i].is_valid()) {
+			RS::get_singleton()->free(shader_cache[i]);
+		}
 	}
 }
 
-void PanoramaSkyMaterial::_update_shader() {
+void PanoramaSkyMaterial::_update_shader(bool p_filter) {
 	MutexLock shader_lock(shader_mutex);
-	if (shader_cache[0].is_null()) {
-		for (int i = 0; i < 2; i++) {
-			shader_cache[i] = RS::get_singleton()->shader_create();
+	int index = int(p_filter);
+	if (shader_cache[index].is_null()) {
+		shader_cache[index] = RS::get_singleton()->shader_create();
 
-			// Add a comment to describe the shader origin (useful when converting to ShaderMaterial).
-			RS::get_singleton()->shader_set_code(shader_cache[i], vformat(R"(
+		// Add a comment to describe the shader origin (useful when converting to ShaderMaterial).
+		RS::get_singleton()->shader_set_code(shader_cache[index], vformat(R"(
 // NOTE: Shader automatically converted from )" GODOT_VERSION_NAME " " GODOT_VERSION_FULL_CONFIG R"('s PanoramaSkyMaterial.
 
 shader_type sky;
@@ -500,8 +499,7 @@ void sky() {
 	COLOR = texture(source_panorama, SKY_COORDS).rgb * exposure;
 }
 )",
-																		  i ? "filter_linear" : "filter_nearest"));
-		}
+																		  p_filter ? "filter_linear" : "filter_nearest"));
 	}
 }
 
@@ -599,7 +597,7 @@ float PhysicalSkyMaterial::get_energy_multiplier() const {
 
 void PhysicalSkyMaterial::set_use_debanding(bool p_use_debanding) {
 	use_debanding = p_use_debanding;
-	_update_shader();
+	_update_shader(use_debanding, night_sky.is_valid());
 	// Only set if shader already compiled
 	if (shader_set) {
 		RS::get_singleton()->material_set_shader(_get_material(), get_shader_cache());
@@ -618,6 +616,8 @@ void PhysicalSkyMaterial::set_night_sky(const Ref<Texture2D> &p_night_sky) {
 		RS::get_singleton()->material_set_param(_get_material(), "night_sky", Variant());
 	}
 
+	_update_shader(use_debanding, night_sky.is_valid());
+
 	if (shader_set) {
 		RS::get_singleton()->material_set_shader(_get_material(), get_shader_cache());
 	}
@@ -638,7 +638,7 @@ RID PhysicalSkyMaterial::get_shader_cache() const {
 }
 
 RID PhysicalSkyMaterial::get_rid() const {
-	_update_shader();
+	_update_shader(use_debanding, night_sky.is_valid());
 	if (!shader_set) {
 		RS::get_singleton()->material_set_shader(_get_material(), get_shader_cache());
 		shader_set = true;
@@ -647,7 +647,7 @@ RID PhysicalSkyMaterial::get_rid() const {
 }
 
 RID PhysicalSkyMaterial::get_shader_rid() const {
-	_update_shader();
+	_update_shader(use_debanding, night_sky.is_valid());
 	return get_shader_cache();
 }
 
@@ -712,22 +712,21 @@ void PhysicalSkyMaterial::_bind_methods() {
 }
 
 void PhysicalSkyMaterial::cleanup_shader() {
-	if (shader_cache[0].is_valid()) {
-		RS::get_singleton()->free(shader_cache[0]);
-		RS::get_singleton()->free(shader_cache[1]);
-		RS::get_singleton()->free(shader_cache[2]);
-		RS::get_singleton()->free(shader_cache[3]);
+	for (int i = 0; i < 4; i++) {
+		if (shader_cache[i].is_valid()) {
+			RS::get_singleton()->free(shader_cache[i]);
+		}
 	}
 }
 
-void PhysicalSkyMaterial::_update_shader() {
+void PhysicalSkyMaterial::_update_shader(bool p_use_debanding, bool p_use_night_sky) {
 	MutexLock shader_lock(shader_mutex);
-	if (shader_cache[0].is_null()) {
-		for (int i = 0; i < 4; i++) {
-			shader_cache[i] = RS::get_singleton()->shader_create();
+	int index = int(p_use_debanding) + int(p_use_night_sky) * 2;
+	if (shader_cache[index].is_null()) {
+		shader_cache[index] = RS::get_singleton()->shader_create();
 
-			// Add a comment to describe the shader origin (useful when converting to ShaderMaterial).
-			RS::get_singleton()->shader_set_code(shader_cache[i], vformat(R"(
+		// Add a comment to describe the shader origin (useful when converting to ShaderMaterial).
+		RS::get_singleton()->shader_set_code(shader_cache[index], vformat(R"(
 // NOTE: Shader automatically converted from )" GODOT_VERSION_NAME " " GODOT_VERSION_FULL_CONFIG R"('s PhysicalSkyMaterial.
 
 shader_type sky;
@@ -811,8 +810,7 @@ void sky() {
 	}
 }
 )",
-																		  (i % 2) ? "render_mode use_debanding;" : "", i > 1 ? "L0 += texture(night_sky, SKY_COORDS).xyz * extinction;" : "", i > 1 ? "COLOR = texture(night_sky, SKY_COORDS).xyz;" : ""));
-		}
+																		  p_use_debanding ? "render_mode use_debanding;" : "", p_use_night_sky ? "L0 += texture(night_sky, SKY_COORDS).xyz * extinction;" : "", p_use_night_sky ? "COLOR = texture(night_sky, SKY_COORDS).xyz;" : ""));
 	}
 }
 

+ 3 - 3
scene/resources/3d/sky_material.h

@@ -56,7 +56,7 @@ private:
 
 	static Mutex shader_mutex;
 	static RID shader_cache[4];
-	static void _update_shader();
+	static void _update_shader(bool p_use_debanding, bool p_use_sky_cover);
 	mutable bool shader_set = false;
 
 	RID get_shader_cache() const;
@@ -130,7 +130,7 @@ private:
 
 	static Mutex shader_mutex;
 	static RID shader_cache[2];
-	static void _update_shader();
+	static void _update_shader(bool p_filter);
 	mutable bool shader_set = false;
 
 	bool filter = true;
@@ -181,7 +181,7 @@ private:
 	float energy_multiplier = 1.0f;
 	bool use_debanding = true;
 	Ref<Texture2D> night_sky;
-	static void _update_shader();
+	static void _update_shader(bool p_use_debanding, bool p_use_night_sky);
 	mutable bool shader_set = false;
 
 protected:

+ 4 - 2
scene/resources/material.cpp

@@ -682,6 +682,10 @@ void BaseMaterial3D::finish_shaders() {
 }
 
 void BaseMaterial3D::_update_shader() {
+	if (!_is_initialized()) {
+		_mark_ready();
+	}
+
 	MaterialKey mk = _compute_key();
 	if (mk == current_key) {
 		return; //no update required in the end
@@ -3644,8 +3648,6 @@ BaseMaterial3D::BaseMaterial3D(bool p_orm) :
 	flags[FLAG_USE_TEXTURE_REPEAT] = true;
 
 	current_key.invalid_key = 1;
-
-	_mark_initialized(callable_mp(this, &BaseMaterial3D::_queue_shader_change), Callable());
 }
 
 BaseMaterial3D::~BaseMaterial3D() {

+ 56 - 26
scene/resources/particle_process_material.cpp

@@ -32,8 +32,9 @@
 
 #include "core/version.h"
 
-Mutex ParticleProcessMaterial::material_mutex;
+Mutex ParticleProcessMaterial::dirty_materials_mutex;
 SelfList<ParticleProcessMaterial>::List ParticleProcessMaterial::dirty_materials;
+Mutex ParticleProcessMaterial::shader_map_mutex;
 HashMap<ParticleProcessMaterial::MaterialKey, ParticleProcessMaterial::ShaderData, ParticleProcessMaterial::MaterialKey> ParticleProcessMaterial::shader_map;
 RBSet<String> ParticleProcessMaterial::min_max_properties;
 ParticleProcessMaterial::ShaderNames *ParticleProcessMaterial::shader_names = nullptr;
@@ -147,26 +148,37 @@ void ParticleProcessMaterial::finish_shaders() {
 }
 
 void ParticleProcessMaterial::_update_shader() {
+	if (!_is_initialized()) {
+		_mark_ready();
+	}
+
 	MaterialKey mk = _compute_key();
 	if (mk == current_key) {
 		return; // No update required in the end.
 	}
 
-	if (shader_map.has(current_key)) {
-		shader_map[current_key].users--;
-		if (shader_map[current_key].users == 0) {
-			// Deallocate shader, as it's no longer in use.
-			RS::get_singleton()->free(shader_map[current_key].shader);
-			shader_map.erase(current_key);
+	{
+		MutexLock lock(shader_map_mutex);
+		ShaderData *v = shader_map.getptr(current_key);
+		if (v) {
+			v->users--;
+			if (v->users == 0) {
+				// Deallocate shader, as it's no longer in use.
+				RS::get_singleton()->free(v->shader);
+				shader_map.erase(current_key);
+				shader_rid = RID();
+			}
 		}
-	}
 
-	current_key = mk;
+		current_key = mk;
 
-	if (shader_map.has(mk)) {
-		RS::get_singleton()->material_set_shader(_get_material(), shader_map[mk].shader);
-		shader_map[mk].users++;
-		return;
+		v = shader_map.getptr(mk);
+		if (v) {
+			shader_rid = v->shader;
+			RS::get_singleton()->material_set_shader(_get_material(), shader_rid);
+			v->users++;
+			return;
+		}
 	}
 
 	// No pre-existing shader, create one.
@@ -1176,19 +1188,34 @@ void ParticleProcessMaterial::_update_shader() {
 	code += "	}\n";
 	code += "}\n";
 
-	ShaderData shader_data;
-	shader_data.shader = RS::get_singleton()->shader_create();
-	shader_data.users = 1;
+	// We must create the shader outside the shader_map_mutex to avoid potential deadlocks with
+	// other tasks in the WorkerThreadPool simultaneously creating materials, which
+	// may also hold the shared shader_map_mutex lock.
+	RID new_shader = RS::get_singleton()->shader_create_from_code(code);
 
-	RS::get_singleton()->shader_set_code(shader_data.shader, code);
+	MutexLock lock(shader_map_mutex);
 
-	shader_map[mk] = shader_data;
+	ShaderData *v = shader_map.getptr(mk);
+	if (unlikely(v)) {
+		// We raced and managed to create the same key concurrently, so we'll free the shader we just created,
+		// given we know it isn't used, and use the winner.
+		RS::get_singleton()->free(new_shader);
+	} else {
+		ShaderData shader_data;
+		shader_data.shader = new_shader;
+		// ShaderData will be inserted with a users count of 0, but we
+		// increment unconditionally outside this if block, whilst still under lock.
+		v = &shader_map.insert(mk, shader_data)->value;
+	}
 
-	RS::get_singleton()->material_set_shader(_get_material(), shader_data.shader);
+	shader_rid = v->shader;
+	v->users++;
+
+	RS::get_singleton()->material_set_shader(_get_material(), shader_rid);
 }
 
 void ParticleProcessMaterial::flush_changes() {
-	MutexLock lock(material_mutex);
+	MutexLock lock(dirty_materials_mutex);
 
 	while (dirty_materials.first()) {
 		dirty_materials.first()->self()->_update_shader();
@@ -1201,7 +1228,7 @@ void ParticleProcessMaterial::_queue_shader_change() {
 		return;
 	}
 
-	MutexLock lock(material_mutex);
+	MutexLock lock(dirty_materials_mutex);
 
 	if (!element.in_list()) {
 		dirty_materials.add(&element);
@@ -1836,9 +1863,14 @@ double ParticleProcessMaterial::get_lifetime_randomness() const {
 	return lifetime_randomness;
 }
 
+RID ParticleProcessMaterial::get_rid() const {
+	const_cast<ParticleProcessMaterial *>(this)->_update_shader();
+	return Material::get_rid();
+}
+
 RID ParticleProcessMaterial::get_shader_rid() const {
-	ERR_FAIL_COND_V(!shader_map.has(current_key), RID());
-	return shader_map[current_key].shader;
+	const_cast<ParticleProcessMaterial *>(this)->_update_shader();
+	return shader_rid;
 }
 
 void ParticleProcessMaterial::_validate_property(PropertyInfo &p_property) const {
@@ -2403,13 +2435,11 @@ ParticleProcessMaterial::ParticleProcessMaterial() :
 	set_color(Color(1, 1, 1, 1));
 
 	current_key.invalid_key = 1;
-
-	_mark_initialized(callable_mp(this, &ParticleProcessMaterial::_queue_shader_change), callable_mp(this, &ParticleProcessMaterial::_update_shader));
 }
 
 ParticleProcessMaterial::~ParticleProcessMaterial() {
 	ERR_FAIL_NULL(RenderingServer::get_singleton());
-	MutexLock lock(material_mutex);
+	MutexLock lock(shader_map_mutex);
 
 	if (shader_map.has(current_key)) {
 		shader_map[current_key].users--;

+ 4 - 1
scene/resources/particle_process_material.h

@@ -148,10 +148,12 @@ private:
 		int users = 0;
 	};
 
+	static Mutex shader_map_mutex;
 	static HashMap<MaterialKey, ShaderData, MaterialKey> shader_map;
 	static RBSet<String> min_max_properties;
 
 	MaterialKey current_key;
+	RID shader_rid;
 
 	_FORCE_INLINE_ MaterialKey _compute_key() const {
 		MaterialKey mk;
@@ -185,7 +187,7 @@ private:
 		return mk;
 	}
 
-	static Mutex material_mutex;
+	static Mutex dirty_materials_mutex;
 	static SelfList<ParticleProcessMaterial>::List dirty_materials;
 
 	struct ShaderNames {
@@ -501,6 +503,7 @@ public:
 	void set_emission_shape_scale(const Vector3 &p_emission_shape_scale);
 	Vector3 get_emission_shape_scale() const;
 
+	virtual RID get_rid() const override;
 	virtual RID get_shader_rid() const override;
 
 	virtual Shader::Mode get_shader_mode() const override;

+ 1 - 1
servers/rendering/dummy/storage/material_storage.cpp

@@ -147,7 +147,7 @@ RID MaterialStorage::shader_allocate() {
 	return shader_owner.allocate_rid();
 }
 
-void MaterialStorage::shader_initialize(RID p_rid) {
+void MaterialStorage::shader_initialize(RID p_rid, bool p_embedded) {
 	shader_owner.initialize_rid(p_rid, DummyShader());
 }
 

+ 5 - 1
servers/rendering/dummy/storage/material_storage.h

@@ -50,6 +50,7 @@ private:
 	mutable RID_Owner<DummyShader> shader_owner;
 
 	ShaderCompiler dummy_compiler;
+	HashSet<RID> dummy_embedded_set;
 
 	struct DummyMaterial {
 		RID shader;
@@ -87,7 +88,7 @@ public:
 	bool owns_shader(RID p_rid) { return shader_owner.owns(p_rid); }
 
 	virtual RID shader_allocate() override;
-	virtual void shader_initialize(RID p_rid) override;
+	virtual void shader_initialize(RID p_rid, bool p_embedded) override;
 	virtual void shader_free(RID p_rid) override;
 
 	virtual void shader_set_code(RID p_shader, const String &p_code) override;
@@ -101,6 +102,9 @@ public:
 	virtual Variant shader_get_parameter_default(RID p_material, const StringName &p_param) const override { return Variant(); }
 
 	virtual RS::ShaderNativeSourceCode shader_get_native_source_code(RID p_shader) const override { return RS::ShaderNativeSourceCode(); }
+	virtual void shader_embedded_set_lock() override {}
+	virtual const HashSet<RID> &shader_embedded_set_get() const override { return dummy_embedded_set; }
+	virtual void shader_embedded_set_unlock() override {}
 
 	/* MATERIAL API */
 

+ 14 - 10
servers/rendering/renderer_rd/cluster_builder_rd.cpp

@@ -49,29 +49,33 @@ ClusterBuilderSharedDataRD::ClusterBuilderSharedDataRD() {
 	{
 		RD::FramebufferFormatID fb_format;
 		RD::PipelineColorBlendState blend_state;
-		String defines;
+		RD::PipelineRasterizationState rasterization_state;
+		RD::PipelineMultisampleState ms;
+		rasterization_state.enable_depth_clamp = true;
+		ms.sample_count = RD::TEXTURE_SAMPLES_4;
+
+		Vector<String> variants;
+		variants.push_back("");
+		variants.push_back("\n#define USE_ATTACHMENT\n");
+
+		ClusterRender::ShaderVariant shader_variant;
 		if (RD::get_singleton()->has_feature(RD::SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS)) {
 			fb_format = RD::get_singleton()->framebuffer_format_create_empty();
 			blend_state = RD::PipelineColorBlendState::create_disabled();
+			shader_variant = ClusterRender::SHADER_NORMAL;
 		} else {
 			Vector<RD::AttachmentFormat> afs;
 			afs.push_back(RD::AttachmentFormat());
 			afs.write[0].usage_flags = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
 			fb_format = RD::get_singleton()->framebuffer_format_create(afs);
 			blend_state = RD::PipelineColorBlendState::create_blend();
-			defines = "\n#define USE_ATTACHMENT\n";
+			shader_variant = ClusterRender::SHADER_USE_ATTACHMENT;
 		}
 
-		RD::PipelineRasterizationState rasterization_state;
-		rasterization_state.enable_depth_clamp = true;
-		Vector<String> versions;
-		versions.push_back("");
-		cluster_render.cluster_render_shader.initialize(versions, defines);
+		cluster_render.cluster_render_shader.initialize(variants);
 		cluster_render.shader_version = cluster_render.cluster_render_shader.version_create();
-		cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, 0);
+		cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, shader_variant);
 		cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rasterization_state, RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0);
-		RD::PipelineMultisampleState ms;
-		ms.sample_count = RD::TEXTURE_SAMPLES_4;
 		cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rasterization_state, ms, RD::PipelineDepthStencilState(), blend_state, 0);
 	}
 	{

+ 5 - 0
servers/rendering/renderer_rd/cluster_builder_rd.h

@@ -73,6 +73,11 @@ class ClusterBuilderSharedDataRD {
 		RID shader_version;
 		RID shader;
 
+		enum ShaderVariant {
+			SHADER_NORMAL,
+			SHADER_USE_ATTACHMENT,
+		};
+
 		enum PipelineVersion {
 			PIPELINE_NORMAL,
 			PIPELINE_MSAA,

+ 9 - 6
servers/rendering/renderer_rd/effects/fsr.cpp

@@ -35,17 +35,20 @@
 using namespace RendererRD;
 
 FSR::FSR() {
-	Vector<String> FSR_upscale_modes;
+	Vector<String> fsr_upscale_modes;
+	fsr_upscale_modes.push_back("\n#define MODE_FSR_UPSCALE_NORMAL\n");
+	fsr_upscale_modes.push_back("\n#define MODE_FSR_UPSCALE_FALLBACK\n");
+	fsr_shader.initialize(fsr_upscale_modes);
+
+	FSRShaderVariant variant;
 	if (RD::get_singleton()->has_feature(RD::SUPPORTS_FSR_HALF_FLOAT)) {
-		FSR_upscale_modes.push_back("\n#define MODE_FSR_UPSCALE_NORMAL\n");
+		variant = FSR_SHADER_VARIANT_NORMAL;
 	} else {
-		FSR_upscale_modes.push_back("\n#define MODE_FSR_UPSCALE_FALLBACK\n");
+		variant = FSR_SHADER_VARIANT_FALLBACK;
 	}
 
-	fsr_shader.initialize(FSR_upscale_modes);
-
 	shader_version = fsr_shader.version_create();
-	pipeline = RD::get_singleton()->compute_pipeline_create(fsr_shader.version_get_shader(shader_version, 0));
+	pipeline = RD::get_singleton()->compute_pipeline_create(fsr_shader.version_get_shader(shader_version, variant));
 }
 
 FSR::~FSR() {

+ 5 - 0
servers/rendering/renderer_rd/effects/fsr.h

@@ -47,6 +47,11 @@ public:
 	virtual void process(Ref<RenderSceneBuffersRD> p_render_buffers, RID p_source_rd_texture, RID p_destination_texture) final;
 
 private:
+	enum FSRShaderVariant {
+		FSR_SHADER_VARIANT_NORMAL,
+		FSR_SHADER_VARIANT_FALLBACK,
+	};
+
 	enum FSRUpscalePass {
 		FSR_UPSCALE_PASS_EASU = 0,
 		FSR_UPSCALE_PASS_RCAS = 1

+ 32 - 39
servers/rendering/renderer_rd/effects/fsr2.cpp

@@ -515,17 +515,13 @@ FSR2Context::~FSR2Context() {
 
 FSR2Effect::FSR2Effect() {
 	FfxDeviceCapabilities &capabilities = device.capabilities;
-	uint64_t default_subgroup_size = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_SIZE);
 	capabilities.minimumSupportedShaderModel = FFX_SHADER_MODEL_5_1;
-	capabilities.waveLaneCountMin = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_MIN_SIZE);
-	capabilities.waveLaneCountMax = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_MAX_SIZE);
+	capabilities.waveLaneCountMin = 32;
+	capabilities.waveLaneCountMax = 32;
 	capabilities.fp16Supported = RD::get_singleton()->has_feature(RD::Features::SUPPORTS_FSR_HALF_FLOAT);
 	capabilities.raytracingSupported = false;
 
-	bool force_wave_64 = default_subgroup_size == 32 && capabilities.waveLaneCountMax == 64;
-	bool use_lut = force_wave_64 || default_subgroup_size == 64;
-
-	String general_defines_base =
+	String general_defines =
 			"\n#define FFX_GPU\n"
 			"\n#define FFX_GLSL 1\n"
 			"\n#define FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS 1\n"
@@ -534,17 +530,12 @@ FSR2Effect::FSR2Effect() {
 			"\n#define FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP 1\n"
 			"\n#define FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS 1\n";
 
-	if (use_lut) {
-		general_defines_base += "\n#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 1\n";
-	}
-
-	String general_defines = general_defines_base;
-	if (capabilities.fp16Supported) {
-		general_defines += "\n#define FFX_HALF 1\n";
-	}
+	Vector<String> modes_single;
+	modes_single.push_back("");
 
-	Vector<String> modes;
-	modes.push_back("");
+	Vector<String> modes_with_fp16;
+	modes_with_fp16.push_back("");
+	modes_with_fp16.push_back("\n#define FFX_HALF 1\n");
 
 	// Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and
 	// there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL
@@ -557,8 +548,9 @@ FSR2Effect::FSR2Effect() {
 	{
 		Pass &pass = device.passes[FFX_FSR2_PASS_DEPTH_CLIP];
 		pass.shader = &shaders.depth_clip;
-		pass.shader->initialize(modes, general_defines);
+		pass.shader->initialize(modes_with_fp16, general_defines);
 		pass.shader_version = pass.shader->version_create();
+		pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
 
 		pass.sampled_bindings = {
 			FfxResourceBinding{ 0, 0, L"r_reconstructed_previous_nearest_depth" },
@@ -587,8 +579,9 @@ FSR2Effect::FSR2Effect() {
 	{
 		Pass &pass = device.passes[FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH];
 		pass.shader = &shaders.reconstruct_previous_depth;
-		pass.shader->initialize(modes, general_defines);
+		pass.shader->initialize(modes_with_fp16, general_defines);
 		pass.shader_version = pass.shader->version_create();
+		pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
 
 		pass.sampled_bindings = {
 			FfxResourceBinding{ 0, 0, L"r_input_motion_vectors" },
@@ -616,8 +609,9 @@ FSR2Effect::FSR2Effect() {
 	{
 		Pass &pass = device.passes[FFX_FSR2_PASS_LOCK];
 		pass.shader = &shaders.lock;
-		pass.shader->initialize(modes, general_defines);
+		pass.shader->initialize(modes_with_fp16, general_defines);
 		pass.shader_version = pass.shader->version_create();
+		pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
 
 		pass.sampled_bindings = {
 			FfxResourceBinding{ 0, 0, L"r_lock_input_luma" }
@@ -634,22 +628,19 @@ FSR2Effect::FSR2Effect() {
 	}
 
 	{
-		Vector<String> accumulate_modes;
-		accumulate_modes.push_back("\n");
-		accumulate_modes.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");
-
-		String general_defines_accumulate;
-		if (RD::get_singleton()->get_device_vendor_name() == "NVIDIA") {
-			// Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput.
-			general_defines_accumulate = general_defines_base;
-		} else {
-			general_defines_accumulate = general_defines;
-		}
-
+		Vector<String> accumulate_modes_with_fp16;
+		accumulate_modes_with_fp16.push_back("\n");
+		accumulate_modes_with_fp16.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");
+		accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n");
+		accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");
+
+		// Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput.
+		const bool fp16_path_supported = RD::get_singleton()->get_device_vendor_name() != "NVIDIA";
 		Pass &pass = device.passes[FFX_FSR2_PASS_ACCUMULATE];
 		pass.shader = &shaders.accumulate;
-		pass.shader->initialize(accumulate_modes, general_defines_accumulate);
+		pass.shader->initialize(accumulate_modes_with_fp16, general_defines);
 		pass.shader_version = pass.shader->version_create();
+		pass.shader_variant = capabilities.fp16Supported && fp16_path_supported ? 2 : 0;
 
 		pass.sampled_bindings = {
 			FfxResourceBinding{ 0, 0, L"r_input_exposure" },
@@ -679,16 +670,16 @@ FSR2Effect::FSR2Effect() {
 			FfxResourceBinding{ 18, 0, L"cbFSR2" }
 		};
 
-		// Sharpen pass is a clone of the accumulate pass.
+		// Sharpen pass is a clone of the accumulate pass with the sharpening variant.
 		Pass &sharpen_pass = device.passes[FFX_FSR2_PASS_ACCUMULATE_SHARPEN];
 		sharpen_pass = pass;
-		sharpen_pass.shader_variant = 1;
+		sharpen_pass.shader_variant = pass.shader_variant + 1;
 	}
 
 	{
 		Pass &pass = device.passes[FFX_FSR2_PASS_RCAS];
 		pass.shader = &shaders.rcas;
-		pass.shader->initialize(modes, general_defines_base);
+		pass.shader->initialize(modes_single, general_defines);
 		pass.shader_version = pass.shader->version_create();
 
 		pass.sampled_bindings = {
@@ -709,7 +700,7 @@ FSR2Effect::FSR2Effect() {
 	{
 		Pass &pass = device.passes[FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID];
 		pass.shader = &shaders.compute_luminance_pyramid;
-		pass.shader->initialize(modes, general_defines_base);
+		pass.shader->initialize(modes_single, general_defines);
 		pass.shader_version = pass.shader->version_create();
 
 		pass.sampled_bindings = {
@@ -732,8 +723,9 @@ FSR2Effect::FSR2Effect() {
 	{
 		Pass &pass = device.passes[FFX_FSR2_PASS_GENERATE_REACTIVE];
 		pass.shader = &shaders.autogen_reactive;
-		pass.shader->initialize(modes, general_defines);
+		pass.shader->initialize(modes_with_fp16, general_defines);
 		pass.shader_version = pass.shader->version_create();
+		pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
 
 		pass.sampled_bindings = {
 			FfxResourceBinding{ 0, 0, L"r_input_opaque_only" },
@@ -753,8 +745,9 @@ FSR2Effect::FSR2Effect() {
 	{
 		Pass &pass = device.passes[FFX_FSR2_PASS_TCR_AUTOGENERATE];
 		pass.shader = &shaders.tcr_autogen;
-		pass.shader->initialize(modes, general_defines);
+		pass.shader->initialize(modes_with_fp16, general_defines);
 		pass.shader_version = pass.shader->version_create();
+		pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
 
 		pass.sampled_bindings = {
 			FfxResourceBinding{ 0, 0, L"r_input_opaque_only" },

+ 5 - 0
servers/rendering/renderer_rd/environment/fog.cpp

@@ -368,6 +368,11 @@ RS::ShaderNativeSourceCode Fog::FogShaderData::get_native_source_code() const {
 	return fog_singleton->volumetric_fog.shader.version_get_native_source_code(version);
 }
 
+Pair<ShaderRD *, RID> Fog::FogShaderData::get_native_shader_and_version() const {
+	Fog *fog_singleton = Fog::get_singleton();
+	return { &fog_singleton->volumetric_fog.shader, version };
+}
+
 Fog::FogShaderData::~FogShaderData() {
 	Fog *fog_singleton = Fog::get_singleton();
 	ERR_FAIL_NULL(fog_singleton);

+ 1 - 0
servers/rendering/renderer_rd/environment/fog.h

@@ -202,6 +202,7 @@ private:
 		virtual bool is_animated() const;
 		virtual bool casts_shadows() const;
 		virtual RS::ShaderNativeSourceCode get_native_source_code() const;
+		virtual Pair<ShaderRD *, RID> get_native_shader_and_version() const;
 
 		FogShaderData() {}
 		virtual ~FogShaderData();

+ 60 - 38
servers/rendering/renderer_rd/environment/gi.cpp

@@ -3366,6 +3366,31 @@ GI::GI() {
 }
 
 GI::~GI() {
+	if (voxel_gi_debug_shader_version.is_valid()) {
+		voxel_gi_debug_shader.version_free(voxel_gi_debug_shader_version);
+	}
+	if (voxel_gi_lighting_shader_version.is_valid()) {
+		voxel_gi_shader.version_free(voxel_gi_lighting_shader_version);
+	}
+	if (shader_version.is_valid()) {
+		shader.version_free(shader_version);
+	}
+	if (sdfgi_shader.debug_probes_shader.is_valid()) {
+		sdfgi_shader.debug_probes.version_free(sdfgi_shader.debug_probes_shader);
+	}
+	if (sdfgi_shader.debug_shader.is_valid()) {
+		sdfgi_shader.debug.version_free(sdfgi_shader.debug_shader);
+	}
+	if (sdfgi_shader.direct_light_shader.is_valid()) {
+		sdfgi_shader.direct_light.version_free(sdfgi_shader.direct_light_shader);
+	}
+	if (sdfgi_shader.integrate_shader.is_valid()) {
+		sdfgi_shader.integrate.version_free(sdfgi_shader.integrate_shader);
+	}
+	if (sdfgi_shader.preprocess_shader.is_valid()) {
+		sdfgi_shader.preprocess.version_free(sdfgi_shader.preprocess_shader);
+	}
+
 	singleton = nullptr;
 }
 
@@ -3511,20 +3536,26 @@ void GI::init(SkyRD *p_sky) {
 	{
 		//calculate tables
 		String defines = "\n#define SDFGI_OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n";
-		if (RendererSceneRenderRD::get_singleton()->is_vrs_supported()) {
-			defines += "\n#define USE_VRS\n";
-		}
-		if (!RD::get_singleton()->sampler_is_format_supported_for_filter(RD::DATA_FORMAT_R8G8_UINT, RD::SAMPLER_FILTER_LINEAR)) {
-			defines += "\n#define SAMPLE_VOXEL_GI_NEAREST\n";
+
+		Vector<ShaderRD::VariantDefine> variants;
+		for (uint32_t vrs = 0; vrs < 2; vrs++) {
+			String vrs_base = vrs ? "\n#define USE_VRS\n" : "";
+			Group group = vrs ? GROUP_VRS : GROUP_NORMAL;
+			bool default_enabled = vrs == 0;
+			variants.push_back(ShaderRD::VariantDefine(group, vrs_base + "\n#define USE_VOXEL_GI_INSTANCES\n", default_enabled)); // MODE_VOXEL_GI
+			variants.push_back(ShaderRD::VariantDefine(group, vrs_base + "\n#define USE_VOXEL_GI_INSTANCES\n#define SAMPLE_VOXEL_GI_NEAREST\n", default_enabled)); // MODE_VOXEL_GI_WITHOUT_SAMPLER
+			variants.push_back(ShaderRD::VariantDefine(group, vrs_base + "\n#define USE_SDFGI\n", default_enabled)); // MODE_SDFGI
+			variants.push_back(ShaderRD::VariantDefine(group, vrs_base + "\n#define USE_SDFGI\n\n#define USE_VOXEL_GI_INSTANCES\n", default_enabled)); // MODE_COMBINED
+			variants.push_back(ShaderRD::VariantDefine(group, vrs_base + "\n#define USE_SDFGI\n\n#define USE_VOXEL_GI_INSTANCES\n#define SAMPLE_VOXEL_GI_NEAREST\n", default_enabled)); // MODE_COMBINED_WITHOUT_SAMPLER
 		}
 
-		Vector<String> gi_modes;
+		shader.initialize(variants, defines);
 
-		gi_modes.push_back("\n#define USE_VOXEL_GI_INSTANCES\n"); // MODE_VOXEL_GI
-		gi_modes.push_back("\n#define USE_SDFGI\n"); // MODE_SDFGI
-		gi_modes.push_back("\n#define USE_SDFGI\n\n#define USE_VOXEL_GI_INSTANCES\n"); // MODE_COMBINED
+		bool vrs_supported = RendererSceneRenderRD::get_singleton()->is_vrs_supported();
+		if (vrs_supported) {
+			shader.enable_group(GROUP_VRS);
+		}
 
-		shader.initialize(gi_modes, defines);
 		shader_version = shader.version_create();
 
 		Vector<RD::PipelineSpecializationConstant> specialization_constants;
@@ -3551,8 +3582,10 @@ void GI::init(SkyRD *p_sky) {
 			specialization_constants.ptrw()[0].bool_value = (v & SHADER_SPECIALIZATION_HALF_RES) ? true : false;
 			specialization_constants.ptrw()[1].bool_value = (v & SHADER_SPECIALIZATION_USE_FULL_PROJECTION_MATRIX) ? true : false;
 			specialization_constants.ptrw()[2].bool_value = (v & SHADER_SPECIALIZATION_USE_VRS) ? true : false;
+
+			int variant_base = vrs_supported ? MODE_MAX : 0;
 			for (int i = 0; i < MODE_MAX; i++) {
-				pipelines[v][i] = RD::get_singleton()->compute_pipeline_create(shader.version_get_shader(shader_version, i), specialization_constants);
+				pipelines[v][i] = RD::get_singleton()->compute_pipeline_create(shader.version_get_shader(shader_version, variant_base + i), specialization_constants);
 			}
 		}
 
@@ -3612,31 +3645,6 @@ void GI::free() {
 		RD::get_singleton()->free(sdfgi_ubo);
 	}
 
-	if (voxel_gi_debug_shader_version.is_valid()) {
-		voxel_gi_debug_shader.version_free(voxel_gi_debug_shader_version);
-	}
-	if (voxel_gi_lighting_shader_version.is_valid()) {
-		voxel_gi_shader.version_free(voxel_gi_lighting_shader_version);
-	}
-	if (shader_version.is_valid()) {
-		shader.version_free(shader_version);
-	}
-	if (sdfgi_shader.debug_probes_shader.is_valid()) {
-		sdfgi_shader.debug_probes.version_free(sdfgi_shader.debug_probes_shader);
-	}
-	if (sdfgi_shader.debug_shader.is_valid()) {
-		sdfgi_shader.debug.version_free(sdfgi_shader.debug_shader);
-	}
-	if (sdfgi_shader.direct_light_shader.is_valid()) {
-		sdfgi_shader.direct_light.version_free(sdfgi_shader.direct_light_shader);
-	}
-	if (sdfgi_shader.integrate_shader.is_valid()) {
-		sdfgi_shader.integrate.version_free(sdfgi_shader.integrate_shader);
-	}
-	if (sdfgi_shader.preprocess_shader.is_valid()) {
-		sdfgi_shader.preprocess.version_free(sdfgi_shader.preprocess_shader);
-	}
-
 	if (voxel_gi_lights) {
 		memdelete_arr(voxel_gi_lights);
 	}
@@ -3889,7 +3897,15 @@ void GI::process_gi(Ref<RenderSceneBuffersRD> p_render_buffers, const RID *p_nor
 		pipeline_specialization |= SHADER_SPECIALIZATION_USE_VRS;
 	}
 
-	Mode mode = (use_sdfgi && use_voxel_gi_instances) ? MODE_COMBINED : (use_sdfgi ? MODE_SDFGI : MODE_VOXEL_GI);
+	bool without_sampler = RD::get_singleton()->sampler_is_format_supported_for_filter(RD::DATA_FORMAT_R8G8_UINT, RD::SAMPLER_FILTER_LINEAR);
+	Mode mode;
+	if (use_sdfgi && use_voxel_gi_instances) {
+		mode = without_sampler ? MODE_COMBINED_WITHOUT_SAMPLER : MODE_COMBINED;
+	} else if (use_sdfgi) {
+		mode = MODE_SDFGI;
+	} else {
+		mode = without_sampler ? MODE_VOXEL_GI_WITHOUT_SAMPLER : MODE_VOXEL_GI;
+	}
 
 	for (uint32_t v = 0; v < p_view_count; v++) {
 		push_constant.view_index = v;
@@ -4062,7 +4078,9 @@ void GI::process_gi(Ref<RenderSceneBuffersRD> p_render_buffers, const RID *p_nor
 				uniforms.push_back(u);
 			}
 
-			rbgi->uniform_set[v] = RD::get_singleton()->uniform_set_create(uniforms, shader.version_get_shader(shader_version, 0), 0);
+			bool vrs_supported = RendererSceneRenderRD::get_singleton()->is_vrs_supported();
+			int variant_base = vrs_supported ? MODE_MAX : 0;
+			rbgi->uniform_set[v] = RD::get_singleton()->uniform_set_create(uniforms, shader.version_get_shader(shader_version, variant_base), 0);
 		}
 
 		RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, pipelines[pipeline_specialization][mode]);
@@ -4121,3 +4139,7 @@ void GI::debug_voxel_gi(RID p_voxel_gi, RD::DrawListID p_draw_list, RID p_frameb
 
 	voxel_gi->debug(p_draw_list, p_framebuffer, p_camera_with_transform, p_lighting, p_emission, p_alpha);
 }
+
+void GI::enable_vrs_shader_group() {
+	shader.enable_group(GROUP_VRS);
+}

+ 9 - 0
servers/rendering/renderer_rd/environment/gi.h

@@ -788,10 +788,17 @@ public:
 
 	RID sdfgi_ubo;
 
+	enum Group {
+		GROUP_NORMAL,
+		GROUP_VRS,
+	};
+
 	enum Mode {
 		MODE_VOXEL_GI,
+		MODE_VOXEL_GI_WITHOUT_SAMPLER,
 		MODE_SDFGI,
 		MODE_COMBINED,
+		MODE_COMBINED_WITHOUT_SAMPLER,
 		MODE_MAX
 	};
 
@@ -825,6 +832,8 @@ public:
 	bool voxel_gi_needs_update(RID p_probe) const;
 	void voxel_gi_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, const PagedArray<RenderGeometryInstance *> &p_dynamic_objects);
 	void debug_voxel_gi(RID p_voxel_gi, RD::DrawListID p_draw_list, RID p_framebuffer, const Projection &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha);
+
+	void enable_vrs_shader_group();
 };
 
 } // namespace RendererRD

+ 5 - 0
servers/rendering/renderer_rd/environment/sky.cpp

@@ -168,6 +168,11 @@ RS::ShaderNativeSourceCode SkyRD::SkyShaderData::get_native_source_code() const
 	return scene_singleton->sky.sky_shader.shader.version_get_native_source_code(version);
 }
 
+Pair<ShaderRD *, RID> SkyRD::SkyShaderData::get_native_shader_and_version() const {
+	RendererSceneRenderRD *scene_singleton = static_cast<RendererSceneRenderRD *>(RendererSceneRenderRD::singleton);
+	return { &scene_singleton->sky.sky_shader.shader, version };
+}
+
 SkyRD::SkyShaderData::~SkyShaderData() {
 	RendererSceneRenderRD *scene_singleton = static_cast<RendererSceneRenderRD *>(RendererSceneRenderRD::singleton);
 	ERR_FAIL_NULL(scene_singleton);

+ 1 - 0
servers/rendering/renderer_rd/environment/sky.h

@@ -128,6 +128,7 @@ private:
 		virtual bool is_animated() const;
 		virtual bool casts_shadows() const;
 		virtual RS::ShaderNativeSourceCode get_native_source_code() const;
+		virtual Pair<ShaderRD *, RID> get_native_shader_and_version() const;
 
 		SkyShaderData() {}
 		virtual ~SkyShaderData();

+ 24 - 4
servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp

@@ -4821,6 +4821,24 @@ uint32_t RenderForwardClustered::get_pipeline_compilations(RS::PipelineSource p_
 	return scene_shader.get_pipeline_compilations(p_source);
 }
 
+void RenderForwardClustered::enable_features(BitField<FeatureBits> p_feature_bits) {
+	if (p_feature_bits.has_flag(FEATURE_MULTIVIEW_BIT)) {
+		scene_shader.enable_multiview_shader_group();
+	}
+
+	if (p_feature_bits.has_flag(FEATURE_ADVANCED_BIT)) {
+		scene_shader.enable_advanced_shader_group(p_feature_bits.has_flag(FEATURE_MULTIVIEW_BIT));
+	}
+
+	if (p_feature_bits.has_flag(FEATURE_VRS_BIT)) {
+		gi.enable_vrs_shader_group();
+	}
+}
+
+String RenderForwardClustered::get_name() const {
+	return "forward_clustered";
+}
+
 void RenderForwardClustered::GeometryInstanceForwardClustered::pair_voxel_gi_instances(const RID *p_voxel_gi_instances, uint32_t p_voxel_gi_instance_count) {
 	if (p_voxel_gi_instance_count > 0) {
 		voxel_gi_instances[0] = p_voxel_gi_instances[0];
@@ -4957,8 +4975,6 @@ RenderForwardClustered::RenderForwardClustered() {
 		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
 		RD::get_singleton()->compute_list_dispatch_threads(compute_list, tformat.width, tformat.height, 1);
 		RD::get_singleton()->compute_list_end();
-
-		best_fit_normal.shader.version_free(best_fit_normal.shader_version);
 	}
 
 	/* DFG LUT */
@@ -4996,8 +5012,6 @@ RenderForwardClustered::RenderForwardClustered() {
 		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
 		RD::get_singleton()->compute_list_dispatch_threads(compute_list, tformat.width, tformat.height, 1);
 		RD::get_singleton()->compute_list_end();
-
-		dfg_lut.shader.version_free(dfg_lut.shader_version);
 	}
 
 	_update_shader_quality_settings();
@@ -5048,8 +5062,14 @@ RenderForwardClustered::~RenderForwardClustered() {
 
 	RD::get_singleton()->free(shadow_sampler);
 	RSG::light_storage->directional_shadow_atlas_set_size(0);
+
+	RD::get_singleton()->free(best_fit_normal.pipeline);
 	RD::get_singleton()->free(best_fit_normal.texture);
+	best_fit_normal.shader.version_free(best_fit_normal.shader_version);
+
+	RD::get_singleton()->free(dfg_lut.pipeline);
 	RD::get_singleton()->free(dfg_lut.texture);
+	dfg_lut.shader.version_free(dfg_lut.shader_version);
 
 	{
 		for (const RID &rid : scene_state.uniform_buffers) {

+ 5 - 0
servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h

@@ -812,6 +812,11 @@ public:
 	virtual void mesh_generate_pipelines(RID p_mesh, bool p_background_compilation) override;
 	virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source) override;
 
+	/* SHADER LIBRARY */
+
+	virtual void enable_features(BitField<FeatureBits> p_feature_bits) override;
+	virtual String get_name() const override;
+
 	virtual bool free(RID p_rid) override;
 
 	virtual void update() override;

+ 14 - 2
servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp

@@ -160,7 +160,7 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) {
 	}
 
 	if (version.is_null()) {
-		version = SceneShaderForwardClustered::singleton->shader.version_create();
+		version = SceneShaderForwardClustered::singleton->shader.version_create(false);
 	}
 
 	depth_draw = DepthDraw(depth_drawi);
@@ -230,6 +230,14 @@ RS::ShaderNativeSourceCode SceneShaderForwardClustered::ShaderData::get_native_s
 	}
 }
 
+Pair<ShaderRD *, RID> SceneShaderForwardClustered::ShaderData::get_native_shader_and_version() const {
+	if (version.is_valid()) {
+		return { &SceneShaderForwardClustered::singleton->shader, version };
+	} else {
+		return {};
+	}
+}
+
 uint16_t SceneShaderForwardClustered::ShaderData::_get_shader_version(PipelineVersion p_pipeline_version, uint32_t p_color_pass_flags, bool p_ubershader) const {
 	uint32_t ubershader_base = p_ubershader ? ShaderVersion::SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL : 0;
 	switch (p_pipeline_version) {
@@ -789,7 +797,7 @@ void SceneShaderForwardClustered::init(const String p_defines) {
 		actions.global_buffer_array_variable = "global_shader_uniforms.data";
 		actions.instance_uniform_index_variable = "instances.data[instance_index_interp].instance_uniforms_ofs";
 
-		actions.check_multiview_samplers = RendererCompositorRD::get_singleton()->is_xr_enabled(); // Make sure we check sampling multiview textures.
+		actions.check_multiview_samplers = true;
 
 		compiler.initialize(actions);
 	}
@@ -901,6 +909,10 @@ void SceneShaderForwardClustered::set_default_specialization(const ShaderSpecial
 	}
 }
 
+void SceneShaderForwardClustered::enable_multiview_shader_group() {
+	shader.enable_group(SHADER_GROUP_MULTIVIEW);
+}
+
 void SceneShaderForwardClustered::enable_advanced_shader_group(bool p_needs_multiview) {
 	if (p_needs_multiview || RendererCompositorRD::get_singleton()->is_xr_enabled()) {
 		shader.enable_group(SHADER_GROUP_ADVANCED_MULTIVIEW);

+ 2 - 0
servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h

@@ -277,6 +277,7 @@ public:
 		virtual bool is_animated() const;
 		virtual bool casts_shadows() const;
 		virtual RS::ShaderNativeSourceCode get_native_source_code() const;
+		virtual Pair<ShaderRD *, RID> get_native_shader_and_version() const;
 		uint16_t _get_shader_version(PipelineVersion p_pipeline_version, uint32_t p_color_pass_flags, bool p_ubershader) const;
 		RID _get_shader_variant(uint16_t p_shader_version) const;
 		void _clear_vertex_input_mask_cache();
@@ -350,6 +351,7 @@ public:
 
 	void init(const String p_defines);
 	void set_default_specialization(const ShaderSpecialization &p_specialization);
+	void enable_multiview_shader_group();
 	void enable_advanced_shader_group(bool p_needs_multiview = false);
 	bool is_multiview_shader_group_enabled() const;
 	bool is_advanced_shader_group_enabled(bool p_multiview) const;

+ 15 - 1
servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp

@@ -351,6 +351,20 @@ uint32_t RenderForwardMobile::get_pipeline_compilations(RS::PipelineSource p_sou
 	return scene_shader.get_pipeline_compilations(p_source);
 }
 
+void RenderForwardMobile::enable_features(BitField<FeatureBits> p_feature_bits) {
+	if (p_feature_bits.has_flag(FEATURE_MULTIVIEW_BIT)) {
+		scene_shader.enable_multiview_shader_group();
+	}
+
+	if (p_feature_bits.has_flag(FEATURE_VRS_BIT)) {
+		gi.enable_vrs_shader_group();
+	}
+}
+
+String RenderForwardMobile::get_name() const {
+	return "forward_mobile";
+}
+
 bool RenderForwardMobile::free(RID p_rid) {
 	if (RendererSceneRenderRD::free(p_rid)) {
 		return true;
@@ -3013,7 +3027,7 @@ void RenderForwardMobile::_mesh_compile_pipelines_for_surface(const SurfacePipel
 	pipeline_key.primitive_type = mesh_storage->mesh_surface_get_primitive(p_surface.mesh_surface);
 	pipeline_key.wireframe = false;
 
-	const bool multiview_enabled = p_global.use_multiview && scene_shader.is_multiview_enabled();
+	const bool multiview_enabled = p_global.use_multiview && scene_shader.is_multiview_shader_group_enabled();
 	const RD::DataFormat buffers_color_format = _render_buffers_get_color_format();
 	const bool buffers_can_be_storage = _render_buffers_can_be_storage();
 	const uint32_t vrs_iterations = p_global.use_vrs ? 2 : 1;

+ 5 - 0
servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h

@@ -673,6 +673,11 @@ public:
 	virtual void mesh_generate_pipelines(RID p_mesh, bool p_background_compilation) override;
 	virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source) override;
 
+	/* SHADER LIBRARY */
+
+	virtual void enable_features(BitField<FeatureBits> p_feature_bits) override;
+	virtual String get_name() const override;
+
 	virtual bool free(RID p_rid) override;
 
 	virtual void update() override;

+ 29 - 20
servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp

@@ -155,7 +155,7 @@ void SceneShaderForwardMobile::ShaderData::set_code(const String &p_code) {
 	}
 
 	if (version.is_null()) {
-		version = SceneShaderForwardMobile::singleton->shader.version_create();
+		version = SceneShaderForwardMobile::singleton->shader.version_create(false);
 	}
 
 	depth_draw = DepthDraw(depth_drawi);
@@ -236,6 +236,15 @@ RS::ShaderNativeSourceCode SceneShaderForwardMobile::ShaderData::get_native_sour
 	}
 }
 
+Pair<ShaderRD *, RID> SceneShaderForwardMobile::ShaderData::get_native_shader_and_version() const {
+	if (version.is_valid()) {
+		MutexLock lock(SceneShaderForwardMobile::singleton_mutex);
+		return { &SceneShaderForwardMobile::singleton->shader, version };
+	} else {
+		return {};
+	}
+}
+
 void SceneShaderForwardMobile::ShaderData::_create_pipeline(PipelineKey p_pipeline_key) {
 #if PRINT_PIPELINE_COMPILATION_KEYS
 	print_line(
@@ -480,19 +489,19 @@ void SceneShaderForwardMobile::init(const String p_defines) {
 	/* SCENE SHADER */
 
 	{
-		Vector<String> shader_versions;
+		Vector<ShaderRD::VariantDefine> shader_versions;
 		for (uint32_t ubershader = 0; ubershader < 2; ubershader++) {
 			const String base_define = ubershader ? "\n#define UBERSHADER\n" : "";
-			shader_versions.push_back(base_define + ""); // SHADER_VERSION_COLOR_PASS
-			shader_versions.push_back(base_define + "\n#define USE_LIGHTMAP\n"); // SHADER_VERSION_LIGHTMAP_COLOR_PASS
-			shader_versions.push_back(base_define + "\n#define MODE_RENDER_DEPTH\n#define SHADOW_PASS\n"); // SHADER_VERSION_SHADOW_PASS, should probably change this to MODE_RENDER_SHADOW because we don't have a depth pass here...
-			shader_versions.push_back(base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_DUAL_PARABOLOID\n#define SHADOW_PASS\n"); // SHADER_VERSION_SHADOW_PASS_DP
-			shader_versions.push_back(base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_MATERIAL\n"); // SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL
+			shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "", true)); // SHADER_VERSION_COLOR_PASS
+			shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define USE_LIGHTMAP\n", true)); // SHADER_VERSION_LIGHTMAP_COLOR_PASS
+			shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define MODE_RENDER_DEPTH\n#define SHADOW_PASS\n", true)); // SHADER_VERSION_SHADOW_PASS, should probably change this to MODE_RENDER_SHADOW because we don't have a depth pass here...
+			shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_DUAL_PARABOLOID\n#define SHADOW_PASS\n", true)); // SHADER_VERSION_SHADOW_PASS_DP
+			shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_MATERIAL\n", true)); // SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL
 
 			// Multiview versions of our shaders.
-			shader_versions.push_back(base_define + "\n#define USE_MULTIVIEW\n"); // SHADER_VERSION_COLOR_PASS_MULTIVIEW
-			shader_versions.push_back(base_define + "\n#define USE_MULTIVIEW\n#define USE_LIGHTMAP\n"); // SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW
-			shader_versions.push_back(base_define + "\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n#define SHADOW_PASS\n"); // SHADER_VERSION_SHADOW_PASS_MULTIVIEW
+			shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, base_define + "\n#define USE_MULTIVIEW\n", false)); // SHADER_VERSION_COLOR_PASS_MULTIVIEW
+			shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, base_define + "\n#define USE_MULTIVIEW\n#define USE_LIGHTMAP\n", false)); // SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW
+			shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, base_define + "\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n#define SHADOW_PASS\n", false)); // SHADER_VERSION_SHADOW_PASS_MULTIVIEW
 		}
 
 		Vector<RD::PipelineImmutableSampler> immutable_samplers;
@@ -502,13 +511,9 @@ void SceneShaderForwardMobile::init(const String p_defines) {
 		immutable_shadow_sampler.uniform_type = RenderingDeviceCommons::UNIFORM_TYPE_SAMPLER;
 		immutable_samplers.push_back(immutable_shadow_sampler);
 		shader.initialize(shader_versions, p_defines, immutable_samplers);
-		if (!RendererCompositorRD::get_singleton()->is_xr_enabled()) {
-			for (uint32_t ubershader = 0; ubershader < 2; ubershader++) {
-				uint32_t base_variant = ubershader ? SHADER_VERSION_MAX : 0;
-				shader.set_variant_enabled(base_variant + SHADER_VERSION_COLOR_PASS_MULTIVIEW, false);
-				shader.set_variant_enabled(base_variant + SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW, false);
-				shader.set_variant_enabled(base_variant + SHADER_VERSION_SHADOW_PASS_MULTIVIEW, false);
-			}
+
+		if (RendererCompositorRD::get_singleton()->is_xr_enabled()) {
+			shader.enable_group(SHADER_GROUP_MULTIVIEW);
 		}
 	}
 
@@ -714,7 +719,7 @@ void SceneShaderForwardMobile::init(const String p_defines) {
 		actions.instance_uniform_index_variable = "instances.data[draw_call.instance_index].instance_uniforms_ofs";
 
 		actions.apply_luminance_multiplier = true; // apply luminance multiplier to screen texture
-		actions.check_multiview_samplers = RendererCompositorRD::get_singleton()->is_xr_enabled(); // Make sure we check sampling multiview textures.
+		actions.check_multiview_samplers = true;
 
 		compiler.initialize(actions);
 	}
@@ -824,8 +829,12 @@ uint32_t SceneShaderForwardMobile::get_pipeline_compilations(RS::PipelineSource
 	return pipeline_compilations[p_source];
 }
 
-bool SceneShaderForwardMobile::is_multiview_enabled() const {
-	return shader.is_variant_enabled(SHADER_VERSION_COLOR_PASS_MULTIVIEW);
+void SceneShaderForwardMobile::enable_multiview_shader_group() {
+	shader.enable_group(SHADER_GROUP_MULTIVIEW);
+}
+
+bool SceneShaderForwardMobile::is_multiview_shader_group_enabled() const {
+	return shader.is_group_enabled(SHADER_GROUP_MULTIVIEW);
 }
 
 SceneShaderForwardMobile::~SceneShaderForwardMobile() {

+ 8 - 1
servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h

@@ -72,6 +72,11 @@ public:
 		}
 	}
 
+	enum ShaderGroup {
+		SHADER_GROUP_BASE, // Always compiled at the beginning.
+		SHADER_GROUP_MULTIVIEW,
+	};
+
 	struct ShaderSpecialization {
 		union {
 			uint32_t packed_0;
@@ -265,6 +270,7 @@ public:
 		virtual bool is_animated() const;
 		virtual bool casts_shadows() const;
 		virtual RS::ShaderNativeSourceCode get_native_source_code() const;
+		virtual Pair<ShaderRD *, RID> get_native_shader_and_version() const;
 		RD::PolygonCullMode get_cull_mode_from_cull_variant(CullVariant p_cull_variant);
 		void _clear_vertex_input_mask_cache();
 		RID get_shader_variant(ShaderVersion p_shader_version, bool p_ubershader) const;
@@ -337,7 +343,8 @@ public:
 	void init(const String p_defines);
 	void set_default_specialization(const ShaderSpecialization &p_specialization);
 	uint32_t get_pipeline_compilations(RS::PipelineSource p_source);
-	bool is_multiview_enabled() const;
+	void enable_multiview_shader_group();
+	bool is_multiview_shader_group_enabled() const;
 };
 
 } // namespace RendererSceneRenderImplementation

+ 19 - 24
servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp

@@ -392,9 +392,9 @@ RID RendererCanvasRenderRD::_create_base_uniform_set(RID p_to_render_target, boo
 
 	{
 		RD::Uniform u;
-		u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
+		u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
 		u.binding = 2;
-		u.append_id(state.lights_uniform_buffer);
+		u.append_id(state.lights_storage_buffer);
 		uniforms.push_back(u);
 	}
 
@@ -525,7 +525,7 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p
 		uint32_t index = 0;
 
 		while (l) {
-			if (index == state.max_lights_per_render) {
+			if (index == MAX_LIGHTS_PER_RENDER) {
 				l->render_index_cache = -1;
 				l = l->next_ptr;
 				continue;
@@ -588,7 +588,7 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p
 		uint32_t index = light_count;
 
 		while (l) {
-			if (index == state.max_lights_per_render) {
+			if (index == MAX_LIGHTS_PER_RENDER) {
 				l->render_index_cache = -1;
 				l = l->next_ptr;
 				continue;
@@ -664,7 +664,7 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p
 	}
 
 	if (light_count > 0) {
-		RD::get_singleton()->buffer_update(state.lights_uniform_buffer, 0, sizeof(LightUniform) * light_count, &state.light_uniforms[0]);
+		RD::get_singleton()->buffer_update(state.lights_storage_buffer, 0, sizeof(LightUniform) * light_count, &state.light_uniforms[0]);
 	}
 
 	bool use_linear_colors = texture_storage->render_target_is_using_hdr(p_to_render_target);
@@ -964,7 +964,7 @@ void RendererCanvasRenderRD::_update_shadow_atlas() {
 			RD::TextureFormat tf;
 			tf.texture_type = RD::TEXTURE_TYPE_2D;
 			tf.width = state.shadow_texture_size;
-			tf.height = state.max_lights_per_render * 2;
+			tf.height = MAX_LIGHTS_PER_RENDER * 2;
 			tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT;
 			tf.format = RD::DATA_FORMAT_R32_SFLOAT;
 
@@ -975,7 +975,7 @@ void RendererCanvasRenderRD::_update_shadow_atlas() {
 			RD::TextureFormat tf;
 			tf.texture_type = RD::TEXTURE_TYPE_2D;
 			tf.width = state.shadow_texture_size;
-			tf.height = state.max_lights_per_render * 2;
+			tf.height = MAX_LIGHTS_PER_RENDER * 2;
 			tf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
 			tf.format = RD::DATA_FORMAT_D32_SFLOAT;
 			tf.is_discardable = true;
@@ -1022,7 +1022,7 @@ void RendererCanvasRenderRD::light_update_shadow(RID p_rid, int p_shadow_index,
 	_update_shadow_atlas();
 
 	cl->shadow.z_far = p_far;
-	cl->shadow.y_offset = float(p_shadow_index * 2 + 1) / float(state.max_lights_per_render * 2);
+	cl->shadow.y_offset = float(p_shadow_index * 2 + 1) / float(MAX_LIGHTS_PER_RENDER * 2);
 	Color cc = Color(p_far, p_far, p_far, 1.0);
 
 	// First, do a culling pass and record what occluders need to be drawn for this light.
@@ -1137,7 +1137,7 @@ void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_sh
 	float half_size = p_clip_rect.size.length() * 0.5; //shadow length, must keep this no matter the angle
 
 	cl->shadow.z_far = distance;
-	cl->shadow.y_offset = float(p_shadow_index * 2 + 1) / float(state.max_lights_per_render * 2);
+	cl->shadow.y_offset = float(p_shadow_index * 2 + 1) / float(MAX_LIGHTS_PER_RENDER * 2);
 
 	Transform2D to_light_xform;
 
@@ -1584,7 +1584,7 @@ void RendererCanvasRenderRD::CanvasShaderData::set_code(const String &p_code) {
 	pipeline_hash_map.clear_pipelines();
 
 	if (version.is_null()) {
-		version = canvas_singleton->shader.canvas_shader.version_create();
+		version = canvas_singleton->shader.canvas_shader.version_create(false);
 	}
 
 #if 0
@@ -1625,6 +1625,11 @@ RS::ShaderNativeSourceCode RendererCanvasRenderRD::CanvasShaderData::get_native_
 	return canvas_singleton->shader.canvas_shader.version_get_native_source_code(version);
 }
 
+Pair<ShaderRD *, RID> RendererCanvasRenderRD::CanvasShaderData::get_native_shader_and_version() const {
+	RendererCanvasRenderRD *canvas_singleton = static_cast<RendererCanvasRenderRD *>(RendererCanvasRender::singleton);
+	return { &canvas_singleton->shader.canvas_shader, version };
+}
+
 RID RendererCanvasRenderRD::CanvasShaderData::get_shader(ShaderVariant p_shader_variant, bool p_ubershader) const {
 	if (version.is_valid()) {
 		uint32_t variant_index = p_shader_variant + (p_ubershader ? SHADER_VARIANT_MAX : 0);
@@ -1726,20 +1731,10 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() {
 	{ //shader variants
 
 		String global_defines;
-
-		uint64_t uniform_max_size = RD::get_singleton()->limit_get(RD::LIMIT_MAX_UNIFORM_BUFFER_SIZE);
-		if (uniform_max_size < 65536) {
-			//Yes, you guessed right, ARM again
-			state.max_lights_per_render = 64;
-			global_defines += "#define MAX_LIGHTS 64\n";
-		} else {
-			state.max_lights_per_render = DEFAULT_MAX_LIGHTS_PER_RENDER;
-			global_defines += "#define MAX_LIGHTS " + itos(DEFAULT_MAX_LIGHTS_PER_RENDER) + "\n";
-		}
-
+		global_defines += "#define MAX_LIGHTS " + itos(MAX_LIGHTS_PER_RENDER) + "\n";
 		global_defines += "\n#define SAMPLERS_BINDING_FIRST_INDEX " + itos(SAMPLERS_BINDING_FIRST_INDEX) + "\n";
 
-		state.light_uniforms = memnew_arr(LightUniform, state.max_lights_per_render);
+		state.light_uniforms = memnew_arr(LightUniform, MAX_LIGHTS_PER_RENDER);
 		Vector<String> variants;
 		const uint32_t ubershader_iterations = 1;
 		for (uint32_t ubershader = 0; ubershader < ubershader_iterations; ubershader++) {
@@ -1921,7 +1916,7 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() {
 	{ //bindings
 
 		state.canvas_state_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(State::Buffer));
-		state.lights_uniform_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(LightUniform) * state.max_lights_per_render);
+		state.lights_storage_buffer = RD::get_singleton()->storage_buffer_create(sizeof(LightUniform) * MAX_LIGHTS_PER_RENDER);
 
 		RD::SamplerState shadow_sampler_state;
 		shadow_sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST;
@@ -3316,7 +3311,7 @@ RendererCanvasRenderRD::~RendererCanvasRenderRD() {
 		}
 
 		memdelete_arr(state.light_uniforms);
-		RD::get_singleton()->free(state.lights_uniform_buffer);
+		RD::get_singleton()->free(state.lights_storage_buffer);
 	}
 
 	//shadow rendering

+ 3 - 4
servers/rendering/renderer_rd/renderer_canvas_render_rd.h

@@ -106,7 +106,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 		MAX_RENDER_ITEMS = 256 * 1024,
 		MAX_LIGHT_TEXTURES = 1024,
 		MAX_LIGHTS_PER_ITEM = 16,
-		DEFAULT_MAX_LIGHTS_PER_RENDER = 256
+		MAX_LIGHTS_PER_RENDER = 256,
 	};
 
 	/****************/
@@ -169,6 +169,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 		virtual bool is_animated() const;
 		virtual bool casts_shadows() const;
 		virtual RS::ShaderNativeSourceCode get_native_source_code() const;
+		virtual Pair<ShaderRD *, RID> get_native_shader_and_version() const;
 		RID get_shader(ShaderVariant p_shader_variant, bool p_ubershader) const;
 		uint64_t get_vertex_input_mask(ShaderVariant p_shader_variant, bool p_ubershader);
 		bool is_valid() const;
@@ -570,7 +571,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 
 		LightUniform *light_uniforms = nullptr;
 
-		RID lights_uniform_buffer;
+		RID lights_storage_buffer;
 		RID canvas_state_buffer;
 		RID shadow_sampler;
 		RID shadow_texture;
@@ -584,8 +585,6 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 
 		RID default_transforms_uniform_set;
 
-		uint32_t max_lights_per_render;
-
 		double time;
 
 	} state;

+ 33 - 28
servers/rendering/renderer_rd/renderer_compositor_rd.cpp

@@ -275,41 +275,45 @@ RendererCompositorRD::RendererCompositorRD() {
 	uniform_set_cache = memnew(UniformSetCacheRD);
 	framebuffer_cache = memnew(FramebufferCacheRD);
 
-	{
-		String shader_cache_dir = Engine::get_singleton()->get_shader_cache_path();
-		if (shader_cache_dir.is_empty()) {
-			shader_cache_dir = "user://";
+	bool shader_cache_enabled = GLOBAL_GET("rendering/shader_compiler/shader_cache/enabled");
+	bool compress = GLOBAL_GET("rendering/shader_compiler/shader_cache/compress");
+	bool use_zstd = GLOBAL_GET("rendering/shader_compiler/shader_cache/use_zstd_compression");
+	bool strip_debug = GLOBAL_GET("rendering/shader_compiler/shader_cache/strip_debug");
+	ShaderRD::set_shader_cache_save_compressed(compress);
+	ShaderRD::set_shader_cache_save_compressed_zstd(use_zstd);
+	ShaderRD::set_shader_cache_save_debug(!strip_debug);
+
+	// Shader cache is forcefully enabled when running the editor.
+	if (shader_cache_enabled || Engine::get_singleton()->is_editor_hint()) {
+		// Attempt to create a folder for the shader cache that the user can write to. Shaders will only be attempted to be saved if this path exists.
+		String shader_cache_user_dir = Engine::get_singleton()->get_shader_cache_path();
+		if (shader_cache_user_dir.is_empty()) {
+			shader_cache_user_dir = "user://";
 		}
-		Ref<DirAccess> da = DirAccess::open(shader_cache_dir);
-		if (da.is_null()) {
-			ERR_PRINT("Can't create shader cache folder, no shader caching will happen: " + shader_cache_dir);
+
+		Ref<DirAccess> user_da = DirAccess::open(shader_cache_user_dir);
+		if (user_da.is_null()) {
+			ERR_PRINT("Can't create shader cache folder, no shader caching will happen: " + shader_cache_user_dir);
 		} else {
-			Error err = da->change_dir("shader_cache");
+			Error err = user_da->change_dir("shader_cache");
 			if (err != OK) {
-				err = da->make_dir("shader_cache");
+				err = user_da->make_dir("shader_cache");
 			}
+
 			if (err != OK) {
-				ERR_PRINT("Can't create shader cache folder, no shader caching will happen: " + shader_cache_dir);
+				ERR_PRINT("Can't create shader cache folder, no shader caching will happen: " + shader_cache_user_dir);
 			} else {
-				shader_cache_dir = shader_cache_dir.path_join("shader_cache");
-
-				bool shader_cache_enabled = GLOBAL_GET("rendering/shader_compiler/shader_cache/enabled");
-				if (!Engine::get_singleton()->is_editor_hint() && !shader_cache_enabled) {
-					shader_cache_dir = String(); //disable only if not editor
-				}
-
-				if (!shader_cache_dir.is_empty()) {
-					bool compress = GLOBAL_GET("rendering/shader_compiler/shader_cache/compress");
-					bool use_zstd = GLOBAL_GET("rendering/shader_compiler/shader_cache/use_zstd_compression");
-					bool strip_debug = GLOBAL_GET("rendering/shader_compiler/shader_cache/strip_debug");
-
-					ShaderRD::set_shader_cache_dir(shader_cache_dir);
-					ShaderRD::set_shader_cache_save_compressed(compress);
-					ShaderRD::set_shader_cache_save_compressed_zstd(use_zstd);
-					ShaderRD::set_shader_cache_save_debug(!strip_debug);
-				}
+				shader_cache_user_dir = shader_cache_user_dir.path_join("shader_cache");
+				ShaderRD::set_shader_cache_user_dir(shader_cache_user_dir);
 			}
 		}
+
+		// Check if a directory exists for the shader cache to pull shaders from as read-only. This is used on exported projects with baked shaders.
+		String shader_cache_res_dir = "res://.godot/shader_cache";
+		Ref<DirAccess> res_da = DirAccess::open(shader_cache_res_dir);
+		if (res_da.is_valid()) {
+			ShaderRD::set_shader_cache_res_dir(shader_cache_res_dir);
+		}
 	}
 
 	ERR_FAIL_COND_MSG(singleton != nullptr, "A RendererCompositorRD singleton already exists.");
@@ -347,5 +351,6 @@ RendererCompositorRD::~RendererCompositorRD() {
 	singleton = nullptr;
 	memdelete(uniform_set_cache);
 	memdelete(framebuffer_cache);
-	ShaderRD::set_shader_cache_dir(String());
+	ShaderRD::set_shader_cache_user_dir(String());
+	ShaderRD::set_shader_cache_res_dir(String());
 }

+ 2 - 0
servers/rendering/renderer_rd/renderer_compositor_rd.h

@@ -134,6 +134,8 @@ public:
 	_ALWAYS_INLINE_ double get_total_time() const { return time; }
 	_ALWAYS_INLINE_ bool can_create_resources_async() const { return true; }
 
+	virtual bool is_xr_enabled() const { return RendererCompositor::is_xr_enabled(); }
+
 	static Error is_viable() {
 		return OK;
 	}

+ 2 - 1
servers/rendering/renderer_rd/renderer_scene_render_rd.h

@@ -50,8 +50,9 @@
 #include "servers/rendering/renderer_scene_render.h"
 #include "servers/rendering/rendering_device.h"
 #include "servers/rendering/rendering_method.h"
+#include "servers/rendering/rendering_shader_library.h"
 
-class RendererSceneRenderRD : public RendererSceneRender {
+class RendererSceneRenderRD : public RendererSceneRender, public RenderingShaderLibrary {
 	friend RendererRD::SkyRD;
 	friend RendererRD::GI;
 

+ 252 - 120
servers/rendering/renderer_rd/shader_rd.cpp

@@ -152,10 +152,6 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con
 	tohash.append(GODOT_VERSION_NUMBER);
 	tohash.append("[GodotVersionHash]");
 	tohash.append(GODOT_VERSION_HASH);
-	tohash.append("[SpirvCacheKey]");
-	tohash.append(RenderingDevice::get_singleton()->shader_get_spirv_cache_key());
-	tohash.append("[BinaryCacheKey]");
-	tohash.append(RenderingDevice::get_singleton()->shader_get_binary_cache_key());
 	tohash.append("[Vertex]");
 	tohash.append(p_vertex_code ? p_vertex_code : "");
 	tohash.append("[Fragment]");
@@ -166,7 +162,7 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con
 	base_sha256 = tohash.as_string().sha256_text();
 }
 
-RID ShaderRD::version_create() {
+RID ShaderRD::version_create(bool p_embedded) {
 	//initialize() was never called
 	ERR_FAIL_COND_V(group_to_variant_map.is_empty(), RID());
 
@@ -174,12 +170,22 @@ RID ShaderRD::version_create() {
 	version.dirty = true;
 	version.valid = false;
 	version.initialize_needed = true;
+	version.embedded = p_embedded;
 	version.variants.clear();
 	version.variant_data.clear();
+
 	version.mutex = memnew(Mutex);
 	RID rid = version_owner.make_rid(version);
-	MutexLock lock(versions_mutex);
-	version_mutexes.insert(rid, version.mutex);
+	{
+		MutexLock lock(versions_mutex);
+		version_mutexes.insert(rid, version.mutex);
+	}
+
+	if (p_embedded) {
+		MutexLock lock(shader_versions_embedded_set_mutex);
+		shader_versions_embedded_set.insert({ this, rid });
+	}
+
 	return rid;
 }
 
@@ -263,86 +269,49 @@ void ShaderRD::_build_variant_code(StringBuilder &builder, uint32_t p_variant, c
 	}
 }
 
-void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) {
-	uint32_t variant = group_to_variant_map[p_data.group][p_variant];
-
-	if (!variants_enabled[variant]) {
-		return; // Variant is disabled, return.
-	}
-
-	Vector<RD::ShaderStageSPIRVData> stages;
-
-	String error;
-	String current_source;
-	RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX;
-	bool build_ok = true;
-
-	if (!is_compute) {
-		//vertex stage
-
-		StringBuilder builder;
-		_build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_VERTEX]);
-
-		current_source = builder.as_string();
-		RD::ShaderStageSPIRVData stage;
-		stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::SHADER_STAGE_VERTEX, current_source, RD::SHADER_LANGUAGE_GLSL, &error);
-		if (stage.spirv.is_empty()) {
-			build_ok = false;
-		} else {
-			stage.shader_stage = RD::SHADER_STAGE_VERTEX;
-			stages.push_back(stage);
-		}
+Vector<String> ShaderRD::_build_variant_stage_sources(uint32_t p_variant, CompileData p_data) {
+	if (!variants_enabled[p_variant]) {
+		return Vector<String>(); // Variant is disabled, return.
 	}
 
-	if (!is_compute && build_ok) {
-		//fragment stage
-		current_stage = RD::SHADER_STAGE_FRAGMENT;
-
-		StringBuilder builder;
-		_build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_FRAGMENT]);
-
-		current_source = builder.as_string();
-		RD::ShaderStageSPIRVData stage;
-		stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::SHADER_STAGE_FRAGMENT, current_source, RD::SHADER_LANGUAGE_GLSL, &error);
-		if (stage.spirv.is_empty()) {
-			build_ok = false;
-		} else {
-			stage.shader_stage = RD::SHADER_STAGE_FRAGMENT;
-			stages.push_back(stage);
-		}
-	}
+	Vector<String> stage_sources;
+	stage_sources.resize(RD::SHADER_STAGE_MAX);
 
 	if (is_compute) {
-		//compute stage
-		current_stage = RD::SHADER_STAGE_COMPUTE;
-
+		// Compute stage.
 		StringBuilder builder;
-		_build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_COMPUTE]);
-
-		current_source = builder.as_string();
+		_build_variant_code(builder, p_variant, p_data.version, stage_templates[STAGE_TYPE_COMPUTE]);
+		stage_sources.write[RD::SHADER_STAGE_COMPUTE] = builder.as_string();
+	} else {
+		{
+			// Vertex stage.
+			StringBuilder builder;
+			_build_variant_code(builder, p_variant, p_data.version, stage_templates[STAGE_TYPE_VERTEX]);
+			stage_sources.write[RD::SHADER_STAGE_VERTEX] = builder.as_string();
+		}
 
-		RD::ShaderStageSPIRVData stage;
-		stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::SHADER_STAGE_COMPUTE, current_source, RD::SHADER_LANGUAGE_GLSL, &error);
-		if (stage.spirv.is_empty()) {
-			build_ok = false;
-		} else {
-			stage.shader_stage = RD::SHADER_STAGE_COMPUTE;
-			stages.push_back(stage);
+		{
+			// Fragment stage.
+			StringBuilder builder;
+			_build_variant_code(builder, p_variant, p_data.version, stage_templates[STAGE_TYPE_FRAGMENT]);
+			stage_sources.write[RD::SHADER_STAGE_FRAGMENT] = builder.as_string();
 		}
 	}
 
-	if (!build_ok) {
-		ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_COMPUTE ? "Compute " : (current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment")) + " shader, variant #" + itos(variant) + " (" + variant_defines[variant].text.get_data() + ").");
-		ERR_PRINT(error);
+	return stage_sources;
+}
 
-#ifdef DEBUG_ENABLED
-		ERR_PRINT("code:\n" + current_source.get_with_code_lines());
-#endif
-		return;
+void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) {
+	uint32_t variant = group_to_variant_map[p_data.group][p_variant];
+	if (!variants_enabled[variant]) {
+		return; // Variant is disabled, return.
 	}
 
-	Vector<uint8_t> shader_data = RD::get_singleton()->shader_compile_binary_from_spirv(stages, name + ":" + itos(variant));
+	Vector<String> variant_stage_sources = _build_variant_stage_sources(variant, p_data);
+	Vector<RD::ShaderStageSPIRVData> variant_stages = compile_stages(variant_stage_sources);
+	ERR_FAIL_COND(variant_stages.is_empty());
 
+	Vector<uint8_t> shader_data = RD::get_singleton()->shader_compile_binary_from_spirv(variant_stages, name + ":" + itos(variant));
 	ERR_FAIL_COND(shader_data.is_empty());
 
 	{
@@ -351,6 +320,20 @@ void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) {
 	}
 }
 
+Vector<String> ShaderRD::version_build_variant_stage_sources(RID p_version, int p_variant) {
+	Version *version = version_owner.get_or_null(p_version);
+	ERR_FAIL_NULL_V(version, Vector<String>());
+
+	if (version->dirty) {
+		_initialize_version(version);
+	}
+
+	CompileData compile_data;
+	compile_data.version = version;
+	compile_data.group = variant_to_group[p_variant];
+	return _build_variant_stage_sources(p_variant, compile_data);
+}
+
 RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_version) {
 	Version *version = version_owner.get_or_null(p_version);
 	RS::ShaderNativeSourceCode source_code;
@@ -404,6 +387,13 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio
 	return source_code;
 }
 
+String ShaderRD::version_get_cache_file_relative_path(RID p_version, int p_group, const String &p_api_name) {
+	Version *version = version_owner.get_or_null(p_version);
+	ERR_FAIL_NULL_V(version, String());
+
+	return _get_cache_file_relative_path(version, p_group, p_api_name);
+}
+
 String ShaderRD::_version_get_sha1(Version *p_version) const {
 	StringBuilder hash_build;
 
@@ -437,17 +427,31 @@ String ShaderRD::_version_get_sha1(Version *p_version) const {
 static const char *shader_file_header = "GDSC";
 static const uint32_t cache_file_version = 4;
 
-String ShaderRD::_get_cache_file_path(Version *p_version, int p_group) {
-	const String &sha1 = _version_get_sha1(p_version);
-	const String &api_safe_name = String(RD::get_singleton()->get_device_api_name()).validate_filename().to_lower();
-	const String &path = shader_cache_dir.path_join(name).path_join(group_sha256[p_group]).path_join(sha1) + "." + api_safe_name + ".cache";
-	return path;
+String ShaderRD::_get_cache_file_relative_path(Version *p_version, int p_group, const String &p_api_name) {
+	String sha1 = _version_get_sha1(p_version);
+	return name.path_join(group_sha256[p_group]).path_join(sha1) + "." + p_api_name + ".cache";
+}
+
+String ShaderRD::_get_cache_file_path(Version *p_version, int p_group, const String &p_api_name, bool p_user_dir) {
+	const String &shader_cache_dir = p_user_dir ? shader_cache_user_dir : shader_cache_res_dir;
+	String relative_path = _get_cache_file_relative_path(p_version, p_group, p_api_name);
+	return shader_cache_dir.path_join(relative_path);
 }
 
 bool ShaderRD::_load_from_cache(Version *p_version, int p_group) {
-	const String &path = _get_cache_file_path(p_version, p_group);
-	Ref<FileAccess> f = FileAccess::open(path, FileAccess::READ);
+	String api_safe_name = String(RD::get_singleton()->get_device_api_name()).validate_filename().to_lower();
+	Ref<FileAccess> f;
+	if (shader_cache_user_dir_valid) {
+		f = FileAccess::open(_get_cache_file_path(p_version, p_group, api_safe_name, true), FileAccess::READ);
+	}
+
 	if (f.is_null()) {
+		f = FileAccess::open(_get_cache_file_path(p_version, p_group, api_safe_name, false), FileAccess::READ);
+	}
+
+	if (f.is_null()) {
+		const String &sha1 = _version_get_sha1(p_version);
+		print_verbose(vformat("Shader cache miss for %s", name.path_join(group_sha256[p_group]).path_join(sha1)));
 		return false;
 	}
 
@@ -506,19 +510,14 @@ bool ShaderRD::_load_from_cache(Version *p_version, int p_group) {
 }
 
 void ShaderRD::_save_to_cache(Version *p_version, int p_group) {
-	ERR_FAIL_COND(!shader_cache_dir_valid);
-	const String &path = _get_cache_file_path(p_version, p_group);
+	ERR_FAIL_COND(!shader_cache_user_dir_valid);
+	String api_safe_name = String(RD::get_singleton()->get_device_api_name()).validate_filename().to_lower();
+	const String &path = _get_cache_file_path(p_version, p_group, api_safe_name, true);
 	Ref<FileAccess> f = FileAccess::open(path, FileAccess::WRITE);
 	ERR_FAIL_COND(f.is_null());
-	f->store_buffer((const uint8_t *)shader_file_header, 4);
-	f->store_32(cache_file_version); // File version.
-	uint32_t variant_count = group_to_variant_map[p_group].size();
-	f->store_32(variant_count); // Variant count.
-	for (uint32_t i = 0; i < variant_count; i++) {
-		int variant_id = group_to_variant_map[p_group][i];
-		f->store_32(p_version->variant_data[variant_id].size()); // Stage count.
-		f->store_buffer(p_version->variant_data[variant_id].ptr(), p_version->variant_data[variant_id].size());
-	}
+
+	PackedByteArray shader_cache_bytes = ShaderRD::save_shader_cache_bytes(group_to_variant_map[p_group], p_version->variant_data);
+	f->store_buffer(shader_cache_bytes);
 }
 
 void ShaderRD::_allocate_placeholders(Version *p_version, int p_group) {
@@ -543,10 +542,8 @@ void ShaderRD::_compile_version_start(Version *p_version, int p_group) {
 	p_version->dirty = false;
 
 #if ENABLE_SHADER_CACHE
-	if (shader_cache_dir_valid) {
-		if (_load_from_cache(p_version, p_group)) {
-			return;
-		}
+	if (_load_from_cache(p_version, p_group)) {
+		return;
 	}
 #endif
 
@@ -595,7 +592,7 @@ void ShaderRD::_compile_version_end(Version *p_version, int p_group) {
 		return;
 	}
 #if ENABLE_SHADER_CACHE
-	else if (shader_cache_dir_valid) {
+	else if (shader_cache_user_dir_valid) {
 		_save_to_cache(p_version, p_group);
 	}
 #endif
@@ -714,6 +711,11 @@ bool ShaderRD::version_free(RID p_version) {
 		}
 
 		Version *version = version_owner.get_or_null(p_version);
+		if (version->embedded) {
+			MutexLock lock(shader_versions_embedded_set_mutex);
+			shader_versions_embedded_set.erase({ this, p_version });
+		}
+
 		version->mutex->lock();
 		_clear_version(version);
 		version_owner.free(p_version);
@@ -737,6 +739,14 @@ bool ShaderRD::is_variant_enabled(int p_variant) const {
 	return variants_enabled[p_variant];
 }
 
+int64_t ShaderRD::get_variant_count() const {
+	return variants_enabled.size();
+}
+
+int ShaderRD::get_variant_to_group(int p_variant) const {
+	return variant_to_group[p_variant];
+}
+
 void ShaderRD::enable_group(int p_group) {
 	ERR_FAIL_INDEX(p_group, group_enabled.size());
 
@@ -760,6 +770,18 @@ bool ShaderRD::is_group_enabled(int p_group) const {
 	return group_enabled[p_group];
 }
 
+int64_t ShaderRD::get_group_count() const {
+	return group_enabled.size();
+}
+
+const LocalVector<int> &ShaderRD::get_group_to_variants(int p_group) const {
+	return group_to_variant_map[p_group];
+}
+
+const String &ShaderRD::get_name() const {
+	return name;
+}
+
 bool ShaderRD::shader_cache_cleanup_on_start = false;
 
 ShaderRD::ShaderRD() {
@@ -778,12 +800,12 @@ ShaderRD::ShaderRD() {
 	base_compute_defines = base_compute_define_text.ascii();
 }
 
-void ShaderRD::initialize(const Vector<String> &p_variant_defines, const String &p_general_defines, const Vector<RD::PipelineImmutableSampler> &r_immutable_samplers) {
-	immutable_samplers = r_immutable_samplers;
+void ShaderRD::initialize(const Vector<String> &p_variant_defines, const String &p_general_defines, const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers) {
 	ERR_FAIL_COND(variant_defines.size());
 	ERR_FAIL_COND(p_variant_defines.is_empty());
 
 	general_defines = p_general_defines.utf8();
+	immutable_samplers = p_immutable_samplers;
 
 	// When initialized this way, there is just one group and its always enabled.
 	group_to_variant_map.insert(0, LocalVector<int>{});
@@ -796,13 +818,18 @@ void ShaderRD::initialize(const Vector<String> &p_variant_defines, const String
 		group_to_variant_map[0].push_back(i);
 	}
 
-	if (!shader_cache_dir.is_empty()) {
+	if (!shader_cache_user_dir.is_empty() || !shader_cache_res_dir.is_empty()) {
 		group_sha256.resize(1);
 		_initialize_cache();
 	}
 }
 
 void ShaderRD::_initialize_cache() {
+	shader_cache_user_dir_valid = !shader_cache_user_dir.is_empty();
+	if (!shader_cache_user_dir_valid) {
+		return;
+	}
+
 	for (const KeyValue<int, LocalVector<int>> &E : group_to_variant_map) {
 		StringBuilder hash_build;
 
@@ -819,34 +846,44 @@ void ShaderRD::_initialize_cache() {
 
 		group_sha256[E.key] = hash_build.as_string().sha256_text();
 
-		Ref<DirAccess> d = DirAccess::open(shader_cache_dir);
-		ERR_FAIL_COND(d.is_null());
-		if (d->change_dir(name) != OK) {
-			Error err = d->make_dir(name);
-			ERR_FAIL_COND(err != OK);
-			d->change_dir(name);
-		}
+		if (!shader_cache_user_dir.is_empty()) {
+			// Validate if it's possible to write to all the directories required by in the user directory.
+			Ref<DirAccess> d = DirAccess::open(shader_cache_user_dir);
+			if (d.is_null()) {
+				shader_cache_user_dir_valid = false;
+				ERR_FAIL_MSG(vformat("Unable to open shader cache directory at %s.", shader_cache_user_dir));
+			}
 
-		// Erase other versions?
-		if (shader_cache_cleanup_on_start) {
-		}
-		//
-		if (d->change_dir(group_sha256[E.key]) != OK) {
-			Error err = d->make_dir(group_sha256[E.key]);
-			ERR_FAIL_COND(err != OK);
+			if (d->change_dir(name) != OK) {
+				Error err = d->make_dir(name);
+				if (err != OK) {
+					shader_cache_user_dir_valid = false;
+					ERR_FAIL_MSG(vformat("Unable to create shader cache directory %s at %s.", name, shader_cache_user_dir));
+				}
+
+				d->change_dir(name);
+			}
+
+			if (d->change_dir(group_sha256[E.key]) != OK) {
+				Error err = d->make_dir(group_sha256[E.key]);
+				if (err != OK) {
+					shader_cache_user_dir_valid = false;
+					ERR_FAIL_MSG(vformat("Unable to create shader cache directory %s/%s at %s.", name, group_sha256[E.key], shader_cache_user_dir));
+				}
+			}
 		}
-		shader_cache_dir_valid = true;
 
 		print_verbose("Shader '" + name + "' (group " + itos(E.key) + ") SHA256: " + group_sha256[E.key]);
 	}
 }
 
 // Same as above, but allows specifying shader compilation groups.
-void ShaderRD::initialize(const Vector<VariantDefine> &p_variant_defines, const String &p_general_defines) {
+void ShaderRD::initialize(const Vector<VariantDefine> &p_variant_defines, const String &p_general_defines, const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers) {
 	ERR_FAIL_COND(variant_defines.size());
 	ERR_FAIL_COND(p_variant_defines.is_empty());
 
 	general_defines = p_general_defines.utf8();
+	immutable_samplers = p_immutable_samplers;
 
 	int max_group_id = 0;
 
@@ -877,14 +914,38 @@ void ShaderRD::initialize(const Vector<VariantDefine> &p_variant_defines, const
 		}
 	}
 
-	if (!shader_cache_dir.is_empty()) {
+	if (!shader_cache_user_dir.is_empty()) {
 		group_sha256.resize(max_group_id + 1);
 		_initialize_cache();
 	}
 }
 
-void ShaderRD::set_shader_cache_dir(const String &p_dir) {
-	shader_cache_dir = p_dir;
+void ShaderRD::shaders_embedded_set_lock() {
+	shader_versions_embedded_set_mutex.lock();
+}
+
+const ShaderRD::ShaderVersionPairSet &ShaderRD::shaders_embedded_set_get() {
+	return shader_versions_embedded_set;
+}
+
+void ShaderRD::shaders_embedded_set_unlock() {
+	shader_versions_embedded_set_mutex.unlock();
+}
+
+void ShaderRD::set_shader_cache_user_dir(const String &p_dir) {
+	shader_cache_user_dir = p_dir;
+}
+
+const String &ShaderRD::get_shader_cache_user_dir() {
+	return shader_cache_user_dir;
+}
+
+void ShaderRD::set_shader_cache_res_dir(const String &p_dir) {
+	shader_cache_res_dir = p_dir;
+}
+
+const String &ShaderRD::get_shader_cache_res_dir() {
+	return shader_cache_res_dir;
 }
 
 void ShaderRD::set_shader_cache_save_compressed(bool p_enable) {
@@ -899,7 +960,78 @@ void ShaderRD::set_shader_cache_save_debug(bool p_enable) {
 	shader_cache_save_debug = p_enable;
 }
 
-String ShaderRD::shader_cache_dir;
+Vector<RD::ShaderStageSPIRVData> ShaderRD::compile_stages(const Vector<String> &p_stage_sources) {
+	RD::ShaderStageSPIRVData stage;
+	Vector<RD::ShaderStageSPIRVData> stages;
+	String error;
+	RD::ShaderStage compilation_failed_stage = RD::SHADER_STAGE_MAX;
+	bool compilation_failed = false;
+	for (int64_t i = 0; i < p_stage_sources.size() && !compilation_failed; i++) {
+		if (p_stage_sources[i].is_empty()) {
+			continue;
+		}
+
+		stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::ShaderStage(i), p_stage_sources[i], RD::SHADER_LANGUAGE_GLSL, &error);
+		stage.shader_stage = RD::ShaderStage(i);
+		if (!stage.spirv.is_empty()) {
+			stages.push_back(stage);
+
+		} else {
+			compilation_failed_stage = RD::ShaderStage(i);
+			compilation_failed = true;
+		}
+	}
+
+	if (compilation_failed) {
+		ERR_PRINT("Error compiling " + String(compilation_failed_stage == RD::SHADER_STAGE_COMPUTE ? "Compute " : (compilation_failed_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment")) + " shader.");
+		ERR_PRINT(error);
+
+#ifdef DEBUG_ENABLED
+		ERR_PRINT("code:\n" + p_stage_sources[compilation_failed_stage].get_with_code_lines());
+#endif
+
+		return Vector<RD::ShaderStageSPIRVData>();
+	} else {
+		return stages;
+	}
+}
+
+PackedByteArray ShaderRD::save_shader_cache_bytes(const LocalVector<int> &p_variants, const Vector<Vector<uint8_t>> &p_variant_data) {
+	uint32_t variant_count = p_variants.size();
+	PackedByteArray bytes;
+	int64_t total_size = 0;
+	total_size += 4 + sizeof(uint32_t) * 2;
+	for (uint32_t i = 0; i < variant_count; i++) {
+		total_size += sizeof(uint32_t) + p_variant_data[p_variants[i]].size();
+	}
+
+	bytes.resize(total_size);
+
+	uint8_t *bytes_ptr = bytes.ptrw();
+	memcpy(bytes_ptr, shader_file_header, 4);
+	bytes_ptr += 4;
+
+	*(uint32_t *)(bytes_ptr) = cache_file_version;
+	bytes_ptr += sizeof(uint32_t);
+
+	*(uint32_t *)(bytes_ptr) = variant_count;
+	bytes_ptr += sizeof(uint32_t);
+
+	for (uint32_t i = 0; i < variant_count; i++) {
+		int variant_id = p_variants[i];
+		*(uint32_t *)(bytes_ptr) = uint32_t(p_variant_data[variant_id].size());
+		bytes_ptr += sizeof(uint32_t);
+
+		memcpy(bytes_ptr, p_variant_data[variant_id].ptr(), p_variant_data[variant_id].size());
+		bytes_ptr += p_variant_data[variant_id].size();
+	}
+
+	DEV_ASSERT((bytes.ptrw() + bytes.size()) == bytes_ptr);
+	return bytes;
+}
+
+String ShaderRD::shader_cache_user_dir;
+String ShaderRD::shader_cache_res_dir;
 bool ShaderRD::shader_cache_save_compressed = true;
 bool ShaderRD::shader_cache_save_compressed_zstd = true;
 bool ShaderRD::shader_cache_save_debug = true;

+ 37 - 7
servers/rendering/renderer_rd/shader_rd.h

@@ -35,6 +35,7 @@
 #include "core/templates/hash_map.h"
 #include "core/templates/local_vector.h"
 #include "core/templates/rid_owner.h"
+#include "core/templates/self_list.h"
 #include "servers/rendering_server.h"
 
 class ShaderRD {
@@ -51,6 +52,9 @@ public:
 		}
 	};
 
+	typedef Pair<ShaderRD *, RID> ShaderVersionPair;
+	typedef HashSet<ShaderVersionPair> ShaderVersionPairSet;
+
 private:
 	//versions
 	CharString general_defines;
@@ -78,6 +82,7 @@ private:
 		bool valid;
 		bool dirty;
 		bool initialize_needed;
+		bool embedded;
 	};
 
 	struct CompileData {
@@ -85,6 +90,7 @@ private:
 		int group = 0;
 	};
 
+	// Vector will have the size of SHADER_STAGE_MAX and unused stages will have empty strings.
 	void _compile_variant(uint32_t p_variant, CompileData p_data);
 
 	void _initialize_version(Version *p_version);
@@ -126,12 +132,16 @@ private:
 	String base_sha256;
 	LocalVector<String> group_sha256;
 
-	static String shader_cache_dir;
+	static inline ShaderVersionPairSet shader_versions_embedded_set;
+	static inline Mutex shader_versions_embedded_set_mutex;
+
+	static String shader_cache_user_dir;
+	static String shader_cache_res_dir;
 	static bool shader_cache_cleanup_on_start;
 	static bool shader_cache_save_compressed;
 	static bool shader_cache_save_compressed_zstd;
 	static bool shader_cache_save_debug;
-	bool shader_cache_dir_valid = false;
+	bool shader_cache_user_dir_valid = false;
 
 	enum StageType {
 		STAGE_TYPE_VERTEX,
@@ -143,11 +153,13 @@ private:
 	StageTemplate stage_templates[STAGE_TYPE_MAX];
 
 	void _build_variant_code(StringBuilder &p_builder, uint32_t p_variant, const Version *p_version, const StageTemplate &p_template);
+	Vector<String> _build_variant_stage_sources(uint32_t p_variant, CompileData p_data);
 
 	void _add_stage(const char *p_code, StageType p_stage_type);
 
 	String _version_get_sha1(Version *p_version) const;
-	String _get_cache_file_path(Version *p_version, int p_group);
+	String _get_cache_file_relative_path(Version *p_version, int p_group, const String &p_api_name);
+	String _get_cache_file_path(Version *p_version, int p_group, const String &p_api_name, bool p_user_dir);
 	bool _load_from_cache(Version *p_version, int p_group);
 	void _save_to_cache(Version *p_version, int p_group);
 	void _initialize_cache();
@@ -157,7 +169,7 @@ protected:
 	void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name);
 
 public:
-	RID version_create();
+	RID version_create(bool p_embedded = true);
 
 	void version_set_code(RID p_version, const HashMap<String, String> &p_code, const String &p_uniforms, const String &p_vertex_globals, const String &p_fragment_globals, const Vector<String> &p_custom_defines);
 	void version_set_compute_code(RID p_version, const HashMap<String, String> &p_code, const String &p_uniforms, const String &p_compute_globals, const Vector<String> &p_custom_defines);
@@ -201,20 +213,38 @@ public:
 	// Enable/disable variants for things that you know won't be used at engine initialization time .
 	void set_variant_enabled(int p_variant, bool p_enabled);
 	bool is_variant_enabled(int p_variant) const;
+	int64_t get_variant_count() const;
+	int get_variant_to_group(int p_variant) const;
 
 	// Enable/disable groups for things that might be enabled at run time.
 	void enable_group(int p_group);
 	bool is_group_enabled(int p_group) const;
+	int64_t get_group_count() const;
+	const LocalVector<int> &get_group_to_variants(int p_group) const;
+
+	const String &get_name() const;
 
-	static void set_shader_cache_dir(const String &p_dir);
+	static void shaders_embedded_set_lock();
+	static const ShaderVersionPairSet &shaders_embedded_set_get();
+	static void shaders_embedded_set_unlock();
+
+	static void set_shader_cache_user_dir(const String &p_dir);
+	static const String &get_shader_cache_user_dir();
+	static void set_shader_cache_res_dir(const String &p_dir);
+	static const String &get_shader_cache_res_dir();
 	static void set_shader_cache_save_compressed(bool p_enable);
 	static void set_shader_cache_save_compressed_zstd(bool p_enable);
 	static void set_shader_cache_save_debug(bool p_enable);
 
+	static Vector<RD::ShaderStageSPIRVData> compile_stages(const Vector<String> &p_stage_sources);
+	static PackedByteArray save_shader_cache_bytes(const LocalVector<int> &p_variants, const Vector<Vector<uint8_t>> &p_variant_data);
+
+	Vector<String> version_build_variant_stage_sources(RID p_version, int p_variant);
 	RS::ShaderNativeSourceCode version_get_native_source_code(RID p_version);
+	String version_get_cache_file_relative_path(RID p_version, int p_group, const String &p_api_name);
 
-	void initialize(const Vector<String> &p_variant_defines, const String &p_general_defines = "", const Vector<RD::PipelineImmutableSampler> &r_immutable_samplers = Vector<RD::PipelineImmutableSampler>());
-	void initialize(const Vector<VariantDefine> &p_variant_defines, const String &p_general_defines = "");
+	void initialize(const Vector<String> &p_variant_defines, const String &p_general_defines = "", const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers = Vector<RD::PipelineImmutableSampler>());
+	void initialize(const Vector<VariantDefine> &p_variant_defines, const String &p_general_defines = "", const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers = Vector<RD::PipelineImmutableSampler>());
 
 	virtual ~ShaderRD();
 };

+ 2 - 2
servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl

@@ -143,8 +143,8 @@ struct Light {
 	vec4 atlas_rect;
 };
 
-layout(set = 0, binding = 2, std140) uniform LightData {
-	Light data[MAX_LIGHTS];
+layout(set = 0, binding = 2, std430) restrict readonly buffer LightData {
+	Light data[];
 }
 light_array;
 

+ 1 - 28
servers/rendering/renderer_rd/shaders/cluster_render.glsl

@@ -64,17 +64,11 @@ void main() {
 #version 450
 
 #VERSION_DEFINES
-#ifndef MOLTENVK_USED // Metal will corrupt GPU state otherwise
-#if defined(has_GL_KHR_shader_subgroup_ballot) && defined(has_GL_KHR_shader_subgroup_arithmetic) && defined(has_GL_KHR_shader_subgroup_vote)
 
 #extension GL_KHR_shader_subgroup_ballot : enable
 #extension GL_KHR_shader_subgroup_arithmetic : enable
 #extension GL_KHR_shader_subgroup_vote : enable
 
-#define USE_SUBGROUPS
-#endif
-#endif
-
 layout(location = 0) in float depth_interp;
 layout(location = 1) in flat uint element_index;
 
@@ -119,10 +113,7 @@ void main() {
 
 	uint aux = 0;
 
-#ifdef USE_SUBGROUPS
-
 	uint cluster_thread_group_index;
-
 	if (!gl_HelperInvocation) {
 		//https://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf
 
@@ -147,15 +138,7 @@ void main() {
 			aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
 		}
 	}
-#else
-// MoltenVK/Metal fails to compile shaders using gl_HelperInvocation for some GPUs
-#ifndef MOLTENVK_USED
-	if (!gl_HelperInvocation)
-#endif
-	{
-		aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
-	}
-#endif
+
 	//find the current element in the depth usage list and mark the current depth as used
 	float unit_depth = depth_interp * state.inv_z_far;
 
@@ -164,22 +147,12 @@ void main() {
 	uint z_write_offset = cluster_offset + state.cluster_depth_offset + element_index;
 	uint z_write_bit = 1 << z_bit;
 
-#ifdef USE_SUBGROUPS
 	if (!gl_HelperInvocation) {
 		z_write_bit = subgroupOr(z_write_bit); //merge all Zs
 		if (cluster_thread_group_index == 0) {
 			aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit);
 		}
 	}
-#else
-// MoltenVK/Metal fails to compile shaders using gl_HelperInvocation for some GPUs
-#ifndef MOLTENVK_USED
-	if (!gl_HelperInvocation)
-#endif
-	{
-		aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit);
-	}
-#endif
 
 #ifdef USE_ATTACHMENT
 	frag_color = vec4(float(aux));

+ 1 - 14
servers/rendering/renderer_rd/shaders/effects/copy_to_fb.glsl

@@ -5,13 +5,9 @@
 #VERSION_DEFINES
 
 #ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
 #extension GL_EXT_multiview : enable
 #define ViewIndex gl_ViewIndex
-#else // has_VK_KHR_multiview
-#define ViewIndex 0
-#endif // has_VK_KHR_multiview
-#endif //USE_MULTIVIEW
+#endif // USE_MULTIVIEW
 
 #define FLAG_FLIP_Y (1 << 0)
 #define FLAG_USE_SECTION (1 << 1)
@@ -67,15 +63,6 @@ void main() {
 
 #VERSION_DEFINES
 
-#ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
-#extension GL_EXT_multiview : enable
-#define ViewIndex gl_ViewIndex
-#else // has_VK_KHR_multiview
-#define ViewIndex 0
-#endif // has_VK_KHR_multiview
-#endif //USE_MULTIVIEW
-
 #define FLAG_FLIP_Y (1 << 0)
 #define FLAG_USE_SECTION (1 << 1)
 #define FLAG_FORCE_LUMINANCE (1 << 2)

+ 2 - 29
servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl

@@ -4,21 +4,10 @@
 
 #VERSION_DEFINES
 
-#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview)
+#if defined(USE_MULTIVIEW)
 #extension GL_EXT_multiview : enable
-#endif
-
-#ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
 #define ViewIndex gl_ViewIndex
-#else // has_VK_KHR_multiview
-// !BAS! This needs to become an input once we implement our fallback!
-#define ViewIndex 0
-#endif // has_VK_KHR_multiview
-#else // USE_MULTIVIEW
-// Set to zero, not supported in non stereo
-#define ViewIndex 0
-#endif //USE_MULTIVIEW
+#endif // USE_MULTIVIEW
 
 #ifdef USE_MULTIVIEW
 layout(location = 0) out vec3 uv_interp;
@@ -41,22 +30,6 @@ void main() {
 
 #VERSION_DEFINES
 
-#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview)
-#extension GL_EXT_multiview : enable
-#endif
-
-#ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
-#define ViewIndex gl_ViewIndex
-#else // has_VK_KHR_multiview
-// !BAS! This needs to become an input once we implement our fallback!
-#define ViewIndex 0
-#endif // has_VK_KHR_multiview
-#else // USE_MULTIVIEW
-// Set to zero, not supported in non stereo
-#define ViewIndex 0
-#endif //USE_MULTIVIEW
-
 #ifdef USE_MULTIVIEW
 layout(location = 0) in vec3 uv_interp;
 #else // USE_MULTIVIEW

+ 0 - 23
servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl

@@ -32,19 +32,13 @@
 // Based on Spartan Engine's TAA implementation (without TAA upscale).
 // <https://github.com/PanosK92/SpartanEngine/blob/a8338d0609b85dc32f3732a5c27fb4463816a3b9/Data/shaders/temporal_antialiasing.hlsl>
 
-#ifndef MOLTENVK_USED
-#define USE_SUBGROUPS
-#endif // MOLTENVK_USED
-
 #define GROUP_SIZE 8
 #define FLT_MIN 0.00000001
 #define FLT_MAX 32767.0
 #define RPC_9 0.11111111111
 #define RPC_16 0.0625
 
-#ifdef USE_SUBGROUPS
 layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = 1) in;
-#endif
 
 layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D color_buffer;
 layout(set = 0, binding = 1) uniform sampler2D depth_buffer;
@@ -92,7 +86,6 @@ float get_depth(ivec2 thread_id) {
 	return texelFetch(depth_buffer, thread_id, 0).r;
 }
 
-#ifdef USE_SUBGROUPS
 shared vec3 tile_color[kTileDimension][kTileDimension];
 shared float tile_depth[kTileDimension][kTileDimension];
 
@@ -141,15 +134,6 @@ void populate_group_shared_memory(uvec2 group_id, uint group_index) {
 	groupMemoryBarrier();
 	barrier();
 }
-#else
-vec3 load_color(uvec2 screen_pos) {
-	return imageLoad(color_buffer, ivec2(screen_pos)).rgb;
-}
-
-float load_depth(uvec2 screen_pos) {
-	return get_depth(ivec2(screen_pos));
-}
-#endif
 
 /*------------------------------------------------------------------------------
 								VELOCITY
@@ -380,22 +364,15 @@ vec3 temporal_antialiasing(uvec2 pos_group_top_left, uvec2 pos_group, uvec2 pos_
 }
 
 void main() {
-#ifdef USE_SUBGROUPS
 	populate_group_shared_memory(gl_WorkGroupID.xy, gl_LocalInvocationIndex);
-#endif
 
 	// Out of bounds check
 	if (any(greaterThanEqual(vec2(gl_GlobalInvocationID.xy), params.resolution))) {
 		return;
 	}
 
-#ifdef USE_SUBGROUPS
 	const uvec2 pos_group = gl_LocalInvocationID.xy;
 	const uvec2 pos_group_top_left = gl_WorkGroupID.xy * kGroupSize - kBorderSize;
-#else
-	const uvec2 pos_group = gl_GlobalInvocationID.xy;
-	const uvec2 pos_group_top_left = uvec2(0, 0);
-#endif
 	const uvec2 pos_screen = gl_GlobalInvocationID.xy;
 	const vec2 uv = (gl_GlobalInvocationID.xy + 0.5f) / params.resolution;
 

+ 0 - 10
servers/rendering/renderer_rd/shaders/effects/tonemap.glsl

@@ -4,12 +4,6 @@
 
 #VERSION_DEFINES
 
-#ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
-#extension GL_EXT_multiview : enable
-#endif
-#endif
-
 layout(location = 0) out vec2 uv_interp;
 
 void main() {
@@ -38,12 +32,8 @@ void main() {
 #VERSION_DEFINES
 
 #ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
 #extension GL_EXT_multiview : enable
 #define ViewIndex gl_ViewIndex
-#else // has_VK_KHR_multiview
-#define ViewIndex 0
-#endif // has_VK_KHR_multiview
 #endif //USE_MULTIVIEW
 
 layout(location = 0) in vec2 uv_interp;

+ 0 - 13
servers/rendering/renderer_rd/shaders/effects/vrs.glsl

@@ -5,12 +5,8 @@
 #VERSION_DEFINES
 
 #ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
 #extension GL_EXT_multiview : enable
 #define ViewIndex gl_ViewIndex
-#else // has_VK_KHR_multiview
-#define ViewIndex 0
-#endif // has_VK_KHR_multiview
 #endif //USE_MULTIVIEW
 
 #ifdef USE_MULTIVIEW
@@ -42,15 +38,6 @@ void main() {
 
 #VERSION_DEFINES
 
-#ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
-#extension GL_EXT_multiview : enable
-#define ViewIndex gl_ViewIndex
-#else // has_VK_KHR_multiview
-#define ViewIndex 0
-#endif // has_VK_KHR_multiview
-#endif //USE_MULTIVIEW
-
 #ifdef USE_MULTIVIEW
 layout(location = 0) in vec3 uv_interp;
 layout(set = 0, binding = 0) uniform sampler2DArray source_color;

+ 2 - 27
servers/rendering/renderer_rd/shaders/environment/sdfgi_debug_probes.glsl

@@ -2,21 +2,12 @@
 
 #version 450
 
-#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview)
-#extension GL_EXT_multiview : enable
-#endif
-
 #ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
+#extension GL_EXT_multiview : enable
 #define ViewIndex gl_ViewIndex
-#else // has_VK_KHR_multiview
-// !BAS! This needs to become an input once we implement our fallback!
-#define ViewIndex 0
-#endif // has_VK_KHR_multiview
 #else // USE_MULTIVIEW
-// Set to zero, not supported in non stereo
 #define ViewIndex 0
-#endif //USE_MULTIVIEW
+#endif // !USE_MULTIVIEW
 
 #VERSION_DEFINES
 
@@ -174,22 +165,6 @@ void main() {
 
 #version 450
 
-#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview)
-#extension GL_EXT_multiview : enable
-#endif
-
-#ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
-#define ViewIndex gl_ViewIndex
-#else // has_VK_KHR_multiview
-// !BAS! This needs to become an input once we implement our fallback!
-#define ViewIndex 0
-#endif // has_VK_KHR_multiview
-#else // USE_MULTIVIEW
-// Set to zero, not supported in non stereo
-#define ViewIndex 0
-#endif //USE_MULTIVIEW
-
 #VERSION_DEFINES
 
 #define MAX_VIEWS 2

+ 1 - 16
servers/rendering/renderer_rd/shaders/environment/sky.glsl

@@ -4,12 +4,6 @@
 
 #VERSION_DEFINES
 
-#define MAX_VIEWS 2
-
-#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview)
-#extension GL_EXT_multiview : enable
-#endif
-
 layout(location = 0) out vec2 uv_interp;
 
 layout(push_constant, std430) uniform Params {
@@ -36,20 +30,11 @@ void main() {
 #VERSION_DEFINES
 
 #ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
 #extension GL_EXT_multiview : enable
 #define ViewIndex gl_ViewIndex
-#else // has_VK_KHR_multiview
-// !BAS! This needs to become an input once we implement our fallback!
-#define ViewIndex 0
-#endif // has_VK_KHR_multiview
-#else // USE_MULTIVIEW
-// Set to zero, not supported in non stereo
-#define ViewIndex 0
-#endif //USE_MULTIVIEW
+#endif
 
 #define M_PI 3.14159265359
-#define MAX_VIEWS 2
 
 layout(location = 0) in vec2 uv_interp;
 

+ 1 - 37
servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl

@@ -4,15 +4,6 @@
 
 #VERSION_DEFINES
 
-/* Do not use subgroups here, seems there is not much advantage and causes glitches
-#if defined(has_GL_KHR_shader_subgroup_ballot) && defined(has_GL_KHR_shader_subgroup_arithmetic)
-#extension GL_KHR_shader_subgroup_ballot: enable
-#extension GL_KHR_shader_subgroup_arithmetic: enable
-
-#define USE_SUBGROUPS
-#endif
-*/
-
 #ifdef MODE_DENSITY
 layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;
 #else
@@ -459,28 +450,15 @@ void main() {
 
 			cluster_get_item_range(cluster_omni_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
 
-#ifdef USE_SUBGROUPS
-			item_from = subgroupBroadcastFirst(subgroupMin(item_from));
-			item_to = subgroupBroadcastFirst(subgroupMax(item_to));
-#endif
-
 			for (uint i = item_from; i < item_to; i++) {
 				uint mask = cluster_buffer.data[cluster_omni_offset + i];
 				mask &= cluster_get_range_clip_mask(i, item_min, item_max);
-#ifdef USE_SUBGROUPS
-				uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
-#else
 				uint merged_mask = mask;
-#endif
 
 				while (merged_mask != 0) {
 					uint bit = findMSB(merged_mask);
 					merged_mask &= ~(1 << bit);
-#ifdef USE_SUBGROUPS
-					if (((1 << bit) & mask) == 0) { //do not process if not originally here
-						continue;
-					}
-#endif
+
 					uint light_index = 32 * i + bit;
 
 					//if (!bool(omni_omni_lights.data[light_index].mask & draw_call.layer_mask)) {
@@ -539,28 +517,14 @@ void main() {
 
 			cluster_get_item_range(cluster_spot_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
 
-#ifdef USE_SUBGROUPS
-			item_from = subgroupBroadcastFirst(subgroupMin(item_from));
-			item_to = subgroupBroadcastFirst(subgroupMax(item_to));
-#endif
-
 			for (uint i = item_from; i < item_to; i++) {
 				uint mask = cluster_buffer.data[cluster_spot_offset + i];
 				mask &= cluster_get_range_clip_mask(i, item_min, item_max);
-#ifdef USE_SUBGROUPS
-				uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
-#else
 				uint merged_mask = mask;
-#endif
 
 				while (merged_mask != 0) {
 					uint bit = findMSB(merged_mask);
 					merged_mask &= ~(1 << bit);
-#ifdef USE_SUBGROUPS
-					if (((1 << bit) & mask) == 0) { //do not process if not originally here
-						continue;
-					}
-#endif
 
 					//if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) {
 					//	continue; //not masked

+ 15 - 52
servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl

@@ -134,12 +134,9 @@ layout(location = 9) out float dp_clip;
 layout(location = 10) out flat uint instance_index_interp;
 
 #ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
+#extension GL_EXT_multiview : enable
 #define ViewIndex gl_ViewIndex
-#else // has_VK_KHR_multiview
-// !BAS! This needs to become an input once we implement our fallback!
-#define ViewIndex 0
-#endif // has_VK_KHR_multiview
+
 vec3 multiview_uv(vec2 uv) {
 	return vec3(uv, ViewIndex);
 }
@@ -148,15 +145,12 @@ ivec3 multiview_uv(ivec2 uv) {
 }
 layout(location = 11) out vec4 combined_projected;
 #else // USE_MULTIVIEW
-// Set to zero, not supported in non stereo
-#define ViewIndex 0
 vec2 multiview_uv(vec2 uv) {
 	return uv;
 }
 ivec2 multiview_uv(ivec2 uv) {
 	return uv;
 }
-
 #endif //USE_MULTIVIEW
 
 #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) && defined(USE_VERTEX_LIGHTING)
@@ -915,12 +909,8 @@ vec4 textureArray_bicubic(texture2DArray tex, vec3 uv, vec2 texture_size) {
 #endif //USE_LIGHTMAP
 
 #ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
+#extension GL_EXT_multiview : enable
 #define ViewIndex gl_ViewIndex
-#else // has_VK_KHR_multiview
-// !BAS! This needs to become an input once we implement our fallback!
-#define ViewIndex 0
-#endif // has_VK_KHR_multiview
 vec3 multiview_uv(vec2 uv) {
 	return vec3(uv, ViewIndex);
 }
@@ -929,15 +919,13 @@ ivec3 multiview_uv(ivec2 uv) {
 }
 layout(location = 11) in vec4 combined_projected;
 #else // USE_MULTIVIEW
-// Set to zero, not supported in non stereo
-#define ViewIndex 0
 vec2 multiview_uv(vec2 uv) {
 	return uv;
 }
 ivec2 multiview_uv(ivec2 uv) {
 	return uv;
 }
-#endif //USE_MULTIVIEW
+#endif // !USE_MULTIVIEW
 #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) && defined(USE_VERTEX_LIGHTING)
 layout(location = 12) highp in vec4 diffuse_light_interp;
 layout(location = 13) highp in vec4 specular_light_interp;
@@ -1452,28 +1440,22 @@ void fragment_shader(in SceneData scene_data) {
 
 		cluster_get_item_range(cluster_decal_offset + implementation_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
 
-#ifdef USE_SUBGROUPS
 		item_from = subgroupBroadcastFirst(subgroupMin(item_from));
 		item_to = subgroupBroadcastFirst(subgroupMax(item_to));
-#endif
 
 		for (uint i = item_from; i < item_to; i++) {
 			uint mask = cluster_buffer.data[cluster_decal_offset + i];
 			mask &= cluster_get_range_clip_mask(i, item_min, item_max);
-#ifdef USE_SUBGROUPS
-			uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
-#else
-			uint merged_mask = mask;
-#endif
 
+			uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
 			while (merged_mask != 0) {
 				uint bit = findMSB(merged_mask);
 				merged_mask &= ~(1u << bit);
-#ifdef USE_SUBGROUPS
+
 				if (((1u << bit) & mask) == 0) { //do not process if not originally here
 					continue;
 				}
-#endif
+
 				uint decal_index = 32 * i + bit;
 
 				if (!bool(decals.data[decal_index].mask & instances.data[instance_index].layer_mask)) {
@@ -1931,10 +1913,8 @@ void fragment_shader(in SceneData scene_data) {
 
 		cluster_get_item_range(cluster_reflection_offset + implementation_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
 
-#ifdef USE_SUBGROUPS
 		item_from = subgroupBroadcastFirst(subgroupMin(item_from));
 		item_to = subgroupBroadcastFirst(subgroupMax(item_to));
-#endif
 
 #ifdef LIGHT_ANISOTROPY_USED
 		// https://google.github.io/filament/Filament.html#lighting/imagebasedlights/anisotropy
@@ -1952,20 +1932,16 @@ void fragment_shader(in SceneData scene_data) {
 		for (uint i = item_from; i < item_to; i++) {
 			uint mask = cluster_buffer.data[cluster_reflection_offset + i];
 			mask &= cluster_get_range_clip_mask(i, item_min, item_max);
-#ifdef USE_SUBGROUPS
-			uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
-#else
-			uint merged_mask = mask;
-#endif
 
+			uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
 			while (merged_mask != 0) {
 				uint bit = findMSB(merged_mask);
 				merged_mask &= ~(1u << bit);
-#ifdef USE_SUBGROUPS
+
 				if (((1u << bit) & mask) == 0) { //do not process if not originally here
 					continue;
 				}
-#endif
+
 				uint reflection_index = 32 * i + bit;
 
 				if (!bool(reflections.data[reflection_index].mask & instances.data[instance_index].layer_mask)) {
@@ -2515,28 +2491,22 @@ void fragment_shader(in SceneData scene_data) {
 
 		cluster_get_item_range(cluster_omni_offset + implementation_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
 
-#ifdef USE_SUBGROUPS
 		item_from = subgroupBroadcastFirst(subgroupMin(item_from));
 		item_to = subgroupBroadcastFirst(subgroupMax(item_to));
-#endif
 
 		for (uint i = item_from; i < item_to; i++) {
 			uint mask = cluster_buffer.data[cluster_omni_offset + i];
 			mask &= cluster_get_range_clip_mask(i, item_min, item_max);
-#ifdef USE_SUBGROUPS
-			uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
-#else
-			uint merged_mask = mask;
-#endif
 
+			uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
 			while (merged_mask != 0) {
 				uint bit = findMSB(merged_mask);
 				merged_mask &= ~(1u << bit);
-#ifdef USE_SUBGROUPS
+
 				if (((1u << bit) & mask) == 0) { //do not process if not originally here
 					continue;
 				}
-#endif
+
 				uint light_index = 32 * i + bit;
 
 				if (!bool(omni_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) {
@@ -2582,28 +2552,21 @@ void fragment_shader(in SceneData scene_data) {
 
 		cluster_get_item_range(cluster_spot_offset + implementation_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
 
-#ifdef USE_SUBGROUPS
 		item_from = subgroupBroadcastFirst(subgroupMin(item_from));
 		item_to = subgroupBroadcastFirst(subgroupMax(item_to));
-#endif
 
 		for (uint i = item_from; i < item_to; i++) {
 			uint mask = cluster_buffer.data[cluster_spot_offset + i];
 			mask &= cluster_get_range_clip_mask(i, item_min, item_max);
-#ifdef USE_SUBGROUPS
-			uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
-#else
-			uint merged_mask = mask;
-#endif
 
+			uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
 			while (merged_mask != 0) {
 				uint bit = findMSB(merged_mask);
 				merged_mask &= ~(1u << bit);
-#ifdef USE_SUBGROUPS
+
 				if (((1u << bit) & mask) == 0) { //do not process if not originally here
 					continue;
 				}
-#endif
 
 				uint light_index = 32 * i + bit;
 

+ 0 - 11
servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl

@@ -5,20 +5,9 @@
 #define MAX_VOXEL_GI_INSTANCES 8
 #define MAX_VIEWS 2
 
-#ifndef MOLTENVK_USED
-#if defined(has_GL_KHR_shader_subgroup_ballot) && defined(has_GL_KHR_shader_subgroup_arithmetic)
-
 #extension GL_KHR_shader_subgroup_ballot : enable
 #extension GL_KHR_shader_subgroup_arithmetic : enable
 
-#define USE_SUBGROUPS
-#endif
-#endif // MOLTENVK_USED
-
-#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview)
-#extension GL_EXT_multiview : enable
-#endif
-
 #include "../cluster_data_inc.glsl"
 #include "../decal_data_inc.glsl"
 #include "../scene_data_inc.glsl"

+ 6 - 18
servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl

@@ -128,28 +128,22 @@ layout(location = 9) out highp float dp_clip;
 #endif
 
 #ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
+#extension GL_EXT_multiview : enable
 #define ViewIndex gl_ViewIndex
-#else
-// !BAS! This needs to become an input once we implement our fallback!
-#define ViewIndex 0
-#endif
 vec3 multiview_uv(vec2 uv) {
 	return vec3(uv, ViewIndex);
 }
 ivec3 multiview_uv(ivec2 uv) {
 	return ivec3(uv, int(ViewIndex));
 }
-#else
-// Set to zero, not supported in non stereo
-#define ViewIndex 0
+#else // USE_MULTIVIEW
 vec2 multiview_uv(vec2 uv) {
 	return uv;
 }
 ivec2 multiview_uv(ivec2 uv) {
 	return uv;
 }
-#endif //USE_MULTIVIEW
+#endif // !USE_MULTIVIEW
 
 invariant gl_Position;
 
@@ -712,28 +706,22 @@ vec4 textureArray_bicubic(texture2DArray tex, vec3 uv, vec2 texture_size) {
 #endif //USE_LIGHTMAP
 
 #ifdef USE_MULTIVIEW
-#ifdef has_VK_KHR_multiview
+#extension GL_EXT_multiview : enable
 #define ViewIndex gl_ViewIndex
-#else
-// !BAS! This needs to become an input once we implement our fallback!
-#define ViewIndex 0
-#endif
 vec3 multiview_uv(vec2 uv) {
 	return vec3(uv, ViewIndex);
 }
 ivec3 multiview_uv(ivec2 uv) {
 	return ivec3(uv, int(ViewIndex));
 }
-#else
-// Set to zero, not supported in non stereo
-#define ViewIndex 0
+#else // USE_MULTIVIEW
 vec2 multiview_uv(vec2 uv) {
 	return uv;
 }
 ivec2 multiview_uv(ivec2 uv) {
 	return uv;
 }
-#endif //USE_MULTIVIEW
+#endif // !USE_MULTIVIEW
 
 //defines to keep compatibility with vertex
 

+ 0 - 4
servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl

@@ -1,10 +1,6 @@
 #define M_PI 3.14159265359
 #define MAX_VIEWS 2
 
-#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview)
-#extension GL_EXT_multiview : enable
-#endif
-
 #include "../decal_data_inc.glsl"
 #include "../scene_data_inc.glsl"
 

+ 33 - 1
servers/rendering/renderer_rd/storage_rd/material_storage.cpp

@@ -1936,12 +1936,19 @@ RID MaterialStorage::shader_allocate() {
 	return shader_owner.allocate_rid();
 }
 
-void MaterialStorage::shader_initialize(RID p_rid) {
+void MaterialStorage::shader_initialize(RID p_rid, bool p_embedded) {
 	Shader shader;
 	shader.data = nullptr;
 	shader.type = SHADER_TYPE_MAX;
+	shader.embedded = p_embedded;
 
 	shader_owner.initialize_rid(p_rid, shader);
+
+	if (p_embedded) {
+		// Add to the global embedded set.
+		MutexLock lock(embedded_set_mutex);
+		embedded_set.insert(p_rid);
+	}
 }
 
 void MaterialStorage::shader_free(RID p_rid) {
@@ -1957,6 +1964,13 @@ void MaterialStorage::shader_free(RID p_rid) {
 	if (shader->data) {
 		memdelete(shader->data);
 	}
+
+	if (shader->embedded) {
+		// Remove from the global embedded set.
+		MutexLock lock(embedded_set_mutex);
+		embedded_set.erase(p_rid);
+	}
+
 	shader_owner.free(p_rid);
 }
 
@@ -2112,6 +2126,12 @@ void MaterialStorage::shader_set_data_request_function(ShaderType p_shader_type,
 	shader_data_request_func[p_shader_type] = p_function;
 }
 
+MaterialStorage::ShaderData *MaterialStorage::shader_get_data(RID p_shader) const {
+	Shader *shader = shader_owner.get_or_null(p_shader);
+	ERR_FAIL_NULL_V(shader, nullptr);
+	return shader->data;
+}
+
 RS::ShaderNativeSourceCode MaterialStorage::shader_get_native_source_code(RID p_shader) const {
 	Shader *shader = shader_owner.get_or_null(p_shader);
 	ERR_FAIL_NULL_V(shader, RS::ShaderNativeSourceCode());
@@ -2121,6 +2141,18 @@ RS::ShaderNativeSourceCode MaterialStorage::shader_get_native_source_code(RID p_
 	return RS::ShaderNativeSourceCode();
 }
 
+void MaterialStorage::shader_embedded_set_lock() {
+	embedded_set_mutex.lock();
+}
+
+const HashSet<RID> &MaterialStorage::shader_embedded_set_get() const {
+	return embedded_set;
+}
+
+void MaterialStorage::shader_embedded_set_unlock() {
+	embedded_set_mutex.unlock();
+}
+
 /* MATERIAL API */
 
 void MaterialStorage::_material_uniform_set_erased(void *p_material) {

+ 10 - 2
servers/rendering/renderer_rd/storage_rd/material_storage.h

@@ -79,7 +79,8 @@ public:
 		virtual void set_code(const String &p_Code) = 0;
 		virtual bool is_animated() const = 0;
 		virtual bool casts_shadows() const = 0;
-		virtual RS::ShaderNativeSourceCode get_native_source_code() const { return RS::ShaderNativeSourceCode(); }
+		virtual RS::ShaderNativeSourceCode get_native_source_code() const = 0;
+		virtual Pair<ShaderRD *, RID> get_native_shader_and_version() const = 0;
 
 		virtual ~ShaderData() {}
 
@@ -220,12 +221,15 @@ private:
 		ShaderType type;
 		HashMap<StringName, HashMap<int, RID>> default_texture_parameter;
 		HashSet<Material *> owners;
+		bool embedded = false;
 	};
 
 	typedef ShaderData *(*ShaderDataRequestFunction)();
 	ShaderDataRequestFunction shader_data_request_func[SHADER_TYPE_MAX];
 
 	mutable RID_Owner<Shader, true> shader_owner;
+	HashSet<RID> embedded_set;
+	Mutex embedded_set_mutex;
 	Shader *get_shader(RID p_rid) { return shader_owner.get_or_null(p_rid); }
 
 	/* MATERIAL API */
@@ -406,7 +410,7 @@ public:
 	bool owns_shader(RID p_rid) { return shader_owner.owns(p_rid); }
 
 	virtual RID shader_allocate() override;
-	virtual void shader_initialize(RID p_shader) override;
+	virtual void shader_initialize(RID p_shader, bool p_embedded = true) override;
 	virtual void shader_free(RID p_rid) override;
 
 	virtual void shader_set_code(RID p_shader, const String &p_code) override;
@@ -418,8 +422,12 @@ public:
 	virtual RID shader_get_default_texture_parameter(RID p_shader, const StringName &p_name, int p_index) const override;
 	virtual Variant shader_get_parameter_default(RID p_shader, const StringName &p_param) const override;
 	void shader_set_data_request_function(ShaderType p_shader_type, ShaderDataRequestFunction p_function);
+	ShaderData *shader_get_data(RID p_shader) const;
 
 	virtual RS::ShaderNativeSourceCode shader_get_native_source_code(RID p_shader) const override;
+	virtual void shader_embedded_set_lock() override;
+	virtual const HashSet<RID> &shader_embedded_set_get() const override;
+	virtual void shader_embedded_set_unlock() override;
 
 	/* MATERIAL API */
 

+ 4 - 0
servers/rendering/renderer_rd/storage_rd/particles_storage.cpp

@@ -1748,6 +1748,10 @@ RS::ShaderNativeSourceCode ParticlesStorage::ParticlesShaderData::get_native_sou
 	return ParticlesStorage::get_singleton()->particles_shader.shader.version_get_native_source_code(version);
 }
 
+Pair<ShaderRD *, RID> ParticlesStorage::ParticlesShaderData::get_native_shader_and_version() const {
+	return { &ParticlesStorage::get_singleton()->particles_shader.shader, version };
+}
+
 ParticlesStorage::ParticlesShaderData::~ParticlesShaderData() {
 	//pipeline variants will clear themselves if shader is gone
 	if (version.is_valid()) {

+ 1 - 0
servers/rendering/renderer_rd/storage_rd/particles_storage.h

@@ -364,6 +364,7 @@ private:
 		virtual bool is_animated() const;
 		virtual bool casts_shadows() const;
 		virtual RS::ShaderNativeSourceCode get_native_source_code() const;
+		virtual Pair<ShaderRD *, RID> get_native_shader_and_version() const;
 
 		ParticlesShaderData() {}
 		virtual ~ParticlesShaderData();

+ 41 - 42
servers/rendering/rendering_device.cpp

@@ -36,6 +36,11 @@
 
 #include "core/config/project_settings.h"
 #include "core/io/dir_access.h"
+#include "modules/modules_enabled.gen.h"
+
+#ifdef MODULE_GLSLANG_ENABLED
+#include "modules/glslang/shader_compile.h"
+#endif
 
 #define FORCE_SEPARATE_PRESENT_QUEUE 0
 #define PRINT_FRAMEBUFFER_FORMAT 0
@@ -135,10 +140,6 @@ RenderingDevice *RenderingDevice::get_singleton() {
 	return singleton;
 }
 
-RenderingDevice::ShaderCompileToSPIRVFunction RenderingDevice::compile_to_spirv_function = nullptr;
-RenderingDevice::ShaderCacheFunction RenderingDevice::cache_function = nullptr;
-RenderingDevice::ShaderSPIRVGetCacheKeyFunction RenderingDevice::get_spirv_cache_key_function = nullptr;
-
 /***************************/
 /**** ID INFRASTRUCTURE ****/
 /***************************/
@@ -191,36 +192,18 @@ void RenderingDevice::_free_dependencies(RID p_id) {
 /**** SHADER INFRASTRUCTURE ****/
 /*******************************/
 
-void RenderingDevice::shader_set_compile_to_spirv_function(ShaderCompileToSPIRVFunction p_function) {
-	compile_to_spirv_function = p_function;
-}
-
-void RenderingDevice::shader_set_spirv_cache_function(ShaderCacheFunction p_function) {
-	cache_function = p_function;
-}
-
-void RenderingDevice::shader_set_get_cache_key_function(ShaderSPIRVGetCacheKeyFunction p_function) {
-	get_spirv_cache_key_function = p_function;
-}
-
 Vector<uint8_t> RenderingDevice::shader_compile_spirv_from_source(ShaderStage p_stage, const String &p_source_code, ShaderLanguage p_language, String *r_error, bool p_allow_cache) {
-	if (p_allow_cache && cache_function) {
-		Vector<uint8_t> cache = cache_function(p_stage, p_source_code, p_language);
-		if (cache.size()) {
-			return cache;
+	switch (p_language) {
+#ifdef MODULE_GLSLANG_ENABLED
+		case ShaderLanguage::SHADER_LANGUAGE_GLSL: {
+			ShaderLanguageVersion language_version = driver->get_shader_container_format().get_shader_language_version();
+			ShaderSpirvVersion spirv_version = driver->get_shader_container_format().get_shader_spirv_version();
+			return compile_glslang_shader(p_stage, ShaderIncludeDB::parse_include_files(p_source_code), language_version, spirv_version, r_error);
 		}
+#endif
+		default:
+			ERR_FAIL_V_MSG(Vector<uint8_t>(), "Shader language is not supported.");
 	}
-
-	ERR_FAIL_NULL_V(compile_to_spirv_function, Vector<uint8_t>());
-
-	return compile_to_spirv_function(p_stage, ShaderIncludeDB::parse_include_files(p_source_code), p_language, r_error, this);
-}
-
-String RenderingDevice::shader_get_spirv_cache_key() const {
-	if (get_spirv_cache_key_function) {
-		return get_spirv_cache_key_function(this);
-	}
-	return String();
 }
 
 RID RenderingDevice::shader_create_from_spirv(const Vector<ShaderStageSPIRVData> &p_spirv, const String &p_shader_name) {
@@ -3360,12 +3343,23 @@ String RenderingDevice::_shader_uniform_debug(RID p_shader, int p_set) {
 	return ret;
 }
 
-String RenderingDevice::shader_get_binary_cache_key() const {
-	return driver->shader_get_binary_cache_key();
-}
-
 Vector<uint8_t> RenderingDevice::shader_compile_binary_from_spirv(const Vector<ShaderStageSPIRVData> &p_spirv, const String &p_shader_name) {
-	return driver->shader_compile_binary_from_spirv(p_spirv, p_shader_name);
+	ShaderReflection shader_refl;
+	if (reflect_spirv(p_spirv, shader_refl) != OK) {
+		return Vector<uint8_t>();
+	}
+
+	const RenderingShaderContainerFormat &container_format = driver->get_shader_container_format();
+	Ref<RenderingShaderContainer> shader_container = container_format.create_container();
+	ERR_FAIL_COND_V(shader_container.is_null(), Vector<uint8_t>());
+
+	shader_container->set_from_shader_reflection(p_shader_name, shader_refl);
+
+	// Compile shader binary from SPIR-V.
+	bool code_compiled = shader_container->set_code_from_spirv(p_spirv);
+	ERR_FAIL_COND_V_MSG(!code_compiled, Vector<uint8_t>(), vformat("Failed to compile code to native for SPIR-V."));
+
+	return shader_container->to_bytes();
 }
 
 RID RenderingDevice::shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, RID p_placeholder) {
@@ -3379,8 +3373,11 @@ RID RenderingDevice::shader_create_from_bytecode(const Vector<uint8_t> &p_shader
 RID RenderingDevice::shader_create_from_bytecode_with_samplers(const Vector<uint8_t> &p_shader_binary, RID p_placeholder, const Vector<PipelineImmutableSampler> &p_immutable_samplers) {
 	_THREAD_SAFE_METHOD_
 
-	ShaderDescription shader_desc;
-	String name;
+	Ref<RenderingShaderContainer> shader_container = driver->get_shader_container_format().create_container();
+	ERR_FAIL_COND_V(shader_container.is_null(), RID());
+
+	bool parsed_container = shader_container->from_bytes(p_shader_binary);
+	ERR_FAIL_COND_V_MSG(!parsed_container, RID(), "Failed to parse shader container from binary.");
 
 	Vector<RDD::ImmutableSampler> driver_immutable_samplers;
 	for (const PipelineImmutableSampler &source_sampler : p_immutable_samplers) {
@@ -3395,7 +3392,8 @@ RID RenderingDevice::shader_create_from_bytecode_with_samplers(const Vector<uint
 
 		driver_immutable_samplers.append(driver_sampler);
 	}
-	RDD::ShaderID shader_id = driver->shader_create_from_bytecode(p_shader_binary, shader_desc, name, driver_immutable_samplers);
+
+	RDD::ShaderID shader_id = driver->shader_create_from_container(shader_container, driver_immutable_samplers);
 	ERR_FAIL_COND_V(!shader_id, RID());
 
 	// All good, let's create modules.
@@ -3410,8 +3408,9 @@ RID RenderingDevice::shader_create_from_bytecode_with_samplers(const Vector<uint
 	Shader *shader = shader_owner.get_or_null(id);
 	ERR_FAIL_NULL_V(shader, RID());
 
-	*((ShaderDescription *)shader) = shader_desc; // ShaderDescription bundle.
-	shader->name = name;
+	*((ShaderReflection *)shader) = shader_container->get_shader_reflection();
+	shader->name.clear();
+	shader->name.append_utf8(shader_container->shader_name);
 	shader->driver_id = shader_id;
 	shader->layout_hash = driver->shader_get_layout_hash(shader_id);
 
@@ -3437,7 +3436,7 @@ RID RenderingDevice::shader_create_from_bytecode_with_samplers(const Vector<uint
 		shader->set_formats.push_back(format);
 	}
 
-	for (ShaderStage stage : shader_desc.stages) {
+	for (ShaderStage stage : shader->stages_vector) {
 		switch (stage) {
 			case SHADER_STAGE_VERTEX:
 				shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT);

Bu fark içinde çok fazla dosya değişikliği olduğu için bazı dosyalar gösterilmiyor