3 days ago · a6f313c755
--- a/drivers/d3d12/d3d12_godot_nir_bridge.h
+++ b/drivers/d3d12/d3d12_godot_nir_bridge.h
@@ -52,6 +52,10 @@ typedef struct GodotNirCallbacks {
 
				 	void (*report_bitcode_bit_offset_fn)(uint64_t p_bit_offset, void *p_data);
			
 
				 } GodotNirCallbacks;
			
 
				 
			
 
				+extern void *godot_nir_malloc(size_t p_size);
			
 
				+extern void *godot_nir_realloc(void *p_block, size_t p_size);
			
 
				+extern void godot_nir_free(void *p_block);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/drivers/d3d12/rendering_shader_container_d3d12.cpp
+++ b/drivers/d3d12/rendering_shader_container_d3d12.cpp
@@ -72,6 +72,97 @@ GODOT_GCC_WARNING_POP
 
				 GODOT_CLANG_WARNING_POP
			
 
				 GODOT_MSVC_WARNING_POP
			
 
				 
			
 
				+// SPIR-V to DXIL does way too many allocations, which causes worker threads
			
 
				+// to bottleneck each other due to sharing the same global process heap.
			
 
				+// This can be solved by making each thread allocate from its own heap.
			
 
				+#define SPIRV_TO_DXIL_ENABLE_HEAP_PER_THREAD
			
 
				+
			
 
				+#ifdef SPIRV_TO_DXIL_ENABLE_HEAP_PER_THREAD
			
 
				+
			
 
				+namespace {
			
 
				+struct Win32Heap {
			
 
				+	HANDLE handle;
			
 
				+	SafeRefCount ref_count;
			
 
				+
			
 
				+	Win32Heap() {
			
 
				+		handle = HeapCreate(0, 0, 0);
			
 
				+		ref_count.init();
			
 
				+	}
			
 
				+
			
 
				+	~Win32Heap() {
			
 
				+		HeapDestroy(handle);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+constexpr size_t ALLOC_HEADER_SIZE = sizeof(Win32Heap *) * 2;
			
 
				+} //namespace
			
 
				+
			
 
				+extern "C" {
			
 
				+void *godot_nir_malloc(size_t p_size) {
			
 
				+	// This RAII helper is for allowing the heap to be destroyed when the thread quits.
			
 
				+	struct Win32HeapHolder {
			
 
				+		Win32Heap *win32_heap = nullptr;
			
 
				+
			
 
				+		Win32HeapHolder() {
			
 
				+			win32_heap = memnew(Win32Heap);
			
 
				+		}
			
 
				+
			
 
				+		~Win32HeapHolder() {
			
 
				+			if (win32_heap->ref_count.unref()) {
			
 
				+				memdelete(win32_heap);
			
 
				+			}
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	thread_local Win32HeapHolder holder;
			
 
				+
			
 
				+	void *block = HeapAlloc(holder.win32_heap->handle, 0, p_size + ALLOC_HEADER_SIZE);
			
 
				+
			
 
				+	// Store the heap in the allocation for the realloc/free operations.
			
 
				+	*(Win32Heap **)block = holder.win32_heap;
			
 
				+	holder.win32_heap->ref_count.ref();
			
 
				+
			
 
				+	return (uint8_t *)block + ALLOC_HEADER_SIZE;
			
 
				+}
			
 
				+
			
 
				+void *godot_nir_realloc(void *p_block, size_t p_size) {
			
 
				+	uint8_t *actual_block = (uint8_t *)p_block - ALLOC_HEADER_SIZE;
			
 
				+	Win32Heap *win32_heap = *(Win32Heap **)actual_block;
			
 
				+	return (uint8_t *)HeapReAlloc(win32_heap->handle, 0, actual_block, p_size + ALLOC_HEADER_SIZE) + ALLOC_HEADER_SIZE;
			
 
				+}
			
 
				+
			
 
				+void godot_nir_free(void *p_block) {
			
 
				+	if (p_block != nullptr) {
			
 
				+		uint8_t *actual_block = (uint8_t *)p_block - ALLOC_HEADER_SIZE;
			
 
				+		Win32Heap *win32_heap = *(Win32Heap **)actual_block;
			
 
				+		HeapFree(win32_heap->handle, 0, actual_block);
			
 
				+
			
 
				+		// Allocations can outlive the threads they were created in if they were stored globally.
			
 
				+		if (win32_heap->ref_count.unref()) {
			
 
				+			memdelete(win32_heap);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+extern "C" {
			
 
				+void *godot_nir_malloc(size_t p_size) {
			
 
				+	return malloc(p_size);
			
 
				+}
			
 
				+
			
 
				+void *godot_nir_realloc(void *p_block, size_t p_size) {
			
 
				+	return realloc(p_block, p_size);
			
 
				+}
			
 
				+
			
 
				+void godot_nir_free(void *p_block) {
			
 
				+	return free(p_block);
			
 
				+}
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				 static D3D12_SHADER_VISIBILITY stages_to_d3d12_visibility(uint32_t p_stages_mask) {
			
 
				 	switch (p_stages_mask) {
			
 
				 		case RenderingDeviceCommons::SHADER_STAGE_VERTEX_BIT:
			
@@ -274,8 +365,8 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
 
				 	dxil_spirv_runtime_conf dxil_runtime_conf = {};
			
 
				 	dxil_runtime_conf.runtime_data_cbv.base_shader_register = RUNTIME_DATA_REGISTER;
			
 
				 	dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER;
			
 
				-	dxil_runtime_conf.zero_based_vertex_instance_id = true;
			
 
				-	dxil_runtime_conf.zero_based_compute_workgroup_id = true;
			
 
				+	dxil_runtime_conf.first_vertex_and_base_instance_mode = DXIL_SPIRV_SYSVAL_TYPE_ZERO;
			
 
				+	dxil_runtime_conf.workgroup_id_mode = DXIL_SPIRV_SYSVAL_TYPE_ZERO;
			
 
				 
			
 
				 	// Explicitly keeping these false because converting UAV descriptors to SRVs do not seem to have real performance benefits on desktop GPUs.
			
 
				 	// It also makes it easier to implement descriptor heaps and enhanced barriers.
			
@@ -290,7 +381,7 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
 
				 		r_stages_processed.set_flag(stage_flag);
			
 
				 
			
 
				 		const char *entry_point = "main";
			
 
				-		static const gl_shader_stage SPIRV_TO_MESA_STAGES[RenderingDeviceCommons::SHADER_STAGE_MAX] = {
			
 
				+		static const mesa_shader_stage SPIRV_TO_MESA_STAGES[RenderingDeviceCommons::SHADER_STAGE_MAX] = {
			
 
				 			MESA_SHADER_VERTEX, // SHADER_STAGE_VERTEX
			
 
				 			MESA_SHADER_FRAGMENT, // SHADER_STAGE_FRAGMENT
			
 
				 			MESA_SHADER_TESS_CTRL, // SHADER_STAGE_TESSELATION_CONTROL
			
@@ -320,8 +411,8 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
 
				 		}
			
 
				 
			
 
				 		dxil_spirv_nir_prep(shader);
			
 
				-		bool requires_runtime_data = false;
			
 
				-		dxil_spirv_nir_passes(shader, &dxil_runtime_conf, &requires_runtime_data);
			
 
				+		dxil_spirv_metadata dxil_metadata = {};
			
 
				+		dxil_spirv_nir_passes(shader, &dxil_runtime_conf, &dxil_metadata);
			
 
				 
			
 
				 		r_stages_nir_shaders[stage] = shader;
			
 
				 	}
			
@@ -360,8 +451,8 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
 
				 			}
			
 
				 		}
			
 
				 		if (prev_shader) {
			
 
				-			bool requires_runtime_data = {};
			
 
				-			dxil_spirv_nir_link(shader, prev_shader, &dxil_runtime_conf, &requires_runtime_data);
			
 
				+			dxil_spirv_metadata dxil_metadata = {};
			
 
				+			dxil_spirv_nir_link(shader, prev_shader, &dxil_runtime_conf, &dxil_metadata);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -434,7 +525,9 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_dxil(Span<ReflectShaderSta
 
				 	};
			
 
				 
			
 
				 	// This structure must live as long as the shaders are alive.
			
 
				-	nir_shader_compiler_options compiler_options = *dxil_get_nir_compiler_options();
			
 
				+	nir_shader_compiler_options compiler_options = {};
			
 
				+	const unsigned supported_bit_sizes = 16 | 32 | 64;
			
 
				+	dxil_get_nir_compiler_options(&compiler_options, shader_model_d3d_to_dxil(D3D_SHADER_MODEL(REQUIRED_SHADER_MODEL)), supported_bit_sizes, supported_bit_sizes);
			
 
				 	compiler_options.lower_base_vertex = false;
			
 
				 
			
 
				 	// This is based on spirv2dxil.c. May need updates when it changes.
			
--- a/misc/scripts/install_d3d12_sdk_windows.py
+++ b/misc/scripts/install_d3d12_sdk_windows.py
@@ -32,7 +32,7 @@ else:
 
				 
			
 
				 # Mesa NIR
			
 
				 # Check for latest version: https://github.com/godotengine/godot-nir-static/releases/latest
			
 
				-mesa_version = "23.1.9-2"
			
 
				+mesa_version = "25.3.1"
			
 
				 # WinPixEventRuntime
			
 
				 # Check for latest version: https://www.nuget.org/api/v2/package/WinPixEventRuntime (check downloaded filename)
			
 
				 pix_version = "1.0.240308001"