Browse Source

Merge pull request #113618 from blueskythlikesclouds/spirv-to-dxil-memory-alloc-opt

Update Mesa NIR to 25.3.1 + Make each SPIR-V -> DXIL conversion thread allocate from its own heap
Thaddeus Crews 2 days ago
parent
commit
a6f313c755

+ 4 - 0
drivers/d3d12/d3d12_godot_nir_bridge.h

@@ -52,6 +52,10 @@ typedef struct GodotNirCallbacks {
 	void (*report_bitcode_bit_offset_fn)(uint64_t p_bit_offset, void *p_data);
 } GodotNirCallbacks;
 
+extern void *godot_nir_malloc(size_t p_size);
+extern void *godot_nir_realloc(void *p_block, size_t p_size);
+extern void godot_nir_free(void *p_block);
+
 #ifdef __cplusplus
 }
 #endif

+ 101 - 8
drivers/d3d12/rendering_shader_container_d3d12.cpp

@@ -72,6 +72,97 @@ GODOT_GCC_WARNING_POP
 GODOT_CLANG_WARNING_POP
 GODOT_MSVC_WARNING_POP
 
+// SPIR-V to DXIL does way too many allocations, which causes worker threads
+// to bottleneck each other due to sharing the same global process heap.
+// This can be solved by making each thread allocate from its own heap.
+#define SPIRV_TO_DXIL_ENABLE_HEAP_PER_THREAD
+
+#ifdef SPIRV_TO_DXIL_ENABLE_HEAP_PER_THREAD
+
+namespace {
+struct Win32Heap {
+	HANDLE handle;
+	SafeRefCount ref_count;
+
+	Win32Heap() {
+		handle = HeapCreate(0, 0, 0);
+		ref_count.init();
+	}
+
+	~Win32Heap() {
+		HeapDestroy(handle);
+	}
+};
+
+constexpr size_t ALLOC_HEADER_SIZE = sizeof(Win32Heap *) * 2;
+} //namespace
+
+extern "C" {
+void *godot_nir_malloc(size_t p_size) {
+	// This RAII helper is for allowing the heap to be destroyed when the thread quits.
+	struct Win32HeapHolder {
+		Win32Heap *win32_heap = nullptr;
+
+		Win32HeapHolder() {
+			win32_heap = memnew(Win32Heap);
+		}
+
+		~Win32HeapHolder() {
+			if (win32_heap->ref_count.unref()) {
+				memdelete(win32_heap);
+			}
+		}
+	};
+
+	thread_local Win32HeapHolder holder;
+
+	void *block = HeapAlloc(holder.win32_heap->handle, 0, p_size + ALLOC_HEADER_SIZE);
+
+	// Store the heap in the allocation for the realloc/free operations.
+	*(Win32Heap **)block = holder.win32_heap;
+	holder.win32_heap->ref_count.ref();
+
+	return (uint8_t *)block + ALLOC_HEADER_SIZE;
+}
+
+void *godot_nir_realloc(void *p_block, size_t p_size) {
+	uint8_t *actual_block = (uint8_t *)p_block - ALLOC_HEADER_SIZE;
+	Win32Heap *win32_heap = *(Win32Heap **)actual_block;
+	return (uint8_t *)HeapReAlloc(win32_heap->handle, 0, actual_block, p_size + ALLOC_HEADER_SIZE) + ALLOC_HEADER_SIZE;
+}
+
+void godot_nir_free(void *p_block) {
+	if (p_block != nullptr) {
+		uint8_t *actual_block = (uint8_t *)p_block - ALLOC_HEADER_SIZE;
+		Win32Heap *win32_heap = *(Win32Heap **)actual_block;
+		HeapFree(win32_heap->handle, 0, actual_block);
+
+		// Allocations can outlive the threads they were created in if they were stored globally.
+		if (win32_heap->ref_count.unref()) {
+			memdelete(win32_heap);
+		}
+	}
+}
+}
+
+#else
+
+extern "C" {
+void *godot_nir_malloc(size_t p_size) {
+	return malloc(p_size);
+}
+
+void *godot_nir_realloc(void *p_block, size_t p_size) {
+	return realloc(p_block, p_size);
+}
+
+void godot_nir_free(void *p_block) {
+	return free(p_block);
+}
+}
+
+#endif
+
 static D3D12_SHADER_VISIBILITY stages_to_d3d12_visibility(uint32_t p_stages_mask) {
 	switch (p_stages_mask) {
 		case RenderingDeviceCommons::SHADER_STAGE_VERTEX_BIT:
@@ -274,8 +365,8 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
 	dxil_spirv_runtime_conf dxil_runtime_conf = {};
 	dxil_runtime_conf.runtime_data_cbv.base_shader_register = RUNTIME_DATA_REGISTER;
 	dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER;
-	dxil_runtime_conf.zero_based_vertex_instance_id = true;
-	dxil_runtime_conf.zero_based_compute_workgroup_id = true;
+	dxil_runtime_conf.first_vertex_and_base_instance_mode = DXIL_SPIRV_SYSVAL_TYPE_ZERO;
+	dxil_runtime_conf.workgroup_id_mode = DXIL_SPIRV_SYSVAL_TYPE_ZERO;
 
 	// Explicitly keeping these false because converting UAV descriptors to SRVs do not seem to have real performance benefits on desktop GPUs.
 	// It also makes it easier to implement descriptor heaps and enhanced barriers.
@@ -290,7 +381,7 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
 		r_stages_processed.set_flag(stage_flag);
 
 		const char *entry_point = "main";
-		static const gl_shader_stage SPIRV_TO_MESA_STAGES[RenderingDeviceCommons::SHADER_STAGE_MAX] = {
+		static const mesa_shader_stage SPIRV_TO_MESA_STAGES[RenderingDeviceCommons::SHADER_STAGE_MAX] = {
 			MESA_SHADER_VERTEX, // SHADER_STAGE_VERTEX
 			MESA_SHADER_FRAGMENT, // SHADER_STAGE_FRAGMENT
 			MESA_SHADER_TESS_CTRL, // SHADER_STAGE_TESSELATION_CONTROL
@@ -320,8 +411,8 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
 		}
 
 		dxil_spirv_nir_prep(shader);
-		bool requires_runtime_data = false;
-		dxil_spirv_nir_passes(shader, &dxil_runtime_conf, &requires_runtime_data);
+		dxil_spirv_metadata dxil_metadata = {};
+		dxil_spirv_nir_passes(shader, &dxil_runtime_conf, &dxil_metadata);
 
 		r_stages_nir_shaders[stage] = shader;
 	}
@@ -360,8 +451,8 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
 			}
 		}
 		if (prev_shader) {
-			bool requires_runtime_data = {};
-			dxil_spirv_nir_link(shader, prev_shader, &dxil_runtime_conf, &requires_runtime_data);
+			dxil_spirv_metadata dxil_metadata = {};
+			dxil_spirv_nir_link(shader, prev_shader, &dxil_runtime_conf, &dxil_metadata);
 		}
 	}
 
@@ -434,7 +525,9 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_dxil(Span<ReflectShaderSta
 	};
 
 	// This structure must live as long as the shaders are alive.
-	nir_shader_compiler_options compiler_options = *dxil_get_nir_compiler_options();
+	nir_shader_compiler_options compiler_options = {};
+	const unsigned supported_bit_sizes = 16 | 32 | 64;
+	dxil_get_nir_compiler_options(&compiler_options, shader_model_d3d_to_dxil(D3D_SHADER_MODEL(REQUIRED_SHADER_MODEL)), supported_bit_sizes, supported_bit_sizes);
 	compiler_options.lower_base_vertex = false;
 
 	// This is based on spirv2dxil.c. May need updates when it changes.

+ 1 - 1
misc/scripts/install_d3d12_sdk_windows.py

@@ -32,7 +32,7 @@ else:
 
 # Mesa NIR
 # Check for latest version: https://github.com/godotengine/godot-nir-static/releases/latest
-mesa_version = "23.1.9-2"
+mesa_version = "25.3.1"
 # WinPixEventRuntime
 # Check for latest version: https://www.nuget.org/api/v2/package/WinPixEventRuntime (check downloaded filename)
 pix_version = "1.0.240308001"