瀏覽代碼

Merge pull request #87388 from clayjohn/Vulkan-limit-add

Implement `LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE` to `limit_get` in the Vulkan backend
Thaddeus Crews 6 月之前
父節點
當前提交
aef8ed2901

+ 2 - 0
drivers/d3d12/rendering_device_driver_d3d12.cpp

@@ -6218,6 +6218,8 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) {
 			return D3D12_CS_THREAD_GROUP_MAX_Y;
 		case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
 			return D3D12_CS_THREAD_GROUP_MAX_Z;
+		case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:
+			return D3D12_CS_TGSM_REGISTER_COUNT * sizeof(float);
 		case LIMIT_SUBGROUP_SIZE:
 		// Note in min/max. Shader model 6.6 supports it (see https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_WaveSize.html),
 		// but at this time I don't know the implications on the transpilation to DXIL, etc.

+ 1 - 0
drivers/metal/metal_device_properties.h

@@ -125,6 +125,7 @@ struct MetalLimits {
 	uint32_t maxVertexInputBindingStride;
 	uint32_t maxDrawIndexedIndexValue;
 	uint32_t maxShaderVaryings;
+	uint32_t maxThreadGroupMemoryAllocation;
 
 	double temporalScalerInputContentMinScale;
 	double temporalScalerInputContentMaxScale;

+ 8 - 0
drivers/metal/metal_device_properties.mm

@@ -305,6 +305,14 @@ void MetalDeviceProperties::init_limits(id<MTLDevice> p_device) {
 	limits.maxVertexInputBindingStride = (2 * KIBI);
 	limits.maxShaderVaryings = 31; // Accurate on Apple4 and above. See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
 
+	if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
+		limits.maxThreadGroupMemoryAllocation = 32768;
+	} else if ([p_device supportsFamily:MTLGPUFamilyApple3]) {
+		limits.maxThreadGroupMemoryAllocation = 16384;
+	} else {
+		limits.maxThreadGroupMemoryAllocation = 16352;
+	}
+
 #if TARGET_OS_IOS && !TARGET_OS_MACCATALYST
 	limits.minUniformBufferOffsetAlignment = 64;
 #endif

+ 11 - 5
drivers/metal/rendering_device_driver_metal.mm

@@ -3901,16 +3901,16 @@ uint64_t RenderingDeviceDriverMetal::get_lazily_memory_used() {
 uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
 	MetalDeviceProperties const &props = (*device_properties);
 	MetalLimits const &limits = props.limits;
-
+	uint64_t safe_unbounded = ((uint64_t)1 << 30);
 #if defined(DEV_ENABLED)
 #define UNKNOWN(NAME)                                                            \
 	case NAME:                                                                   \
 		WARN_PRINT_ONCE("Returning maximum value for unknown limit " #NAME "."); \
-		return (uint64_t)1 << 30;
+		return safe_unbounded;
 #else
 #define UNKNOWN(NAME) \
 	case NAME:        \
-		return (uint64_t)1 << 30
+		return safe_unbounded
 #endif
 
 	// clang-format off
@@ -3983,6 +3983,8 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
 			return limits.maxThreadsPerThreadGroup.height;
 		case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
 			return limits.maxThreadsPerThreadGroup.depth;
+		case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:
+			return limits.maxThreadGroupMemoryAllocation;
 		case LIMIT_MAX_VIEWPORT_DIMENSIONS_X:
 			return limits.maxViewportDimensionX;
 		case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y:
@@ -4008,8 +4010,12 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
 		UNKNOWN(LIMIT_VRS_TEXEL_HEIGHT);
 		UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_WIDTH);
 		UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_HEIGHT);
-		default:
-			ERR_FAIL_V(0);
+		default: {
+#ifdef DEV_ENABLED
+			WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + ".");
+#endif
+			return safe_unbounded;
+		}
 	}
 	// clang-format on
 	return 0;

+ 9 - 2
drivers/vulkan/rendering_device_driver_vulkan.cpp

@@ -5828,6 +5828,7 @@ uint64_t RenderingDeviceDriverVulkan::get_lazily_memory_used() {
 
 uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) {
 	const VkPhysicalDeviceLimits &limits = physical_device_properties.limits;
+	uint64_t safe_unbounded = ((uint64_t)1 << 30);
 	switch (p_limit) {
 		case LIMIT_MAX_BOUND_UNIFORM_SETS:
 			return limits.maxBoundDescriptorSets;
@@ -5897,6 +5898,8 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) {
 			return limits.maxComputeWorkGroupSize[1];
 		case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
 			return limits.maxComputeWorkGroupSize[2];
+		case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:
+			return limits.maxComputeSharedMemorySize;
 		case LIMIT_MAX_VIEWPORT_DIMENSIONS_X:
 			return limits.maxViewportDimensions[0];
 		case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y:
@@ -5923,8 +5926,12 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) {
 			// The Vulkan spec states that built in varyings like gl_FragCoord should count against this, but in
 			// practice, that doesn't seem to be the case. The validation layers don't even complain.
 			return MIN(limits.maxVertexOutputComponents / 4, limits.maxFragmentInputComponents / 4);
-		default:
-			ERR_FAIL_V(0);
+		default: {
+#ifdef DEV_ENABLED
+			WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + ".");
+#endif
+			return safe_unbounded;
+		}
 	}
 }