metal_device_properties.mm 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. /**************************************************************************/
  2. /* metal_device_properties.mm */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. /**************************************************************************/
  31. /* */
  32. /* Portions of this code were derived from MoltenVK. */
  33. /* */
  34. /* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
  35. /* (http://www.brenwill.com) */
  36. /* */
  37. /* Licensed under the Apache License, Version 2.0 (the "License"); */
  38. /* you may not use this file except in compliance with the License. */
  39. /* You may obtain a copy of the License at */
  40. /* */
  41. /* http://www.apache.org/licenses/LICENSE-2.0 */
  42. /* */
  43. /* Unless required by applicable law or agreed to in writing, software */
  44. /* distributed under the License is distributed on an "AS IS" BASIS, */
  45. /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
  46. /* implied. See the License for the specific language governing */
  47. /* permissions and limitations under the License. */
  48. /**************************************************************************/
  49. #import "metal_device_properties.h"
  50. #include "servers/rendering/renderer_rd/effects/metal_fx.h"
  51. #import <Metal/Metal.h>
  52. #import <MetalFX/MetalFX.h>
  53. #import <spirv_cross.hpp>
  54. #import <spirv_msl.hpp>
  55. // Common scaling multipliers.
  56. #define KIBI (1024)
  57. #define MEBI (KIBI * KIBI)
  58. #if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 140000) || (TARGET_OS_IPHONE && __IPHONE_OS_VERSION_MAX_ALLOWED < 170000)
  59. #define MTLGPUFamilyApple9 (MTLGPUFamily)1009
  60. #endif
  61. API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(1.0))
  62. MTLGPUFamily &operator--(MTLGPUFamily &p_family) {
  63. p_family = static_cast<MTLGPUFamily>(static_cast<int>(p_family) - 1);
  64. if (p_family < MTLGPUFamilyApple1) {
  65. p_family = MTLGPUFamilyApple9;
  66. }
  67. return p_family;
  68. }
  69. void MetalDeviceProperties::init_features(id<MTLDevice> p_device) {
  70. features = {};
  71. features.highestFamily = MTLGPUFamilyApple1;
  72. for (MTLGPUFamily family = MTLGPUFamilyApple9; family >= MTLGPUFamilyApple1; --family) {
  73. if ([p_device supportsFamily:family]) {
  74. features.highestFamily = family;
  75. break;
  76. }
  77. }
  78. if (@available(macOS 11, iOS 16.4, tvOS 16.4, *)) {
  79. features.supportsBCTextureCompression = p_device.supportsBCTextureCompression;
  80. } else {
  81. features.supportsBCTextureCompression = false;
  82. }
  83. #if TARGET_OS_OSX
  84. features.supportsDepth24Stencil8 = p_device.isDepth24Stencil8PixelFormatSupported;
  85. #endif
  86. if (@available(macOS 11.0, iOS 14.0, tvOS 14.0, *)) {
  87. features.supports32BitFloatFiltering = p_device.supports32BitFloatFiltering;
  88. features.supports32BitMSAA = p_device.supports32BitMSAA;
  89. }
  90. if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
  91. features.supports_gpu_address = true;
  92. }
  93. features.hostMemoryPageSize = sysconf(_SC_PAGESIZE);
  94. for (SampleCount sc = SampleCount1; sc <= SampleCount64; sc <<= 1) {
  95. if ([p_device supportsTextureSampleCount:sc]) {
  96. features.supportedSampleCounts |= sc;
  97. }
  98. }
  99. features.layeredRendering = [p_device supportsFamily:MTLGPUFamilyApple5];
  100. features.multisampleLayeredRendering = [p_device supportsFamily:MTLGPUFamilyApple7];
  101. features.tessellationShader = [p_device supportsFamily:MTLGPUFamilyApple3];
  102. features.imageCubeArray = [p_device supportsFamily:MTLGPUFamilyApple3];
  103. features.quadPermute = [p_device supportsFamily:MTLGPUFamilyApple4];
  104. features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6];
  105. features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7];
  106. features.argument_buffers_tier = p_device.argumentBuffersSupport;
  107. if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
  108. features.needs_arg_encoders = !([p_device supportsFamily:MTLGPUFamilyMetal3] && features.argument_buffers_tier == MTLArgumentBuffersTier2);
  109. }
  110. if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
  111. features.metal_fx_spatial = [MTLFXSpatialScalerDescriptor supportsDevice:p_device];
  112. #ifdef METAL_MFXTEMPORAL_ENABLED
  113. features.metal_fx_temporal = [MTLFXTemporalScalerDescriptor supportsDevice:p_device];
  114. #else
  115. features.metal_fx_temporal = false;
  116. #endif
  117. }
  118. MTLCompileOptions *opts = [MTLCompileOptions new];
  119. features.mslVersionEnum = opts.languageVersion; // By default, Metal uses the most recent language version.
  120. #define setMSLVersion(m_maj, m_min) \
  121. features.mslVersion = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::make_msl_version(m_maj, m_min)
  122. switch (features.mslVersionEnum) {
  123. #if __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 180000 || __TV_OS_VERSION_MAX_ALLOWED >= 180000 || __VISION_OS_VERSION_MAX_ALLOWED >= 20000
  124. case MTLLanguageVersion3_2:
  125. setMSLVersion(3, 2);
  126. break;
  127. #endif
  128. #if __MAC_OS_X_VERSION_MAX_ALLOWED >= 140000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 170000 || __TV_OS_VERSION_MAX_ALLOWED >= 170000
  129. case MTLLanguageVersion3_1:
  130. setMSLVersion(3, 1);
  131. break;
  132. #endif
  133. case MTLLanguageVersion3_0:
  134. setMSLVersion(3, 0);
  135. break;
  136. case MTLLanguageVersion2_4:
  137. setMSLVersion(2, 4);
  138. break;
  139. case MTLLanguageVersion2_3:
  140. setMSLVersion(2, 3);
  141. break;
  142. case MTLLanguageVersion2_2:
  143. setMSLVersion(2, 2);
  144. break;
  145. case MTLLanguageVersion2_1:
  146. setMSLVersion(2, 1);
  147. break;
  148. case MTLLanguageVersion2_0:
  149. setMSLVersion(2, 0);
  150. break;
  151. case MTLLanguageVersion1_2:
  152. setMSLVersion(1, 2);
  153. break;
  154. case MTLLanguageVersion1_1:
  155. setMSLVersion(1, 1);
  156. break;
  157. #if TARGET_OS_IPHONE && !TARGET_OS_MACCATALYST && !TARGET_OS_VISION
  158. case MTLLanguageVersion1_0:
  159. setMSLVersion(1, 0);
  160. break;
  161. #endif
  162. }
  163. }
  164. void MetalDeviceProperties::init_limits(id<MTLDevice> p_device) {
  165. using std::max;
  166. using std::min;
  167. // FST: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
  168. // FST: Maximum number of layers per 1D texture array, 2D texture array, or 3D texture.
  169. limits.maxImageArrayLayers = 2048;
  170. if ([p_device supportsFamily:MTLGPUFamilyApple3]) {
  171. // FST: Maximum 2D texture width and height.
  172. limits.maxFramebufferWidth = 16384;
  173. limits.maxFramebufferHeight = 16384;
  174. limits.maxViewportDimensionX = 16384;
  175. limits.maxViewportDimensionY = 16384;
  176. // FST: Maximum 1D texture width.
  177. limits.maxImageDimension1D = 16384;
  178. // FST: Maximum 2D texture width and height.
  179. limits.maxImageDimension2D = 16384;
  180. // FST: Maximum cube map texture width and height.
  181. limits.maxImageDimensionCube = 16384;
  182. } else {
  183. // FST: Maximum 2D texture width and height.
  184. limits.maxFramebufferWidth = 8192;
  185. limits.maxFramebufferHeight = 8192;
  186. limits.maxViewportDimensionX = 8192;
  187. limits.maxViewportDimensionY = 8192;
  188. // FST: Maximum 1D texture width.
  189. limits.maxImageDimension1D = 8192;
  190. // FST: Maximum 2D texture width and height.
  191. limits.maxImageDimension2D = 8192;
  192. // FST: Maximum cube map texture width and height.
  193. limits.maxImageDimensionCube = 8192;
  194. }
  195. // FST: Maximum 3D texture width, height, and depth.
  196. limits.maxImageDimension3D = 2048;
  197. limits.maxThreadsPerThreadGroup = p_device.maxThreadsPerThreadgroup;
  198. // No effective limits.
  199. limits.maxComputeWorkGroupCount = { std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max() };
  200. // https://github.com/KhronosGroup/MoltenVK/blob/568cc3acc0e2299931fdaecaaa1fc3ec5b4af281/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h#L85
  201. limits.maxBoundDescriptorSets = SPIRV_CROSS_NAMESPACE::kMaxArgumentBuffers;
  202. // FST: Maximum number of color render targets per render pass descriptor.
  203. limits.maxColorAttachments = 8;
  204. // Maximum number of textures the device can access, per stage, from an argument buffer.
  205. if ([p_device supportsFamily:MTLGPUFamilyApple6]) {
  206. limits.maxTexturesPerArgumentBuffer = 1'000'000;
  207. } else if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
  208. limits.maxTexturesPerArgumentBuffer = 96;
  209. } else {
  210. limits.maxTexturesPerArgumentBuffer = 31;
  211. }
  212. // Maximum number of samplers the device can access, per stage, from an argument buffer.
  213. if ([p_device supportsFamily:MTLGPUFamilyApple6]) {
  214. limits.maxSamplersPerArgumentBuffer = 1024;
  215. } else {
  216. limits.maxSamplersPerArgumentBuffer = 16;
  217. }
  218. // Maximum number of buffers the device can access, per stage, from an argument buffer.
  219. if ([p_device supportsFamily:MTLGPUFamilyApple6]) {
  220. limits.maxBuffersPerArgumentBuffer = std::numeric_limits<uint64_t>::max();
  221. } else if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
  222. limits.maxBuffersPerArgumentBuffer = 96;
  223. } else {
  224. limits.maxBuffersPerArgumentBuffer = 31;
  225. }
  226. limits.minSubgroupSize = limits.maxSubgroupSize = 1;
  227. // These values were taken from MoltenVK.
  228. if (features.simdPermute) {
  229. limits.minSubgroupSize = 4;
  230. limits.maxSubgroupSize = 32;
  231. } else if (features.quadPermute) {
  232. limits.minSubgroupSize = limits.maxSubgroupSize = 4;
  233. }
  234. limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_COMPUTE_BIT);
  235. if (features.tessellationShader) {
  236. limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_TESSELATION_CONTROL_BIT);
  237. }
  238. limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_FRAGMENT_BIT);
  239. limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_BASIC_BIT);
  240. if (features.simdPermute || features.quadPermute) {
  241. limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_VOTE_BIT);
  242. limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_BALLOT_BIT);
  243. limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_SHUFFLE_BIT);
  244. limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_SHUFFLE_RELATIVE_BIT);
  245. }
  246. if (features.simdReduction) {
  247. limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_ARITHMETIC_BIT);
  248. }
  249. if (features.quadPermute) {
  250. limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_QUAD_BIT);
  251. }
  252. limits.maxBufferLength = p_device.maxBufferLength;
  253. // FST: Maximum size of vertex descriptor layout stride.
  254. limits.maxVertexDescriptorLayoutStride = std::numeric_limits<uint64_t>::max();
  255. // Maximum number of viewports.
  256. if ([p_device supportsFamily:MTLGPUFamilyApple5]) {
  257. limits.maxViewports = 16;
  258. } else {
  259. limits.maxViewports = 1;
  260. }
  261. limits.maxPerStageBufferCount = 31;
  262. limits.maxPerStageSamplerCount = 16;
  263. if ([p_device supportsFamily:MTLGPUFamilyApple6]) {
  264. limits.maxPerStageTextureCount = 128;
  265. } else if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
  266. limits.maxPerStageTextureCount = 96;
  267. } else {
  268. limits.maxPerStageTextureCount = 31;
  269. }
  270. limits.maxVertexInputAttributes = 31;
  271. limits.maxVertexInputBindings = 31;
  272. limits.maxVertexInputBindingStride = (2 * KIBI);
  273. limits.maxShaderVaryings = 31; // Accurate on Apple4 and above. See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
  274. if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
  275. limits.maxThreadGroupMemoryAllocation = 32768;
  276. } else if ([p_device supportsFamily:MTLGPUFamilyApple3]) {
  277. limits.maxThreadGroupMemoryAllocation = 16384;
  278. } else {
  279. limits.maxThreadGroupMemoryAllocation = 16352;
  280. }
  281. #if TARGET_OS_IOS && !TARGET_OS_MACCATALYST
  282. limits.minUniformBufferOffsetAlignment = 64;
  283. #endif
  284. #if TARGET_OS_OSX
  285. // This is Apple Silicon specific.
  286. limits.minUniformBufferOffsetAlignment = 16;
  287. #endif
  288. limits.maxDrawIndexedIndexValue = std::numeric_limits<uint32_t>::max() - 1;
  289. #ifdef METAL_MFXTEMPORAL_ENABLED
  290. if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {
  291. limits.temporalScalerInputContentMinScale = (double)[MTLFXTemporalScalerDescriptor supportedInputContentMinScaleForDevice:p_device];
  292. limits.temporalScalerInputContentMaxScale = (double)[MTLFXTemporalScalerDescriptor supportedInputContentMaxScaleForDevice:p_device];
  293. } else {
  294. // Defaults taken from macOS 14+
  295. limits.temporalScalerInputContentMinScale = 1.0;
  296. limits.temporalScalerInputContentMaxScale = 3.0;
  297. }
  298. #else
  299. // Defaults taken from macOS 14+
  300. limits.temporalScalerInputContentMinScale = 1.0;
  301. limits.temporalScalerInputContentMaxScale = 3.0;
  302. #endif
  303. }
  304. MetalDeviceProperties::MetalDeviceProperties(id<MTLDevice> p_device) {
  305. init_features(p_device);
  306. init_limits(p_device);
  307. }
  308. MetalDeviceProperties::~MetalDeviceProperties() {
  309. }
  310. SampleCount MetalDeviceProperties::find_nearest_supported_sample_count(RenderingDevice::TextureSamples p_samples) const {
  311. SampleCount supported = features.supportedSampleCounts;
  312. if (supported & sample_count[p_samples]) {
  313. return sample_count[p_samples];
  314. }
  315. SampleCount requested_sample_count = sample_count[p_samples];
  316. // Find the nearest supported sample count.
  317. while (requested_sample_count > SampleCount1) {
  318. if (supported & requested_sample_count) {
  319. return requested_sample_count;
  320. }
  321. requested_sample_count = (SampleCount)(requested_sample_count >> 1);
  322. }
  323. return SampleCount1;
  324. }
  325. // region static members
  326. const SampleCount MetalDeviceProperties::sample_count[RenderingDevice::TextureSamples::TEXTURE_SAMPLES_MAX] = {
  327. SampleCount1,
  328. SampleCount2,
  329. SampleCount4,
  330. SampleCount8,
  331. SampleCount16,
  332. SampleCount32,
  333. SampleCount64,
  334. };
  335. // endregion