metal_objects.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983
  1. /**************************************************************************/
  2. /* metal_objects.h */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #pragma once
  31. /**************************************************************************/
  32. /* */
  33. /* Portions of this code were derived from MoltenVK. */
  34. /* */
  35. /* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
  36. /* (http://www.brenwill.com) */
  37. /* */
  38. /* Licensed under the Apache License, Version 2.0 (the "License"); */
  39. /* you may not use this file except in compliance with the License. */
  40. /* You may obtain a copy of the License at */
  41. /* */
  42. /* http://www.apache.org/licenses/LICENSE-2.0 */
  43. /* */
  44. /* Unless required by applicable law or agreed to in writing, software */
  45. /* distributed under the License is distributed on an "AS IS" BASIS, */
  46. /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
  47. /* implied. See the License for the specific language governing */
  48. /* permissions and limitations under the License. */
  49. /**************************************************************************/
  50. #import "metal_device_properties.h"
  51. #import "metal_utils.h"
  52. #import "pixel_formats.h"
  53. #import "sha256_digest.h"
  54. #include "servers/rendering/rendering_device_driver.h"
  55. #import <CommonCrypto/CommonDigest.h>
  56. #import <Foundation/Foundation.h>
  57. #import <Metal/Metal.h>
  58. #import <QuartzCore/CAMetalLayer.h>
  59. #import <simd/simd.h>
  60. #import <zlib.h>
  61. #import <initializer_list>
  62. #import <optional>
  63. // These types can be used in Vector and other containers that use
  64. // pointer operations not supported by ARC.
  65. namespace MTL {
  66. #define MTL_CLASS(name) \
  67. class name { \
  68. public: \
  69. name(id<MTL##name> obj = nil) : m_obj(obj) {} \
  70. operator id<MTL##name>() const { \
  71. return m_obj; \
  72. } \
  73. id<MTL##name> m_obj; \
  74. };
  75. MTL_CLASS(Texture)
  76. } //namespace MTL
  77. enum ShaderStageUsage : uint32_t {
  78. None = 0,
  79. Vertex = RDD::SHADER_STAGE_VERTEX_BIT,
  80. Fragment = RDD::SHADER_STAGE_FRAGMENT_BIT,
  81. TesselationControl = RDD::SHADER_STAGE_TESSELATION_CONTROL_BIT,
  82. TesselationEvaluation = RDD::SHADER_STAGE_TESSELATION_EVALUATION_BIT,
  83. Compute = RDD::SHADER_STAGE_COMPUTE_BIT,
  84. };
  85. _FORCE_INLINE_ ShaderStageUsage &operator|=(ShaderStageUsage &p_a, int p_b) {
  86. p_a = ShaderStageUsage(uint32_t(p_a) | uint32_t(p_b));
  87. return p_a;
  88. }
  89. enum StageResourceUsage : uint32_t {
  90. VertexRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_VERTEX * 2),
  91. VertexWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_VERTEX * 2),
  92. FragmentRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_FRAGMENT * 2),
  93. FragmentWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_FRAGMENT * 2),
  94. TesselationControlRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2),
  95. TesselationControlWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2),
  96. TesselationEvaluationRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2),
  97. TesselationEvaluationWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2),
  98. ComputeRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_COMPUTE * 2),
  99. ComputeWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_COMPUTE * 2),
  100. };
  101. typedef LocalVector<__unsafe_unretained id<MTLResource>> ResourceVector;
  102. typedef HashMap<StageResourceUsage, ResourceVector> ResourceUsageMap;
  103. enum class MDCommandBufferStateType {
  104. None,
  105. Render,
  106. Compute,
  107. Blit,
  108. };
  109. enum class MDPipelineType {
  110. None,
  111. Render,
  112. Compute,
  113. };
  114. class MDRenderPass;
  115. class MDPipeline;
  116. class MDRenderPipeline;
  117. class MDComputePipeline;
  118. class MDFrameBuffer;
  119. class RenderingDeviceDriverMetal;
  120. class MDUniformSet;
  121. class MDShader;
  122. #pragma mark - Resource Factory
  123. struct ClearAttKey {
  124. const static uint32_t COLOR_COUNT = MAX_COLOR_ATTACHMENT_COUNT;
  125. const static uint32_t DEPTH_INDEX = COLOR_COUNT;
  126. const static uint32_t STENCIL_INDEX = DEPTH_INDEX + 1;
  127. const static uint32_t ATTACHMENT_COUNT = STENCIL_INDEX + 1;
  128. enum Flags : uint16_t {
  129. CLEAR_FLAGS_NONE = 0,
  130. CLEAR_FLAGS_LAYERED = 1 << 0,
  131. };
  132. Flags flags = CLEAR_FLAGS_NONE;
  133. uint16_t sample_count = 0;
  134. uint16_t pixel_formats[ATTACHMENT_COUNT] = { 0 };
  135. _FORCE_INLINE_ void set_color_format(uint32_t p_idx, MTLPixelFormat p_fmt) { pixel_formats[p_idx] = p_fmt; }
  136. _FORCE_INLINE_ void set_depth_format(MTLPixelFormat p_fmt) { pixel_formats[DEPTH_INDEX] = p_fmt; }
  137. _FORCE_INLINE_ void set_stencil_format(MTLPixelFormat p_fmt) { pixel_formats[STENCIL_INDEX] = p_fmt; }
  138. _FORCE_INLINE_ MTLPixelFormat depth_format() const { return (MTLPixelFormat)pixel_formats[DEPTH_INDEX]; }
  139. _FORCE_INLINE_ MTLPixelFormat stencil_format() const { return (MTLPixelFormat)pixel_formats[STENCIL_INDEX]; }
  140. _FORCE_INLINE_ void enable_layered_rendering() { flags::set(flags, CLEAR_FLAGS_LAYERED); }
  141. _FORCE_INLINE_ bool is_enabled(uint32_t p_idx) const { return pixel_formats[p_idx] != 0; }
  142. _FORCE_INLINE_ bool is_depth_enabled() const { return pixel_formats[DEPTH_INDEX] != 0; }
  143. _FORCE_INLINE_ bool is_stencil_enabled() const { return pixel_formats[STENCIL_INDEX] != 0; }
  144. _FORCE_INLINE_ bool is_layered_rendering_enabled() const { return flags::any(flags, CLEAR_FLAGS_LAYERED); }
  145. _FORCE_INLINE_ bool operator==(const ClearAttKey &p_rhs) const {
  146. return memcmp(this, &p_rhs, sizeof(ClearAttKey)) == 0;
  147. }
  148. uint32_t hash() const {
  149. uint32_t h = hash_murmur3_one_32(flags);
  150. h = hash_murmur3_one_32(sample_count, h);
  151. h = hash_murmur3_buffer(pixel_formats, ATTACHMENT_COUNT * sizeof(pixel_formats[0]), h);
  152. return hash_fmix32(h);
  153. }
  154. };
  155. class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDResourceFactory {
  156. private:
  157. RenderingDeviceDriverMetal *device_driver;
  158. id<MTLFunction> new_func(NSString *p_source, NSString *p_name, NSError **p_error);
  159. id<MTLFunction> new_clear_vert_func(ClearAttKey &p_key);
  160. id<MTLFunction> new_clear_frag_func(ClearAttKey &p_key);
  161. NSString *get_format_type_string(MTLPixelFormat p_fmt);
  162. public:
  163. id<MTLRenderPipelineState> new_clear_pipeline_state(ClearAttKey &p_key, NSError **p_error);
  164. id<MTLDepthStencilState> new_depth_stencil_state(bool p_use_depth, bool p_use_stencil);
  165. MDResourceFactory(RenderingDeviceDriverMetal *p_device_driver) :
  166. device_driver(p_device_driver) {}
  167. ~MDResourceFactory() = default;
  168. };
  169. class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDResourceCache {
  170. private:
  171. typedef HashMap<ClearAttKey, id<MTLRenderPipelineState>, HashableHasher<ClearAttKey>> HashMap;
  172. std::unique_ptr<MDResourceFactory> resource_factory;
  173. HashMap clear_states;
  174. struct {
  175. id<MTLDepthStencilState> all;
  176. id<MTLDepthStencilState> depth_only;
  177. id<MTLDepthStencilState> stencil_only;
  178. id<MTLDepthStencilState> none;
  179. } clear_depth_stencil_state;
  180. public:
  181. id<MTLRenderPipelineState> get_clear_render_pipeline_state(ClearAttKey &p_key, NSError **p_error);
  182. id<MTLDepthStencilState> get_depth_stencil_state(bool p_use_depth, bool p_use_stencil);
  183. explicit MDResourceCache(RenderingDeviceDriverMetal *p_device_driver) :
  184. resource_factory(new MDResourceFactory(p_device_driver)) {}
  185. ~MDResourceCache() = default;
  186. };
  187. enum class MDAttachmentType : uint8_t {
  188. None = 0,
  189. Color = 1 << 0,
  190. Depth = 1 << 1,
  191. Stencil = 1 << 2,
  192. };
  193. _FORCE_INLINE_ MDAttachmentType &operator|=(MDAttachmentType &p_a, MDAttachmentType p_b) {
  194. flags::set(p_a, p_b);
  195. return p_a;
  196. }
  197. _FORCE_INLINE_ bool operator&(MDAttachmentType p_a, MDAttachmentType p_b) {
  198. return uint8_t(p_a) & uint8_t(p_b);
  199. }
  200. struct MDSubpass {
  201. uint32_t subpass_index = 0;
  202. uint32_t view_count = 0;
  203. LocalVector<RDD::AttachmentReference> input_references;
  204. LocalVector<RDD::AttachmentReference> color_references;
  205. RDD::AttachmentReference depth_stencil_reference;
  206. LocalVector<RDD::AttachmentReference> resolve_references;
  207. MTLFmtCaps getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const;
  208. };
  209. struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDAttachment {
  210. private:
  211. uint32_t index = 0;
  212. uint32_t firstUseSubpassIndex = 0;
  213. uint32_t lastUseSubpassIndex = 0;
  214. public:
  215. MTLPixelFormat format = MTLPixelFormatInvalid;
  216. MDAttachmentType type = MDAttachmentType::None;
  217. MTLLoadAction loadAction = MTLLoadActionDontCare;
  218. MTLStoreAction storeAction = MTLStoreActionDontCare;
  219. MTLLoadAction stencilLoadAction = MTLLoadActionDontCare;
  220. MTLStoreAction stencilStoreAction = MTLStoreActionDontCare;
  221. uint32_t samples = 1;
  222. /*!
  223. * @brief Returns true if this attachment is first used in the given subpass.
  224. * @param p_subpass
  225. * @return
  226. */
  227. _FORCE_INLINE_ bool isFirstUseOf(MDSubpass const &p_subpass) const {
  228. return p_subpass.subpass_index == firstUseSubpassIndex;
  229. }
  230. /*!
  231. * @brief Returns true if this attachment is last used in the given subpass.
  232. * @param p_subpass
  233. * @return
  234. */
  235. _FORCE_INLINE_ bool isLastUseOf(MDSubpass const &p_subpass) const {
  236. return p_subpass.subpass_index == lastUseSubpassIndex;
  237. }
  238. void linkToSubpass(MDRenderPass const &p_pass);
  239. MTLStoreAction getMTLStoreAction(MDSubpass const &p_subpass,
  240. bool p_is_rendering_entire_area,
  241. bool p_has_resolve,
  242. bool p_can_resolve,
  243. bool p_is_stencil) const;
  244. bool configureDescriptor(MTLRenderPassAttachmentDescriptor *p_desc,
  245. PixelFormats &p_pf,
  246. MDSubpass const &p_subpass,
  247. id<MTLTexture> p_attachment,
  248. bool p_is_rendering_entire_area,
  249. bool p_has_resolve,
  250. bool p_can_resolve,
  251. bool p_is_stencil) const;
  252. /** Returns whether this attachment should be cleared in the subpass. */
  253. bool shouldClear(MDSubpass const &p_subpass, bool p_is_stencil) const;
  254. };
  255. class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderPass {
  256. public:
  257. Vector<MDAttachment> attachments;
  258. Vector<MDSubpass> subpasses;
  259. uint32_t get_sample_count() const {
  260. return attachments.is_empty() ? 1 : attachments[0].samples;
  261. }
  262. MDRenderPass(Vector<MDAttachment> &p_attachments, Vector<MDSubpass> &p_subpasses);
  263. };
  264. class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
  265. private:
  266. #pragma mark - Common State
  267. // From RenderingDevice
  268. static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128;
  269. RenderingDeviceDriverMetal *device_driver = nullptr;
  270. id<MTLCommandQueue> queue = nil;
  271. id<MTLCommandBuffer> commandBuffer = nil;
  272. bool state_begin = false;
  273. _FORCE_INLINE_ id<MTLCommandBuffer> command_buffer() {
  274. DEV_ASSERT(state_begin);
  275. if (commandBuffer == nil) {
  276. commandBuffer = queue.commandBuffer;
  277. }
  278. return commandBuffer;
  279. }
  280. void _end_compute_dispatch();
  281. void _end_blit();
  282. #pragma mark - Render
  283. void _render_set_dirty_state();
  284. void _render_bind_uniform_sets();
  285. void _populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView<Rect2i> p_rects);
  286. uint32_t _populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size);
  287. void _end_render_pass();
  288. void _render_clear_render_area();
  289. #pragma mark - Compute
  290. void _compute_set_dirty_state();
  291. void _compute_bind_uniform_sets();
  292. public:
  293. MDCommandBufferStateType type = MDCommandBufferStateType::None;
  294. struct RenderState {
  295. MDRenderPass *pass = nullptr;
  296. MDFrameBuffer *frameBuffer = nullptr;
  297. MDRenderPipeline *pipeline = nullptr;
  298. LocalVector<RDD::RenderPassClearValue> clear_values;
  299. LocalVector<MTLViewport> viewports;
  300. LocalVector<MTLScissorRect> scissors;
  301. std::optional<Color> blend_constants;
  302. uint32_t current_subpass = UINT32_MAX;
  303. Rect2i render_area = {};
  304. bool is_rendering_entire_area = false;
  305. MTLRenderPassDescriptor *desc = nil;
  306. id<MTLRenderCommandEncoder> encoder = nil;
  307. id<MTLBuffer> __unsafe_unretained index_buffer = nil; // Buffer is owned by RDD.
  308. MTLIndexType index_type = MTLIndexTypeUInt16;
  309. uint32_t index_offset = 0;
  310. LocalVector<id<MTLBuffer> __unsafe_unretained> vertex_buffers;
  311. LocalVector<NSUInteger> vertex_offsets;
  312. ResourceUsageMap resource_usage;
  313. // clang-format off
  314. enum DirtyFlag: uint16_t {
  315. DIRTY_NONE = 0,
  316. DIRTY_PIPELINE = 1 << 0, //! pipeline state
  317. DIRTY_UNIFORMS = 1 << 1, //! uniform sets
  318. DIRTY_PUSH = 1 << 2, //! push constants
  319. DIRTY_DEPTH = 1 << 3, //! depth / stencil state
  320. DIRTY_VERTEX = 1 << 4, //! vertex buffers
  321. DIRTY_VIEWPORT = 1 << 5, //! viewport rectangles
  322. DIRTY_SCISSOR = 1 << 6, //! scissor rectangles
  323. DIRTY_BLEND = 1 << 7, //! blend state
  324. DIRTY_RASTER = 1 << 8, //! encoder state like cull mode
  325. DIRTY_ALL = (1 << 9) - 1,
  326. };
  327. // clang-format on
  328. BitField<DirtyFlag> dirty = DIRTY_NONE;
  329. LocalVector<MDUniformSet *> uniform_sets;
  330. // Bit mask of the uniform sets that are dirty, to prevent redundant binding.
  331. uint64_t uniform_set_mask = 0;
  332. uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
  333. uint32_t push_constant_data_len = 0;
  334. uint32_t push_constant_bindings[2] = { 0 };
  335. _FORCE_INLINE_ void reset();
  336. void end_encoding();
  337. _ALWAYS_INLINE_ const MDSubpass &get_subpass() const {
  338. DEV_ASSERT(pass != nullptr);
  339. return pass->subpasses[current_subpass];
  340. }
  341. _FORCE_INLINE_ void mark_viewport_dirty() {
  342. if (viewports.is_empty()) {
  343. return;
  344. }
  345. dirty.set_flag(DirtyFlag::DIRTY_VIEWPORT);
  346. }
  347. _FORCE_INLINE_ void mark_scissors_dirty() {
  348. if (scissors.is_empty()) {
  349. return;
  350. }
  351. dirty.set_flag(DirtyFlag::DIRTY_SCISSOR);
  352. }
  353. _FORCE_INLINE_ void mark_vertex_dirty() {
  354. if (vertex_buffers.is_empty()) {
  355. return;
  356. }
  357. dirty.set_flag(DirtyFlag::DIRTY_VERTEX);
  358. }
  359. _FORCE_INLINE_ void mark_uniforms_dirty(std::initializer_list<uint32_t> l) {
  360. if (uniform_sets.is_empty()) {
  361. return;
  362. }
  363. for (uint32_t i : l) {
  364. if (i < uniform_sets.size() && uniform_sets[i] != nullptr) {
  365. uniform_set_mask |= 1 << i;
  366. }
  367. }
  368. dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
  369. }
  370. _FORCE_INLINE_ void mark_uniforms_dirty(void) {
  371. if (uniform_sets.is_empty()) {
  372. return;
  373. }
  374. for (uint32_t i = 0; i < uniform_sets.size(); i++) {
  375. if (uniform_sets[i] != nullptr) {
  376. uniform_set_mask |= 1 << i;
  377. }
  378. }
  379. dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
  380. }
  381. _FORCE_INLINE_ void mark_push_constants_dirty() {
  382. if (push_constant_data_len == 0) {
  383. return;
  384. }
  385. dirty.set_flag(DirtyFlag::DIRTY_PUSH);
  386. }
  387. _FORCE_INLINE_ void mark_blend_dirty() {
  388. if (!blend_constants.has_value()) {
  389. return;
  390. }
  391. dirty.set_flag(DirtyFlag::DIRTY_BLEND);
  392. }
  393. MTLScissorRect clip_to_render_area(MTLScissorRect p_rect) const {
  394. uint32_t raLeft = render_area.position.x;
  395. uint32_t raRight = raLeft + render_area.size.width;
  396. uint32_t raBottom = render_area.position.y;
  397. uint32_t raTop = raBottom + render_area.size.height;
  398. p_rect.x = CLAMP(p_rect.x, raLeft, MAX(raRight - 1, raLeft));
  399. p_rect.y = CLAMP(p_rect.y, raBottom, MAX(raTop - 1, raBottom));
  400. p_rect.width = MIN(p_rect.width, raRight - p_rect.x);
  401. p_rect.height = MIN(p_rect.height, raTop - p_rect.y);
  402. return p_rect;
  403. }
  404. Rect2i clip_to_render_area(Rect2i p_rect) const {
  405. int32_t raLeft = render_area.position.x;
  406. int32_t raRight = raLeft + render_area.size.width;
  407. int32_t raBottom = render_area.position.y;
  408. int32_t raTop = raBottom + render_area.size.height;
  409. p_rect.position.x = CLAMP(p_rect.position.x, raLeft, MAX(raRight - 1, raLeft));
  410. p_rect.position.y = CLAMP(p_rect.position.y, raBottom, MAX(raTop - 1, raBottom));
  411. p_rect.size.width = MIN(p_rect.size.width, raRight - p_rect.position.x);
  412. p_rect.size.height = MIN(p_rect.size.height, raTop - p_rect.position.y);
  413. return p_rect;
  414. }
  415. } render;
  416. // State specific for a compute pass.
  417. struct ComputeState {
  418. MDComputePipeline *pipeline = nullptr;
  419. id<MTLComputeCommandEncoder> encoder = nil;
  420. ResourceUsageMap resource_usage;
  421. // clang-format off
  422. enum DirtyFlag: uint16_t {
  423. DIRTY_NONE = 0,
  424. DIRTY_PIPELINE = 1 << 0, //! pipeline state
  425. DIRTY_UNIFORMS = 1 << 1, //! uniform sets
  426. DIRTY_PUSH = 1 << 2, //! push constants
  427. DIRTY_ALL = (1 << 3) - 1,
  428. };
  429. // clang-format on
  430. BitField<DirtyFlag> dirty = DIRTY_NONE;
  431. LocalVector<MDUniformSet *> uniform_sets;
  432. // Bit mask of the uniform sets that are dirty, to prevent redundant binding.
  433. uint64_t uniform_set_mask = 0;
  434. uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
  435. uint32_t push_constant_data_len = 0;
  436. uint32_t push_constant_bindings[1] = { 0 };
  437. _FORCE_INLINE_ void reset();
  438. void end_encoding();
  439. _FORCE_INLINE_ void mark_uniforms_dirty(void) {
  440. if (uniform_sets.is_empty()) {
  441. return;
  442. }
  443. for (uint32_t i = 0; i < uniform_sets.size(); i++) {
  444. if (uniform_sets[i] != nullptr) {
  445. uniform_set_mask |= 1 << i;
  446. }
  447. }
  448. dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
  449. }
  450. _FORCE_INLINE_ void mark_push_constants_dirty() {
  451. if (push_constant_data_len == 0) {
  452. return;
  453. }
  454. dirty.set_flag(DirtyFlag::DIRTY_PUSH);
  455. }
  456. } compute;
  457. // State specific to a blit pass.
  458. struct {
  459. id<MTLBlitCommandEncoder> encoder = nil;
  460. _FORCE_INLINE_ void reset() {
  461. encoder = nil;
  462. }
  463. } blit;
  464. _FORCE_INLINE_ id<MTLCommandBuffer> get_command_buffer() const {
  465. return commandBuffer;
  466. }
  467. void begin();
  468. void commit();
  469. void end();
  470. id<MTLBlitCommandEncoder> blit_command_encoder();
  471. void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label);
  472. void bind_pipeline(RDD::PipelineID p_pipeline);
  473. void encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data);
  474. #pragma mark - Render Commands
  475. void render_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index);
  476. void render_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count);
  477. void render_clear_attachments(VectorView<RDD::AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects);
  478. void render_set_viewport(VectorView<Rect2i> p_viewports);
  479. void render_set_scissor(VectorView<Rect2i> p_scissors);
  480. void render_set_blend_constants(const Color &p_constants);
  481. void render_begin_pass(RDD::RenderPassID p_render_pass,
  482. RDD::FramebufferID p_frameBuffer,
  483. RDD::CommandBufferType p_cmd_buffer_type,
  484. const Rect2i &p_rect,
  485. VectorView<RDD::RenderPassClearValue> p_clear_values);
  486. void render_next_subpass();
  487. void render_draw(uint32_t p_vertex_count,
  488. uint32_t p_instance_count,
  489. uint32_t p_base_vertex,
  490. uint32_t p_first_instance);
  491. void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets);
  492. void render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset);
  493. void render_draw_indexed(uint32_t p_index_count,
  494. uint32_t p_instance_count,
  495. uint32_t p_first_index,
  496. int32_t p_vertex_offset,
  497. uint32_t p_first_instance);
  498. void render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride);
  499. void render_draw_indexed_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride);
  500. void render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride);
  501. void render_draw_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride);
  502. void render_end_pass();
  503. #pragma mark - Compute Commands
  504. void compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index);
  505. void compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count);
  506. void compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups);
  507. void compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset);
  508. MDCommandBuffer(id<MTLCommandQueue> p_queue, RenderingDeviceDriverMetal *p_device_driver) :
  509. device_driver(p_device_driver), queue(p_queue) {
  510. type = MDCommandBufferStateType::None;
  511. }
  512. MDCommandBuffer() = default;
  513. };
  514. #if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 140000) || (TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED < 170000)
  515. #define MTLBindingAccess MTLArgumentAccess
  516. #define MTLBindingAccessReadOnly MTLArgumentAccessReadOnly
  517. #define MTLBindingAccessReadWrite MTLArgumentAccessReadWrite
  518. #define MTLBindingAccessWriteOnly MTLArgumentAccessWriteOnly
  519. #endif
  520. struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) BindingInfo {
  521. MTLDataType dataType = MTLDataTypeNone;
  522. uint32_t index = 0;
  523. MTLBindingAccess access = MTLBindingAccessReadOnly;
  524. MTLResourceUsage usage = 0;
  525. MTLTextureType textureType = MTLTextureType2D;
  526. int imageFormat = 0;
  527. uint32_t arrayLength = 0;
  528. bool isMultisampled = false;
  529. inline MTLArgumentDescriptor *new_argument_descriptor() const {
  530. MTLArgumentDescriptor *desc = MTLArgumentDescriptor.argumentDescriptor;
  531. desc.dataType = dataType;
  532. desc.index = index;
  533. desc.access = access;
  534. desc.textureType = textureType;
  535. desc.arrayLength = arrayLength;
  536. return desc;
  537. }
  538. };
  539. using RDC = RenderingDeviceCommons;
  540. typedef API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) HashMap<RDC::ShaderStage, BindingInfo> BindingInfoMap;
  541. struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformInfo {
  542. uint32_t binding;
  543. ShaderStageUsage active_stages = None;
  544. BindingInfoMap bindings;
  545. BindingInfoMap bindings_secondary;
  546. };
  547. struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformSet {
  548. LocalVector<UniformInfo> uniforms;
  549. uint32_t buffer_size = 0;
  550. HashMap<RDC::ShaderStage, uint32_t> offsets;
  551. HashMap<RDC::ShaderStage, id<MTLArgumentEncoder>> encoders;
  552. };
  553. struct ShaderCacheEntry;
  554. enum class ShaderLoadStrategy {
  555. IMMEDIATE,
  556. LAZY,
  557. /// The default strategy is to load the shader immediately.
  558. DEFAULT = IMMEDIATE,
  559. };
  560. /// A Metal shader library.
  561. @interface MDLibrary : NSObject {
  562. ShaderCacheEntry *_entry;
  563. NSString *_original_source;
  564. };
  565. - (id<MTLLibrary>)library;
  566. - (NSError *)error;
  567. - (void)setLabel:(NSString *)label;
  568. #ifdef DEV_ENABLED
  569. - (NSString *)originalSource;
  570. #endif
  571. + (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry
  572. device:(id<MTLDevice>)device
  573. source:(NSString *)source
  574. options:(MTLCompileOptions *)options
  575. strategy:(ShaderLoadStrategy)strategy;
  576. + (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry
  577. device:(id<MTLDevice>)device
  578. #ifdef DEV_ENABLED
  579. source:(NSString *)source
  580. #endif
  581. data:(dispatch_data_t)data;
  582. @end
  583. template <>
  584. struct HashMapComparatorDefault<SHA256Digest> {
  585. static bool compare(const SHA256Digest &p_lhs, const SHA256Digest &p_rhs) {
  586. return memcmp(p_lhs.data, p_rhs.data, CC_SHA256_DIGEST_LENGTH) == 0;
  587. }
  588. };
  589. /// A cache entry for a Metal shader library.
  590. struct ShaderCacheEntry {
  591. RenderingDeviceDriverMetal &owner;
  592. /// A hash of the Metal shader source code.
  593. SHA256Digest key;
  594. CharString name;
  595. RD::ShaderStage stage = RD::SHADER_STAGE_VERTEX;
  596. /// This reference must be weak, to ensure that when the last strong reference to the library
  597. /// is released, the cache entry is freed.
  598. MDLibrary *__weak library = nil;
  599. /// Notify the cache that this entry is no longer needed.
  600. void notify_free() const;
  601. ShaderCacheEntry(RenderingDeviceDriverMetal &p_owner, SHA256Digest p_key) :
  602. owner(p_owner), key(p_key) {
  603. }
  604. ~ShaderCacheEntry() = default;
  605. };
  606. class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDShader {
  607. public:
  608. CharString name;
  609. Vector<UniformSet> sets;
  610. bool uses_argument_buffers = true;
  611. MDShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers) :
  612. name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {}
  613. virtual ~MDShader() = default;
  614. };
  615. class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputeShader final : public MDShader {
  616. public:
  617. struct {
  618. int32_t binding = -1;
  619. uint32_t size = 0;
  620. } push_constants;
  621. MTLSize local = {};
  622. MDLibrary *kernel;
  623. MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers, MDLibrary *p_kernel);
  624. };
  625. class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderShader final : public MDShader {
  626. public:
  627. struct {
  628. struct {
  629. int32_t binding = -1;
  630. uint32_t size = 0;
  631. } vert;
  632. struct {
  633. int32_t binding = -1;
  634. uint32_t size = 0;
  635. } frag;
  636. } push_constants;
  637. bool needs_view_mask_buffer = false;
  638. MDLibrary *vert;
  639. MDLibrary *frag;
  640. MDRenderShader(CharString p_name,
  641. Vector<UniformSet> p_sets,
  642. bool p_needs_view_mask_buffer,
  643. bool p_uses_argument_buffers,
  644. MDLibrary *p_vert, MDLibrary *p_frag);
  645. };
  646. _FORCE_INLINE_ StageResourceUsage &operator|=(StageResourceUsage &p_a, uint32_t p_b) {
  647. p_a = StageResourceUsage(uint32_t(p_a) | p_b);
  648. return p_a;
  649. }
  650. _FORCE_INLINE_ StageResourceUsage stage_resource_usage(RDC::ShaderStage p_stage, MTLResourceUsage p_usage) {
  651. return StageResourceUsage(p_usage << (p_stage * 2));
  652. }
  653. _FORCE_INLINE_ MTLResourceUsage resource_usage_for_stage(StageResourceUsage p_usage, RDC::ShaderStage p_stage) {
  654. return MTLResourceUsage((p_usage >> (p_stage * 2)) & 0b11);
  655. }
  656. template <>
  657. struct HashMapComparatorDefault<RDD::ShaderID> {
  658. static bool compare(const RDD::ShaderID &p_lhs, const RDD::ShaderID &p_rhs) {
  659. return p_lhs.id == p_rhs.id;
  660. }
  661. };
  662. struct BoundUniformSet {
  663. id<MTLBuffer> buffer;
  664. ResourceUsageMap usage_to_resources;
  665. /// Perform a 2-way merge each key of `ResourceVector` resources from this set into the
  666. /// destination set.
  667. ///
  668. /// Assumes the vectors of resources are sorted.
  669. void merge_into(ResourceUsageMap &p_dst) const;
  670. };
  671. class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDUniformSet {
  672. private:
  673. void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index);
  674. void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index);
  675. void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index);
  676. void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index);
  677. public:
  678. uint32_t index;
  679. LocalVector<RDD::BoundUniform> uniforms;
  680. HashMap<MDShader *, BoundUniformSet> bound_uniforms;
  681. void bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index);
  682. void bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index);
  683. BoundUniformSet &bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage, uint32_t p_set_index);
  684. };
  685. class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDPipeline {
  686. public:
  687. MDPipelineType type;
  688. explicit MDPipeline(MDPipelineType p_type) :
  689. type(p_type) {}
  690. virtual ~MDPipeline() = default;
  691. };
  692. class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderPipeline final : public MDPipeline {
  693. public:
  694. id<MTLRenderPipelineState> state = nil;
  695. id<MTLDepthStencilState> depth_stencil = nil;
  696. uint32_t push_constant_size = 0;
  697. uint32_t push_constant_stages_mask = 0;
  698. SampleCount sample_count = SampleCount1;
  699. struct {
  700. MTLCullMode cull_mode = MTLCullModeNone;
  701. MTLTriangleFillMode fill_mode = MTLTriangleFillModeFill;
  702. MTLDepthClipMode clip_mode = MTLDepthClipModeClip;
  703. MTLWinding winding = MTLWindingClockwise;
  704. MTLPrimitiveType render_primitive = MTLPrimitiveTypePoint;
  705. struct {
  706. bool enabled = false;
  707. } depth_test;
  708. struct {
  709. bool enabled = false;
  710. float depth_bias = 0.0;
  711. float slope_scale = 0.0;
  712. float clamp = 0.0;
  713. _FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const {
  714. if (!enabled) {
  715. return;
  716. }
  717. [p_enc setDepthBias:depth_bias slopeScale:slope_scale clamp:clamp];
  718. }
  719. } depth_bias;
  720. struct {
  721. bool enabled = false;
  722. uint32_t front_reference = 0;
  723. uint32_t back_reference = 0;
  724. _FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const {
  725. if (!enabled) {
  726. return;
  727. }
  728. [p_enc setStencilFrontReferenceValue:front_reference backReferenceValue:back_reference];
  729. }
  730. } stencil;
  731. struct {
  732. bool enabled = false;
  733. float r = 0.0;
  734. float g = 0.0;
  735. float b = 0.0;
  736. float a = 0.0;
  737. _FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const {
  738. //if (!enabled)
  739. // return;
  740. [p_enc setBlendColorRed:r green:g blue:b alpha:a];
  741. }
  742. } blend;
  743. _FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const {
  744. [p_enc setCullMode:cull_mode];
  745. [p_enc setTriangleFillMode:fill_mode];
  746. [p_enc setDepthClipMode:clip_mode];
  747. [p_enc setFrontFacingWinding:winding];
  748. depth_bias.apply(p_enc);
  749. stencil.apply(p_enc);
  750. blend.apply(p_enc);
  751. }
  752. } raster_state;
  753. MDRenderShader *shader = nil;
  754. MDRenderPipeline() :
  755. MDPipeline(MDPipelineType::Render) {}
  756. ~MDRenderPipeline() final = default;
  757. };
  758. class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputePipeline final : public MDPipeline {
  759. public:
  760. id<MTLComputePipelineState> state = nil;
  761. struct {
  762. MTLSize local = {};
  763. } compute_state;
  764. MDComputeShader *shader = nil;
  765. explicit MDComputePipeline(id<MTLComputePipelineState> p_state) :
  766. MDPipeline(MDPipelineType::Compute), state(p_state) {}
  767. ~MDComputePipeline() final = default;
  768. };
  769. class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDFrameBuffer {
  770. Vector<MTL::Texture> textures;
  771. public:
  772. Size2i size;
  773. MDFrameBuffer(Vector<MTL::Texture> p_textures, Size2i p_size) :
  774. textures(p_textures), size(p_size) {}
  775. MDFrameBuffer() {}
  776. /// Returns the texture at the given index.
  777. _ALWAYS_INLINE_ MTL::Texture get_texture(uint32_t p_idx) const {
  778. return textures[p_idx];
  779. }
  780. /// Returns true if the texture at the given index is not nil.
  781. _ALWAYS_INLINE_ bool has_texture(uint32_t p_idx) const {
  782. return textures[p_idx] != nil;
  783. }
  784. /// Set the texture at the given index.
  785. _ALWAYS_INLINE_ void set_texture(uint32_t p_idx, MTL::Texture p_texture) {
  786. textures.write[p_idx] = p_texture;
  787. }
  788. /// Unset or nil the texture at the given index.
  789. _ALWAYS_INLINE_ void unset_texture(uint32_t p_idx) {
  790. textures.write[p_idx] = nil;
  791. }
  792. /// Resizes buffers to the specified size.
  793. _ALWAYS_INLINE_ void set_texture_count(uint32_t p_size) {
  794. textures.resize(p_size);
  795. }
  796. virtual ~MDFrameBuffer() = default;
  797. };
  798. // These functions are used to convert between Objective-C objects and
  799. // the RIDs used by Godot, respecting automatic reference counting.
  800. namespace rid {
  801. // Converts an Objective-C object to a pointer, and incrementing the
  802. // reference count.
  803. _FORCE_INLINE_ void *owned(id p_id) {
  804. return (__bridge_retained void *)p_id;
  805. }
  806. #define MAKE_ID(FROM, TO) \
  807. _FORCE_INLINE_ TO make(FROM p_obj) { \
  808. return TO(owned(p_obj)); \
  809. }
  810. MAKE_ID(id<MTLTexture>, RDD::TextureID)
  811. MAKE_ID(id<MTLBuffer>, RDD::BufferID)
  812. MAKE_ID(id<MTLSamplerState>, RDD::SamplerID)
  813. MAKE_ID(MTLVertexDescriptor *, RDD::VertexFormatID)
  814. MAKE_ID(id<MTLCommandQueue>, RDD::CommandPoolID)
  815. // Converts a pointer to an Objective-C object without changing the reference count.
  816. _FORCE_INLINE_ auto get(RDD::ID p_id) {
  817. return (p_id.id) ? (__bridge ::id)(void *)p_id.id : nil;
  818. }
  819. // Converts a pointer to an Objective-C object, and decrements the reference count.
  820. _FORCE_INLINE_ auto release(RDD::ID p_id) {
  821. return (__bridge_transfer ::id)(void *)p_id.id;
  822. }
  823. } // namespace rid