metal_gpu.m 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #include <iron_math.h>
  5. #include <iron_system.h>
  6. #include <iron_gpu.h>
  7. #import <Metal/Metal.h>
  8. #import <MetalKit/MTKView.h>
  9. id get_metal_layer(void);
  10. id get_metal_device(void);
  11. id get_metal_queue(void);
  12. bool gpu_transpose_mat = true;
  13. static id<MTLCommandBuffer> command_buffer = nil;
  14. static id<MTLRenderCommandEncoder> command_encoder = nil;
  15. static id<MTLArgumentEncoder> argument_encoder = nil;
  16. static id<MTLBuffer> argument_buffer = nil;
  17. static id<CAMetalDrawable> drawable;
  18. static id<MTLSamplerState> linear_sampler;
  19. static id<MTLSamplerState> point_sampler;
  20. static int argument_buffer_step;
  21. static gpu_buffer_t *current_vb;
  22. static gpu_buffer_t *current_ib;
  23. static gpu_pipeline_t *current_pipeline;
  24. static MTLViewport current_viewport;
  25. static MTLScissorRect current_scissor;
  26. static MTLRenderPassDescriptor *render_pass_desc;
  27. static bool resized = false;
  28. static gpu_texture_t *current_textures[GPU_MAX_TEXTURES] = {
  29. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  30. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
  31. };
  32. static void *readback_buffer;
  33. static int readback_buffer_size = 0;
  34. static bool linear_sampling = true;
  35. static MTLBlendFactor convert_blending_factor(gpu_blending_factor_t factor) {
  36. switch (factor) {
  37. case GPU_BLEND_ONE:
  38. return MTLBlendFactorOne;
  39. case GPU_BLEND_ZERO:
  40. return MTLBlendFactorZero;
  41. case GPU_BLEND_SOURCE_ALPHA:
  42. return MTLBlendFactorSourceAlpha;
  43. case GPU_BLEND_DEST_ALPHA:
  44. return MTLBlendFactorDestinationAlpha;
  45. case GPU_BLEND_INV_SOURCE_ALPHA:
  46. return MTLBlendFactorOneMinusSourceAlpha;
  47. case GPU_BLEND_INV_DEST_ALPHA:
  48. return MTLBlendFactorOneMinusDestinationAlpha;
  49. }
  50. }
  51. static MTLCompareFunction convert_compare_mode(gpu_compare_mode_t compare) {
  52. switch (compare) {
  53. case GPU_COMPARE_MODE_ALWAYS:
  54. return MTLCompareFunctionAlways;
  55. case GPU_COMPARE_MODE_NEVER:
  56. return MTLCompareFunctionNever;
  57. case GPU_COMPARE_MODE_EQUAL:
  58. return MTLCompareFunctionEqual;
  59. case GPU_COMPARE_MODE_LESS:
  60. return MTLCompareFunctionLess;
  61. }
  62. }
  63. static MTLCullMode convert_cull_mode(gpu_cull_mode_t cull) {
  64. switch (cull) {
  65. case GPU_CULL_MODE_CLOCKWISE:
  66. return MTLCullModeFront;
  67. case GPU_CULL_MODE_COUNTERCLOCKWISE:
  68. return MTLCullModeBack;
  69. case GPU_CULL_MODE_NEVER:
  70. return MTLCullModeNone;
  71. }
  72. }
  73. static MTLPixelFormat convert_texture_format(gpu_texture_format_t format) {
  74. switch (format) {
  75. case GPU_TEXTURE_FORMAT_RGBA128:
  76. return MTLPixelFormatRGBA32Float;
  77. case GPU_TEXTURE_FORMAT_RGBA64:
  78. return MTLPixelFormatRGBA16Float;
  79. case GPU_TEXTURE_FORMAT_R32:
  80. return MTLPixelFormatR32Float;
  81. case GPU_TEXTURE_FORMAT_R16:
  82. return MTLPixelFormatR16Float;
  83. case GPU_TEXTURE_FORMAT_R8:
  84. return MTLPixelFormatR8Unorm;
  85. case GPU_TEXTURE_FORMAT_D32:
  86. return MTLPixelFormatDepth32Float;
  87. default:
  88. return MTLPixelFormatBGRA8Unorm;
  89. }
  90. }
  91. void gpu_render_target_init2(gpu_texture_t *target, int width, int height, gpu_texture_format_t format, int framebuffer_index) {
  92. target->width = width;
  93. target->height = height;
  94. target->format = format;
  95. target->state = GPU_TEXTURE_STATE_RENDER_TARGET;
  96. target->buffer = NULL;
  97. if (framebuffer_index < 0) {
  98. id<MTLDevice> device = get_metal_device();
  99. MTLTextureDescriptor *descriptor = [MTLTextureDescriptor new];
  100. descriptor.textureType = MTLTextureType2D;
  101. descriptor.width = width;
  102. descriptor.height = height;
  103. descriptor.depth = 1;
  104. descriptor.pixelFormat = convert_texture_format(format);
  105. descriptor.arrayLength = 1;
  106. descriptor.mipmapLevelCount = 1;
  107. descriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite;
  108. descriptor.resourceOptions = MTLResourceStorageModePrivate;
  109. target->impl._tex = (__bridge_retained void *)[device newTextureWithDescriptor:descriptor];
  110. }
  111. }
  112. void gpu_destroy(void) {
  113. id<MTLTexture> readback = (__bridge_transfer id<MTLTexture>)readback_buffer;
  114. readback = nil;
  115. }
  116. void gpu_resize_internal(int width, int height) {
  117. resized = true;
  118. }
  119. static void next_drawable() {
  120. CAMetalLayer *layer = get_metal_layer();
  121. drawable = [layer nextDrawable];
  122. framebuffers[framebuffer_index].impl._tex = (__bridge void *)drawable.texture;
  123. }
  124. void gpu_init_internal(int depth_buffer_bits, bool vsync) {
  125. id<MTLDevice> device = get_metal_device();
  126. MTLSamplerDescriptor *linear_desc = [MTLSamplerDescriptor new];
  127. linear_desc.minFilter = MTLSamplerMinMagFilterLinear;
  128. linear_desc.magFilter = MTLSamplerMinMagFilterLinear;
  129. linear_desc.mipFilter = MTLSamplerMipFilterLinear;
  130. linear_desc.sAddressMode = MTLSamplerAddressModeRepeat;
  131. linear_desc.tAddressMode = MTLSamplerAddressModeRepeat;
  132. linear_desc.supportArgumentBuffers = true;
  133. linear_sampler = [device newSamplerStateWithDescriptor:linear_desc];
  134. MTLSamplerDescriptor *point_desc = [MTLSamplerDescriptor new];
  135. point_desc.minFilter = MTLSamplerMinMagFilterNearest;
  136. point_desc.magFilter = MTLSamplerMinMagFilterNearest;
  137. point_desc.mipFilter = MTLSamplerMipFilterNearest;
  138. point_desc.sAddressMode = MTLSamplerAddressModeRepeat;
  139. point_desc.tAddressMode = MTLSamplerAddressModeRepeat;
  140. point_desc.supportArgumentBuffers = true;
  141. point_sampler = [device newSamplerStateWithDescriptor:point_desc];
  142. MTLArgumentDescriptor *constants_desc = [MTLArgumentDescriptor argumentDescriptor];
  143. constants_desc.dataType = MTLDataTypePointer;
  144. constants_desc.index = 0;
  145. MTLArgumentDescriptor *sampler_desc = [MTLArgumentDescriptor argumentDescriptor];
  146. sampler_desc.dataType = MTLDataTypeSampler;
  147. sampler_desc.index = 1;
  148. MTLArgumentDescriptor *texture_desc[GPU_MAX_TEXTURES];
  149. for (int i = 0; i < GPU_MAX_TEXTURES; ++i) {
  150. texture_desc[i] = [MTLArgumentDescriptor argumentDescriptor];
  151. texture_desc[i].dataType = MTLDataTypeTexture;
  152. texture_desc[i].index = i + 2;
  153. texture_desc[i].textureType = MTLTextureType2D;
  154. }
  155. NSArray *arguments = [NSArray arrayWithObjects:constants_desc, sampler_desc, texture_desc[0], texture_desc[1], texture_desc[2], texture_desc[3], texture_desc[4], texture_desc[5], texture_desc[6], texture_desc[7], texture_desc[8], texture_desc[9], texture_desc[10], texture_desc[11], texture_desc[12], texture_desc[13], texture_desc[14], texture_desc[15], nil];
  156. argument_encoder = [device newArgumentEncoderWithArguments:arguments];
  157. argument_buffer_step = [argument_encoder encodedLength];
  158. argument_buffer = [device newBufferWithLength:(argument_buffer_step * GPU_CONSTANT_BUFFER_MULTIPLE) options:MTLResourceStorageModeShared];
  159. gpu_create_framebuffers(depth_buffer_bits);
  160. next_drawable();
  161. }
  162. void gpu_begin_internal(unsigned flags, unsigned color, float depth) {
  163. render_pass_desc = [MTLRenderPassDescriptor renderPassDescriptor];
  164. for (int i = 0; i < current_render_targets_count; ++i) {
  165. render_pass_desc.colorAttachments[i].texture = (__bridge id<MTLTexture>)current_render_targets[i]->impl._tex;
  166. if (flags & GPU_CLEAR_COLOR) {
  167. float red, green, blue, alpha;
  168. iron_color_components(color, &red, &green, &blue, &alpha);
  169. render_pass_desc.colorAttachments[i].loadAction = MTLLoadActionClear;
  170. render_pass_desc.colorAttachments[i].storeAction = MTLStoreActionStore;
  171. render_pass_desc.colorAttachments[i].clearColor = MTLClearColorMake(red, green, blue, alpha);
  172. }
  173. else {
  174. render_pass_desc.colorAttachments[i].loadAction = MTLLoadActionLoad;
  175. render_pass_desc.colorAttachments[i].storeAction = MTLStoreActionStore;
  176. render_pass_desc.colorAttachments[i].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 1.0);
  177. }
  178. }
  179. if (current_depth_buffer != NULL) {
  180. render_pass_desc.depthAttachment.texture = (__bridge id<MTLTexture>)current_depth_buffer->impl._tex;
  181. }
  182. if (flags & GPU_CLEAR_DEPTH) {
  183. render_pass_desc.depthAttachment.clearDepth = depth;
  184. render_pass_desc.depthAttachment.loadAction = MTLLoadActionClear;
  185. render_pass_desc.depthAttachment.storeAction = MTLStoreActionStore;
  186. }
  187. else {
  188. render_pass_desc.depthAttachment.clearDepth = 1;
  189. render_pass_desc.depthAttachment.loadAction = MTLLoadActionLoad;
  190. render_pass_desc.depthAttachment.storeAction = MTLStoreActionStore;
  191. }
  192. id<MTLCommandQueue> queue = get_metal_queue();
  193. if (command_buffer == nil) {
  194. command_buffer = [queue commandBuffer];
  195. }
  196. command_encoder = [command_buffer renderCommandEncoderWithDescriptor:render_pass_desc];
  197. current_viewport.originX = 0;
  198. current_viewport.originY = 0;
  199. current_viewport.width = current_render_targets[0]->width;
  200. current_viewport.height = current_render_targets[0]->height;
  201. current_scissor.x = 0;
  202. current_scissor.y = 0;
  203. current_scissor.width = current_render_targets[0]->width;
  204. current_scissor.height = current_render_targets[0]->height;
  205. }
  206. void gpu_end_internal() {
  207. [command_encoder endEncoding];
  208. current_render_targets_count = 0;
  209. }
  210. void gpu_execute_and_wait() {
  211. if (gpu_in_use) {
  212. [command_encoder endEncoding];
  213. }
  214. [command_buffer commit];
  215. [command_buffer waitUntilCompleted];
  216. id<MTLCommandQueue> queue = get_metal_queue();
  217. command_buffer = [queue commandBuffer];
  218. if (gpu_in_use) {
  219. for (int i = 0; i < current_render_targets_count; ++i) {
  220. render_pass_desc.colorAttachments[i].loadAction = MTLLoadActionLoad;
  221. }
  222. render_pass_desc.depthAttachment.loadAction = MTLLoadActionLoad;
  223. command_encoder = [command_buffer renderCommandEncoderWithDescriptor:render_pass_desc];
  224. id<MTLRenderPipelineState> pipe = (__bridge id<MTLRenderPipelineState>)current_pipeline->impl._pipeline;
  225. [command_encoder setRenderPipelineState:pipe];
  226. id<MTLDepthStencilState> depth_state = (__bridge id<MTLDepthStencilState>)current_pipeline->impl._depth;
  227. [command_encoder setDepthStencilState:depth_state];
  228. [command_encoder setFrontFacingWinding:MTLWindingClockwise];
  229. [command_encoder setCullMode:convert_cull_mode(current_pipeline->cull_mode)];
  230. id<MTLBuffer> vb = (__bridge id<MTLBuffer>)current_vb->impl.metal_buffer;
  231. [command_encoder setVertexBuffer:vb offset:0 atIndex:0];
  232. [command_encoder setViewport:current_viewport];
  233. [command_encoder setScissorRect:current_scissor];
  234. }
  235. }
  236. void gpu_present_internal() {
  237. [command_buffer presentDrawable:drawable];
  238. [command_buffer commit];
  239. [command_buffer waitUntilCompleted];
  240. drawable = nil;
  241. command_buffer = nil;
  242. command_encoder = nil;
  243. if (resized) {
  244. CAMetalLayer *layer = get_metal_layer();
  245. layer.drawableSize = CGSizeMake(iron_window_width(), iron_window_height());
  246. for (int i = 0; i < GPU_FRAMEBUFFER_COUNT; ++i) {
  247. // gpu_texture_destroy_internal(&framebuffers[i]);
  248. gpu_render_target_init2(&framebuffers[i], iron_window_width(), iron_window_height(), GPU_TEXTURE_FORMAT_RGBA32, i);
  249. }
  250. resized = false;
  251. }
  252. next_drawable();
  253. }
  254. void gpu_barrier(gpu_texture_t *render_target, gpu_texture_state_t state_after) {
  255. }
  256. void gpu_draw_internal() {
  257. id<MTLBuffer> index_buffer = (__bridge id<MTLBuffer>)current_ib->impl.metal_buffer;
  258. [command_encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
  259. indexCount:current_ib->count
  260. indexType:MTLIndexTypeUInt32
  261. indexBuffer:index_buffer
  262. indexBufferOffset:0];
  263. }
  264. void gpu_viewport(int x, int y, int width, int height) {
  265. current_viewport.originX = x;
  266. current_viewport.originY = y;
  267. current_viewport.width = width;
  268. current_viewport.height = height;
  269. current_viewport.znear = 0.1;
  270. current_viewport.zfar = 100.0;
  271. [command_encoder setViewport:current_viewport];
  272. }
  273. void gpu_scissor(int x, int y, int width, int height) {
  274. current_scissor.x = x;
  275. current_scissor.y = y;
  276. int target_w = current_render_targets[0]->width;
  277. int target_h = current_render_targets[0]->height;
  278. current_scissor.width = (x + width <= target_w) ? width : target_w - x;
  279. current_scissor.height = (y + height <= target_h) ? height : target_h - y;
  280. [command_encoder setScissorRect:current_scissor];
  281. }
  282. void gpu_disable_scissor() {
  283. current_scissor.x = 0;
  284. current_scissor.y = 0;
  285. current_scissor.width = current_render_targets[0]->width;
  286. current_scissor.height = current_render_targets[0]->height;
  287. [command_encoder setScissorRect:current_scissor];
  288. }
  289. void gpu_set_pipeline(gpu_pipeline_t *pipeline) {
  290. current_pipeline = pipeline;
  291. id<MTLRenderPipelineState> pipe = (__bridge id<MTLRenderPipelineState>)pipeline->impl._pipeline;
  292. [command_encoder setRenderPipelineState:pipe];
  293. id<MTLDepthStencilState> depth_state = (__bridge id<MTLDepthStencilState>)pipeline->impl._depth;
  294. [command_encoder setDepthStencilState:depth_state];
  295. [command_encoder setFrontFacingWinding:MTLWindingClockwise];
  296. [command_encoder setCullMode:convert_cull_mode(pipeline->cull_mode)];
  297. for (int i = 0; i < GPU_MAX_TEXTURES; ++i) {
  298. current_textures[i] = NULL;
  299. }
  300. }
  301. void gpu_set_vertex_buffer(gpu_buffer_t *buffer) {
  302. current_vb = buffer;
  303. id<MTLBuffer> buf = (__bridge id<MTLBuffer>)buffer->impl.metal_buffer;
  304. [command_encoder setVertexBuffer:buf offset:0 atIndex:0];
  305. }
  306. void gpu_set_index_buffer(gpu_buffer_t *buffer) {
  307. current_ib = buffer;
  308. }
  309. void gpu_get_render_target_pixels(gpu_texture_t *render_target, uint8_t *data) {
  310. gpu_execute_and_wait();
  311. int buffer_size = render_target->width * render_target->height * gpu_texture_format_size(render_target->format);
  312. int new_readback_buffer_size = buffer_size;
  313. if (new_readback_buffer_size < (2048 * 2048 * 4)) {
  314. new_readback_buffer_size = (2048 * 2048 * 4);
  315. }
  316. if (readback_buffer_size < new_readback_buffer_size) {
  317. readback_buffer_size = new_readback_buffer_size;
  318. if (readback_buffer != NULL) {
  319. id<MTLTexture> readback = (__bridge_transfer id<MTLTexture>)readback_buffer;
  320. readback = nil;
  321. }
  322. id<MTLDevice> device = get_metal_device();
  323. readback_buffer = (__bridge_retained void *)[device newBufferWithLength:new_readback_buffer_size options:MTLResourceStorageModeShared];
  324. }
  325. // Copy render target to readback buffer
  326. id<MTLCommandQueue> queue = get_metal_queue();
  327. id<MTLCommandBuffer> command_buffer = [queue commandBuffer];
  328. id<MTLBlitCommandEncoder> command_encoder = [command_buffer blitCommandEncoder];
  329. [command_encoder copyFromTexture:(__bridge id<MTLTexture>)render_target->impl._tex
  330. sourceSlice:0
  331. sourceLevel:0
  332. sourceOrigin:MTLOriginMake(0, 0, 0)
  333. sourceSize:MTLSizeMake(render_target->width, render_target->height, 1)
  334. toBuffer:(__bridge id<MTLBuffer>)readback_buffer
  335. destinationOffset:0
  336. destinationBytesPerRow:render_target->width * gpu_texture_format_size(render_target->format)
  337. destinationBytesPerImage:0];
  338. [command_encoder endEncoding];
  339. [command_buffer commit];
  340. [command_buffer waitUntilCompleted];
  341. // Read buffer
  342. id<MTLBuffer> buffer = (__bridge id<MTLBuffer>)readback_buffer;
  343. memcpy(data, [buffer contents], render_target->width * render_target->height * gpu_texture_format_size(render_target->format));
  344. }
  345. void gpu_set_constant_buffer(gpu_buffer_t *buffer, int offset, size_t size) {
  346. id<MTLBuffer> buf = (__bridge id<MTLBuffer>)buffer->impl.metal_buffer;
  347. [argument_encoder setArgumentBuffer:argument_buffer offset:argument_buffer_step * constant_buffer_index];
  348. [argument_encoder setBuffer:buf offset:offset atIndex:0];
  349. [argument_encoder setSamplerState:(linear_sampling ? linear_sampler : point_sampler) atIndex:1];
  350. [command_encoder setVertexBuffer:argument_buffer offset:argument_buffer_step * constant_buffer_index atIndex:1];
  351. [command_encoder setFragmentBuffer:argument_buffer offset:argument_buffer_step * constant_buffer_index atIndex:1];
  352. [command_encoder useResource:buf usage:MTLResourceUsageRead stages:MTLRenderStageVertex|MTLRenderStageFragment];
  353. for (int i = 0; i < GPU_MAX_TEXTURES; ++i) {
  354. if (current_textures[i] == NULL) {
  355. break;
  356. }
  357. id<MTLTexture> tex = (__bridge id<MTLTexture>)current_textures[i]->impl._tex;
  358. [argument_encoder setTexture:tex atIndex:i + 2];
  359. [command_encoder useResource:tex usage:MTLResourceUsageRead stages:MTLRenderStageVertex|MTLRenderStageFragment];
  360. }
  361. }
  362. void gpu_set_texture(int unit, gpu_texture_t *texture) {
  363. current_textures[unit] = texture;
  364. }
  365. void gpu_use_linear_sampling(bool b) {
  366. linear_sampling = b;
  367. }
  368. void gpu_pipeline_destroy_internal(gpu_pipeline_t *pipeline) {
  369. id<MTLRenderPipelineState> pipe = (__bridge_transfer id<MTLRenderPipelineState>)pipeline->impl._pipeline;
  370. pipe = nil;
  371. pipeline->impl._pipeline = NULL;
  372. id<MTLDepthStencilState> depth_state = (__bridge_transfer id<MTLDepthStencilState>)pipeline->impl._depth;
  373. depth_state = nil;
  374. pipeline->impl._depth = NULL;
  375. }
  376. void gpu_pipeline_compile(gpu_pipeline_t *pipeline) {
  377. id<MTLDevice> device = get_metal_device();
  378. NSError *error = nil;
  379. id<MTLLibrary> library = [device newLibraryWithSource:[[NSString alloc] initWithBytes:pipeline->vertex_shader->impl.source length:pipeline->vertex_shader->impl.length encoding:NSUTF8StringEncoding] options:nil error:&error];
  380. if (library == nil) {
  381. iron_error("%s", error.localizedDescription.UTF8String);
  382. }
  383. pipeline->vertex_shader->impl.mtl_function = (__bridge_retained void *)[library newFunctionWithName:[NSString stringWithCString:pipeline->vertex_shader->impl.name encoding:NSUTF8StringEncoding]];
  384. assert(pipeline->vertex_shader->impl.mtl_function);
  385. pipeline->fragment_shader->impl.mtl_function = (__bridge_retained void *)[library newFunctionWithName:[NSString stringWithCString:pipeline->fragment_shader->impl.name encoding:NSUTF8StringEncoding]];
  386. assert(pipeline->fragment_shader->impl.mtl_function);
  387. MTLRenderPipelineDescriptor *render_pipeline_desc = [[MTLRenderPipelineDescriptor alloc] init];
  388. render_pipeline_desc.vertexFunction = (__bridge id<MTLFunction>)pipeline->vertex_shader->impl.mtl_function;
  389. render_pipeline_desc.fragmentFunction = (__bridge id<MTLFunction>)pipeline->fragment_shader->impl.mtl_function;
  390. for (int i = 0; i < pipeline->color_attachment_count; ++i) {
  391. render_pipeline_desc.colorAttachments[i].pixelFormat = convert_texture_format(pipeline->color_attachment[i]);
  392. render_pipeline_desc.colorAttachments[i].blendingEnabled =
  393. pipeline->blend_source != GPU_BLEND_ONE || pipeline->blend_destination != GPU_BLEND_ZERO ||
  394. pipeline->alpha_blend_source != GPU_BLEND_ONE || pipeline->alpha_blend_destination != GPU_BLEND_ZERO;
  395. render_pipeline_desc.colorAttachments[i].sourceRGBBlendFactor = convert_blending_factor(pipeline->blend_source);
  396. render_pipeline_desc.colorAttachments[i].destinationRGBBlendFactor = convert_blending_factor(pipeline->blend_destination);
  397. render_pipeline_desc.colorAttachments[i].rgbBlendOperation = MTLBlendOperationAdd;
  398. render_pipeline_desc.colorAttachments[i].sourceAlphaBlendFactor = convert_blending_factor(pipeline->alpha_blend_source);
  399. render_pipeline_desc.colorAttachments[i].destinationAlphaBlendFactor = convert_blending_factor(pipeline->alpha_blend_destination);
  400. render_pipeline_desc.colorAttachments[i].alphaBlendOperation = MTLBlendOperationAdd;
  401. render_pipeline_desc.colorAttachments[i].writeMask =
  402. (pipeline->color_write_mask_red[i] ? MTLColorWriteMaskRed : 0) |
  403. (pipeline->color_write_mask_green[i] ? MTLColorWriteMaskGreen : 0) |
  404. (pipeline->color_write_mask_blue[i] ? MTLColorWriteMaskBlue : 0) |
  405. (pipeline->color_write_mask_alpha[i] ? MTLColorWriteMaskAlpha : 0);
  406. }
  407. render_pipeline_desc.depthAttachmentPixelFormat = pipeline->depth_attachment_bits > 0 ? MTLPixelFormatDepth32Float : MTLPixelFormatInvalid;
  408. float offset = 0;
  409. MTLVertexDescriptor *vertex_descriptor = [[MTLVertexDescriptor alloc] init];
  410. for (int i = 0; i < pipeline->input_layout->size; ++i) {
  411. vertex_descriptor.attributes[i].bufferIndex = 0;
  412. vertex_descriptor.attributes[i].offset = offset;
  413. offset += gpu_vertex_data_size(pipeline->input_layout->elements[i].data);
  414. switch (pipeline->input_layout->elements[i].data) {
  415. case GPU_VERTEX_DATA_F32_1X:
  416. vertex_descriptor.attributes[i].format = MTLVertexFormatFloat;
  417. break;
  418. case GPU_VERTEX_DATA_F32_2X:
  419. vertex_descriptor.attributes[i].format = MTLVertexFormatFloat2;
  420. break;
  421. case GPU_VERTEX_DATA_F32_3X:
  422. vertex_descriptor.attributes[i].format = MTLVertexFormatFloat3;
  423. break;
  424. case GPU_VERTEX_DATA_F32_4X:
  425. vertex_descriptor.attributes[i].format = MTLVertexFormatFloat4;
  426. break;
  427. case GPU_VERTEX_DATA_I16_2X_NORM:
  428. vertex_descriptor.attributes[i].format = MTLVertexFormatShort2Normalized;
  429. break;
  430. case GPU_VERTEX_DATA_I16_4X_NORM:
  431. vertex_descriptor.attributes[i].format = MTLVertexFormatShort4Normalized;
  432. break;
  433. }
  434. }
  435. vertex_descriptor.layouts[0].stride = offset;
  436. vertex_descriptor.layouts[0].stepFunction = MTLVertexStepFunctionPerVertex;
  437. render_pipeline_desc.vertexDescriptor = vertex_descriptor;
  438. NSError *errors = nil;
  439. MTLRenderPipelineReflection *reflection = nil;
  440. pipeline->impl._pipeline = (__bridge_retained void *)[
  441. device newRenderPipelineStateWithDescriptor:render_pipeline_desc
  442. options:MTLPipelineOptionBufferTypeInfo
  443. reflection:&reflection
  444. error:&errors];
  445. MTLDepthStencilDescriptor *depth_descriptor = [MTLDepthStencilDescriptor new];
  446. depth_descriptor.depthCompareFunction = convert_compare_mode(pipeline->depth_mode);
  447. depth_descriptor.depthWriteEnabled = pipeline->depth_write;
  448. pipeline->impl._depth = (__bridge_retained void *)[device newDepthStencilStateWithDescriptor:depth_descriptor];
  449. }
  450. void gpu_shader_destroy(gpu_shader_t *shader) {
  451. id<MTLFunction> function = (__bridge_transfer id<MTLFunction>)shader->impl.mtl_function;
  452. function = nil;
  453. shader->impl.mtl_function = NULL;
  454. }
  455. void gpu_shader_init(gpu_shader_t *shader, const void *data, size_t length, gpu_shader_type_t type) {
  456. shader->impl.name[0] = 0;
  457. const char *source = data;
  458. for (int i = 3; i < length; ++i) { // //>
  459. if (source[i] == '\n') {
  460. shader->impl.name[i - 3] = 0;
  461. break;
  462. }
  463. shader->impl.name[i - 3] = source[i];
  464. }
  465. shader->impl.source = data;
  466. shader->impl.length = length;
  467. }
  468. void gpu_texture_init_from_bytes(gpu_texture_t *texture, void *data, int width, int height, gpu_texture_format_t format) {
  469. texture->width = width;
  470. texture->height = height;
  471. texture->format = format;
  472. texture->state = GPU_TEXTURE_STATE_SHADER_RESOURCE;
  473. texture->buffer = NULL;
  474. MTLPixelFormat mtlformat = convert_texture_format(format);
  475. if (mtlformat == MTLPixelFormatBGRA8Unorm) {
  476. mtlformat = MTLPixelFormatRGBA8Unorm;
  477. }
  478. MTLTextureDescriptor *descriptor = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:mtlformat
  479. width:width
  480. height:height
  481. mipmapped:NO];
  482. descriptor.textureType = MTLTextureType2D;
  483. descriptor.width = width;
  484. descriptor.height = height;
  485. descriptor.depth = 1;
  486. descriptor.pixelFormat = mtlformat;
  487. descriptor.arrayLength = 1;
  488. descriptor.mipmapLevelCount = 1;
  489. descriptor.usage = MTLTextureUsageShaderRead; // MTLTextureUsageShaderWrite
  490. id<MTLDevice> device = get_metal_device();
  491. id<MTLTexture> tex = [device newTextureWithDescriptor:descriptor];
  492. texture->impl._tex = (__bridge_retained void *)tex;
  493. [tex replaceRegion:MTLRegionMake2D(0, 0, width, height)
  494. mipmapLevel:0
  495. slice:0
  496. withBytes:data
  497. bytesPerRow:width * gpu_texture_format_size(format)
  498. bytesPerImage:width * gpu_texture_format_size(format) * height];
  499. }
  500. void gpu_texture_destroy_internal(gpu_texture_t *target) {
  501. id<MTLTexture> tex = (__bridge_transfer id<MTLTexture>)target->impl._tex;
  502. tex = nil;
  503. target->impl._tex = NULL;
  504. }
  505. void gpu_render_target_init(gpu_texture_t *target, int width, int height, gpu_texture_format_t format) {
  506. gpu_render_target_init2(target, width, height, format, -1);
  507. }
  508. void gpu_vertex_buffer_init(gpu_buffer_t *buffer, int count, gpu_vertex_structure_t *structure) {
  509. buffer->count = count;
  510. for (int i = 0; i < structure->size; ++i) {
  511. gpu_vertex_element_t element = structure->elements[i];
  512. buffer->stride += gpu_vertex_data_size(element.data);
  513. }
  514. id<MTLDevice> device = get_metal_device();
  515. MTLResourceOptions options = MTLResourceCPUCacheModeWriteCombined;
  516. options |= MTLResourceStorageModeShared;
  517. id<MTLBuffer> buf = [device newBufferWithLength:count * buffer->stride options:options];
  518. buffer->impl.metal_buffer = (__bridge_retained void *)buf;
  519. }
  520. void *gpu_vertex_buffer_lock(gpu_buffer_t *buf) {
  521. id<MTLBuffer> buffer = (__bridge id<MTLBuffer>)buf->impl.metal_buffer;
  522. return [buffer contents];
  523. }
  524. void gpu_vertex_buffer_unlock(gpu_buffer_t *buf) {
  525. }
  526. void gpu_constant_buffer_init(gpu_buffer_t *buffer, int size) {
  527. buffer->count = size;
  528. buffer->data = NULL;
  529. buffer->impl.metal_buffer = (__bridge_retained void *)[get_metal_device() newBufferWithLength:size options:MTLResourceOptionCPUCacheModeDefault];
  530. }
  531. void gpu_constant_buffer_lock(gpu_buffer_t *buffer, int start, int count) {
  532. id<MTLBuffer> buf = (__bridge id<MTLBuffer>)buffer->impl.metal_buffer;
  533. uint8_t *data = (uint8_t *)[buf contents];
  534. buffer->data = &data[start];
  535. }
  536. void gpu_constant_buffer_unlock(gpu_buffer_t *buffer) {
  537. }
  538. void gpu_index_buffer_init(gpu_buffer_t *buffer, int indexCount) {
  539. buffer->count = indexCount;
  540. id<MTLDevice> device = get_metal_device();
  541. MTLResourceOptions options = MTLResourceCPUCacheModeWriteCombined;
  542. options |= MTLResourceStorageModeShared;
  543. buffer->impl.metal_buffer = (__bridge_retained void *)[device
  544. newBufferWithLength:sizeof(uint32_t) * indexCount
  545. options:options];
  546. }
  547. void gpu_buffer_destroy_internal(gpu_buffer_t *buffer) {
  548. id<MTLBuffer> buf = (__bridge_transfer id<MTLBuffer>)buffer->impl.metal_buffer;
  549. buf = nil;
  550. buffer->impl.metal_buffer = NULL;
  551. }
  552. void *gpu_index_buffer_lock(gpu_buffer_t *buffer) {
  553. id<MTLBuffer> metal_buffer = (__bridge id<MTLBuffer>)buffer->impl.metal_buffer;
  554. uint8_t *data = (uint8_t *)[metal_buffer contents];
  555. return data;
  556. }
  557. void gpu_index_buffer_unlock(gpu_buffer_t *buffer) {
  558. }
  559. char *gpu_device_name() {
  560. id<MTLDevice> device = get_metal_device();
  561. return (char *)[device.name UTF8String];
  562. }
  563. typedef struct inst {
  564. iron_matrix4x4_t m;
  565. int i;
  566. } inst_t;
  567. static gpu_raytrace_acceleration_structure_t *accel;
  568. static gpu_raytrace_pipeline_t *pipeline;
  569. static gpu_texture_t *output = NULL;
  570. static gpu_buffer_t *constant_buf;
  571. static id<MTLComputePipelineState> _raytracing_pipeline;
  572. static NSMutableArray *_primitive_accels;
  573. static id<MTLAccelerationStructure> _instance_accel;
  574. static dispatch_semaphore_t _semaphore;
  575. static gpu_texture_t *_texpaint0;
  576. static gpu_texture_t *_texpaint1;
  577. static gpu_texture_t *_texpaint2;
  578. static gpu_texture_t *_texenv;
  579. static gpu_texture_t *_texsobol;
  580. static gpu_texture_t *_texscramble;
  581. static gpu_texture_t *_texrank;
  582. static gpu_buffer_t *vb[16];
  583. static gpu_buffer_t *vb_last[16];
  584. static gpu_buffer_t *ib[16];
  585. static int vb_count = 0;
  586. static int vb_count_last = 0;
  587. static inst_t instances[1024];
  588. static int instances_count = 0;
  589. void gpu_raytrace_pipeline_init(gpu_raytrace_pipeline_t *pipeline, void *ray_shader, int ray_shader_size, gpu_buffer_t *constant_buffer) {
  590. id<MTLDevice> device = get_metal_device();
  591. if (!device.supportsRaytracing) return;
  592. constant_buf = constant_buffer;
  593. NSError *error = nil;
  594. id<MTLLibrary> library = [device newLibraryWithSource:[[NSString alloc] initWithBytes:ray_shader length:ray_shader_size encoding:NSUTF8StringEncoding]
  595. options:nil
  596. error:&error];
  597. if (library == nil) {
  598. iron_error("%s", error.localizedDescription.UTF8String);
  599. }
  600. MTLComputePipelineDescriptor *descriptor = [[MTLComputePipelineDescriptor alloc] init];
  601. descriptor.computeFunction = [library newFunctionWithName:@"raytracingKernel"];
  602. descriptor.threadGroupSizeIsMultipleOfThreadExecutionWidth = YES;
  603. _raytracing_pipeline = [device newComputePipelineStateWithDescriptor:descriptor options:0 reflection:nil error:&error];
  604. _semaphore = dispatch_semaphore_create(2);
  605. }
  606. void gpu_raytrace_pipeline_destroy(gpu_raytrace_pipeline_t *pipeline) {
  607. }
  608. bool gpu_raytrace_supported() {
  609. id<MTLDevice> device = get_metal_device();
  610. return device.supportsRaytracing;
  611. }
  612. id<MTLAccelerationStructure> create_acceleration_sctructure(MTLAccelerationStructureDescriptor *descriptor) {
  613. id<MTLDevice> device = get_metal_device();
  614. id<MTLCommandQueue> queue = get_metal_queue();
  615. MTLAccelerationStructureSizes accel_sizes = [device accelerationStructureSizesWithDescriptor:descriptor];
  616. id<MTLAccelerationStructure> acceleration_structure = [device newAccelerationStructureWithSize:accel_sizes.accelerationStructureSize];
  617. id<MTLBuffer> scratch_buffer = [device newBufferWithLength:accel_sizes.buildScratchBufferSize options:MTLResourceStorageModePrivate];
  618. id<MTLCommandBuffer> command_buffer = [queue commandBuffer];
  619. id<MTLAccelerationStructureCommandEncoder> command_encoder = [command_buffer accelerationStructureCommandEncoder];
  620. id<MTLBuffer> compacteds_size_buffer = [device newBufferWithLength:sizeof(uint32_t) options:MTLResourceStorageModeShared];
  621. [command_encoder buildAccelerationStructure:acceleration_structure descriptor:descriptor scratchBuffer:scratch_buffer scratchBufferOffset:0];
  622. [command_encoder writeCompactedAccelerationStructureSize:acceleration_structure toBuffer:compacteds_size_buffer offset:0];
  623. [command_encoder endEncoding];
  624. [command_buffer commit];
  625. [command_buffer waitUntilCompleted];
  626. uint32_t compacted_size = *(uint32_t *)compacteds_size_buffer.contents;
  627. id<MTLAccelerationStructure> compacted_acceleration_structure = [device newAccelerationStructureWithSize:compacted_size];
  628. command_buffer = [queue commandBuffer];
  629. command_encoder = [command_buffer accelerationStructureCommandEncoder];
  630. [command_encoder copyAndCompactAccelerationStructure:acceleration_structure toAccelerationStructure:compacted_acceleration_structure];
  631. [command_encoder endEncoding];
  632. [command_buffer commit];
  633. return compacted_acceleration_structure;
  634. }
  635. void gpu_raytrace_acceleration_structure_init(gpu_raytrace_acceleration_structure_t *accel) {
  636. vb_count = 0;
  637. instances_count = 0;
  638. }
  639. void gpu_raytrace_acceleration_structure_add(gpu_raytrace_acceleration_structure_t *accel, gpu_buffer_t *_vb, gpu_buffer_t *_ib,
  640. iron_matrix4x4_t _transform) {
  641. int vb_i = -1;
  642. for (int i = 0; i < vb_count; ++i) {
  643. if (_vb == vb[i]) {
  644. vb_i = i;
  645. break;
  646. }
  647. }
  648. if (vb_i == -1) {
  649. vb_i = vb_count;
  650. vb[vb_count] = _vb;
  651. ib[vb_count] = _ib;
  652. vb_count++;
  653. }
  654. inst_t inst = { .i = vb_i, .m = _transform };
  655. instances[instances_count] = inst;
  656. instances_count++;
  657. }
  658. void _gpu_raytrace_acceleration_structure_destroy_bottom(gpu_raytrace_acceleration_structure_t *accel) {
  659. // for (int i = 0; i < vb_count_last; ++i) {
  660. // }
  661. _primitive_accels = nil;
  662. }
  663. void _gpu_raytrace_acceleration_structure_destroy_top(gpu_raytrace_acceleration_structure_t *accel) {
  664. _instance_accel = nil;
  665. }
  666. void gpu_raytrace_acceleration_structure_build(gpu_raytrace_acceleration_structure_t *accel,
  667. gpu_buffer_t *_vb_full, gpu_buffer_t *_ib_full) {
  668. bool build_bottom = false;
  669. for (int i = 0; i < 16; ++i) {
  670. if (vb_last[i] != vb[i]) {
  671. build_bottom = true;
  672. }
  673. vb_last[i] = vb[i];
  674. }
  675. if (vb_count_last > 0) {
  676. if (build_bottom) {
  677. _gpu_raytrace_acceleration_structure_destroy_bottom(accel);
  678. }
  679. _gpu_raytrace_acceleration_structure_destroy_top(accel);
  680. }
  681. vb_count_last = vb_count;
  682. if (vb_count == 0) {
  683. return;
  684. }
  685. id<MTLDevice> device = get_metal_device();
  686. if (!device.supportsRaytracing) {
  687. return;
  688. }
  689. MTLResourceOptions options = MTLResourceStorageModeShared;
  690. MTLAccelerationStructureTriangleGeometryDescriptor *descriptor = [MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
  691. descriptor.indexType = MTLIndexTypeUInt32;
  692. descriptor.indexBuffer = (__bridge id<MTLBuffer>)ib[0]->impl.metal_buffer;
  693. descriptor.vertexBuffer = (__bridge id<MTLBuffer>)vb[0]->impl.metal_buffer;
  694. descriptor.vertexStride = vb[0]->stride;
  695. descriptor.triangleCount = ib[0]->count / 3;
  696. descriptor.vertexFormat = MTLAttributeFormatShort4Normalized;
  697. MTLPrimitiveAccelerationStructureDescriptor *accel_descriptor = [MTLPrimitiveAccelerationStructureDescriptor descriptor];
  698. accel_descriptor.geometryDescriptors = @[ descriptor ];
  699. id<MTLAccelerationStructure> acceleration_structure = create_acceleration_sctructure(accel_descriptor);
  700. _primitive_accels = [[NSMutableArray alloc] init];
  701. [_primitive_accels addObject:acceleration_structure];
  702. id<MTLBuffer> instance_buffer = [device newBufferWithLength:sizeof(MTLAccelerationStructureInstanceDescriptor) * 1 options:options];
  703. MTLAccelerationStructureInstanceDescriptor *instance_descriptors = (MTLAccelerationStructureInstanceDescriptor *)instance_buffer.contents;
  704. instance_descriptors[0].accelerationStructureIndex = 0;
  705. instance_descriptors[0].options = MTLAccelerationStructureInstanceOptionOpaque;
  706. instance_descriptors[0].mask = 1;
  707. instance_descriptors[0].transformationMatrix.columns[0] = MTLPackedFloat3Make(instances[0].m.m[0], instances[0].m.m[1], instances[0].m.m[2]);
  708. instance_descriptors[0].transformationMatrix.columns[1] = MTLPackedFloat3Make(instances[0].m.m[4], instances[0].m.m[5], instances[0].m.m[6]);
  709. instance_descriptors[0].transformationMatrix.columns[2] = MTLPackedFloat3Make(instances[0].m.m[8], instances[0].m.m[9], instances[0].m.m[10]);
  710. instance_descriptors[0].transformationMatrix.columns[3] = MTLPackedFloat3Make(instances[0].m.m[12], instances[0].m.m[13], instances[0].m.m[14]);
  711. MTLInstanceAccelerationStructureDescriptor *inst_accel_descriptor = [MTLInstanceAccelerationStructureDescriptor descriptor];
  712. inst_accel_descriptor.instancedAccelerationStructures = _primitive_accels;
  713. inst_accel_descriptor.instanceCount = 1;
  714. inst_accel_descriptor.instanceDescriptorBuffer = instance_buffer;
  715. _instance_accel = create_acceleration_sctructure(inst_accel_descriptor);
  716. }
  717. void gpu_raytrace_acceleration_structure_destroy(gpu_raytrace_acceleration_structure_t *accel) {}
  718. void gpu_raytrace_set_textures(gpu_texture_t *texpaint0, gpu_texture_t *texpaint1, gpu_texture_t *texpaint2, gpu_texture_t *texenv, gpu_texture_t *texsobol, gpu_texture_t *texscramble, gpu_texture_t *texrank) {
  719. _texpaint0 = texpaint0;
  720. _texpaint1 = texpaint1;
  721. _texpaint2 = texpaint2;
  722. _texenv = texenv;
  723. _texsobol = texsobol;
  724. _texscramble = texscramble;
  725. _texrank = texrank;
  726. }
  727. void gpu_raytrace_set_acceleration_structure(gpu_raytrace_acceleration_structure_t *_accel) {
  728. accel = _accel;
  729. }
  730. void gpu_raytrace_set_pipeline(gpu_raytrace_pipeline_t *_pipeline) {
  731. pipeline = _pipeline;
  732. }
  733. void gpu_raytrace_set_target(gpu_texture_t *_output) {
  734. output = _output;
  735. }
  736. void gpu_raytrace_dispatch_rays() {
  737. id<MTLDevice> device = get_metal_device();
  738. if (!device.supportsRaytracing) return;
  739. dispatch_semaphore_wait(_semaphore, DISPATCH_TIME_FOREVER);
  740. id<MTLCommandQueue> queue = get_metal_queue();
  741. id<MTLCommandBuffer> command_buffer = [queue commandBuffer];
  742. __block dispatch_semaphore_t sem = _semaphore;
  743. [command_buffer addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
  744. dispatch_semaphore_signal(sem);
  745. }];
  746. NSUInteger width = output->width;
  747. NSUInteger height = output->height;
  748. MTLSize threads_per_threadgroup = MTLSizeMake(8, 8, 1);
  749. MTLSize threadgroups = MTLSizeMake((width + threads_per_threadgroup.width - 1) / threads_per_threadgroup.width,
  750. (height + threads_per_threadgroup.height - 1) / threads_per_threadgroup.height, 1);
  751. id<MTLComputeCommandEncoder> compute_encoder = [command_buffer computeCommandEncoder];
  752. [compute_encoder setBuffer:(__bridge id<MTLBuffer>)constant_buf->impl.metal_buffer offset:0 atIndex:0];
  753. [compute_encoder setAccelerationStructure:_instance_accel atBufferIndex:1];
  754. [compute_encoder setBuffer: (__bridge id<MTLBuffer>)ib[0]->impl.metal_buffer offset:0 atIndex:2];
  755. [compute_encoder setBuffer: (__bridge id<MTLBuffer>)vb[0]->impl.metal_buffer offset:0 atIndex:3];
  756. [compute_encoder setTexture:(__bridge id<MTLTexture>)output->impl._tex atIndex:0];
  757. [compute_encoder setTexture:(__bridge id<MTLTexture>)_texpaint0->impl._tex atIndex:1];
  758. [compute_encoder setTexture:(__bridge id<MTLTexture>)_texpaint1->impl._tex atIndex:2];
  759. [compute_encoder setTexture:(__bridge id<MTLTexture>)_texpaint2->impl._tex atIndex:3];
  760. [compute_encoder setTexture:(__bridge id<MTLTexture>)_texenv->impl._tex atIndex:4];
  761. [compute_encoder setTexture:(__bridge id<MTLTexture>)_texsobol->impl._tex atIndex:5];
  762. [compute_encoder setTexture:(__bridge id<MTLTexture>)_texscramble->impl._tex atIndex:6];
  763. [compute_encoder setTexture:(__bridge id<MTLTexture>)_texrank->impl._tex atIndex:7];
  764. for (id<MTLAccelerationStructure> primitive_accel in _primitive_accels) {
  765. [compute_encoder useResource:primitive_accel usage:MTLResourceUsageRead];
  766. }
  767. [compute_encoder setComputePipelineState:_raytracing_pipeline];
  768. [compute_encoder dispatchThreadgroups:threadgroups threadsPerThreadgroup:threads_per_threadgroup];
  769. [compute_encoder endEncoding];
  770. [command_buffer commit];
  771. }