renderer_mtl.mm 93 KB


  1. /*
  2. * Copyright 2011-2015 Attila Kocsis. All rights reserved.
  3. * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
  4. */
  5. #include "bgfx_p.h"
  6. #if BGFX_CONFIG_RENDERER_METAL
  7. #include "renderer_mtl.h"
  8. #include "renderer.h"
  9. #include "bx/bx.h"
  10. #if BX_PLATFORM_OSX
  11. # include <Cocoa/Cocoa.h>
  12. #endif
  13. #import <Foundation/Foundation.h>
  14. #define UNIFORM_BUFFER_SIZE (1024*1024)
  15. #define UNIFORM_BUFFER_COUNT (3)
  16. /*
  17. Known issues / TODOs:
  18. - 15-shadowmaps-simple (modified shaderc and example needs modification too, mtxCrop znew = z * 0.5 + 0.5 is not needed ) could be hacked in shader too
  19. - 19-oit ( hacked shaderc to support MRT output)
  20. - 21-deferred ( hacked shaderc to support MRT output and fs_deferred_light needed modification for metal (similar to BGFX_SHADER_LANGUAGE_HLSL )
  21. 07-callback, saveScreenshot should be implemented with one frame latency (using saveScreenshotBegin and End)
  22. 16-shadowmaps, //problem with essl -> metal: SAMPLER2D(u_shadowMap0, 4); sampler index is lost. Shadowmap is set to slot 4, but
  23. metal shader uses sampler/texture slot 0. this could require changes outside of renderer_mtl?
  24. Otherwise it works with hacking the slot.
  25. 24-nbody - cannot generate compute shaders for metal
  26. 20-nanonvg - TODO: remove sampler/texture hack
  27. - caps
  28. - optimization...
  29. create binary shader representation
  30. 13-stencil and 16-shadowmaps are very inefficient. every view stores/loads backbuffer data
  31. BGFX_RESET_FLIP_AFTER_RENDER on low level renderers should be true? (crashes even with BGFX_RESET_FLIP_AFTER_RENDER because there is
  32. one rendering frame before reset). Do I have absolutely need to send result to View at flip or can I do it in submit?
  33. */
  34. namespace bgfx { namespace mtl
  35. {
  36. static char s_viewName[BGFX_CONFIG_MAX_VIEWS][BGFX_CONFIG_MAX_VIEW_NAME];
  37. struct PrimInfo
  38. {
  39. MTLPrimitiveType m_type;
  40. uint32_t m_min;
  41. uint32_t m_div;
  42. uint32_t m_sub;
  43. };
  44. static const PrimInfo s_primInfo[] =
  45. {
  46. { MTLPrimitiveTypeTriangle, 3, 3, 0 },
  47. { MTLPrimitiveTypeTriangleStrip, 3, 1, 2 },
  48. { MTLPrimitiveTypeLine, 2, 2, 0 },
  49. { MTLPrimitiveTypeLineStrip, 2, 1, 1 },
  50. { MTLPrimitiveTypePoint, 1, 1, 0 },
  51. };
  52. static const char* s_primName[] =
  53. {
  54. "TriList",
  55. "TriStrip",
  56. "Line",
  57. "LineStrip",
  58. "Point",
  59. };
  60. BX_STATIC_ASSERT(BX_COUNTOF(s_primInfo) == BX_COUNTOF(s_primName));
  61. static const char* s_attribName[] =
  62. {
  63. "a_position",
  64. "a_normal",
  65. "a_tangent",
  66. "a_bitangent",
  67. "a_color0",
  68. "a_color1",
  69. "a_indices",
  70. "a_weight",
  71. "a_texcoord0",
  72. "a_texcoord1",
  73. "a_texcoord2",
  74. "a_texcoord3",
  75. "a_texcoord4",
  76. "a_texcoord5",
  77. "a_texcoord6",
  78. "a_texcoord7",
  79. };
  80. BX_STATIC_ASSERT(Attrib::Count == BX_COUNTOF(s_attribName) );
  81. static const char* s_instanceDataName[] =
  82. {
  83. "i_data0",
  84. "i_data1",
  85. "i_data2",
  86. "i_data3",
  87. "i_data4",
  88. };
  89. BX_STATIC_ASSERT(BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT == BX_COUNTOF(s_instanceDataName) );
  90. static const MTLVertexFormat s_attribType[][4][2] = //type, count, normalized
  91. {
  92. // Uint8
  93. {
  94. { MTLVertexFormatUChar2, MTLVertexFormatUChar2Normalized },
  95. { MTLVertexFormatUChar2, MTLVertexFormatUChar2Normalized },
  96. { MTLVertexFormatUChar3, MTLVertexFormatUChar3Normalized },
  97. { MTLVertexFormatUChar4, MTLVertexFormatUChar4Normalized }
  98. },
  99. //Uint10
  100. //TODO: normalized only
  101. {
  102. { MTLVertexFormatInvalid, MTLVertexFormatUInt1010102Normalized },
  103. { MTLVertexFormatInvalid, MTLVertexFormatUInt1010102Normalized },
  104. { MTLVertexFormatInvalid, MTLVertexFormatUInt1010102Normalized },
  105. { MTLVertexFormatInvalid, MTLVertexFormatUInt1010102Normalized }
  106. },
  107. //Int16
  108. {
  109. { MTLVertexFormatShort2, MTLVertexFormatShort2Normalized },
  110. { MTLVertexFormatShort2, MTLVertexFormatShort2Normalized },
  111. { MTLVertexFormatShort3, MTLVertexFormatShort3Normalized },
  112. { MTLVertexFormatShort4, MTLVertexFormatShort4Normalized }
  113. },
  114. //Half
  115. {
  116. { MTLVertexFormatHalf2, MTLVertexFormatHalf2 },
  117. { MTLVertexFormatHalf2, MTLVertexFormatHalf2 },
  118. { MTLVertexFormatHalf3, MTLVertexFormatHalf2 },
  119. { MTLVertexFormatHalf4, MTLVertexFormatHalf2 }
  120. },
  121. //Float
  122. {
  123. { MTLVertexFormatFloat, MTLVertexFormatFloat },
  124. { MTLVertexFormatFloat2, MTLVertexFormatFloat2 },
  125. { MTLVertexFormatFloat3, MTLVertexFormatFloat3 },
  126. { MTLVertexFormatFloat4, MTLVertexFormatFloat4 }
  127. },
  128. };
  129. BX_STATIC_ASSERT(AttribType::Count == BX_COUNTOF(s_attribType) );
  130. static const MTLCullMode s_cullMode[] =
  131. {
  132. MTLCullModeNone,
  133. MTLCullModeFront,
  134. MTLCullModeBack,
  135. MTLCullModeNone
  136. };
  137. static const MTLBlendFactor s_blendFactor[][2] =
  138. {
  139. { (MTLBlendFactor)0, (MTLBlendFactor)0 }, // ignored
  140. { MTLBlendFactorZero, MTLBlendFactorZero }, // ZERO
  141. { MTLBlendFactorOne, MTLBlendFactorOne }, // ONE
  142. { MTLBlendFactorSourceColor, MTLBlendFactorSourceAlpha }, // SRC_COLOR
  143. { MTLBlendFactorOneMinusSourceColor, MTLBlendFactorOneMinusSourceAlpha }, // INV_SRC_COLOR
  144. { MTLBlendFactorSourceAlpha, MTLBlendFactorSourceAlpha }, // SRC_ALPHA
  145. { MTLBlendFactorOneMinusSourceAlpha, MTLBlendFactorOneMinusSourceAlpha }, // INV_SRC_ALPHA
  146. { MTLBlendFactorDestinationAlpha, MTLBlendFactorDestinationAlpha }, // DST_ALPHA
  147. { MTLBlendFactorOneMinusDestinationAlpha, MTLBlendFactorOneMinusDestinationAlpha }, // INV_DST_ALPHA
  148. { MTLBlendFactorDestinationColor, MTLBlendFactorDestinationAlpha }, // DST_COLOR
  149. { MTLBlendFactorOneMinusDestinationColor, MTLBlendFactorOneMinusDestinationAlpha }, // INV_DST_COLOR
  150. { MTLBlendFactorSourceAlphaSaturated, MTLBlendFactorOne }, // SRC_ALPHA_SAT
  151. { MTLBlendFactorBlendColor, MTLBlendFactorBlendColor }, // FACTOR
  152. { MTLBlendFactorOneMinusBlendColor, MTLBlendFactorOneMinusBlendColor }, // INV_FACTOR
  153. };
  154. static const MTLBlendOperation s_blendEquation[] =
  155. {
  156. MTLBlendOperationAdd,
  157. MTLBlendOperationSubtract,
  158. MTLBlendOperationReverseSubtract,
  159. MTLBlendOperationMin,
  160. MTLBlendOperationMax,
  161. };
  162. static const MTLCompareFunction s_cmpFunc[] =
  163. {
  164. MTLCompareFunctionAlways, //TODO: depth disable?
  165. MTLCompareFunctionLess,
  166. MTLCompareFunctionLessEqual,
  167. MTLCompareFunctionEqual,
  168. MTLCompareFunctionGreaterEqual,
  169. MTLCompareFunctionGreater,
  170. MTLCompareFunctionNotEqual,
  171. MTLCompareFunctionNever,
  172. MTLCompareFunctionAlways
  173. };
  174. static const MTLStencilOperation s_stencilOp[] =
  175. {
  176. MTLStencilOperationZero,
  177. MTLStencilOperationKeep,
  178. MTLStencilOperationReplace,
  179. MTLStencilOperationIncrementWrap,
  180. MTLStencilOperationIncrementClamp,
  181. MTLStencilOperationDecrementWrap,
  182. MTLStencilOperationDecrementClamp,
  183. MTLStencilOperationInvert
  184. };
  185. static const MTLSamplerAddressMode s_textureAddress[] =
  186. {
  187. MTLSamplerAddressModeRepeat,
  188. MTLSamplerAddressModeMirrorRepeat,
  189. MTLSamplerAddressModeClampToEdge,
  190. MTLSamplerAddressModeClampToZero,
  191. };
  192. static const MTLSamplerMinMagFilter s_textureFilterMinMag[] =
  193. {
  194. MTLSamplerMinMagFilterLinear,
  195. MTLSamplerMinMagFilterNearest,
  196. MTLSamplerMinMagFilterLinear,
  197. };
  198. static const MTLSamplerMipFilter s_textureFilterMip[] =
  199. {
  200. MTLSamplerMipFilterLinear,
  201. MTLSamplerMipFilterNearest,
  202. };
  203. struct TextureFormatInfo
  204. {
  205. MTLPixelFormat m_fmt;
  206. MTLPixelFormat m_fmtSrgb;
  207. };
  208. static TextureFormatInfo s_textureFormat[] =
  209. {
  210. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // BC1
  211. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // BC2
  212. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // BC3
  213. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // BC4
  214. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // BC5
  215. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // BC6H
  216. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // BC7
  217. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // ETC1
  218. { MTLPixelFormat(180) /*ETC2_RGB8*/, MTLPixelFormat(181) /*ETC2_RGB8_sRGB*/ }, // ETC2
  219. { MTLPixelFormat(178) /*EAC_RGBA8*/, MTLPixelFormat(179) /*EAC_RGBA8_sRGB*/ }, // ETC2A
  220. { MTLPixelFormat(182) /*ETC2_RGB8A1*/, MTLPixelFormat(183) /*ETC2_RGB8A1_sRGB*/ }, // ETC2A1
  221. { MTLPixelFormat(160) /*PVRTC_RGB_2BPP*/, MTLPixelFormat(161) /*PVRTC_RGB_2BPP_sRGB*/ }, // PTC12
  222. { MTLPixelFormat(162) /*PVRTC_RGB_4BPP*/, MTLPixelFormat(163) /*PVRTC_RGB_4BPP_sRGB*/ }, // PTC14
  223. { MTLPixelFormat(164) /*PVRTC_RGBA_2BPP*/, MTLPixelFormat(165) /*PVRTC_RGBA_2BPP_sRGB*/ }, // PTC12A
  224. { MTLPixelFormat(166) /*PVRTC_RGBA_4BPP*/, MTLPixelFormat(167) /*PVRTC_RGBA_4BPP_sRGB*/ }, // PTC14A
  225. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // PTC22
  226. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // PTC24
  227. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // Unknown
  228. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // R1
  229. { MTLPixelFormatA8Unorm, MTLPixelFormatInvalid }, // A8
  230. { MTLPixelFormatR8Unorm, MTLPixelFormat(11) /*R8Unorm_sRGB*/ }, // R8
  231. { MTLPixelFormatR8Sint, MTLPixelFormatInvalid }, // R8I
  232. { MTLPixelFormatR8Uint, MTLPixelFormatInvalid }, // R8U
  233. { MTLPixelFormatR8Snorm, MTLPixelFormatInvalid }, // R8S
  234. { MTLPixelFormatR16Unorm, MTLPixelFormatInvalid }, // R16
  235. { MTLPixelFormatR16Sint, MTLPixelFormatInvalid }, // R16I
  236. { MTLPixelFormatR16Uint, MTLPixelFormatInvalid }, // R16U
  237. { MTLPixelFormatR16Float, MTLPixelFormatInvalid }, // R16F
  238. { MTLPixelFormatR16Snorm, MTLPixelFormatInvalid }, // R16S
  239. { MTLPixelFormatR32Sint, MTLPixelFormatInvalid }, // R32I
  240. { MTLPixelFormatR32Uint, MTLPixelFormatInvalid }, // R32U
  241. { MTLPixelFormatR32Float, MTLPixelFormatInvalid }, // R32F
  242. { MTLPixelFormatRG8Unorm, MTLPixelFormat(31) /*RG8Unorm_sRGB*/ }, // RG8
  243. { MTLPixelFormatRG8Sint, MTLPixelFormatInvalid }, // RG8I
  244. { MTLPixelFormatRG8Uint, MTLPixelFormatInvalid }, // RG8U
  245. { MTLPixelFormatRG8Snorm, MTLPixelFormatInvalid }, // RG8S
  246. { MTLPixelFormatRG16Unorm, MTLPixelFormatInvalid }, // RG16
  247. { MTLPixelFormatRG16Sint, MTLPixelFormatInvalid }, // RG16I
  248. { MTLPixelFormatRG16Uint, MTLPixelFormatInvalid }, // RG16U
  249. { MTLPixelFormatRG16Float, MTLPixelFormatInvalid }, // RG16F
  250. { MTLPixelFormatRG16Snorm, MTLPixelFormatInvalid }, // RG16S
  251. { MTLPixelFormatRG32Sint, MTLPixelFormatInvalid }, // RG32I
  252. { MTLPixelFormatRG32Uint, MTLPixelFormatInvalid }, // RG32U
  253. { MTLPixelFormatRG32Float, MTLPixelFormatInvalid }, // RG32F
  254. { MTLPixelFormatRGB9E5Float, MTLPixelFormatInvalid }, // RGB9E5F
  255. { MTLPixelFormatBGRA8Unorm, MTLPixelFormatBGRA8Unorm_sRGB }, // BGRA8
  256. { MTLPixelFormatRGBA8Unorm, MTLPixelFormatRGBA8Unorm_sRGB }, // RGBA8
  257. { MTLPixelFormatRGBA8Sint, MTLPixelFormatInvalid }, // RGBA8I
  258. { MTLPixelFormatRGBA8Uint, MTLPixelFormatInvalid }, // RGBA8U
  259. { MTLPixelFormatRGBA8Snorm, MTLPixelFormatInvalid }, // RGBA8S
  260. { MTLPixelFormatRGBA16Unorm, MTLPixelFormatInvalid }, // RGBA16
  261. { MTLPixelFormatRGBA16Sint, MTLPixelFormatInvalid }, // RGBA16I
  262. { MTLPixelFormatRGBA16Uint, MTLPixelFormatInvalid }, // RGBA16I
  263. { MTLPixelFormatRGBA16Float, MTLPixelFormatInvalid }, // RGBA16F
  264. { MTLPixelFormatRGBA16Snorm, MTLPixelFormatInvalid }, // RGBA16S
  265. { MTLPixelFormatRGBA32Sint, MTLPixelFormatInvalid }, // RGBA32I
  266. { MTLPixelFormatRGBA32Uint, MTLPixelFormatInvalid }, // RGBA32U
  267. { MTLPixelFormatRGBA32Float, MTLPixelFormatInvalid }, // RGBA32F
  268. { MTLPixelFormat(40) /*B5G6R5Unorm*/, MTLPixelFormatInvalid }, // R5G6B5
  269. { MTLPixelFormat(42) /*ABGR4Unorm*/, MTLPixelFormatInvalid }, // RGBA4
  270. { MTLPixelFormat(41) /*A1BGR5Unorm*/, MTLPixelFormatInvalid }, // RGB5A1
  271. { MTLPixelFormatRGB10A2Unorm, MTLPixelFormatInvalid }, // RGB10A2
  272. { MTLPixelFormatRG11B10Float, MTLPixelFormatInvalid }, // R11G11B10F
  273. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // UnknownDepth
  274. { MTLPixelFormatDepth32Float, MTLPixelFormatInvalid }, // D16
  275. { MTLPixelFormatDepth32Float, MTLPixelFormatInvalid }, // D24
  276. { MTLPixelFormatInvalid, MTLPixelFormatInvalid }, // D24S8
  277. { MTLPixelFormatDepth32Float, MTLPixelFormatInvalid }, // D32
  278. { MTLPixelFormatDepth32Float, MTLPixelFormatInvalid }, // D16F
  279. { MTLPixelFormatDepth32Float, MTLPixelFormatInvalid }, // D24F
  280. { MTLPixelFormatDepth32Float, MTLPixelFormatInvalid }, // D32F
  281. { MTLPixelFormatStencil8, MTLPixelFormatInvalid }, // D0S8
  282. };
  283. BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_textureFormat) );
  284. #define SHADER_FUNCTION_NAME ("xlatMtlMain")
  285. #define SHADER_UNIFORM_NAME ("_mtl_u")
  286. struct RendererContextMtl : public RendererContextI
  287. {
  288. RendererContextMtl()
  289. : m_metalLayer(NULL)
  290. , m_backBufferPixelFormatHash(0)
  291. , m_maxAnisotropy(1)
  292. , m_uniformBufferIndex(0)
  293. , m_numWindows(1)
  294. , m_rtMsaa(false)
  295. , m_drawable(NULL)
  296. {
  297. }
  298. ~RendererContextMtl()
  299. {
  300. }
  301. bool init()
  302. {
  303. BX_TRACE("Init.");
  304. m_fbh.idx = invalidHandle;
  305. memset(m_uniforms, 0, sizeof(m_uniforms) );
  306. memset(&m_resolution, 0, sizeof(m_resolution) );
  307. if (NULL != NSClassFromString(@"CAMetalLayer") )
  308. {
  309. #if BX_PLATFORM_IOS
  310. {
  311. CAMetalLayer* metalLayer = (CAMetalLayer*)g_platformData.nwh;
  312. if (NULL == metalLayer
  313. || ![metalLayer isKindOfClass:NSClassFromString(@"CAMetalLayer")])
  314. {
  315. BX_WARN(NULL != m_device, "Unable to create Metal device. Please set platform data window to a CAMetalLayer");
  316. return false;
  317. }
  318. m_metalLayer = metalLayer;
  319. }
  320. #elif BX_PLATFORM_OSX
  321. {
  322. NSWindow* nsWindow = (NSWindow*)g_platformData.nwh;
  323. [nsWindow.contentView setWantsLayer:YES];
  324. m_metalLayer = [CAMetalLayer layer];
  325. [nsWindow.contentView setLayer:m_metalLayer];
  326. }
  327. #endif // BX_PLATFORM_*
  328. m_device = (id<MTLDevice>)g_platformData.context;
  329. if (NULL == m_device)
  330. {
  331. m_device = MTLCreateSystemDefaultDevice();
  332. }
  333. }
  334. if (NULL == m_device
  335. || NULL == m_metalLayer)
  336. {
  337. BX_WARN(NULL != m_device, "Unable to create Metal device.");
  338. return false;
  339. }
  340. m_metalLayer.device = m_device;
  341. m_metalLayer.pixelFormat = MTLPixelFormatBGRA8Unorm;
  342. m_commandQueue = m_device.newCommandQueue();
  343. BGFX_FATAL(NULL != m_commandQueue, Fatal::UnableToInitialize, "Unable to create Metal device.");
  344. m_renderPipelineDescriptor = newRenderPipelineDescriptor();
  345. m_depthStencilDescriptor = newDepthStencilDescriptor();
  346. m_frontFaceStencilDescriptor = newStencilDescriptor();
  347. m_backFaceStencilDescriptor = newStencilDescriptor();
  348. m_vertexDescriptor = newVertexDescriptor();
  349. m_textureDescriptor = newTextureDescriptor();
  350. m_samplerDescriptor = newSamplerDescriptor();
  351. for (uint8_t i=0; i < UNIFORM_BUFFER_COUNT; ++i)
  352. {
  353. m_uniformBuffers[i] = m_device.newBufferWithLength(UNIFORM_BUFFER_SIZE, 0);
  354. }
  355. m_uniformBufferVertexOffset = 0;
  356. m_uniformBufferFragmentOffset = 0;
  357. g_caps.supported |= (0
  358. | BGFX_CAPS_TEXTURE_3D
  359. | BGFX_CAPS_TEXTURE_COMPARE_LEQUAL
  360. | BGFX_CAPS_INSTANCING
  361. | BGFX_CAPS_VERTEX_ATTRIB_HALF
  362. // | BGFX_CAPS_FRAGMENT_DEPTH
  363. | BGFX_CAPS_BLEND_INDEPENDENT
  364. | BGFX_CAPS_COMPUTE
  365. | BGFX_CAPS_INDEX32
  366. | BGFX_CAPS_DRAW_INDIRECT
  367. // | BGFX_CAPS_TEXTURE_BLIT
  368. // | BGFX_CAPS_TEXTURE_READ_BACK
  369. | BGFX_CAPS_OCCLUSION_QUERY
  370. );
  371. g_caps.maxTextureSize = 2048; //ASK: real caps width/height: 4096, but max depth(3D) size is only: 2048
  372. g_caps.maxFBAttachments = 4; // uint8_t(bx::uint32_min(m_device.supportsFeatureSet(MTLFeatureSet_iOS_GPUFamily2_v1) ? 8 : 4, BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS));
  373. //todo: vendor id, device id, gpu enum
  374. //todo: texture format caps
  375. //add texture formats/caps/etc that are available only on new sdk/devices
  376. #if BX_PLATFORM_IOS
  377. # ifdef __IPHONE_8_0
  378. if (OsVersionEqualOrGreater("8.0.0"))
  379. {
  380. s_textureFormat[TextureFormat::D24S8].m_fmt = MTLPixelFormatDepth32Float;
  381. }
  382. # endif // __IPHONE_8_0
  383. #endif // BX_PLATFORM_*
  384. for (uint32_t ii = 0; ii < TextureFormat::Count; ++ii)
  385. {
  386. uint8_t support = 0;
  387. support |= MTLPixelFormatInvalid != s_textureFormat[ii].m_fmt
  388. ? BGFX_CAPS_FORMAT_TEXTURE_2D
  389. | BGFX_CAPS_FORMAT_TEXTURE_3D
  390. | BGFX_CAPS_FORMAT_TEXTURE_CUBE
  391. : BGFX_CAPS_FORMAT_TEXTURE_NONE
  392. ;
  393. support |= MTLPixelFormatInvalid != s_textureFormat[ii].m_fmtSrgb
  394. ? BGFX_CAPS_FORMAT_TEXTURE_2D_SRGB
  395. | BGFX_CAPS_FORMAT_TEXTURE_3D_SRGB
  396. | BGFX_CAPS_FORMAT_TEXTURE_CUBE_SRGB
  397. : BGFX_CAPS_FORMAT_TEXTURE_NONE
  398. ;
  399. //TODO: additional caps flags
  400. // support |= BGFX_CAPS_FORMAT_TEXTURE_VERTEX : BGFX_CAPS_FORMAT_TEXTURE_NONE;
  401. // support |= BGFX_CAPS_FORMAT_TEXTURE_IMAGE : BGFX_CAPS_FORMAT_TEXTURE_NONE;
  402. // support |= BGFX_CAPS_FORMAT_TEXTURE_FRAMEBUFFER : BGFX_CAPS_FORMAT_TEXTURE_NONE;
  403. g_caps.formats[ii] = support;
  404. }
  405. if (BX_ENABLED(BX_PLATFORM_OSX) )
  406. {
  407. g_caps.formats[TextureFormat::ETC1 ] =
  408. g_caps.formats[TextureFormat::ETC2 ] =
  409. g_caps.formats[TextureFormat::ETC2A ] =
  410. g_caps.formats[TextureFormat::ETC2A1] =
  411. g_caps.formats[TextureFormat::PTC12 ] =
  412. g_caps.formats[TextureFormat::PTC14 ] =
  413. g_caps.formats[TextureFormat::PTC12A] =
  414. g_caps.formats[TextureFormat::PTC14A] =
  415. g_caps.formats[TextureFormat::PTC22 ] =
  416. g_caps.formats[TextureFormat::PTC24 ] = BGFX_CAPS_FORMAT_TEXTURE_NONE;
  417. }
  418. for (uint32_t ii = 0; ii < TextureFormat::Count; ++ii)
  419. {
  420. if (BGFX_CAPS_FORMAT_TEXTURE_NONE == g_caps.formats[ii])
  421. {
  422. s_textureFormat[ii].m_fmt = MTLPixelFormatInvalid;
  423. s_textureFormat[ii].m_fmtSrgb = MTLPixelFormatInvalid;
  424. }
  425. }
  426. // Init reserved part of view name.
  427. for (uint32_t ii = 0; ii < BGFX_CONFIG_MAX_VIEWS; ++ii)
  428. {
  429. bx::snprintf(s_viewName[ii], BGFX_CONFIG_MAX_VIEW_NAME_RESERVED+1, "%3d ", ii);
  430. }
  431. m_occlusionQuery.preReset();
  432. g_internalData.context = m_device;
  433. return true;
  434. }
  435. void shutdown()
  436. {
  437. m_occlusionQuery.postReset();
  438. for (uint32_t ii = 0; ii < BX_COUNTOF(m_shaders); ++ii)
  439. {
  440. m_shaders[ii].destroy();
  441. }
  442. for (uint32_t ii = 0; ii < BX_COUNTOF(m_textures); ++ii)
  443. {
  444. m_textures[ii].destroy();
  445. }
  446. MTL_RELEASE(m_depthStencilDescriptor);
  447. MTL_RELEASE(m_frontFaceStencilDescriptor);
  448. MTL_RELEASE(m_backFaceStencilDescriptor);
  449. MTL_RELEASE(m_renderPipelineDescriptor);
  450. MTL_RELEASE(m_vertexDescriptor);
  451. MTL_RELEASE(m_textureDescriptor);
  452. MTL_RELEASE(m_samplerDescriptor);
  453. MTL_RELEASE(m_backBufferDepth);
  454. if (BX_ENABLED(BX_PLATFORM_IOS) )
  455. {
  456. MTL_RELEASE(m_backBufferStencil);
  457. }
  458. for (uint8_t i=0; i < UNIFORM_BUFFER_COUNT; ++i)
  459. {
  460. MTL_RELEASE(m_uniformBuffers[i]);
  461. }
  462. MTL_RELEASE(m_commandQueue);
  463. MTL_RELEASE(m_device);
  464. }
  465. RendererType::Enum getRendererType() const BX_OVERRIDE
  466. {
  467. return RendererType::Metal;
  468. }
  469. const char* getRendererName() const BX_OVERRIDE
  470. {
  471. return BGFX_RENDERER_METAL_NAME;
  472. }
  473. void createIndexBuffer(IndexBufferHandle _handle, Memory* _mem, uint16_t _flags) BX_OVERRIDE
  474. {
  475. m_indexBuffers[_handle.idx].create(_mem->size, _mem->data, _flags);
  476. }
  477. void destroyIndexBuffer(IndexBufferHandle _handle) BX_OVERRIDE
  478. {
  479. m_indexBuffers[_handle.idx].destroy();
  480. }
  481. void createVertexDecl(VertexDeclHandle _handle, const VertexDecl& _decl) BX_OVERRIDE
  482. {
  483. VertexDecl& decl = m_vertexDecls[_handle.idx];
  484. memcpy(&decl, &_decl, sizeof(VertexDecl) );
  485. dump(decl);
  486. }
  487. void destroyVertexDecl(VertexDeclHandle /*_handle*/) BX_OVERRIDE
  488. {
  489. }
  490. void createVertexBuffer(VertexBufferHandle _handle, Memory* _mem, VertexDeclHandle _declHandle, uint16_t _flags) BX_OVERRIDE
  491. {
  492. m_vertexBuffers[_handle.idx].create(_mem->size, _mem->data, _declHandle, _flags);
  493. }
  494. void destroyVertexBuffer(VertexBufferHandle _handle) BX_OVERRIDE
  495. {
  496. m_vertexBuffers[_handle.idx].destroy();
  497. }
  498. void createDynamicIndexBuffer(IndexBufferHandle _handle, uint32_t _size, uint16_t _flags) BX_OVERRIDE
  499. {
  500. m_indexBuffers[_handle.idx].create(_size, NULL, _flags);
  501. }
  502. void updateDynamicIndexBuffer(IndexBufferHandle _handle, uint32_t _offset, uint32_t _size, Memory* _mem) BX_OVERRIDE
  503. {
  504. m_indexBuffers[_handle.idx].update(_offset, bx::uint32_min(_size, _mem->size), _mem->data);
  505. }
  506. void destroyDynamicIndexBuffer(IndexBufferHandle _handle) BX_OVERRIDE
  507. {
  508. m_indexBuffers[_handle.idx].destroy();
  509. }
  510. void createDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _size, uint16_t _flags) BX_OVERRIDE
  511. {
  512. VertexDeclHandle decl = BGFX_INVALID_HANDLE;
  513. m_vertexBuffers[_handle.idx].create(_size, NULL, decl, _flags);
  514. }
  515. void updateDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _offset, uint32_t _size, Memory* _mem) BX_OVERRIDE
  516. {
  517. m_vertexBuffers[_handle.idx].update(_offset, bx::uint32_min(_size, _mem->size), _mem->data);
  518. }
  519. void destroyDynamicVertexBuffer(VertexBufferHandle _handle) BX_OVERRIDE
  520. {
  521. m_vertexBuffers[_handle.idx].destroy();
  522. }
  523. void createShader(ShaderHandle _handle, Memory* _mem) BX_OVERRIDE
  524. {
  525. m_shaders[_handle.idx].create(_mem);
  526. }
  527. void destroyShader(ShaderHandle _handle) BX_OVERRIDE
  528. {
  529. m_shaders[_handle.idx].destroy();
  530. }
  531. void createProgram(ProgramHandle _handle, ShaderHandle _vsh, ShaderHandle _fsh) BX_OVERRIDE
  532. {
  533. m_program[_handle.idx].create(&m_shaders[_vsh.idx], &m_shaders[_fsh.idx]);
  534. }
  535. void destroyProgram(ProgramHandle _handle) BX_OVERRIDE
  536. {
  537. m_program[_handle.idx].destroy();
  538. }
  539. void createTexture(TextureHandle _handle, Memory* _mem, uint32_t _flags, uint8_t _skip) BX_OVERRIDE
  540. {
  541. m_textures[_handle.idx].create(_mem, _flags, _skip);
  542. }
  543. void updateTextureBegin(TextureHandle /*_handle*/, uint8_t /*_side*/, uint8_t /*_mip*/) BX_OVERRIDE
  544. {
  545. }
  546. void updateTexture(TextureHandle _handle, uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem) BX_OVERRIDE
  547. {
  548. m_textures[_handle.idx].update(_side, _mip, _rect, _z, _depth, _pitch, _mem);
  549. }
  550. void updateTextureEnd() BX_OVERRIDE
  551. {
  552. }
  553. void readTexture(TextureHandle /*_handle*/, void* /*_data*/) BX_OVERRIDE
  554. {
  555. }
  556. void resizeTexture(TextureHandle _handle, uint16_t _width, uint16_t _height) BX_OVERRIDE
  557. {
  558. TextureMtl& texture = m_textures[_handle.idx];
  559. uint32_t size = sizeof(uint32_t) + sizeof(TextureCreate);
  560. const Memory* mem = alloc(size);
  561. bx::StaticMemoryBlockWriter writer(mem->data, mem->size);
  562. uint32_t magic = BGFX_CHUNK_MAGIC_TEX;
  563. bx::write(&writer, magic);
  564. TextureCreate tc;
  565. tc.m_flags = texture.m_flags;
  566. tc.m_width = _width;
  567. tc.m_height = _height;
  568. tc.m_sides = 0;
  569. tc.m_depth = 0;
  570. tc.m_numMips = 1;
  571. tc.m_format = TextureFormat::Enum(texture.m_requestedFormat);
  572. tc.m_cubeMap = false;
  573. tc.m_mem = NULL;
  574. bx::write(&writer, tc);
  575. texture.destroy();
  576. texture.create(mem, tc.m_flags, 0);
  577. release(mem);
  578. }
  579. void setInternal(TextureHandle _handle, uintptr_t _ptr) BX_OVERRIDE
  580. {
  581. BX_UNUSED(_handle, _ptr);
  582. }
  583. uintptr_t getInternal(TextureHandle _handle) BX_OVERRIDE
  584. {
  585. BX_UNUSED(_handle);
  586. return 0;
  587. }
  588. void destroyTexture(TextureHandle _handle) BX_OVERRIDE
  589. {
  590. m_textures[_handle.idx].destroy();
  591. }
  592. void createFrameBuffer(FrameBufferHandle _handle, uint8_t _num, const TextureHandle* _textureHandles) BX_OVERRIDE
  593. {
  594. m_frameBuffers[_handle.idx].create(_num, _textureHandles);
  595. }
  596. void createFrameBuffer(FrameBufferHandle _handle, void* _nwh, uint32_t _width, uint32_t _height, TextureFormat::Enum _depthFormat) BX_OVERRIDE
  597. {
  598. uint16_t denseIdx = m_numWindows++;
  599. m_windows[denseIdx] = _handle;
  600. m_frameBuffers[_handle.idx].create(denseIdx, _nwh, _width, _height, _depthFormat);
  601. }
  602. void destroyFrameBuffer(FrameBufferHandle _handle) BX_OVERRIDE
  603. {
  604. uint16_t denseIdx = m_frameBuffers[_handle.idx].destroy();
  605. if (UINT16_MAX != denseIdx)
  606. {
  607. --m_numWindows;
  608. if (m_numWindows > 1)
  609. {
  610. FrameBufferHandle handle = m_windows[m_numWindows];
  611. m_windows[denseIdx] = handle;
  612. m_frameBuffers[handle.idx].m_denseIdx = denseIdx;
  613. }
  614. }
  615. }
  616. void createUniform(UniformHandle _handle, UniformType::Enum _type, uint16_t _num, const char* _name) BX_OVERRIDE
  617. {
  618. if (NULL != m_uniforms[_handle.idx])
  619. {
  620. BX_FREE(g_allocator, m_uniforms[_handle.idx]);
  621. }
  622. uint32_t size = BX_ALIGN_16(g_uniformTypeSize[_type]*_num);
  623. void* data = BX_ALLOC(g_allocator, size);
  624. memset(data, 0, size);
  625. m_uniforms[_handle.idx] = data;
  626. m_uniformReg.add(_handle, _name, data);
  627. }
  628. void destroyUniform(UniformHandle _handle) BX_OVERRIDE
  629. {
  630. BX_FREE(g_allocator, m_uniforms[_handle.idx]);
  631. m_uniforms[_handle.idx] = NULL;
  632. }
  633. void saveScreenShot(const char* _filePath) BX_OVERRIDE
  634. {
  635. if (NULL == m_drawable
  636. || NULL == m_drawable.texture)
  637. {
  638. return;
  639. }
  640. //TODO: we should wait for completion of pending commandBuffers
  641. //TODO: implement this with saveScreenshotBegin/End
  642. Texture backBuffer = m_drawable.texture;
  643. uint32_t width = backBuffer.width();
  644. uint32_t height = backBuffer.height();
  645. uint32_t length = width*height*4;
  646. uint8_t* data = (uint8_t*)BX_ALLOC(g_allocator, length);
  647. MTLRegion region = { { 0, 0, 0 }, { width, height, 1 } };
  648. backBuffer.getBytes(data, 4*width, 0, region, 0, 0);
  649. g_callback->screenShot(_filePath
  650. , backBuffer.width()
  651. , backBuffer.height()
  652. , width*4
  653. , data
  654. , length
  655. , false
  656. );
  657. BX_FREE(g_allocator, data);
  658. }
  659. void updateViewName(uint8_t _id, const char* _name) BX_OVERRIDE
  660. {
  661. if (BX_ENABLED(BGFX_CONFIG_DEBUG_PIX) )
  662. {
  663. bx::strlcpy(&s_viewName[_id][BGFX_CONFIG_MAX_VIEW_NAME_RESERVED]
  664. , _name
  665. , BX_COUNTOF(s_viewName[0])-BGFX_CONFIG_MAX_VIEW_NAME_RESERVED
  666. );
  667. }
  668. }
  669. void updateUniform(uint16_t _loc, const void* _data, uint32_t _size) BX_OVERRIDE
  670. {
  671. memcpy(m_uniforms[_loc], _data, _size);
  672. }
  673. void setMarker(const char* _marker, uint32_t /*_size*/) BX_OVERRIDE
  674. {
  675. if (BX_ENABLED(BGFX_CONFIG_DEBUG_MTL) )
  676. {
  677. m_renderCommandEncoder.insertDebugSignpost(_marker);
  678. }
  679. }
  680. void submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter) BX_OVERRIDE;
  681. void blitSetup(TextVideoMemBlitter& _blitter) BX_OVERRIDE
  682. {
  683. RenderCommandEncoder rce = m_renderCommandEncoder;
  684. uint32_t width = getBufferWidth();
  685. uint32_t height = getBufferHeight();
  686. //if (m_ovr.isEnabled() )
  687. //{
  688. // m_ovr.getSize(width, height);
  689. //}
  690. FrameBufferHandle fbh = BGFX_INVALID_HANDLE;
  691. //TODO: change to default framebuffer - we need a new encoder for this!
  692. //setFrameBuffer(fbh, false);
  693. MTLViewport viewport = { 0.0f, 0.0f, (float)width, (float)height, 0.0f, 1.0f};
  694. rce.setViewport(viewport);
  695. MTLScissorRect rc = { 0,0,width,height };
  696. rce.setScissorRect(rc);
  697. rce.setCullMode(MTLCullModeNone);
  698. uint64_t state = 0
  699. | BGFX_STATE_RGB_WRITE
  700. | BGFX_STATE_ALPHA_WRITE
  701. | BGFX_STATE_DEPTH_TEST_ALWAYS
  702. ;
  703. setDepthStencilState(state);
  704. ProgramMtl& program = m_program[_blitter.m_program.idx];
  705. RenderPipelineState pipelineState = program.getRenderPipelineState(state, 0, fbh, _blitter.m_vb->decl, 0);
  706. rce.setRenderPipelineState(pipelineState);
  707. uint32_t vertexUniformBufferSize = program.m_vshConstantBufferSize;
  708. uint32_t fragmentUniformBufferSize = program.m_fshConstantBufferSize;
  709. if (vertexUniformBufferSize )
  710. {
  711. m_uniformBufferVertexOffset = BX_ALIGN_MASK(m_uniformBufferVertexOffset, program.m_vshConstantBufferAlignmentMask);
  712. rce.setVertexBuffer(m_uniformBuffer, m_uniformBufferVertexOffset, 0);
  713. }
  714. m_uniformBufferFragmentOffset = m_uniformBufferVertexOffset + vertexUniformBufferSize;
  715. if (fragmentUniformBufferSize )
  716. {
  717. m_uniformBufferFragmentOffset = BX_ALIGN_MASK(m_uniformBufferFragmentOffset, program.m_fshConstantBufferAlignmentMask);
  718. rce.setFragmentBuffer(m_uniformBuffer, m_uniformBufferFragmentOffset, 0);
  719. }
  720. VertexBufferMtl& vb = m_vertexBuffers[_blitter.m_vb->handle.idx];
  721. rce.setVertexBuffer(vb.m_buffer, 0, 1);
  722. float proj[16];
  723. bx::mtxOrtho(proj, 0.0f, (float)width, (float)height, 0.0f, 0.0f, 1000.0f);
  724. PredefinedUniform& predefined = program.m_predefined[0];
  725. uint8_t flags = predefined.m_type;
  726. setShaderUniform(flags, predefined.m_loc, proj, 4);
  727. m_textures[_blitter.m_texture.idx].commit(0);
  728. }
  729. void blitRender(TextVideoMemBlitter& _blitter, uint32_t _numIndices) BX_OVERRIDE
  730. {
  731. const uint32_t numVertices = _numIndices*4/6;
  732. if (0 < numVertices)
  733. {
  734. m_indexBuffers [_blitter.m_ib->handle.idx].update(0, _numIndices*2, _blitter.m_ib->data);
  735. m_vertexBuffers[_blitter.m_vb->handle.idx].update(0, numVertices*_blitter.m_decl.m_stride, _blitter.m_vb->data, true);
  736. m_renderCommandEncoder.drawIndexedPrimitives(MTLPrimitiveTypeTriangle, _numIndices, MTLIndexTypeUInt16, m_indexBuffers[_blitter.m_ib->handle.idx].m_buffer, 0, 1);
  737. }
  738. }
  739. void flip(HMD& /*_hmd*/) BX_OVERRIDE
  740. {
  741. if (NULL == m_drawable
  742. || NULL == m_commandBuffer)
  743. {
  744. return;
  745. }
  746. // Present and commit the command buffer
  747. m_commandBuffer.presentDrawable(m_drawable);
  748. MTL_RELEASE(m_drawable);
  749. m_commandBuffer.commit();
  750. // using heavy syncing now
  751. // TODO: refactor it with double/triple buffering frame data
  752. m_commandBuffer.waitUntilCompleted();
  753. MTL_RELEASE(m_commandBuffer);
  754. //TODO: support multiple windows on OSX
  755. /*
  756. if (m_flip)
  757. {
  758. for (uint32_t ii = 1, num = m_numWindows; ii < num; ++ii)
  759. {
  760. m_glctx.swap(m_frameBuffers[m_windows[ii].idx].m_swapChain);
  761. }
  762. if (!m_ovr.swap(_hmd) )
  763. {
  764. m_glctx.swap();
  765. }
  766. }
  767. */
  768. }
  769. void updateResolution(const Resolution& _resolution)
  770. {
  771. m_maxAnisotropy = !!(_resolution.m_flags & BGFX_RESET_MAXANISOTROPY)
  772. ? 16
  773. : 1
  774. ;
  775. //TODO: _resolution has wrong dimensions, using m_drawable.texture size now
  776. if (NULL == m_drawable.texture)
  777. {
  778. return;
  779. }
  780. uint32_t width = (uint32_t)m_drawable.texture.width;
  781. uint32_t height = (uint32_t)m_drawable.texture.height;
  782. //TODO: there should be a way to specify if backbuffer needs stencil/depth.
  783. //TODO: support msaa
  784. if (NULL == m_backBufferDepth
  785. || width != m_backBufferDepth.width()
  786. || height != m_backBufferDepth.height()
  787. || m_resolution.m_width != _resolution.m_width
  788. || m_resolution.m_height != _resolution.m_height
  789. || m_resolution.m_flags != _resolution.m_flags)
  790. {
  791. m_resolution = _resolution;
  792. m_resolution.m_flags &= ~BGFX_RESET_INTERNAL_FORCE;
  793. m_textureDescriptor.textureType = MTLTextureType2D;
  794. m_textureDescriptor.pixelFormat = MTLPixelFormatDepth32Float_Stencil8;
  795. m_textureDescriptor.width = width;
  796. m_textureDescriptor.height = height;
  797. m_textureDescriptor.depth = 1;
  798. m_textureDescriptor.mipmapLevelCount = 1;
  799. m_textureDescriptor.sampleCount = 1;
  800. m_textureDescriptor.arrayLength = 1;
  801. m_textureDescriptor.resourceOptions = MTLResourceStorageModePrivate;
  802. m_textureDescriptor.cpuCacheMode = MTLCPUCacheModeDefaultCache;
  803. m_textureDescriptor.storageMode = MTLStorageModePrivate;
  804. m_textureDescriptor.usage = MTLTextureUsageRenderTarget;
  805. if (NULL != m_backBufferDepth)
  806. {
  807. release(m_backBufferDepth);
  808. }
  809. m_backBufferDepth = m_device.newTextureWithDescriptor(m_textureDescriptor);
  810. m_backBufferStencil = m_backBufferDepth;
  811. bx::HashMurmur2A murmur;
  812. murmur.begin();
  813. murmur.add(1);
  814. murmur.add((uint32_t)m_drawable.texture.pixelFormat);
  815. murmur.add((uint32_t)m_backBufferDepth.pixelFormat());
  816. murmur.add((uint32_t)m_backBufferStencil.pixelFormat());
  817. m_backBufferPixelFormatHash = murmur.end();
  818. for (uint32_t ii = 0; ii < BX_COUNTOF(m_frameBuffers); ++ii)
  819. {
  820. m_frameBuffers[ii].postReset();
  821. }
  822. m_textVideoMem.resize(false, width, height);
  823. m_textVideoMem.clear();
  824. }
  825. }
  826. void setShaderUniform(uint8_t _flags, uint32_t _loc, const void* _val, uint32_t _numRegs)
  827. {
  828. uint32_t offset = 0 != (_flags&BGFX_UNIFORM_FRAGMENTBIT)
  829. ? m_uniformBufferFragmentOffset
  830. : m_uniformBufferVertexOffset
  831. ;
  832. uint8_t* dst = (uint8_t*)m_uniformBuffer.contents();
  833. memcpy(&dst[offset + _loc], _val, _numRegs*16);
  834. }
  835. void setShaderUniform4f(uint8_t _flags, uint32_t _loc, const void* _val, uint32_t _numRegs)
  836. {
  837. setShaderUniform(_flags, _loc, _val, _numRegs);
  838. }
  839. void setShaderUniform4x4f(uint8_t _flags, uint32_t _loc, const void* _val, uint32_t _numRegs)
  840. {
  841. setShaderUniform(_flags, _loc, _val, _numRegs);
  842. }
  843. void commit(UniformBuffer& _uniformBuffer)
  844. {
  845. _uniformBuffer.reset();
  846. for (;;)
  847. {
  848. uint32_t opcode = _uniformBuffer.read();
  849. if (UniformType::End == opcode)
  850. {
  851. break;
  852. }
  853. UniformType::Enum type;
  854. uint16_t loc;
  855. uint16_t num;
  856. uint16_t copy;
  857. UniformBuffer::decodeOpcode(opcode, type, loc, num, copy);
  858. const char* data;
  859. if (copy)
  860. {
  861. data = _uniformBuffer.read(g_uniformTypeSize[type]*num);
  862. }
  863. else
  864. {
  865. UniformHandle handle;
  866. memcpy(&handle, _uniformBuffer.read(sizeof(UniformHandle) ), sizeof(UniformHandle) );
  867. data = (const char*)m_uniforms[handle.idx];
  868. }
  869. #define CASE_IMPLEMENT_UNIFORM(_uniform, _dxsuffix, _type) \
  870. case UniformType::_uniform: \
  871. case UniformType::_uniform|BGFX_UNIFORM_FRAGMENTBIT: \
  872. { \
  873. setShaderUniform(uint8_t(type), loc, data, num); \
  874. } \
  875. break;
  876. switch ( (uint32_t)type)
  877. {
  878. case UniformType::Mat3:
  879. case UniformType::Mat3|BGFX_UNIFORM_FRAGMENTBIT:
  880. {
  881. float* value = (float*)data;
  882. for (uint32_t ii = 0, count = num/3; ii < count; ++ii, loc += 3*16, value += 9)
  883. {
  884. Matrix4 mtx;
  885. mtx.un.val[ 0] = value[0];
  886. mtx.un.val[ 1] = value[1];
  887. mtx.un.val[ 2] = value[2];
  888. mtx.un.val[ 3] = 0.0f;
  889. mtx.un.val[ 4] = value[3];
  890. mtx.un.val[ 5] = value[4];
  891. mtx.un.val[ 6] = value[5];
  892. mtx.un.val[ 7] = 0.0f;
  893. mtx.un.val[ 8] = value[6];
  894. mtx.un.val[ 9] = value[7];
  895. mtx.un.val[10] = value[8];
  896. mtx.un.val[11] = 0.0f;
  897. setShaderUniform(uint8_t(type), loc, &mtx.un.val[0], 3);
  898. }
  899. }
  900. break;
  901. CASE_IMPLEMENT_UNIFORM(Int1, I, int);
  902. CASE_IMPLEMENT_UNIFORM(Vec4, F, float);
  903. CASE_IMPLEMENT_UNIFORM(Mat4, F, float);
  904. case UniformType::End:
  905. break;
  906. default:
  907. BX_TRACE("%4d: INVALID 0x%08x, t %d, l %d, n %d, c %d", _uniformBuffer.getPos(), opcode, type, loc, num, copy);
  908. break;
  909. }
  910. #undef CASE_IMPLEMENT_UNIFORM
  911. }
  912. }
  913. void clearQuad(ClearQuad& _clearQuad, const Rect& _rect, const Clear& _clear, const float _palette[][4])
  914. {
  915. BX_UNUSED(_clearQuad, _rect, _clear, _palette);
  916. }
  917. void setFrameBuffer(RenderPassDescriptor renderPassDescriptor, FrameBufferHandle _fbh, bool _msaa = true)
  918. {
  919. if (!isValid(_fbh) )
  920. {
  921. renderPassDescriptor.colorAttachments[0].texture = m_drawable.texture;
  922. renderPassDescriptor.depthAttachment.texture = m_backBufferDepth;
  923. renderPassDescriptor.stencilAttachment.texture = m_backBufferStencil;
  924. }
  925. else
  926. {
  927. FrameBufferMtl& frameBuffer = m_frameBuffers[_fbh.idx];
  928. for (uint32_t ii = 0; ii < frameBuffer.m_num; ++ii)
  929. {
  930. const TextureMtl& texture = m_textures[frameBuffer.m_colorHandle[ii].idx];
  931. renderPassDescriptor.colorAttachments[ii].texture = texture.m_ptr;
  932. }
  933. if (isValid(frameBuffer.m_depthHandle) )
  934. {
  935. const TextureMtl& texture = m_textures[frameBuffer.m_depthHandle.idx];
  936. renderPassDescriptor.depthAttachment.texture = texture.m_ptr;
  937. renderPassDescriptor.stencilAttachment.texture = texture.m_ptrStencil;
  938. //TODO: stencilAttachment should be the same if packed/depth stencil format is used
  939. }
  940. }
  941. m_fbh = _fbh;
  942. m_rtMsaa = _msaa;
  943. }
  944. void setDepthStencilState(uint64_t _state, uint64_t _stencil = 0)
  945. {
  946. _state &= BGFX_STATE_DEPTH_WRITE|BGFX_STATE_DEPTH_TEST_MASK;
  947. uint32_t fstencil = unpackStencil(0, _stencil);
  948. uint32_t ref = (fstencil&BGFX_STENCIL_FUNC_REF_MASK)>>BGFX_STENCIL_FUNC_REF_SHIFT;
  949. _stencil &= packStencil(~BGFX_STENCIL_FUNC_REF_MASK, BGFX_STENCIL_MASK);
  950. bx::HashMurmur2A murmur;
  951. murmur.begin();
  952. murmur.add(_state);
  953. murmur.add(_stencil);
  954. uint32_t hash = murmur.end();
  955. DepthStencilState dss = m_depthStencilStateCache.find(hash);
  956. if (NULL == dss)
  957. {
  958. DepthStencilDescriptor desc = m_depthStencilDescriptor;
  959. uint32_t func = (_state&BGFX_STATE_DEPTH_TEST_MASK)>>BGFX_STATE_DEPTH_TEST_SHIFT;
  960. desc.depthWriteEnabled = !!(BGFX_STATE_DEPTH_WRITE & _state);
  961. desc.depthCompareFunction = s_cmpFunc[func];
  962. uint32_t bstencil = unpackStencil(1, _stencil);
  963. uint32_t frontAndBack = bstencil != BGFX_STENCIL_NONE && bstencil != fstencil;
  964. bstencil = frontAndBack ? bstencil : fstencil;
  965. if (0 != _stencil)
  966. {
  967. StencilDescriptor frontFaceDesc = m_frontFaceStencilDescriptor;
  968. StencilDescriptor backfaceDesc = m_backFaceStencilDescriptor;
  969. uint32_t readMask = (fstencil&BGFX_STENCIL_FUNC_RMASK_MASK)>>BGFX_STENCIL_FUNC_RMASK_SHIFT;
  970. uint32_t writeMask = 0xff;
  971. frontFaceDesc.stencilFailureOperation = s_stencilOp[(fstencil&BGFX_STENCIL_OP_FAIL_S_MASK)>>BGFX_STENCIL_OP_FAIL_S_SHIFT];
  972. frontFaceDesc.depthFailureOperation = s_stencilOp[(fstencil&BGFX_STENCIL_OP_FAIL_Z_MASK)>>BGFX_STENCIL_OP_FAIL_Z_SHIFT];
  973. frontFaceDesc.depthStencilPassOperation = s_stencilOp[(fstencil&BGFX_STENCIL_OP_PASS_Z_MASK)>>BGFX_STENCIL_OP_PASS_Z_SHIFT];
  974. frontFaceDesc.stencilCompareFunction = s_cmpFunc[(fstencil&BGFX_STENCIL_TEST_MASK)>>BGFX_STENCIL_TEST_SHIFT];
  975. frontFaceDesc.readMask = readMask;
  976. frontFaceDesc.writeMask = writeMask;
  977. backfaceDesc.stencilFailureOperation = s_stencilOp[(bstencil&BGFX_STENCIL_OP_FAIL_S_MASK)>>BGFX_STENCIL_OP_FAIL_S_SHIFT];
  978. backfaceDesc.depthFailureOperation = s_stencilOp[(bstencil&BGFX_STENCIL_OP_FAIL_Z_MASK)>>BGFX_STENCIL_OP_FAIL_Z_SHIFT];
  979. backfaceDesc.depthStencilPassOperation = s_stencilOp[(bstencil&BGFX_STENCIL_OP_PASS_Z_MASK)>>BGFX_STENCIL_OP_PASS_Z_SHIFT];
  980. backfaceDesc.stencilCompareFunction = s_cmpFunc[(bstencil&BGFX_STENCIL_TEST_MASK)>>BGFX_STENCIL_TEST_SHIFT];
  981. backfaceDesc.readMask = readMask;
  982. backfaceDesc.writeMask = writeMask;
  983. desc.frontFaceStencil = frontFaceDesc;
  984. desc.backFaceStencil = backfaceDesc;
  985. }
  986. else
  987. {
  988. desc.backFaceStencil = NULL;
  989. desc.frontFaceStencil = NULL;
  990. }
  991. dss = m_device.newDepthStencilStateWithDescriptor(desc);
  992. m_depthStencilStateCache.add(hash, dss);
  993. }
  994. m_renderCommandEncoder.setDepthStencilState(dss);
  995. m_renderCommandEncoder.setStencilReferenceValue(ref);
  996. }
  997. SamplerState getSamplerState(uint32_t _flags)
  998. {
  999. _flags &= BGFX_TEXTURE_SAMPLER_BITS_MASK;
  1000. SamplerState sampler = m_samplerStateCache.find(_flags);
  1001. if (NULL == sampler)
  1002. {
  1003. m_samplerDescriptor.sAddressMode = s_textureAddress[(_flags&BGFX_TEXTURE_U_MASK)>>BGFX_TEXTURE_U_SHIFT];
  1004. m_samplerDescriptor.tAddressMode = s_textureAddress[(_flags&BGFX_TEXTURE_V_MASK)>>BGFX_TEXTURE_V_SHIFT];
  1005. m_samplerDescriptor.rAddressMode = s_textureAddress[(_flags&BGFX_TEXTURE_W_MASK)>>BGFX_TEXTURE_W_SHIFT];
  1006. m_samplerDescriptor.minFilter = s_textureFilterMinMag[(_flags&BGFX_TEXTURE_MIN_MASK)>>BGFX_TEXTURE_MIN_SHIFT];
  1007. m_samplerDescriptor.magFilter = s_textureFilterMinMag[(_flags&BGFX_TEXTURE_MAG_MASK)>>BGFX_TEXTURE_MAG_SHIFT];
  1008. m_samplerDescriptor.mipFilter = s_textureFilterMip[(_flags&BGFX_TEXTURE_MIP_MASK)>>BGFX_TEXTURE_MIP_SHIFT];
  1009. m_samplerDescriptor.lodMinClamp = 0;
  1010. m_samplerDescriptor.lodMaxClamp = FLT_MAX;
  1011. m_samplerDescriptor.normalizedCoordinates = TRUE;
  1012. m_samplerDescriptor.maxAnisotropy = m_maxAnisotropy;
  1013. //TODO: I haven't found how to specify this. Comparison function can be specified in shader.
  1014. // On OSX this can be specified. There is no support for this on iOS right now.
  1015. //const uint32_t cmpFunc = (_flags&BGFX_TEXTURE_COMPARE_MASK)>>BGFX_TEXTURE_COMPARE_SHIFT;
  1016. //const uint8_t filter = 0 == cmpFunc ? 0 : D3D11_COMPARISON_FILTERING_BIT;
  1017. //m_samplerDescriptor.comparisonFunc = 0 == cmpFunc ? D3D11_COMPARISON_NEVER : s_cmpFunc[cmpFunc];
  1018. sampler = m_device.newSamplerStateWithDescriptor(m_samplerDescriptor);
  1019. m_samplerStateCache.add(_flags, sampler);
  1020. }
  1021. return sampler;
  1022. }
  1023. bool isVisible(Frame* _render, OcclusionQueryHandle _handle, bool _visible)
  1024. {
  1025. m_occlusionQuery.resolve(_render);
  1026. return _visible == (0 != _render->m_occlusion[_handle.idx]);
  1027. }
  1028. uint32_t getBufferWidth()
  1029. {
  1030. return m_backBufferDepth.width();
  1031. }
  1032. uint32_t getBufferHeight()
  1033. {
  1034. return m_backBufferDepth.height();
  1035. }
  1036. Device m_device;
  1037. CommandQueue m_commandQueue;
  1038. CAMetalLayer* m_metalLayer;
  1039. Texture m_backBufferDepth;
  1040. Texture m_backBufferStencil;
  1041. uint32_t m_backBufferPixelFormatHash;
  1042. uint32_t m_maxAnisotropy;
  1043. OcclusionQueryMTL m_occlusionQuery;
  1044. Buffer m_uniformBuffer;
  1045. Buffer m_uniformBuffers[UNIFORM_BUFFER_COUNT];
  1046. uint32_t m_uniformBufferVertexOffset;
  1047. uint32_t m_uniformBufferFragmentOffset;
  1048. uint8_t m_uniformBufferIndex;
  1049. uint16_t m_numWindows;
  1050. FrameBufferHandle m_windows[BGFX_CONFIG_MAX_FRAME_BUFFERS];
  1051. IndexBufferMtl m_indexBuffers[BGFX_CONFIG_MAX_INDEX_BUFFERS];
  1052. VertexBufferMtl m_vertexBuffers[BGFX_CONFIG_MAX_VERTEX_BUFFERS];
  1053. ShaderMtl m_shaders[BGFX_CONFIG_MAX_SHADERS];
  1054. ProgramMtl m_program[BGFX_CONFIG_MAX_PROGRAMS];
  1055. TextureMtl m_textures[BGFX_CONFIG_MAX_TEXTURES];
  1056. FrameBufferMtl m_frameBuffers[BGFX_CONFIG_MAX_FRAME_BUFFERS];
  1057. VertexDecl m_vertexDecls[BGFX_CONFIG_MAX_VERTEX_DECLS];
  1058. UniformRegistry m_uniformReg;
  1059. void* m_uniforms[BGFX_CONFIG_MAX_UNIFORMS];
  1060. StateCacheT<DepthStencilState> m_depthStencilStateCache;
  1061. StateCacheT<SamplerState> m_samplerStateCache;
  1062. TextVideoMem m_textVideoMem;
  1063. FrameBufferHandle m_fbh;
  1064. bool m_rtMsaa;
  1065. Resolution m_resolution;
  1066. // descriptors
  1067. RenderPipelineDescriptor m_renderPipelineDescriptor;
  1068. DepthStencilDescriptor m_depthStencilDescriptor;
  1069. StencilDescriptor m_frontFaceStencilDescriptor;
  1070. StencilDescriptor m_backFaceStencilDescriptor;
  1071. VertexDescriptor m_vertexDescriptor;
  1072. TextureDescriptor m_textureDescriptor;
  1073. SamplerDescriptor m_samplerDescriptor;
  1074. // currently active objects data
  1075. id <CAMetalDrawable> m_drawable;
  1076. CommandBuffer m_commandBuffer;
  1077. RenderCommandEncoder m_renderCommandEncoder;
  1078. };
  1079. static RendererContextMtl* s_renderMtl;
  1080. RendererContextI* rendererCreate()
  1081. {
  1082. s_renderMtl = BX_NEW(g_allocator, RendererContextMtl);
  1083. if (!s_renderMtl->init())
  1084. {
  1085. BX_DELETE(g_allocator, s_renderMtl);
  1086. s_renderMtl = NULL;
  1087. }
  1088. return s_renderMtl;
  1089. }
  1090. void rendererDestroy()
  1091. {
  1092. s_renderMtl->shutdown();
  1093. BX_DELETE(g_allocator, s_renderMtl);
  1094. s_renderMtl = NULL;
  1095. }
  1096. void writeString(bx::WriterI* _writer, const char* _str)
  1097. {
  1098. bx::write(_writer, _str, (int32_t)strlen(_str) );
  1099. }
  1100. void ShaderMtl::create(const Memory* _mem)
  1101. {
  1102. bx::MemoryReader reader(_mem->data, _mem->size);
  1103. uint32_t magic;
  1104. bx::read(&reader, magic);
  1105. switch (magic)
  1106. {
  1107. case BGFX_CHUNK_MAGIC_CSH:
  1108. case BGFX_CHUNK_MAGIC_FSH:
  1109. case BGFX_CHUNK_MAGIC_VSH:
  1110. break;
  1111. default:
  1112. BGFX_FATAL(false, Fatal::InvalidShader, "Unknown shader format %x.", magic);
  1113. break;
  1114. }
  1115. //bool fragment = BGFX_CHUNK_MAGIC_FSH == magic;
  1116. uint32_t iohash;
  1117. bx::read(&reader, iohash);
  1118. uint16_t count;
  1119. bx::read(&reader, count);
  1120. BX_TRACE("%s Shader consts %d"
  1121. , BGFX_CHUNK_MAGIC_FSH == magic ? "Fragment" : BGFX_CHUNK_MAGIC_VSH == magic ? "Vertex" : "Compute"
  1122. , count
  1123. );
  1124. for (uint32_t ii = 0; ii < count; ++ii)
  1125. {
  1126. uint8_t nameSize;
  1127. bx::read(&reader, nameSize);
  1128. char name[256];
  1129. bx::read(&reader, &name, nameSize);
  1130. name[nameSize] = '\0';
  1131. uint8_t type;
  1132. bx::read(&reader, type);
  1133. uint8_t num;
  1134. bx::read(&reader, num);
  1135. uint16_t regIndex;
  1136. bx::read(&reader, regIndex);
  1137. uint16_t regCount;
  1138. bx::read(&reader, regCount);
  1139. }
  1140. uint32_t shaderSize;
  1141. bx::read(&reader, shaderSize);
  1142. const char* code = (const char*)reader.getDataPtr();
  1143. bx::skip(&reader, shaderSize+1);
  1144. int32_t codeLen = (int32_t)strlen(code);
  1145. int32_t tempLen = codeLen + (4<<10);
  1146. char* temp = (char*)alloca(tempLen);
  1147. bx::StaticMemoryBlockWriter writer(temp, tempLen);
  1148. //TODO: remove this hack. some shaders have problem with half<->float conversion
  1149. writeString(&writer
  1150. , "#define half float\n"
  1151. "#define half2 float2\n"
  1152. "#define half3 float3\n"
  1153. "#define half4 float4\n"
  1154. );
  1155. bx::write(&writer, code, codeLen);
  1156. bx::write(&writer, '\0');
  1157. code = temp;
  1158. //TODO: use binary format
  1159. Library lib = s_renderMtl->m_device.newLibraryWithSource(code);
  1160. if (NULL != lib)
  1161. {
  1162. m_function = lib.newFunctionWithName(SHADER_FUNCTION_NAME);
  1163. }
  1164. BGFX_FATAL(NULL != m_function, bgfx::Fatal::InvalidShader, "Failed to create %s shader."
  1165. , BGFX_CHUNK_MAGIC_FSH == magic ? "Fragment" : BGFX_CHUNK_MAGIC_VSH == magic ? "Vertex" : "Compute");
  1166. }
  1167. void ProgramMtl::create(const ShaderMtl* _vsh, const ShaderMtl* _fsh)
  1168. {
  1169. BX_CHECK(NULL != _vsh->m_function.m_obj, "Vertex shader doesn't exist.");
  1170. m_vsh = _vsh;
  1171. if (NULL != _fsh)
  1172. {
  1173. BX_CHECK(NULL != _fsh->m_function.m_obj, "Fragment shader doesn't exist.");
  1174. m_fsh = _fsh;
  1175. }
  1176. // get attributes
  1177. memset(m_attributes, 0xff, sizeof(m_attributes) );
  1178. uint32_t used = 0;
  1179. uint32_t instUsed = 0;
  1180. if (NULL != _vsh->m_function.m_obj )
  1181. {
  1182. for (MTLVertexAttribute* attrib in _vsh->m_function.m_obj.vertexAttributes)
  1183. {
  1184. if (attrib.active )
  1185. {
  1186. const char* name = utf8String(attrib.name);
  1187. uint32_t loc = (uint32_t)attrib.attributeIndex;
  1188. BX_TRACE("attr %s: %d", name, loc);
  1189. for (uint8_t ii = 0; ii < Attrib::Count; ++ii)
  1190. {
  1191. if (!strcmp(s_attribName[ii],name))
  1192. {
  1193. m_attributes[ii] = loc;
  1194. m_used[used++] = ii;
  1195. break;
  1196. }
  1197. }
  1198. for (uint32_t ii = 0; ii < BX_COUNTOF(s_instanceDataName); ++ii)
  1199. {
  1200. if (!strcmp(s_instanceDataName[ii],name))
  1201. {
  1202. m_instanceData[instUsed++] = loc;
  1203. }
  1204. }
  1205. }
  1206. }
  1207. }
  1208. m_used[used] = Attrib::Count;
  1209. m_instanceData[instUsed] = UINT16_MAX;
  1210. }
  1211. void ProgramMtl::destroy()
  1212. {
  1213. m_vsh = NULL;
  1214. m_fsh = NULL;
  1215. if (NULL != m_vshConstantBuffer)
  1216. {
  1217. UniformBuffer::destroy(m_vshConstantBuffer);
  1218. m_vshConstantBuffer = NULL;
  1219. }
  1220. if (NULL != m_fshConstantBuffer)
  1221. {
  1222. UniformBuffer::destroy(m_fshConstantBuffer);
  1223. m_fshConstantBuffer = NULL;
  1224. }
  1225. m_vshConstantBufferSize = 0;
  1226. m_vshConstantBufferAlignmentMask = 0;
  1227. m_fshConstantBufferSize = 0;
  1228. m_fshConstantBufferAlignmentMask = 0;
  1229. m_processedUniforms = false;
  1230. m_numPredefined = 0;
  1231. m_renderPipelineStateCache.invalidate();
  1232. }
  1233. UniformType::Enum convertMtlType(MTLDataType _type)
  1234. {
  1235. switch (_type)
  1236. {
  1237. case MTLDataTypeUInt:
  1238. case MTLDataTypeInt:
  1239. return UniformType::Int1;
  1240. case MTLDataTypeFloat:
  1241. case MTLDataTypeFloat2:
  1242. case MTLDataTypeFloat3:
  1243. case MTLDataTypeFloat4:
  1244. return UniformType::Vec4;
  1245. case MTLDataTypeFloat3x3:
  1246. return UniformType::Mat3;
  1247. case MTLDataTypeFloat4x4:
  1248. return UniformType::Mat4;
  1249. };
  1250. BX_CHECK(false, "Unrecognized Mtl Data type 0x%04x.", _type);
  1251. return UniformType::End;
  1252. }
  1253. RenderPipelineState ProgramMtl::getRenderPipelineState(uint64_t _state, uint32_t _rgba, FrameBufferHandle _fbHandle, VertexDeclHandle _declHandle, uint16_t _numInstanceData)
  1254. {
  1255. _state &= (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK|BGFX_STATE_ALPHA_WRITE|BGFX_STATE_RGB_WRITE|BGFX_STATE_BLEND_INDEPENDENT|BGFX_STATE_MSAA);
  1256. bool independentBlendEnable = !!(BGFX_STATE_BLEND_INDEPENDENT & _state);
  1257. bx::HashMurmur2A murmur;
  1258. murmur.begin();
  1259. murmur.add(_state);
  1260. murmur.add(independentBlendEnable ? _rgba : 0);
  1261. if (!isValid(_fbHandle) )
  1262. {
  1263. murmur.add(s_renderMtl->m_backBufferPixelFormatHash);
  1264. }
  1265. else
  1266. {
  1267. FrameBufferMtl& frameBuffer = s_renderMtl->m_frameBuffers[_fbHandle.idx];
  1268. murmur.add(frameBuffer.m_pixelFormatHash);
  1269. }
  1270. murmur.add(_declHandle.idx);
  1271. murmur.add(_numInstanceData);
  1272. uint32_t hash = murmur.end();
  1273. RenderPipelineState rps = m_renderPipelineStateCache.find(hash);
  1274. if (NULL == rps)
  1275. {
  1276. RenderPipelineDescriptor& pd = s_renderMtl->m_renderPipelineDescriptor;
  1277. reset(pd);
  1278. uint32_t frameBufferAttachment = 1;
  1279. if (!isValid(_fbHandle) )
  1280. {
  1281. pd.colorAttachments[0].pixelFormat = s_renderMtl->m_drawable.texture.pixelFormat;
  1282. pd.depthAttachmentPixelFormat = s_renderMtl->m_backBufferDepth.m_obj.pixelFormat;
  1283. pd.stencilAttachmentPixelFormat = s_renderMtl->m_backBufferStencil.m_obj.pixelFormat;
  1284. }
  1285. else
  1286. {
  1287. FrameBufferMtl& frameBuffer = s_renderMtl->m_frameBuffers[_fbHandle.idx];
  1288. frameBufferAttachment = frameBuffer.m_num;
  1289. for (uint32_t ii = 0; ii < frameBuffer.m_num; ++ii)
  1290. {
  1291. const TextureMtl& texture = s_renderMtl->m_textures[frameBuffer.m_colorHandle[ii].idx];
  1292. pd.colorAttachments[ii].pixelFormat = texture.m_ptr.m_obj.pixelFormat;
  1293. }
  1294. if (isValid(frameBuffer.m_depthHandle))
  1295. {
  1296. const TextureMtl& texture = s_renderMtl->m_textures[frameBuffer.m_depthHandle.idx];
  1297. pd.depthAttachmentPixelFormat = texture.m_ptr.m_obj.pixelFormat;
  1298. if (NULL != texture.m_ptrStencil)
  1299. {
  1300. pd.stencilAttachmentPixelFormat = MTLPixelFormatInvalid; //texture.m_ptrStencil.m_obj.pixelFormat;
  1301. }
  1302. //todo: stencil attachment should be the same as depth for packed depth/stencil
  1303. }
  1304. }
  1305. // TODO: BGFX_STATE_MSAA using _fbHandle texture msaa values
  1306. const uint32_t blend = uint32_t( (_state&BGFX_STATE_BLEND_MASK)>>BGFX_STATE_BLEND_SHIFT);
  1307. const uint32_t equation = uint32_t( (_state&BGFX_STATE_BLEND_EQUATION_MASK)>>BGFX_STATE_BLEND_EQUATION_SHIFT);
  1308. const uint32_t srcRGB = (blend )&0xf;
  1309. const uint32_t dstRGB = (blend>> 4)&0xf;
  1310. const uint32_t srcA = (blend>> 8)&0xf;
  1311. const uint32_t dstA = (blend>>12)&0xf;
  1312. const uint32_t equRGB = (equation )&0x7;
  1313. const uint32_t equA = (equation>>3)&0x7;
  1314. uint8_t writeMask = (_state&BGFX_STATE_ALPHA_WRITE) ? MTLColorWriteMaskAlpha : 0;
  1315. writeMask |= (_state&BGFX_STATE_RGB_WRITE) ? MTLColorWriteMaskRed|MTLColorWriteMaskGreen|MTLColorWriteMaskBlue : 0;
  1316. for (uint32_t ii = 0; ii < (independentBlendEnable ? 1 : frameBufferAttachment); ++ii)
  1317. {
  1318. RenderPipelineColorAttachmentDescriptor drt = pd.colorAttachments[ii];
  1319. drt.blendingEnabled = !!(BGFX_STATE_BLEND_MASK & _state);
  1320. drt.sourceRGBBlendFactor = s_blendFactor[srcRGB][0];
  1321. drt.destinationRGBBlendFactor = s_blendFactor[dstRGB][0];
  1322. drt.rgbBlendOperation = s_blendEquation[equRGB];
  1323. drt.sourceAlphaBlendFactor = s_blendFactor[srcA][1];
  1324. drt.destinationAlphaBlendFactor = s_blendFactor[dstA][1];
  1325. drt.alphaBlendOperation = s_blendEquation[equA];
  1326. drt.writeMask = writeMask;
  1327. }
  1328. if (independentBlendEnable)
  1329. {
  1330. for (uint32_t ii = 1, rgba = _rgba; ii < frameBufferAttachment; ++ii, rgba >>= 11)
  1331. {
  1332. RenderPipelineColorAttachmentDescriptor drt = pd.colorAttachments[ii];
  1333. drt.blendingEnabled = 0 != (rgba&0x7ff);
  1334. const uint32_t src = (rgba )&0xf;
  1335. const uint32_t dst = (rgba>>4)&0xf;
  1336. const uint32_t equationIndex = (rgba>>8)&0x7;
  1337. drt.sourceRGBBlendFactor = s_blendFactor[src][0];
  1338. drt.destinationRGBBlendFactor = s_blendFactor[dst][0];
  1339. drt.rgbBlendOperation = s_blendEquation[equationIndex];
  1340. drt.sourceAlphaBlendFactor = s_blendFactor[src][1];
  1341. drt.destinationAlphaBlendFactor = s_blendFactor[dst][1];
  1342. drt.alphaBlendOperation = s_blendEquation[equationIndex];
  1343. drt.writeMask = writeMask;
  1344. }
  1345. }
  1346. pd.vertexFunction = m_vsh->m_function;
  1347. pd.fragmentFunction = m_fsh->m_function;
  1348. if (isValid(_declHandle))
  1349. {
  1350. VertexDescriptor vertexDesc = s_renderMtl->m_vertexDescriptor;
  1351. reset(vertexDesc);
  1352. VertexDecl &vertexDecl = s_renderMtl->m_vertexDecls[_declHandle.idx];
  1353. for (uint32_t ii = 0; Attrib::Count != m_used[ii]; ++ii)
  1354. {
  1355. Attrib::Enum attr = Attrib::Enum(m_used[ii]);
  1356. uint32_t loc = m_attributes[attr];
  1357. uint8_t num;
  1358. AttribType::Enum type;
  1359. bool normalized;
  1360. bool asInt;
  1361. vertexDecl.decode(attr, num, type, normalized, asInt);
  1362. BX_CHECK(num <= 4, "num must be <=4");
  1363. if (UINT16_MAX != vertexDecl.m_attributes[attr])
  1364. {
  1365. vertexDesc.attributes[loc].format = s_attribType[type][num-1][normalized?1:0];
  1366. vertexDesc.attributes[loc].bufferIndex = 1;
  1367. vertexDesc.attributes[loc].offset = vertexDecl.m_offset[attr];
  1368. BX_TRACE("attrib:%s format: %d offset:%d", s_attribName[attr], (int)vertexDesc.attributes[loc].format, (int)vertexDesc.attributes[loc].offset);
  1369. }
  1370. else
  1371. { // missing attribute: using dummy attribute with smallest possible size
  1372. vertexDesc.attributes[loc].format = MTLVertexFormatUChar2;
  1373. vertexDesc.attributes[loc].bufferIndex = 1;
  1374. vertexDesc.attributes[loc].offset = 0;
  1375. }
  1376. }
  1377. vertexDesc.layouts[1].stride = vertexDecl.getStride();
  1378. vertexDesc.layouts[1].stepFunction = MTLVertexStepFunctionPerVertex;
  1379. BX_TRACE("stride: %d", (int)vertexDesc.layouts[1].stride);
  1380. if (_numInstanceData > 0)
  1381. {
  1382. for (uint32_t ii = 0; UINT16_MAX != m_instanceData[ii]; ++ii)
  1383. {
  1384. uint32_t loc = m_instanceData[ii];
  1385. vertexDesc.attributes[loc].format = MTLVertexFormatFloat4;
  1386. vertexDesc.attributes[loc].bufferIndex = 2;
  1387. vertexDesc.attributes[loc].offset = ii*16;
  1388. }
  1389. vertexDesc.layouts[2].stride = _numInstanceData * 16;
  1390. vertexDesc.layouts[2].stepFunction = MTLVertexStepFunctionPerInstance;
  1391. vertexDesc.layouts[2].stepRate = 1;
  1392. }
  1393. pd.vertexDescriptor = vertexDesc;
  1394. }
  1395. if (m_processedUniforms)
  1396. {
  1397. rps = s_renderMtl->m_device.newRenderPipelineStateWithDescriptor(pd);
  1398. }
  1399. else
  1400. {
  1401. m_numPredefined = 0;
  1402. RenderPipelineReflection reflection = NULL;
  1403. rps = s_renderMtl->m_device.newRenderPipelineStateWithDescriptor(pd, MTLPipelineOptionBufferTypeInfo, &reflection);
  1404. if (NULL != reflection)
  1405. {
  1406. for (uint32_t shaderType = 0; shaderType < 2; ++shaderType)
  1407. {
  1408. UniformBuffer*& constantBuffer = (shaderType == 0 ? m_vshConstantBuffer : m_fshConstantBuffer);
  1409. uint8_t fragmentBit = (1 == shaderType ? BGFX_UNIFORM_FRAGMENTBIT : 0);
  1410. for (MTLArgument* arg in (shaderType == 0 ? reflection.vertexArguments : reflection.fragmentArguments))
  1411. {
  1412. BX_TRACE("arg: %s type:%d", utf8String(arg.name), arg.type);
  1413. if (arg.active)
  1414. {
  1415. if (arg.type == MTLArgumentTypeBuffer
  1416. && 0 == strcmp(utf8String(arg.name), SHADER_UNIFORM_NAME) )
  1417. {
  1418. BX_CHECK( arg.index == 0, "Uniform buffer must be in the buffer slot 0.");
  1419. BX_CHECK( MTLDataTypeStruct == arg.bufferDataType, "%s's type must be a struct",SHADER_UNIFORM_NAME );
  1420. if (MTLDataTypeStruct == arg.bufferDataType)
  1421. {
  1422. if (shaderType == 0)
  1423. {
  1424. m_vshConstantBufferSize = (uint32_t)arg.bufferDataSize;
  1425. m_vshConstantBufferAlignmentMask = (uint32_t)arg.bufferAlignment - 1;
  1426. }
  1427. else
  1428. {
  1429. m_fshConstantBufferSize = (uint32_t)arg.bufferDataSize;
  1430. m_fshConstantBufferAlignmentMask = (uint32_t)arg.bufferAlignment - 1;
  1431. }
  1432. for (MTLStructMember* uniform in arg.bufferStructType.members )
  1433. {
  1434. const char* name = utf8String(uniform.name);
  1435. BX_TRACE("uniform: %s type:%d", name, uniform.dataType);
  1436. MTLDataType dataType = uniform.dataType;
  1437. uint32_t num = 1;
  1438. if (dataType == MTLDataTypeArray)
  1439. {
  1440. dataType = uniform.arrayType.elementType;
  1441. num = (uint32_t)uniform.arrayType.arrayLength;
  1442. }
  1443. switch (dataType) {
  1444. case MTLDataTypeFloat4 :
  1445. num *= 1;
  1446. break;
  1447. case MTLDataTypeFloat4x4:
  1448. num *= 4;
  1449. break;
  1450. case MTLDataTypeFloat3x3:
  1451. num *= 3;
  1452. break;
  1453. default:
  1454. BX_WARN(0, "Unsupported uniform MTLDataType: %d", uniform.dataType);
  1455. break;
  1456. }
  1457. PredefinedUniform::Enum predefined = nameToPredefinedUniformEnum(name);
  1458. if (PredefinedUniform::Count != predefined)
  1459. {
  1460. m_predefined[m_numPredefined].m_loc = uint32_t(uniform.offset);
  1461. m_predefined[m_numPredefined].m_count = uint16_t(num);
  1462. m_predefined[m_numPredefined].m_type = uint8_t(predefined|fragmentBit);
  1463. m_numPredefined++;
  1464. }
  1465. else
  1466. {
  1467. const UniformInfo* info = s_renderMtl->m_uniformReg.find(name);
  1468. if (NULL != info)
  1469. {
  1470. if (NULL == constantBuffer)
  1471. {
  1472. constantBuffer = UniformBuffer::create(1024);
  1473. }
  1474. UniformType::Enum type = convertMtlType(dataType);
  1475. constantBuffer->writeUniformHandle((UniformType::Enum)(type|fragmentBit), uint32_t(uniform.offset), info->m_handle, uint16_t(num) );
  1476. BX_TRACE("store %s %d offset:%d", name, info->m_handle, uint32_t(uniform.offset));
  1477. }
  1478. }
  1479. }
  1480. }
  1481. }
  1482. else if (arg.type == MTLArgumentTypeTexture)
  1483. {
  1484. BX_TRACE("texture: %s index:%d", utf8String(arg.name), arg.index);
  1485. }
  1486. else if (arg.type == MTLArgumentTypeSampler)
  1487. {
  1488. BX_TRACE("sampler: %s index:%d", utf8String(arg.name), arg.index);
  1489. }
  1490. }
  1491. }
  1492. if (NULL != constantBuffer)
  1493. {
  1494. constantBuffer->finish();
  1495. }
  1496. }
  1497. }
  1498. m_processedUniforms = true;
  1499. }
  1500. m_renderPipelineStateCache.add(hash, rps);
  1501. }
  1502. return rps;
  1503. }
  1504. void BufferMtl::create(uint32_t _size, void* _data, uint16_t _flags, uint16_t _stride, bool _vertex)
  1505. {
  1506. BX_UNUSED(_stride, _vertex);
  1507. m_size = _size;
  1508. m_flags = _flags;
  1509. if (NULL == _data)
  1510. {
  1511. m_buffer = s_renderMtl->m_device.newBufferWithLength(_size, 0);
  1512. }
  1513. else
  1514. {
  1515. m_buffer = s_renderMtl->m_device.newBufferWithBytes(_data, _size, 0);
  1516. }
  1517. }
  1518. void BufferMtl::update(uint32_t _offset, uint32_t _size, void* _data, bool _discard)
  1519. {
  1520. BX_UNUSED(_discard);
  1521. memcpy( (uint8_t*)m_buffer.contents() + _offset, _data, _size);
  1522. }
  1523. void VertexBufferMtl::create(uint32_t _size, void* _data, VertexDeclHandle _declHandle, uint16_t _flags)
  1524. {
  1525. m_decl = _declHandle;
  1526. uint16_t stride = isValid(_declHandle)
  1527. ? s_renderMtl->m_vertexDecls[_declHandle.idx].m_stride
  1528. : 0
  1529. ;
  1530. BufferMtl::create(_size, _data, _flags, stride, true);
  1531. }
  1532. void TextureMtl::create(const Memory* _mem, uint32_t _flags, uint8_t _skip)
  1533. {
  1534. m_sampler = s_renderMtl->getSamplerState(_flags);
  1535. ImageContainer imageContainer;
  1536. if (imageParse(imageContainer, _mem->data, _mem->size) )
  1537. {
  1538. uint8_t numMips = imageContainer.m_numMips;
  1539. const uint8_t startLod = uint8_t(bx::uint32_min(_skip, numMips-1) );
  1540. numMips -= startLod;
  1541. const ImageBlockInfo& blockInfo = getBlockInfo(TextureFormat::Enum(imageContainer.m_format) );
  1542. const uint32_t textureWidth = bx::uint32_max(blockInfo.blockWidth, imageContainer.m_width >>startLod);
  1543. const uint32_t textureHeight = bx::uint32_max(blockInfo.blockHeight, imageContainer.m_height>>startLod);
  1544. m_flags = _flags;
  1545. m_requestedFormat = (uint8_t)imageContainer.m_format;
  1546. m_textureFormat = MTLPixelFormatInvalid == s_textureFormat[m_requestedFormat].m_fmt
  1547. ? uint8_t(TextureFormat::BGRA8)
  1548. : m_requestedFormat
  1549. ;
  1550. const bool convert = m_requestedFormat != m_textureFormat;
  1551. uint8_t bpp = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) );
  1552. if (convert)
  1553. {
  1554. m_textureFormat = (uint8_t)TextureFormat::RGBA8;
  1555. bpp = 32;
  1556. }
  1557. TextureDescriptor desc = s_renderMtl->m_textureDescriptor;
  1558. if (imageContainer.m_cubeMap)
  1559. {
  1560. desc.textureType = MTLTextureTypeCube;
  1561. }
  1562. else if (imageContainer.m_depth > 1)
  1563. {
  1564. desc.textureType = MTLTextureType3D;
  1565. }
  1566. else
  1567. {
  1568. desc.textureType = MTLTextureType2D;
  1569. }
  1570. m_numMips = numMips;
  1571. const bool compressed = isCompressed(TextureFormat::Enum(m_textureFormat) );
  1572. BX_TRACE("Texture %3d: %s (requested: %s), %dx%d%s%s."
  1573. , this - s_renderMtl->m_textures
  1574. , getName( (TextureFormat::Enum)m_textureFormat)
  1575. , getName( (TextureFormat::Enum)m_requestedFormat)
  1576. , textureWidth
  1577. , textureHeight
  1578. , imageContainer.m_cubeMap ? "x6" : ""
  1579. , 0 != (_flags&BGFX_TEXTURE_RT_MASK) ? " (render target)" : ""
  1580. );
  1581. const bool writeOnly = 0 != (_flags&BGFX_TEXTURE_RT_WRITE_ONLY);
  1582. // const bool computeWrite = 0 != (_flags&BGFX_TEXTURE_COMPUTE_WRITE);
  1583. // const bool renderTarget = 0 != (_flags&BGFX_TEXTURE_RT_MASK);
  1584. const bool srgb = 0 != (_flags&BGFX_TEXTURE_SRGB) || imageContainer.m_srgb;
  1585. // const uint32_t msaaQuality = bx::uint32_satsub( (_flags&BGFX_TEXTURE_RT_MSAA_MASK)>>BGFX_TEXTURE_RT_MSAA_SHIFT, 1);
  1586. // const DXGI_SAMPLE_DESC& msaa = s_msaa[msaaQuality];
  1587. MTLPixelFormat format = MTLPixelFormatInvalid;
  1588. if (srgb)
  1589. {
  1590. format = s_textureFormat[m_textureFormat].m_fmtSrgb;
  1591. BX_WARN(format != MTLPixelFormatInvalid
  1592. , "sRGB not supported for texture format %d"
  1593. , m_textureFormat
  1594. );
  1595. }
  1596. if (format == MTLPixelFormatInvalid)
  1597. {
  1598. // not swizzled and not sRGB, or sRGB unsupported
  1599. format = s_textureFormat[m_textureFormat].m_fmt;
  1600. }
  1601. desc.pixelFormat = format;
  1602. desc.width = textureWidth;
  1603. desc.height = textureHeight;
  1604. desc.depth = bx::uint32_max(1,imageContainer.m_depth);
  1605. desc.mipmapLevelCount = imageContainer.m_numMips;
  1606. desc.sampleCount = 1; //TODO: set samplecount - If textureType is not MTLTextureType2DMultisample, the value must be 1.
  1607. desc.resourceOptions = MTLResourceStorageModePrivate;
  1608. desc.cpuCacheMode = MTLCPUCacheModeDefaultCache;
  1609. desc.storageMode = (MTLStorageMode)(writeOnly
  1610. ? 2 /*MTLStorageModePrivate*/
  1611. : 1 /*MTLStorageModeManaged*/
  1612. );
  1613. desc.usage = writeOnly
  1614. ? MTLTextureUsageShaderWrite
  1615. : MTLTextureUsageShaderRead
  1616. ;
  1617. //TODO: set resource flags depending on usage(renderTarget/computeWrite/etc) on iOS9/OSX
  1618. m_ptr = s_renderMtl->m_device.newTextureWithDescriptor(desc);
  1619. if (m_requestedFormat == TextureFormat::D24S8
  1620. && desc.pixelFormat == MTLPixelFormatDepth32Float)
  1621. {
  1622. desc.pixelFormat = MTLPixelFormatStencil8;
  1623. m_ptrStencil = s_renderMtl->m_device.newTextureWithDescriptor(desc);
  1624. }
  1625. uint8_t* temp = NULL;
  1626. if (convert)
  1627. {
  1628. temp = (uint8_t*)BX_ALLOC(g_allocator, textureWidth*textureHeight*4);
  1629. }
  1630. for (uint8_t side = 0, numSides = imageContainer.m_cubeMap ? 6 : 1; side < numSides; ++side)
  1631. {
  1632. uint32_t width = textureWidth;
  1633. uint32_t height = textureHeight;
  1634. uint32_t depth = imageContainer.m_depth;
  1635. for (uint8_t lod = 0, num = numMips; lod < num; ++lod)
  1636. {
  1637. width = bx::uint32_max(1, width);
  1638. height = bx::uint32_max(1, height);
  1639. depth = bx::uint32_max(1, depth);
  1640. ImageMip mip;
  1641. if (imageGetRawData(imageContainer, side, lod+startLod, _mem->data, _mem->size, mip) )
  1642. {
  1643. const uint8_t* data = mip.m_data;
  1644. if (convert)
  1645. {
  1646. imageDecodeToRgba8(temp
  1647. , mip.m_data
  1648. , mip.m_width
  1649. , mip.m_height
  1650. , mip.m_width*4
  1651. , mip.m_format
  1652. );
  1653. data = temp;
  1654. }
  1655. MTLRegion region = { { 0, 0, 0 }, { width, height, depth } };
  1656. uint32_t bytesPerRow = 0;
  1657. uint32_t bytesPerImage = 0;
  1658. if (compressed && !convert)
  1659. {
  1660. if (format >= 160 /*PVRTC_RGB_2BPP*/
  1661. && format <= 167 /*PVRTC_RGBA_4BPP_sRGB*/)
  1662. {
  1663. bytesPerRow = 0;
  1664. bytesPerImage = 0;
  1665. }
  1666. else
  1667. {
  1668. bytesPerRow = (mip.m_width / blockInfo.blockWidth)*mip.m_blockSize;
  1669. bytesPerImage = desc.textureType == MTLTextureType3D
  1670. ? (mip.m_height/blockInfo.blockHeight)*bytesPerRow
  1671. : 0
  1672. ;
  1673. }
  1674. }
  1675. else
  1676. {
  1677. bytesPerRow = width * bpp / 8;
  1678. bytesPerImage = desc.textureType == MTLTextureType3D
  1679. ? bytesPerRow * height
  1680. : 0
  1681. ;
  1682. }
  1683. m_ptr.replaceRegion(region, lod, side, data, bytesPerRow, bytesPerImage);
  1684. }
  1685. width >>= 1;
  1686. height >>= 1;
  1687. depth >>= 1;
  1688. }
  1689. }
  1690. if (NULL != temp)
  1691. {
  1692. BX_FREE(g_allocator, temp);
  1693. }
  1694. }
  1695. }
  1696. void TextureMtl::update(uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem)
  1697. {
  1698. MTLRegion region =
  1699. {
  1700. { _rect.m_x, _rect.m_y, _z },
  1701. { _rect.m_width, _rect.m_height, _depth },
  1702. };
  1703. const uint32_t bpp = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) );
  1704. const uint32_t rectpitch = _rect.m_width*bpp/8;
  1705. const uint32_t srcpitch = UINT16_MAX == _pitch ? rectpitch : _pitch;
  1706. const bool convert = m_textureFormat != m_requestedFormat;
  1707. uint8_t* data = _mem->data;
  1708. uint8_t* temp = NULL;
  1709. if (convert)
  1710. {
  1711. temp = (uint8_t*)BX_ALLOC(g_allocator, rectpitch*_rect.m_height);
  1712. imageDecodeToBgra8(temp
  1713. , data
  1714. , _rect.m_width
  1715. , _rect.m_height
  1716. , srcpitch
  1717. , TextureFormat::Enum(m_requestedFormat)
  1718. );
  1719. data = temp;
  1720. }
  1721. m_ptr.replaceRegion(region, _mip, _side, data, srcpitch, srcpitch * _rect.m_height);
  1722. if (NULL != temp)
  1723. {
  1724. BX_FREE(g_allocator, temp);
  1725. }
  1726. }
  1727. void TextureMtl::commit(uint8_t _stage, uint32_t _flags)
  1728. {
  1729. //TODO: vertex or fragment stage?
  1730. s_renderMtl->m_renderCommandEncoder.setFragmentTexture(m_ptr, _stage);
  1731. s_renderMtl->m_renderCommandEncoder.setFragmentSamplerState(0 == (BGFX_TEXTURE_INTERNAL_DEFAULT_SAMPLER & _flags)
  1732. ? s_renderMtl->getSamplerState(_flags)
  1733. : m_sampler, _stage);
  1734. }
  1735. void FrameBufferMtl::create(uint8_t _num, const TextureHandle* _handles)
  1736. {
  1737. m_num = 0;
  1738. for (uint32_t ii = 0; ii < _num; ++ii)
  1739. {
  1740. TextureHandle handle = _handles[ii];
  1741. if (isValid(handle) )
  1742. {
  1743. const TextureMtl& texture = s_renderMtl->m_textures[handle.idx];
  1744. //TODO: separate stencil buffer? or just use packed depth/stencil (which is not available on iOS8)
  1745. if (isDepth( (TextureFormat::Enum)texture.m_textureFormat) )
  1746. {
  1747. m_depthHandle = handle;
  1748. }
  1749. else
  1750. {
  1751. m_colorHandle[m_num] = handle;
  1752. m_num++;
  1753. }
  1754. }
  1755. }
  1756. bx::HashMurmur2A murmur;
  1757. murmur.begin();
  1758. murmur.add(m_num);
  1759. for (uint32_t ii = 0; ii < m_num; ++ii)
  1760. {
  1761. const TextureMtl& texture = s_renderMtl->m_textures[m_colorHandle[ii].idx];
  1762. murmur.add((uint32_t)texture.m_ptr.pixelFormat());
  1763. }
  1764. const TextureMtl& depthTexture = s_renderMtl->m_textures[m_depthHandle.idx];
  1765. murmur.add((uint32_t)depthTexture.m_ptr.pixelFormat());
  1766. murmur.add((uint32_t)MTLPixelFormatInvalid); //stencil
  1767. m_pixelFormatHash = murmur.end();
  1768. }
  1769. void FrameBufferMtl::create(uint16_t _denseIdx, void* _nwh, uint32_t _width, uint32_t _height, TextureFormat::Enum _depthFormat)
  1770. {
  1771. BX_UNUSED(_denseIdx, _nwh, _width, _height, _depthFormat);
  1772. BX_WARN(false, "FrameBufferMtl::create not yet implemented");
  1773. }
  1774. void FrameBufferMtl::postReset()
  1775. {
  1776. BX_WARN(false, "FrameBufferMtl::postReset not yet implemented");
  1777. //TODO: what should we do here?
  1778. }
  1779. uint16_t FrameBufferMtl::destroy()
  1780. {
  1781. m_num = 0;
  1782. m_depthHandle.idx = invalidHandle;
  1783. uint16_t denseIdx = m_denseIdx;
  1784. m_denseIdx = UINT16_MAX;
  1785. return denseIdx;
  1786. }
  1787. void OcclusionQueryMTL::postReset()
  1788. {
  1789. MTL_RELEASE(m_buffer);
  1790. }
  1791. void OcclusionQueryMTL::preReset()
  1792. {
  1793. m_buffer = s_renderMtl->m_device.newBufferWithLength(BX_COUNTOF(m_query) * 8, 0);
  1794. }
  1795. void OcclusionQueryMTL::begin(RenderCommandEncoder& _rce, Frame* _render, OcclusionQueryHandle _handle)
  1796. {
  1797. while (0 == m_control.reserve(1) )
  1798. {
  1799. resolve(_render, true);
  1800. }
  1801. Query& query = m_query[m_control.m_current];
  1802. query.m_handle = _handle;
  1803. uint32_t offset = _handle.idx * 8;
  1804. _rce.setVisibilityResultMode(MTLVisibilityResultModeBoolean, offset);
  1805. }
  1806. void OcclusionQueryMTL::end(RenderCommandEncoder& _rce)
  1807. {
  1808. Query& query = m_query[m_control.m_current];
  1809. uint32_t offset = query.m_handle.idx * 8;
  1810. _rce.setVisibilityResultMode(MTLVisibilityResultModeDisabled, offset);
  1811. m_control.commit(1);
  1812. }
  1813. void OcclusionQueryMTL::resolve(Frame* _render, bool _wait)
  1814. {
  1815. BX_UNUSED(_wait);
  1816. while (0 != m_control.available() )
  1817. {
  1818. Query& query = m_query[m_control.m_read];
  1819. uint64_t result = ( (uint64_t*)m_buffer.contents() )[query.m_handle.idx];
  1820. _render->m_occlusion[query.m_handle.idx] = 0 < result;
  1821. m_control.consume(1);
  1822. }
  1823. }
  1824. void RendererContextMtl::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter) BX_OVERRIDE
  1825. {
  1826. m_commandBuffer = m_commandQueue.commandBuffer();
  1827. retain(m_commandBuffer); // keep alive to be useable at 'flip'
  1828. //TODO: multithreading with multiple commandbuffer
  1829. // is there a FAST way to tell which view is active?
  1830. //TODO: acquire CAMetalDrawable just before we really need it. When we are using an encoder with target metalLayer's texture
  1831. m_drawable = m_metalLayer.nextDrawable;
  1832. // retain(m_drawable); // keep alive to be useable at 'flip'
  1833. m_uniformBuffer = m_uniformBuffers[m_uniformBufferIndex];
  1834. m_uniformBufferIndex = (m_uniformBufferIndex + 1) % UNIFORM_BUFFER_COUNT;
  1835. m_uniformBufferVertexOffset = 0;
  1836. m_uniformBufferFragmentOffset = 0;
  1837. updateResolution(_render->m_resolution);
  1838. int64_t elapsed = -bx::getHPCounter();
  1839. int64_t captureElapsed = 0;
  1840. if (_render->m_debug & (BGFX_DEBUG_IFH|BGFX_DEBUG_STATS) )
  1841. {
  1842. //TODO
  1843. //m_gpuTimer.begin();
  1844. }
  1845. if (0 < _render->m_iboffset)
  1846. {
  1847. TransientIndexBuffer* ib = _render->m_transientIb;
  1848. m_indexBuffers[ib->handle.idx].update(0, _render->m_iboffset, ib->data);
  1849. }
  1850. if (0 < _render->m_vboffset)
  1851. {
  1852. TransientVertexBuffer* vb = _render->m_transientVb;
  1853. m_vertexBuffers[vb->handle.idx].update(0, _render->m_vboffset, vb->data);
  1854. }
  1855. _render->sort();
  1856. RenderDraw currentState;
  1857. currentState.clear();
  1858. currentState.m_stateFlags = BGFX_STATE_NONE;
  1859. currentState.m_stencil = packStencil(BGFX_STENCIL_NONE, BGFX_STENCIL_NONE);
  1860. _render->m_hmdInitialized = false;
  1861. const bool hmdEnabled = false;
  1862. ViewState viewState(_render, hmdEnabled);
  1863. uint32_t blendFactor = 0;
  1864. bool wireframe = !!(_render->m_debug&BGFX_DEBUG_WIREFRAME);
  1865. //TODO: REMOVE THIS - TEMPORARY HACK
  1866. m_textureDescriptor.textureType = MTLTextureType2D;
  1867. m_textureDescriptor.pixelFormat = MTLPixelFormatRGBA8Unorm;
  1868. m_textureDescriptor.width = 4;
  1869. m_textureDescriptor.height = 4;
  1870. m_textureDescriptor.depth = 1;
  1871. m_textureDescriptor.mipmapLevelCount = 1;
  1872. m_textureDescriptor.sampleCount = 1; //TODO: set samplecount - If textureType is not MTLTextureType2DMultisample, the value must be 1.
  1873. Texture zeroTexture = m_device.newTextureWithDescriptor(m_textureDescriptor);
  1874. uint16_t programIdx = invalidHandle;
  1875. SortKey key;
  1876. uint16_t view = UINT16_MAX;
  1877. FrameBufferHandle fbh = { BGFX_CONFIG_MAX_FRAME_BUFFERS };
  1878. //ASK: why should we use this? It changes topology, so possible renders a big mess, doesn't it?
  1879. //const uint64_t primType = _render->m_debug&BGFX_DEBUG_WIREFRAME ? BGFX_STATE_PT_LINES : 0;
  1880. const uint64_t primType = 0;
  1881. uint8_t primIndex = uint8_t(primType>>BGFX_STATE_PT_SHIFT);
  1882. PrimInfo prim = s_primInfo[primIndex];
  1883. ProgramMtl* currentProgram = NULL;
  1884. RenderCommandEncoder rce;
  1885. bool wasCompute = false;
  1886. bool viewHasScissor = false;
  1887. Rect viewScissorRect;
  1888. viewScissorRect.clear();
  1889. uint32_t statsNumPrimsSubmitted[BX_COUNTOF(s_primInfo)] = {};
  1890. uint32_t statsNumPrimsRendered[BX_COUNTOF(s_primInfo)] = {};
  1891. uint32_t statsNumInstances[BX_COUNTOF(s_primInfo)] = {};
  1892. uint32_t statsNumDrawIndirect[BX_COUNTOF(s_primInfo)] = {};
  1893. uint32_t statsNumIndices = 0;
  1894. uint32_t statsKeyType[2] = {};
  1895. m_occlusionQuery.resolve(_render);
  1896. if (0 == (_render->m_debug&BGFX_DEBUG_IFH) )
  1897. {
  1898. bool viewRestart = false;
  1899. uint8_t eye = 0;
  1900. uint8_t restartState = 0;
  1901. viewState.m_rect = _render->m_rect[0];
  1902. int32_t numItems = _render->m_num;
  1903. for (int32_t item = 0, restartItem = numItems; item < numItems || restartItem < numItems;)
  1904. {
  1905. const bool isCompute = key.decode(_render->m_sortKeys[item], _render->m_viewRemap);
  1906. statsKeyType[isCompute]++;
  1907. const bool viewChanged = 0
  1908. || key.m_view != view
  1909. || item == numItems
  1910. ;
  1911. const RenderItem& renderItem = _render->m_renderItem[_render->m_sortValues[item] ];
  1912. ++item;
  1913. if (viewChanged)
  1914. {
  1915. if (1 == restartState)
  1916. {
  1917. restartState = 2;
  1918. item = restartItem;
  1919. restartItem = numItems;
  1920. view = UINT16_MAX;
  1921. continue;
  1922. }
  1923. view = key.m_view;
  1924. programIdx = invalidHandle;
  1925. viewRestart = ( (BGFX_VIEW_STEREO == (_render->m_viewFlags[view] & BGFX_VIEW_STEREO) ) );
  1926. viewRestart &= hmdEnabled;
  1927. if (viewRestart)
  1928. {
  1929. if (0 == restartState)
  1930. {
  1931. restartState = 1;
  1932. restartItem = item - 1;
  1933. }
  1934. eye = (restartState - 1) & 1;
  1935. restartState &= 1;
  1936. }
  1937. else
  1938. {
  1939. eye = 0;
  1940. }
  1941. viewState.m_rect = _render->m_rect[view];
  1942. if (viewRestart)
  1943. {
  1944. viewState.m_rect.m_x = eye * (viewState.m_rect.m_width+1)/2;
  1945. viewState.m_rect.m_width /= 2;
  1946. }
  1947. const Rect& scissorRect = _render->m_scissor[view];
  1948. viewHasScissor = !scissorRect.isZero();
  1949. viewScissorRect = viewHasScissor ? scissorRect : viewState.m_rect;
  1950. Clear& clr = _render->m_clear[view];
  1951. RenderPassDescriptor renderPassDescriptor = newRenderPassDescriptor();
  1952. renderPassDescriptor.visibilityResultBuffer = m_occlusionQuery.m_buffer;
  1953. uint32_t width = getBufferWidth();
  1954. uint32_t height = getBufferHeight();
  1955. Rect viewRect = viewState.m_rect;
  1956. bool fullscreenRect = true
  1957. && 0 == viewRect.m_x
  1958. && 0 == viewRect.m_y
  1959. && width == viewRect.m_width
  1960. && height == viewRect.m_height
  1961. ;
  1962. fbh = _render->m_fb[view];
  1963. setFrameBuffer(renderPassDescriptor, fbh);
  1964. RenderPassColorAttachmentDescriptor colorAttachment0 = renderPassDescriptor.colorAttachments[0];
  1965. if (0 != (BGFX_CLEAR_COLOR & clr.m_flags) )
  1966. {
  1967. if (0 != (BGFX_CLEAR_COLOR_USE_PALETTE & clr.m_flags) )
  1968. {
  1969. uint8_t index = (uint8_t)bx::uint32_min(BGFX_CONFIG_MAX_COLOR_PALETTE-1, clr.m_index[0]);
  1970. const float* rgba = _render->m_colorPalette[index];
  1971. const float rr = rgba[0];
  1972. const float gg = rgba[1];
  1973. const float bb = rgba[2];
  1974. const float aa = rgba[3];
  1975. colorAttachment0.clearColor = MTLClearColorMake(rr, gg, bb, aa);
  1976. }
  1977. else
  1978. {
  1979. float rr = clr.m_index[0]*1.0f/255.0f;
  1980. float gg = clr.m_index[1]*1.0f/255.0f;
  1981. float bb = clr.m_index[2]*1.0f/255.0f;
  1982. float aa = clr.m_index[3]*1.0f/255.0f;
  1983. colorAttachment0.clearColor = MTLClearColorMake(rr, gg, bb, aa);
  1984. }
  1985. colorAttachment0.loadAction = MTLLoadActionClear;
  1986. }
  1987. else
  1988. {
  1989. colorAttachment0.loadAction = MTLLoadActionLoad;
  1990. }
  1991. //TODO: optimize store actions use discard flag
  1992. RenderPassDepthAttachmentDescriptor depthAttachment = renderPassDescriptor.depthAttachment;
  1993. if (NULL != depthAttachment.texture)
  1994. {
  1995. depthAttachment.clearDepth = clr.m_depth;
  1996. depthAttachment.loadAction = 0 != (BGFX_CLEAR_DEPTH & clr.m_flags)
  1997. ? MTLLoadActionClear
  1998. : MTLLoadActionLoad
  1999. ;
  2000. depthAttachment.storeAction = MTLStoreActionStore;
  2001. }
  2002. RenderPassStencilAttachmentDescriptor stencilAttachment = renderPassDescriptor.stencilAttachment;
  2003. if (NULL != stencilAttachment.texture)
  2004. {
  2005. stencilAttachment.clearStencil = clr.m_stencil;
  2006. stencilAttachment.loadAction = 0 != (BGFX_CLEAR_STENCIL & clr.m_flags)
  2007. ? MTLLoadActionClear
  2008. : MTLLoadActionLoad
  2009. ;
  2010. stencilAttachment.storeAction = MTLStoreActionStore;
  2011. }
  2012. if (0 != m_renderCommandEncoder)
  2013. {
  2014. m_renderCommandEncoder.endEncoding();
  2015. }
  2016. rce = m_commandBuffer.renderCommandEncoderWithDescriptor(renderPassDescriptor);
  2017. m_renderCommandEncoder = rce;
  2018. MTL_RELEASE(renderPassDescriptor);
  2019. //TODO: REMOVE THIS!!!!
  2020. // TERRIBLE HACK TO SUPPRESS DEBUG LAYER WARNING ABOUT MISSING TEXTURE/SAMPLER AT 0 in 20-nanovg
  2021. m_renderCommandEncoder.setFragmentTexture(zeroTexture, 0);
  2022. m_renderCommandEncoder.setFragmentSamplerState(getSamplerState(0), 0);
  2023. rce.setTriangleFillMode(wireframe? MTLTriangleFillModeLines : MTLTriangleFillModeFill);
  2024. if (BX_ENABLED(BGFX_CONFIG_DEBUG_MTL) )
  2025. {
  2026. if (item != 1)
  2027. {
  2028. rce.popDebugGroup();
  2029. }
  2030. rce.pushDebugGroup(s_viewName[view]);
  2031. }
  2032. MTLViewport vp;
  2033. vp.originX = viewState.m_rect.m_x;
  2034. vp.originY = viewState.m_rect.m_y;
  2035. vp.width = viewState.m_rect.m_width;
  2036. vp.height = viewState.m_rect.m_height;
  2037. vp.znear = 0.0f;
  2038. vp.zfar = 1.0f;
  2039. rce.setViewport(vp);
  2040. if (BGFX_CLEAR_NONE != (clr.m_flags & BGFX_CLEAR_MASK)
  2041. && !fullscreenRect)
  2042. {
  2043. clearQuad(_clearQuad, viewState.m_rect, clr, _render->m_colorPalette);
  2044. }
  2045. }
  2046. bool resetState = viewChanged || wasCompute;
  2047. if (wasCompute)
  2048. {
  2049. wasCompute = false;
  2050. programIdx = invalidHandle;
  2051. currentProgram = NULL;
  2052. //invalidateCompute();
  2053. }
  2054. const RenderDraw& draw = renderItem.draw;
  2055. const bool hasOcclusionQuery = 0 != (draw.m_stateFlags & BGFX_STATE_INTERNAL_OCCLUSION_QUERY);
  2056. if (isValid(draw.m_occlusionQuery)
  2057. && !hasOcclusionQuery
  2058. && !isVisible(_render, draw.m_occlusionQuery, 0 != (draw.m_submitFlags&BGFX_SUBMIT_INTERNAL_OCCLUSION_VISIBLE) ) )
  2059. {
  2060. continue;
  2061. }
  2062. const uint64_t newFlags = draw.m_stateFlags;
  2063. uint64_t changedFlags = currentState.m_stateFlags ^ draw.m_stateFlags;
  2064. currentState.m_stateFlags = newFlags;
  2065. const uint64_t newStencil = draw.m_stencil;
  2066. uint64_t changedStencil = currentState.m_stencil ^ draw.m_stencil;
  2067. currentState.m_stencil = newStencil;
  2068. if (resetState)
  2069. {
  2070. currentState.clear();
  2071. currentState.m_scissor = !draw.m_scissor;
  2072. changedFlags = BGFX_STATE_MASK;
  2073. changedStencil = packStencil(BGFX_STENCIL_MASK, BGFX_STENCIL_MASK);
  2074. currentState.m_stateFlags = newFlags;
  2075. currentState.m_stencil = newStencil;
  2076. programIdx = invalidHandle;
  2077. setDepthStencilState(newFlags, packStencil(BGFX_STENCIL_DEFAULT, BGFX_STENCIL_DEFAULT));
  2078. const uint64_t pt = newFlags&BGFX_STATE_PT_MASK;
  2079. primIndex = uint8_t(pt>>BGFX_STATE_PT_SHIFT);
  2080. }
  2081. if (prim.m_type != s_primInfo[primIndex].m_type)
  2082. {
  2083. prim = s_primInfo[primIndex];
  2084. }
  2085. uint16_t scissor = draw.m_scissor;
  2086. if (currentState.m_scissor != scissor)
  2087. {
  2088. currentState.m_scissor = scissor;
  2089. MTLScissorRect rc;
  2090. if (UINT16_MAX == scissor)
  2091. {
  2092. if (viewHasScissor)
  2093. {
  2094. rc.x = viewScissorRect.m_x;
  2095. rc.y = viewScissorRect.m_y;
  2096. rc.width = viewScissorRect.m_width;
  2097. rc.height = viewScissorRect.m_height;
  2098. }
  2099. else
  2100. { // can't disable: set to view rect
  2101. rc.x = viewState.m_rect.m_x;
  2102. rc.y = viewState.m_rect.m_y;
  2103. rc.width = viewState.m_rect.m_width;
  2104. rc.height = viewState.m_rect.m_height;
  2105. }
  2106. }
  2107. else
  2108. {
  2109. Rect scissorRect;
  2110. scissorRect.intersect(viewScissorRect, _render->m_rectCache.m_cache[scissor]);
  2111. rc.x = scissorRect.m_x;
  2112. rc.y = scissorRect.m_y;
  2113. rc.width = scissorRect.m_width;
  2114. rc.height = scissorRect.m_height;
  2115. }
  2116. rce.setScissorRect(rc);
  2117. }
  2118. if ( (BGFX_STATE_DEPTH_WRITE|BGFX_STATE_DEPTH_TEST_MASK) & changedFlags
  2119. || 0 != changedStencil)
  2120. {
  2121. setDepthStencilState(newFlags,newStencil);
  2122. }
  2123. if ( (0
  2124. | BGFX_STATE_CULL_MASK
  2125. | BGFX_STATE_ALPHA_REF_MASK
  2126. | BGFX_STATE_PT_MASK
  2127. // | BGFX_STATE_POINT_SIZE_MASK
  2128. ) & changedFlags)
  2129. {
  2130. if (BGFX_STATE_CULL_MASK & changedFlags)
  2131. {
  2132. const uint64_t pt = newFlags&BGFX_STATE_CULL_MASK;
  2133. uint8_t cullIndex = uint8_t(pt>>BGFX_STATE_CULL_SHIFT);
  2134. rce.setCullMode(s_cullMode[cullIndex]);
  2135. }
  2136. if (BGFX_STATE_ALPHA_REF_MASK & changedFlags)
  2137. {
  2138. uint32_t ref = (newFlags&BGFX_STATE_ALPHA_REF_MASK)>>BGFX_STATE_ALPHA_REF_SHIFT;
  2139. viewState.m_alphaRef = ref/255.0f;
  2140. }
  2141. const uint64_t pt = newFlags&BGFX_STATE_PT_MASK;
  2142. primIndex = uint8_t(pt>>BGFX_STATE_PT_SHIFT);
  2143. if (prim.m_type != s_primInfo[primIndex].m_type)
  2144. {
  2145. prim = s_primInfo[primIndex];
  2146. }
  2147. }
  2148. if (blendFactor != draw.m_rgba
  2149. && !(newFlags & BGFX_STATE_BLEND_INDEPENDENT) )
  2150. {
  2151. const uint32_t rgba = draw.m_rgba;
  2152. float rr = ( (rgba>>24) )/255.0f;
  2153. float gg = ( (rgba>>16)&0xff)/255.0f;
  2154. float bb = ( (rgba>> 8)&0xff)/255.0f;
  2155. float aa = ( (rgba )&0xff)/255.0f;
  2156. rce.setBlendColor(rr,gg,bb,aa);
  2157. blendFactor = draw.m_rgba;
  2158. }
  2159. bool programChanged = false;
  2160. bool constantsChanged = draw.m_constBegin < draw.m_constEnd;
  2161. rendererUpdateUniforms(this, _render->m_uniformBuffer, draw.m_constBegin, draw.m_constEnd);
  2162. if (key.m_program != programIdx
  2163. || (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK|BGFX_STATE_ALPHA_WRITE|BGFX_STATE_RGB_WRITE|BGFX_STATE_BLEND_INDEPENDENT|BGFX_STATE_MSAA) & changedFlags
  2164. || currentState.m_vertexBuffer.idx != draw.m_vertexBuffer.idx
  2165. || currentState.m_vertexDecl.idx != draw.m_vertexDecl.idx
  2166. || currentState.m_instanceDataStride != draw.m_instanceDataStride
  2167. || ( (blendFactor != draw.m_rgba) && !!(newFlags & BGFX_STATE_BLEND_INDEPENDENT) ) )
  2168. {
  2169. programIdx = key.m_program;
  2170. currentState.m_vertexDecl = draw.m_vertexDecl;
  2171. currentState.m_instanceDataStride = draw.m_instanceDataStride;
  2172. if (invalidHandle == programIdx)
  2173. {
  2174. currentProgram = NULL;
  2175. continue;
  2176. }
  2177. else
  2178. {
  2179. ProgramMtl& program = m_program[programIdx];
  2180. currentProgram = &program;
  2181. uint16_t handle = draw.m_vertexBuffer.idx;
  2182. const VertexBufferMtl& vb = m_vertexBuffers[handle];
  2183. VertexDeclHandle decl;
  2184. decl.idx = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx;
  2185. RenderPipelineState pipelineState = program.getRenderPipelineState(newFlags, draw.m_rgba, fbh, decl, draw.m_instanceDataStride/16);
  2186. if (NULL == pipelineState )
  2187. { //call with invalid program
  2188. currentProgram = NULL;
  2189. programIdx = invalidHandle;
  2190. continue;
  2191. }
  2192. rce.setRenderPipelineState(pipelineState);
  2193. }
  2194. programChanged =
  2195. constantsChanged = true;
  2196. }
  2197. if (invalidHandle != programIdx)
  2198. {
  2199. ProgramMtl& program = m_program[programIdx];
  2200. uint32_t vertexUniformBufferSize = program.m_vshConstantBufferSize;
  2201. uint32_t fragmentUniformBufferSize = program.m_fshConstantBufferSize;
  2202. if (vertexUniformBufferSize)
  2203. {
  2204. m_uniformBufferVertexOffset = BX_ALIGN_MASK(m_uniformBufferVertexOffset, program.m_vshConstantBufferAlignmentMask);
  2205. rce.setVertexBuffer(m_uniformBuffer, m_uniformBufferVertexOffset, 0);
  2206. }
  2207. m_uniformBufferFragmentOffset = m_uniformBufferVertexOffset + vertexUniformBufferSize;
  2208. if (fragmentUniformBufferSize)
  2209. {
  2210. m_uniformBufferFragmentOffset = BX_ALIGN_MASK(m_uniformBufferFragmentOffset, program.m_fshConstantBufferAlignmentMask);
  2211. rce.setFragmentBuffer(m_uniformBuffer, m_uniformBufferFragmentOffset, 0);
  2212. }
  2213. if (constantsChanged)
  2214. {
  2215. UniformBuffer* vcb = program.m_vshConstantBuffer;
  2216. if (NULL != vcb)
  2217. {
  2218. commit(*vcb);
  2219. }
  2220. UniformBuffer* fcb = program.m_fshConstantBuffer;
  2221. if (NULL != fcb)
  2222. {
  2223. commit(*fcb);
  2224. }
  2225. }
  2226. viewState.setPredefined<4>(this, view, eye, program, _render, draw);
  2227. m_uniformBufferFragmentOffset += fragmentUniformBufferSize;
  2228. m_uniformBufferVertexOffset = m_uniformBufferFragmentOffset;
  2229. }
  2230. {
  2231. for (uint8_t stage = 0; stage < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++stage)
  2232. {
  2233. const Binding& sampler = draw.m_bind[stage];
  2234. Binding& current = currentState.m_bind[stage];
  2235. if (current.m_idx != sampler.m_idx
  2236. || current.m_un.m_draw.m_textureFlags != sampler.m_un.m_draw.m_textureFlags
  2237. || programChanged)
  2238. {
  2239. if (invalidHandle != sampler.m_idx)
  2240. {
  2241. TextureMtl& texture = m_textures[sampler.m_idx];
  2242. texture.commit(stage, sampler.m_un.m_draw.m_textureFlags);
  2243. }
  2244. }
  2245. current = sampler;
  2246. }
  2247. }
  2248. if (currentState.m_vertexBuffer.idx != draw.m_vertexBuffer.idx
  2249. || currentState.m_startVertex != draw.m_startVertex
  2250. || currentState.m_instanceDataBuffer.idx != draw.m_instanceDataBuffer.idx
  2251. || currentState.m_instanceDataOffset != draw.m_instanceDataOffset)
  2252. {
  2253. currentState.m_vertexBuffer = draw.m_vertexBuffer;
  2254. currentState.m_startVertex = draw.m_startVertex;
  2255. currentState.m_instanceDataBuffer.idx = draw.m_instanceDataBuffer.idx;
  2256. currentState.m_instanceDataOffset = draw.m_instanceDataOffset;
  2257. uint16_t handle = draw.m_vertexBuffer.idx;
  2258. if (invalidHandle != handle)
  2259. {
  2260. const VertexBufferMtl& vb = m_vertexBuffers[handle];
  2261. uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx;
  2262. const VertexDecl& vertexDecl = m_vertexDecls[decl];
  2263. uint32_t offset = draw.m_startVertex * vertexDecl.getStride();
  2264. rce.setVertexBuffer(vb.m_buffer, offset, 1);
  2265. if (isValid(draw.m_instanceDataBuffer) )
  2266. {
  2267. const VertexBufferMtl& inst = m_vertexBuffers[draw.m_instanceDataBuffer.idx];
  2268. rce.setVertexBuffer(inst.m_buffer, draw.m_instanceDataOffset, 2);
  2269. }
  2270. }
  2271. }
  2272. if (isValid(currentState.m_vertexBuffer) )
  2273. {
  2274. uint32_t numVertices = draw.m_numVertices;
  2275. if (UINT32_MAX == numVertices)
  2276. {
  2277. const VertexBufferMtl& vb = m_vertexBuffers[currentState.m_vertexBuffer.idx];
  2278. uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx;
  2279. const VertexDecl& vertexDecl = m_vertexDecls[decl];
  2280. numVertices = vb.m_size/vertexDecl.m_stride;
  2281. }
  2282. uint32_t numIndices = 0;
  2283. uint32_t numPrimsSubmitted = 0;
  2284. uint32_t numInstances = 0;
  2285. uint32_t numPrimsRendered = 0;
  2286. uint32_t numDrawIndirect = 0;
  2287. if (hasOcclusionQuery)
  2288. {
  2289. m_occlusionQuery.begin(rce, _render, draw.m_occlusionQuery);
  2290. }
  2291. if (isValid(draw.m_indirectBuffer) )
  2292. {
  2293. }
  2294. else
  2295. {
  2296. if (isValid(draw.m_indexBuffer) )
  2297. {
  2298. const IndexBufferMtl& ib = m_indexBuffers[draw.m_indexBuffer.idx];
  2299. MTLIndexType indexType = 0 == (ib.m_flags & BGFX_BUFFER_INDEX32) ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32;
  2300. if (UINT32_MAX == draw.m_numIndices)
  2301. {
  2302. const uint32_t indexSize = 0 == (ib.m_flags & BGFX_BUFFER_INDEX32) ? 2 : 4;
  2303. numIndices = ib.m_size/indexSize;
  2304. numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
  2305. numInstances = draw.m_numInstances;
  2306. numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
  2307. rce.drawIndexedPrimitives(prim.m_type, numIndices, indexType, ib.m_buffer, 0, draw.m_numInstances);
  2308. }
  2309. else if (prim.m_min <= draw.m_numIndices)
  2310. {
  2311. const uint32_t indexSize = 0 == (ib.m_flags & BGFX_BUFFER_INDEX32) ? 2 : 4;
  2312. numIndices = draw.m_numIndices;
  2313. numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub;
  2314. numInstances = draw.m_numInstances;
  2315. numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
  2316. rce.drawIndexedPrimitives(prim.m_type, numIndices, indexType, ib.m_buffer, draw.m_startIndex * indexSize,numInstances);
  2317. }
  2318. }
  2319. else
  2320. {
  2321. numPrimsSubmitted = numVertices/prim.m_div - prim.m_sub;
  2322. numInstances = draw.m_numInstances;
  2323. numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
  2324. rce.drawPrimitives(prim.m_type, 0, draw.m_numVertices, draw.m_numInstances);
  2325. }
  2326. }
  2327. if (hasOcclusionQuery)
  2328. {
  2329. m_occlusionQuery.end(rce);
  2330. }
  2331. statsNumPrimsSubmitted[primIndex] += numPrimsSubmitted;
  2332. statsNumPrimsRendered[primIndex] += numPrimsRendered;
  2333. statsNumInstances[primIndex] += numInstances;
  2334. statsNumDrawIndirect[primIndex] += numDrawIndirect;
  2335. statsNumIndices += numIndices;
  2336. }
  2337. }
  2338. if (wasCompute)
  2339. {
  2340. //invalidateCompute();
  2341. }
  2342. if (0 < _render->m_num)
  2343. {
  2344. captureElapsed = -bx::getHPCounter();
  2345. //capture();
  2346. captureElapsed += bx::getHPCounter();
  2347. }
  2348. }
  2349. if (BX_ENABLED(BGFX_CONFIG_DEBUG_MTL) )
  2350. {
  2351. if (0 < _render->m_num)
  2352. {
  2353. rce.popDebugGroup();
  2354. }
  2355. }
  2356. int64_t now = bx::getHPCounter();
  2357. elapsed += now;
  2358. static int64_t last = now;
  2359. int64_t frameTime = now - last;
  2360. last = now;
  2361. static int64_t min = frameTime;
  2362. static int64_t max = frameTime;
  2363. min = min > frameTime ? frameTime : min;
  2364. max = max < frameTime ? frameTime : max;
  2365. if (_render->m_debug & (BGFX_DEBUG_IFH|BGFX_DEBUG_STATS) )
  2366. {
  2367. rce.pushDebugGroup("debugstats");
  2368. static uint32_t maxGpuLatency = 0;
  2369. static double maxGpuElapsed = 0.0f;
  2370. // double elapsedGpuMs = 0.0;
  2371. // m_gpuTimer.end();
  2372. //
  2373. // while (m_gpuTimer.get() )
  2374. // {
  2375. // double toGpuMs = 1000.0 / double(m_gpuTimer.m_frequency);
  2376. // elapsedGpuMs = m_gpuTimer.m_elapsed * toGpuMs;
  2377. // maxGpuElapsed = elapsedGpuMs > maxGpuElapsed ? elapsedGpuMs : maxGpuElapsed;
  2378. // }
  2379. // maxGpuLatency = bx::uint32_imax(maxGpuLatency, m_gpuTimer.m_control.available()-1);
  2380. TextVideoMem& tvm = m_textVideoMem;
  2381. static int64_t next = now;
  2382. if (now >= next)
  2383. {
  2384. next = now + bx::getHPFrequency();
  2385. double freq = double(bx::getHPFrequency() );
  2386. double toMs = 1000.0/freq;
  2387. tvm.clear();
  2388. uint16_t pos = 0;
  2389. tvm.printf(0, pos++, BGFX_CONFIG_DEBUG ? 0x89 : 0x8f, " %s / " BX_COMPILER_NAME " / " BX_CPU_NAME " / " BX_ARCH_NAME " / " BX_PLATFORM_NAME " "
  2390. , getRendererName()
  2391. );
  2392. pos = 10;
  2393. tvm.printf(10, pos++, 0x8e, " Frame: %7.3f, % 7.3f \x1f, % 7.3f \x1e [ms] / % 6.2f FPS "
  2394. , double(frameTime)*toMs
  2395. , double(min)*toMs
  2396. , double(max)*toMs
  2397. , freq/frameTime
  2398. );
  2399. const uint32_t msaa = (m_resolution.m_flags&BGFX_RESET_MSAA_MASK)>>BGFX_RESET_MSAA_SHIFT;
  2400. tvm.printf(10, pos++, 0x8e, " Reset flags: [%c] vsync, [%c] MSAAx%d, [%c] MaxAnisotropy "
  2401. , !!(m_resolution.m_flags&BGFX_RESET_VSYNC) ? '\xfe' : ' '
  2402. , 0 != msaa ? '\xfe' : ' '
  2403. , 1<<msaa
  2404. , !!(m_resolution.m_flags&BGFX_RESET_MAXANISOTROPY) ? '\xfe' : ' '
  2405. );
  2406. double elapsedCpuMs = double(elapsed)*toMs;
  2407. tvm.printf(10, pos++, 0x8e, " Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms] %c GPU %3.4f [ms] (latency %d)"
  2408. , _render->m_num
  2409. , statsKeyType[0]
  2410. , statsKeyType[1]
  2411. , elapsedCpuMs
  2412. , elapsedCpuMs > maxGpuElapsed ? '>' : '<'
  2413. , maxGpuElapsed
  2414. , maxGpuLatency
  2415. );
  2416. maxGpuLatency = 0;
  2417. maxGpuElapsed = 0.0;
  2418. for (uint32_t ii = 0; ii < BX_COUNTOF(s_primName); ++ii)
  2419. {
  2420. tvm.printf(10, pos++, 0x8e, " %10s: %7d (#inst: %5d), submitted: %7d"
  2421. , s_primName[ii]
  2422. , statsNumPrimsRendered[ii]
  2423. , statsNumInstances[ii]
  2424. , statsNumPrimsSubmitted[ii]
  2425. );
  2426. }
  2427. tvm.printf(10, pos++, 0x8e, " Indices: %7d ", statsNumIndices);
  2428. tvm.printf(10, pos++, 0x8e, " Uniform size: %7d, Max: %7d ", _render->m_uniformEnd, _render->m_uniformMax);
  2429. tvm.printf(10, pos++, 0x8e, " DVB size: %7d ", _render->m_vboffset);
  2430. tvm.printf(10, pos++, 0x8e, " DIB size: %7d ", _render->m_iboffset);
  2431. pos++;
  2432. double captureMs = double(captureElapsed)*toMs;
  2433. tvm.printf(10, pos++, 0x8e, " Capture: %3.4f [ms]", captureMs);
  2434. uint8_t attr[2] = { 0x89, 0x8a };
  2435. uint8_t attrIndex = _render->m_waitSubmit < _render->m_waitRender;
  2436. tvm.printf(10, pos++, attr[attrIndex&1], " Submit wait: %3.4f [ms]", _render->m_waitSubmit*toMs);
  2437. tvm.printf(10, pos++, attr[(attrIndex+1)&1], " Render wait: %3.4f [ms]", _render->m_waitRender*toMs);
  2438. min = frameTime;
  2439. max = frameTime;
  2440. }
  2441. blit(this, _textVideoMemBlitter, tvm);
  2442. rce.popDebugGroup();
  2443. }
  2444. else if (_render->m_debug & BGFX_DEBUG_TEXT)
  2445. {
  2446. rce.pushDebugGroup("debugtext");
  2447. blit(this, _textVideoMemBlitter, _render->m_textVideoMem);
  2448. rce.popDebugGroup();
  2449. }
  2450. //TODO: REMOVE THIS - TEMPORARY HACK
  2451. release(zeroTexture);
  2452. rce.endEncoding();
  2453. m_renderCommandEncoder = 0;
  2454. }
  2455. } /* namespace mtl */ } // namespace bgfx
  2456. #else
  2457. namespace bgfx { namespace mtl
  2458. {
  2459. RendererContextI* rendererCreate()
  2460. {
  2461. return NULL;
  2462. }
  2463. void rendererDestroy()
  2464. {
  2465. }
  2466. } /* namespace mtl */ } // namespace bgfx
  2467. #endif // BGFX_CONFIG_RENDERER_METAL