bgfx_compute.sh 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. /*
  2. * Copyright 2011-2019 Branimir Karadzic. All rights reserved.
  3. * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
  4. */
  5. #ifndef BGFX_COMPUTE_H_HEADER_GUARD
  6. #define BGFX_COMPUTE_H_HEADER_GUARD
  7. #include "bgfx_shader.sh"
  8. #ifndef __cplusplus
  9. #if BGFX_SHADER_LANGUAGE_GLSL
  10. # define FRAMEBUFFER_IMAGE2D_RW_0(_name, _format) IMAGE2D_RW(_name, _format, 4)
  11. # define FRAMEBUFFER_IMAGE2D_RW_1(_name, _format) IMAGE2D_RW(_name, _format, 5)
  12. # define FRAMEBUFFER_IMAGE2D_RW_2(_name, _format) IMAGE2D_RW(_name, _format, 6)
  13. # define FRAMEBUFFER_IMAGE2D_RW_3(_name, _format) IMAGE2D_RW(_name, _format, 7)
  14. #else
  15. # define FRAMEBUFFER_IMAGE2D_RW_0(_name, _format) IMAGE2D_RW(_name, _format, 16)
  16. # define FRAMEBUFFER_IMAGE2D_RW_1(_name, _format) IMAGE2D_RW(_name, _format, 17)
  17. # define FRAMEBUFFER_IMAGE2D_RW_2(_name, _format) IMAGE2D_RW(_name, _format, 18)
  18. # define FRAMEBUFFER_IMAGE2D_RW_3(_name, _format) IMAGE2D_RW(_name, _format, 19)
  19. #endif // BGFX_SHADER_LANGUAGE_GLSL
  20. #define FRAMEBUFFER_IMAGE2D_RW(_name, _format, _reg) FRAMEBUFFER_IMAGE2D_RW_ ## _reg(_name, _format)
  21. #if BGFX_SHADER_LANGUAGE_GLSL
  22. #define SHARED shared
  23. #define __IMAGE_XX(_name, _format, _reg, _image, _access) \
  24. layout(_format, binding=_reg) _access uniform highp _image _name
  25. #define readwrite
  26. #define IMAGE2D_RO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2D, readonly)
  27. #define UIMAGE2D_RO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2D, readonly)
  28. #define IMAGE2D_WR( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2D, writeonly)
  29. #define UIMAGE2D_WR(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2D, writeonly)
  30. #define IMAGE2D_RW( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2D, readwrite)
  31. #define UIMAGE2D_RW(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2D, readwrite)
  32. #define IMAGE2D_ARRAY_RO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2DArray, readonly)
  33. #define UIMAGE2D_ARRAY_RO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2DArray, readonly)
  34. #define IMAGE2D_ARRAY_WR( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2DArray, writeonly)
  35. #define UIMAGE2D_ARRAY_WR(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2DArray, writeonly)
  36. #define IMAGE2D_ARRAY_RW( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2DArray, readwrite)
  37. #define UIMAGE2D_ARRAY_RW(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2DArray, readwrite)
  38. #define IMAGE3D_RO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image3D, readonly)
  39. #define UIMAGE3D_RO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage3D, readonly)
  40. #define IMAGE3D_WR( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image3D, writeonly)
  41. #define UIMAGE3D_WR(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage3D, writeonly)
  42. #define IMAGE3D_RW( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image3D, readwrite)
  43. #define UIMAGE3D_RW(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage3D, readwrite)
  44. #define __BUFFER_XX(_name, _type, _reg, _access) \
  45. layout(std430, binding=_reg) _access buffer _name ## Buffer \
  46. { \
  47. _type _name[]; \
  48. }
  49. #define BUFFER_RO(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, readonly)
  50. #define BUFFER_RW(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, readwrite)
  51. #define BUFFER_WR(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, writeonly)
  52. #define NUM_THREADS(_x, _y, _z) layout (local_size_x = _x, local_size_y = _y, local_size_z = _z) in;
  53. #define atomicFetchAndAdd(_mem, _data, _original) _original = atomicAdd(_mem, _data)
  54. #define atomicFetchAndAnd(_mem, _data, _original) _original = atomicAnd(_mem, _data)
  55. #define atomicFetchAndMax(_mem, _data, _original) _original = atomicMax(_mem, _data)
  56. #define atomicFetchAndMin(_mem, _data, _original) _original = atomicMin(_mem, _data)
  57. #define atomicFetchAndOr(_mem, _data, _original) _original = atomicOr(_mem, _data)
  58. #define atomicFetchAndXor(_mem, _data, _original) _original = atomicXor(_mem, _data)
  59. #define atomicFetchAndExchange(_mem, _data, _original) _original = atomicExchange(_mem, _data)
  60. #define atomicFetchCompareExchange(_mem, _compare, _data, _original) _original = atomicCompSwap(_mem,_compare, _data)
  61. #else
  62. #define SHARED groupshared
  63. #define r32ui uint
  64. #define rg32ui uint2
  65. #define rgba32ui uint4
  66. #define r32f float
  67. #define r16f float
  68. #define rg16f float2
  69. #define rgba16f float4
  70. #if BGFX_SHADER_LANGUAGE_HLSL
  71. # define rgba8 unorm float4
  72. # define rg8 unorm float2
  73. # define r8 unorm float
  74. #else
  75. # define rgba8 float4
  76. # define rg8 float2
  77. # define r8 float
  78. #endif // BGFX_SHADER_LANGUAGE_HLSL
  79. #define rgba32f float4
  80. #define IMAGE2D_RO( _name, _format, _reg) \
  81. Texture2D<_format> _name ## Texture : REGISTER(t, _reg); \
  82. static BgfxROImage2D_ ## _format _name = { _name ## Texture }
  83. #define UIMAGE2D_RO(_name, _format, _reg) IMAGE2D_RO(_name, _format, _reg)
  84. #define IMAGE2D_RW( _name, _format, _reg) \
  85. RWTexture2D<_format> _name ## Texture : REGISTER(u, _reg); \
  86. static BgfxRWImage2D_ ## _format _name = { _name ## Texture }
  87. #define IMAGE2D_WR( _name, _format, _reg) IMAGE2D_RW(_name, _format, _reg)
  88. #define UIMAGE2D_WR(_name, _format, _reg) IMAGE2D_RW(_name, _format, _reg)
  89. #define UIMAGE2D_RW(_name, _format, _reg) IMAGE2D_RW(_name, _format, _reg)
  90. #define IMAGE2D_ARRAY_RO(_name, _format, _reg) \
  91. Texture2DArray<_format> _name ## Texture : REGISTER(t, _reg); \
  92. static BgfxROImage2DArray_ ## _format _name = { _name ## Texture }
  93. #define UIMAGE2D_ARRAY_RO(_name, _format, _reg) IMAGE2D_ARRAY_RO(_name, _format, _reg)
  94. #define IMAGE2D_ARRAY_RW(_name, _format, _reg) \
  95. RWTexture2DArray<_format> _name ## Texture : REGISTER(u, _reg); \
  96. static BgfxRWImage2DArray_ ## _format _name = { _name ## Texture }
  97. #define UIMAGE2D_ARRAY_RW(_name, _format, _reg) IMAGE2D_ARRAY_RW(_name, _format, _reg)
  98. #define IMAGE2D_ARRAY_WR( _name, _format, _reg) IMAGE2D_ARRAY_RW(_name, _format, _reg)
  99. #define UIMAGE2D_ARRAY_WR(_name, _format, _reg) IMAGE2D_ARRAY_RW(_name, _format, _reg)
  100. #define IMAGE3D_RO( _name, _format, _reg) \
  101. Texture3D<_format> _name ## Texture : REGISTER(t, _reg); \
  102. static BgfxROImage3D_ ## _format _name = { _name ## Texture }
  103. #define UIMAGE3D_RO(_name, _format, _reg) IMAGE3D_RO(_name, _format, _reg)
  104. #define IMAGE3D_RW( _name, _format, _reg) \
  105. RWTexture3D<_format> _name ## Texture : REGISTER(u, _reg); \
  106. static BgfxRWImage3D_ ## _format _name = { _name ## Texture }
  107. #define UIMAGE3D_RW(_name, _format, _reg) IMAGE3D_RW(_name, _format, _reg)
  108. #define IMAGE3D_WR( _name, _format, _reg) IMAGE3D_RW(_name, _format, _reg)
  109. #define UIMAGE3D_WR(_name, _format, _reg) IMAGE3D_RW(_name, _format, _reg)
  110. #if BGFX_SHADER_LANGUAGE_METAL || BGFX_SHADER_LANGUAGE_SPIRV
  111. #define BUFFER_RO(_name, _struct, _reg) StructuredBuffer<_struct> _name : REGISTER(t, _reg)
  112. #define BUFFER_RW(_name, _struct, _reg) RWStructuredBuffer <_struct> _name : REGISTER(u, _reg)
  113. #define BUFFER_WR(_name, _struct, _reg) BUFFER_RW(_name, _struct, _reg)
  114. #else
  115. #define BUFFER_RO(_name, _struct, _reg) Buffer<_struct> _name : REGISTER(t, _reg)
  116. #define BUFFER_RW(_name, _struct, _reg) RWBuffer<_struct> _name : REGISTER(u, _reg)
  117. #define BUFFER_WR(_name, _struct, _reg) BUFFER_RW(_name, _struct, _reg)
  118. #endif
  119. #define NUM_THREADS(_x, _y, _z) [numthreads(_x, _y, _z)]
  120. #define __IMAGE_IMPL_S(_format, _storeComponents, _type, _loadComponents) \
  121. \
  122. struct BgfxROImage2D_ ## _format \
  123. { \
  124. Texture2D<_format> m_texture; \
  125. }; \
  126. \
  127. struct BgfxRWImage2D_ ## _format \
  128. { \
  129. RWTexture2D<_format> m_texture; \
  130. }; \
  131. \
  132. struct BgfxROImage2DArray_ ## _format \
  133. { \
  134. Texture2DArray<_format> m_texture; \
  135. }; \
  136. \
  137. struct BgfxRWImage2DArray_ ## _format \
  138. { \
  139. RWTexture2DArray<_format> m_texture; \
  140. }; \
  141. \
  142. struct BgfxROImage3D_ ## _format \
  143. { \
  144. Texture3D<_format> m_texture; \
  145. }; \
  146. \
  147. struct BgfxRWImage3D_ ## _format \
  148. { \
  149. RWTexture3D<_format> m_texture; \
  150. }; \
  151. #define __IMAGE_IMPL_A(_format, _storeComponents, _type, _loadComponents) \
  152. __IMAGE_IMPL_S(_format, _storeComponents, _type, _loadComponents) \
  153. \
  154. _type imageLoad(BgfxROImage2D_ ## _format _image, ivec2 _uv) \
  155. { \
  156. return _image.m_texture[_uv]._loadComponents; \
  157. } \
  158. \
  159. ivec2 imageSize(BgfxROImage2D_ ## _format _image) \
  160. { \
  161. uvec2 result; \
  162. _image.m_texture.GetDimensions(result.x, result.y); \
  163. return ivec2(result); \
  164. } \
  165. \
  166. _type imageLoad(BgfxRWImage2D_ ## _format _image, ivec2 _uv) \
  167. { \
  168. return _image.m_texture[_uv]._loadComponents; \
  169. } \
  170. \
  171. ivec2 imageSize(BgfxRWImage2D_ ## _format _image) \
  172. { \
  173. uvec2 result; \
  174. _image.m_texture.GetDimensions(result.x, result.y); \
  175. return ivec2(result); \
  176. } \
  177. \
  178. void imageStore(BgfxRWImage2D_ ## _format _image, ivec2 _uv, _type _value) \
  179. { \
  180. _image.m_texture[_uv] = _value._storeComponents; \
  181. } \
  182. \
  183. _type imageLoad(BgfxROImage2DArray_ ## _format _image, ivec3 _uvw) \
  184. { \
  185. return _image.m_texture[_uvw]._loadComponents; \
  186. } \
  187. \
  188. ivec3 imageSize(BgfxROImage2DArray_ ## _format _image) \
  189. { \
  190. uvec3 result; \
  191. _image.m_texture.GetDimensions(result.x, result.y, result.z); \
  192. return ivec3(result); \
  193. } \
  194. \
  195. _type imageLoad(BgfxRWImage2DArray_ ## _format _image, ivec3 _uvw) \
  196. { \
  197. return _image.m_texture[_uvw]._loadComponents; \
  198. } \
  199. \
  200. void imageStore(BgfxRWImage2DArray_ ## _format _image, ivec3 _uvw, _type _value) \
  201. { \
  202. _image.m_texture[_uvw] = _value._storeComponents; \
  203. } \
  204. \
  205. ivec3 imageSize(BgfxRWImage2DArray_ ## _format _image) \
  206. { \
  207. uvec3 result; \
  208. _image.m_texture.GetDimensions(result.x, result.y, result.z); \
  209. return ivec3(result); \
  210. } \
  211. \
  212. _type imageLoad(BgfxROImage3D_ ## _format _image, ivec3 _uvw) \
  213. { \
  214. return _image.m_texture[_uvw]._loadComponents; \
  215. } \
  216. \
  217. ivec3 imageSize(BgfxROImage3D_ ## _format _image) \
  218. { \
  219. uvec3 result; \
  220. _image.m_texture.GetDimensions(result.x, result.y, result.z); \
  221. return ivec3(result); \
  222. } \
  223. \
  224. _type imageLoad(BgfxRWImage3D_ ## _format _image, ivec3 _uvw) \
  225. { \
  226. return _image.m_texture[_uvw]._loadComponents; \
  227. } \
  228. \
  229. ivec3 imageSize(BgfxRWImage3D_ ## _format _image) \
  230. { \
  231. uvec3 result; \
  232. _image.m_texture.GetDimensions(result.x, result.y, result.z); \
  233. return ivec3(result); \
  234. } \
  235. \
  236. void imageStore(BgfxRWImage3D_ ## _format _image, ivec3 _uvw, _type _value) \
  237. { \
  238. _image.m_texture[_uvw] = _value._storeComponents; \
  239. }
  240. #define __IMAGE_IMPL_ATOMIC(_format, _storeComponents, _type, _loadComponents) \
  241. \
  242. void imageAtomicAdd(BgfxRWImage2D_ ## _format _image, ivec2 _uv, _type _value) \
  243. { \
  244. InterlockedAdd(_image.m_texture[_uv], _value._storeComponents); \
  245. } \
  246. __IMAGE_IMPL_A(rgba8, xyzw, vec4, xyzw)
  247. __IMAGE_IMPL_A(rg8, xy, vec4, xyyy)
  248. __IMAGE_IMPL_A(r8, x, vec4, xxxx)
  249. __IMAGE_IMPL_A(rg16f, xy, vec4, xyyy)
  250. #if BGFX_SHADER_LANGUAGE_HLSL
  251. __IMAGE_IMPL_S(rgba16f, xyzw, vec4, xyzw)
  252. __IMAGE_IMPL_S(r16f, x, vec4, xxxx)
  253. #else
  254. __IMAGE_IMPL_A(rgba16f, xyzw, vec4, xyzw)
  255. __IMAGE_IMPL_A(r16f, x, vec4, xxxx)
  256. #endif // BGFX_SHADER_LANGUAGE_HLSL
  257. __IMAGE_IMPL_A(r32f, x, vec4, xxxx)
  258. __IMAGE_IMPL_A(rgba32f, xyzw, vec4, xyzw)
  259. __IMAGE_IMPL_A(r32ui, x, uvec4, xxxx)
  260. __IMAGE_IMPL_A(rg32ui, xy, uvec4, xyyy)
  261. __IMAGE_IMPL_A(rgba32ui, xyzw, uvec4, xyzw)
  262. __IMAGE_IMPL_ATOMIC(r32ui, x, uvec4, xxxx)
  263. #define atomicAdd(_mem, _data) InterlockedAdd(_mem, _data)
  264. #define atomicAnd(_mem, _data) InterlockedAnd(_mem, _data)
  265. #define atomicMax(_mem, _data) InterlockedMax(_mem, _data)
  266. #define atomicMin(_mem, _data) InterlockedMin(_mem, _data)
  267. #define atomicOr(_mem, _data) InterlockedOr(_mem, _data)
  268. #define atomicXor(_mem, _data) InterlockedXor(_mem, _data)
  269. #define atomicFetchAndAdd(_mem, _data, _original) InterlockedAdd(_mem, _data, _original)
  270. #define atomicFetchAndAnd(_mem, _data, _original) InterlockedAnd(_mem, _data, _original)
  271. #define atomicFetchAndMax(_mem, _data, _original) InterlockedMax(_mem, _data, _original)
  272. #define atomicFetchAndMin(_mem, _data, _original) InterlockedMin(_mem, _data, _original)
  273. #define atomicFetchAndOr(_mem, _data, _original) InterlockedOr(_mem, _data, _original)
  274. #define atomicFetchAndXor(_mem, _data, _original) InterlockedXor(_mem, _data, _original)
  275. #define atomicFetchAndExchange(_mem, _data, _original) InterlockedExchange(_mem, _data, _original)
  276. #define atomicFetchCompareExchange(_mem, _compare, _data, _original) InterlockedCompareExchange(_mem,_compare, _data, _original)
  277. // InterlockedCompareStore
  278. #define barrier() GroupMemoryBarrierWithGroupSync()
  279. #define memoryBarrier() GroupMemoryBarrierWithGroupSync()
  280. #define memoryBarrierAtomicCounter() GroupMemoryBarrierWithGroupSync()
  281. #define memoryBarrierBuffer() AllMemoryBarrierWithGroupSync()
  282. #define memoryBarrierImage() GroupMemoryBarrierWithGroupSync()
  283. #define memoryBarrierShared() GroupMemoryBarrierWithGroupSync()
  284. #define groupMemoryBarrier() GroupMemoryBarrierWithGroupSync()
  285. #endif // BGFX_SHADER_LANGUAGE_GLSL
  286. #define dispatchIndirect( \
  287. _buffer \
  288. , _offset \
  289. , _numX \
  290. , _numY \
  291. , _numZ \
  292. ) \
  293. _buffer[_offset*2+0] = uvec4(_numX, _numY, _numZ, 0u)
  294. #define drawIndirect( \
  295. _buffer \
  296. , _offset \
  297. , _numVertices \
  298. , _numInstances \
  299. , _startVertex \
  300. , _startInstance \
  301. ) \
  302. _buffer[_offset*2+0] = uvec4(_numVertices, _numInstances, _startVertex, _startInstance)
  303. #define drawIndexedIndirect( \
  304. _buffer \
  305. , _offset \
  306. , _numIndices \
  307. , _numInstances \
  308. , _startIndex \
  309. , _startVertex \
  310. , _startInstance \
  311. ) \
  312. _buffer[_offset*2+0] = uvec4(_numIndices, _numInstances, _startIndex, _startVertex); \
  313. _buffer[_offset*2+1] = uvec4(_startInstance, 0u, 0u, 0u)
  314. #endif // __cplusplus
  315. #endif // BGFX_COMPUTE_H_HEADER_GUARD