compute.lua 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. return {
  2. tag = 'compute',
  3. summary = 'Run a compute shader.',
  4. description = [[
  5. Runs a compute shader. There must be an active compute shader set using `Pass:setShader`.
  6. All of the compute shader dispatches in a Pass will run **before** all of the draws in the Pass
  7. (if any). They will also run at the same time in parallel, unless `Pass:barrier` is used to
  8. control the order.
  9. ]],
  10. arguments = {
  11. x = {
  12. type = 'number',
  13. default = '1',
  14. description = 'The number of workgroups to dispatch in the x dimension.'
  15. },
  16. y = {
  17. type = 'number',
  18. default = '1',
  19. description = 'The number of workgroups to dispatch in the y dimension.'
  20. },
  21. z = {
  22. type = 'number',
  23. default = '1',
  24. description = 'The number of workgroups to dispatch in the z dimension.'
  25. },
  26. buffer = {
  27. type = 'Buffer',
  28. description = [[
  29. A Buffer object containing the x, y, and z workgroup counts, stored as 4 byte unsigned
  30. integers.
  31. ]]
  32. },
  33. offset = {
  34. type = 'number',
  35. default = '0',
  36. description = 'The byte offset to read the workgroup counts from in the Buffer.'
  37. }
  38. },
  39. returns = {},
  40. variants = {
  41. {
  42. arguments = { 'x', 'y', 'z' },
  43. returns = {}
  44. },
  45. {
  46. description = [[
  47. Perform an "indirect" dispatch. Instead of passing in the workgroup counts directly from
  48. Lua, the workgroup counts are read from a `Buffer` object at a particular byte offset.
  49. Each count should be a 4-byte integer, so in total 12 bytes will be read from the buffer.
  50. ]],
  51. arguments = { 'buffer', 'offset' },
  52. returns = {}
  53. }
  54. },
  55. notes = [[
  56. Compute shaders are usually run once for each pixel in an image, once per particle, once per
  57. object, etc. The 3 arguments represent how many times to run, or "dispatch", the compute
  58. shader, in up to 3 dimensions. Each element of this grid is called a **workgroup**.
  59. To make things even more complicated, each workgroup itself is made up of a set of "mini GPU
  60. threads", which are called **local workgroups**. Like workgroups, the local workgroup size can
  61. also be 3D. It's declared in the shader code, like this:
  62. layout(local_size_x = w, local_size_y = h, local_size_z = d) in;
  63. All these 3D grids can get confusing, but the basic idea is to make the local workgroup size a
  64. small block of e.g. 32 particles or 8x8 pixels or 4x4x4 voxels, and then dispatch however many
  65. workgroups are needed to cover a list of particles, image, voxel field, etc.
  66. The reason to do it this way is that the GPU runs its threads in little fixed-size bundles
  67. called subgroups. Subgroups are usually 32 or 64 threads (the exact size is given by the
  68. `subgroupSize` property of `lovr.graphics.getDevice`) and all run together. If the local
  69. workgroup size was `1x1x1`, then the GPU would only run 1 thread per subgroup and waste the
  70. other 31 or 63. So for the best performance, be sure to set a local workgroup size bigger than
  71. 1!
  72. Inside the compute shader, a few builtin variables can be used to figure out which workgroup is
  73. running:
  74. - `uvec3 WorkgroupCount` is the workgroup count per axis (the `Pass:compute` arguments).
  75. - `uvec3 WorkgroupSize` is the local workgroup size (declared in the shader).
  76. - `uvec3 WorkgroupID` is the index of the current (global) workgroup.
  77. - `uvec3 LocalThreadID` is the index of the local workgroup inside its workgroup.
  78. - `uint LocalThreadIndex` is a 1D version of `LocalThreadID`.
  79. - `uvec3 GlobalThreadID` is the unique identifier for a thread within all workgroups in a
  80. dispatch. It's equivalent to `WorkgroupID * WorkgroupSize + LocalThreadID` (usually what you
  81. want!)
  82. Indirect compute dispatches are useful to "chain" compute shaders together, while keeping all of
  83. the data on the GPU. The first dispatch can do some computation and write some results to
  84. buffers, then the second indirect dispatch can use the data in those buffers to know how many
  85. times it should run. An example would be a compute shader that does some sort of object
  86. culling, writing the number of visible objects to a buffer along with the IDs of each one.
  87. Subsequent compute shaders can be indirectly dispatched to perform extra processing on the
  88. visible objects. Finally, an indirect draw can be used to render them.
  89. ]],
  90. example = {
  91. description = 'A compute shader that makes a texture grayscale.',
  92. code = [=[
  93. function lovr.load()
  94. shader = lovr.graphics.newShader([[
  95. layout(local_size_x = 8, local_size_y = 8) in;
  96. layout(set = 0, binding = 0, rgba8) uniform image2D image;
  97. void lovrmain() {
  98. ivec2 size = imageSize(image);
  99. ivec2 pixel = ivec2(GlobalThreadID.xy);
  100. if (pixel.x >= size.x || pixel.y >= size.y) {
  101. return;
  102. }
  103. vec4 color = imageLoad(image, pixel);
  104. color.rgb = vec3(color.r * .2126 + color.g * .7512 + color.b * .0722);
  105. imageStore(image, pixel, color);
  106. }
  107. ]])
  108. texture = lovr.graphics.newTexture('image.png', {
  109. usage = { 'storage', 'sample', 'transfer' },
  110. linear = true -- srgb textures don't always support storage usage
  111. })
  112. local tw, th = texture:getDimensions()
  113. local sx, sy = shader:getWorkgroupSize()
  114. local gx, gy = math.ceil(tw / sx), math.ceil(th / sy)
  115. local computer = lovr.graphics.newPass()
  116. computer:setShader(shader)
  117. computer:send('image', texture)
  118. computer:compute(gx, gy)
  119. lovr.graphics.submit(computer)
  120. texture:generateMipmaps()
  121. end
  122. function lovr.draw(pass)
  123. pass:draw(texture, 0, 1.7, -1)
  124. end
  125. ]=]
  126. },
  127. related = {
  128. 'Pass:barrier',
  129. 'Pass:setShader',
  130. 'Pass:send'
  131. }
  132. }