shh.lua 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. local shh = {}
  2. -- Math Helpers
  3. local SH = {}
  4. SH.__index = SH
  5. function shh.new(t)
  6. local self = setmetatable({}, SH)
  7. for i = 1, 9 do
  8. self[i] = { 0, 0, 0 }
  9. end
  10. self:set(t)
  11. return self
  12. end
  13. function SH:clear()
  14. for i = 1, 9 do
  15. self[i][1] = 0
  16. self[i][2] = 0
  17. self[i][3] = 0
  18. end
  19. end
  20. local tempPass
  21. local tempBuffer
  22. function SH:set(t)
  23. if not t then
  24. self:clear()
  25. elseif type(t) == 'table' and type(t[1]) == 'table' then
  26. for i = 1, 9 do
  27. self[i][1] = t[i][1]
  28. self[i][2] = t[i][2]
  29. self[i][3] = t[i][3]
  30. end
  31. elseif type(t) == 'table' and type(t[1]) == 'number' then
  32. for i = 1, 9 do
  33. local b = (i - 1) * 3
  34. self[i][1] = t[b + 1]
  35. self[i][2] = t[b + 2]
  36. self[i][3] = t[b + 3]
  37. end
  38. elseif type(t) == 'userdata' and t:type() == 'Texture' then
  39. tempPass = tempPass or lovr.graphics.newPass()
  40. tempBuffer = tempBuffer or lovr.graphics.newBuffer({ 'vec3', layout = 'std140' }, 9)
  41. tempPass:reset()
  42. shh.compute(tempPass, t, tempBuffer)
  43. lovr.graphics.submit(tempPass)
  44. return self:set(tempBuffer:getData())
  45. else
  46. error('Expected nil, table of numbers, or table of tables')
  47. end
  48. end
  49. local function evaluate(t, c, nx, ny, nz)
  50. return
  51. .88622692545276 * t[1][c] +
  52. 1.0233267079465 * t[2][c] * ny +
  53. 1.0233267079465 * t[3][c] * nz +
  54. 1.0233267079465 * t[4][c] * nx +
  55. .85808553080978 * t[5][c] * nx * ny +
  56. .85808553080978 * t[6][c] * ny * nz +
  57. .24770795610038 * t[7][c] * (3 * nz * nz - 1) +
  58. .85808553080978 * t[8][c] * nx * nz +
  59. .42904276540489 * t[9][c] * (nx * nx - ny * ny)
  60. end
  61. function SH:evaluate(nx, ny, nz)
  62. if type(nx) == 'userdata' then
  63. nx, ny, nz = nx:unpack()
  64. end
  65. local r = evaluate(self, 1, nx, ny, nz)
  66. local g = evaluate(self, 2, nx, ny, nz)
  67. local b = evaluate(self, 3, nx, ny, nz)
  68. return r, g, b
  69. end
  70. function SH:addAmbientLight(r, g, b)
  71. local scale = 3.544907701811 -- 2 * math.pi ^ .5
  72. self[1][1] = self[1][1] + scale * .28209479177388 * r
  73. self[1][2] = self[1][2] + scale * .28209479177388 * g
  74. self[1][3] = self[1][3] + scale * .28209479177388 * b
  75. end
  76. local function integrate(t, c, x, dx, dy, dz)
  77. t[1][c] = t[1][c] + .28209479177388 * x
  78. t[2][c] = t[2][c] + .48860251190292 * x * dy
  79. t[3][c] = t[3][c] + .48860251190292 * x * dz
  80. t[4][c] = t[4][c] + .48860251190292 * x * dx
  81. t[5][c] = t[5][c] + 1.0925484305921 * x * dx * dy
  82. t[6][c] = t[6][c] + 1.0925484305921 * x * dy * dz
  83. t[7][c] = t[7][c] + .31539156525252 * x * (3 * dz * dz - 1)
  84. t[8][c] = t[8][c] + 1.0925484305921 * x * dx * dz
  85. t[9][c] = t[9][c] + .54627421529604 * x * (dx * dx - dy * dy)
  86. end
  87. function SH:addDirectionalLight(dx, dy, dz, r, g, b)
  88. local scale = 2.9567930857316 -- 16 * math.pi / 17
  89. r, g, b = r * scale, g * scale, b * scale
  90. integrate(self, 1, r, dx, dy, dz)
  91. integrate(self, 2, g, dx, dy, dz)
  92. integrate(self, 3, b, dx, dy, dz)
  93. end
  94. function SH:add(other)
  95. for i = 1, 9 do
  96. for c = 1, 3 do
  97. self[i][c] = self[i][c] + other[i][c]
  98. end
  99. end
  100. end
  101. function SH:lerp(other, t)
  102. for i = 1, 9 do
  103. for c = 1, 3 do
  104. self[i][c] = self[i][c] + (other[i][c] - self[i][c]) * t
  105. end
  106. end
  107. end
  108. function SH:scale(s)
  109. for i = 1, 9 do
  110. for c = 1, 3 do
  111. self[i][c] = self[i][c] * s
  112. end
  113. end
  114. end
  115. -- Shaders
  116. local cubeShader = [[
  117. #define RGBA8 0
  118. #define RGBA16F 1
  119. #define RGBA32F 2
  120. #define RG11B10F 3
  121. layout(constant_id = 0) const uint FORMAT = RGBA8;
  122. layout(rgba8) uniform readonly imageCube TextureRGBA8;
  123. layout(rgba16f) uniform readonly imageCube TextureRGBA16F;
  124. layout(rgba32f) uniform readonly imageCube TextureRGBA32F;
  125. layout(r11f_g11f_b10f) uniform readonly imageCube TextureRG11B10F;
  126. layout(std140) buffer writeonly Basis { vec3 basis[9]; };
  127. #define THREADS 96
  128. layout(local_size_x = 4, local_size_y = 4, local_size_z = 6) in;
  129. shared vec3 coefficients[THREADS][9];
  130. shared float totalAngle[THREADS];
  131. void lovrmain() {
  132. uint id = LocalThreadIndex;
  133. uint face = LocalThreadID.z;
  134. totalAngle[id] = 0.;
  135. for (int i = 0; i < 9; i++) {
  136. coefficients[id][i] = vec3(0.);
  137. }
  138. int size;
  139. if (FORMAT == RGBA8) size = imageSize(TextureRGBA8).x;
  140. if (FORMAT == RGBA16F) size = imageSize(TextureRGBA16F).x;
  141. if (FORMAT == RGBA32F) size = imageSize(TextureRGBA32F).x;
  142. if (FORMAT == RG11B10F) size = imageSize(TextureRG11B10F).x;
  143. int tile = size / int(WorkgroupSize.x);
  144. ivec2 origin = ivec2(LocalThreadID.xy) * tile;
  145. for (int y = 0; y < tile; y++) {
  146. for (int x = 0; x < tile; x++) {
  147. ivec2 xy = origin + ivec2(x, y);
  148. vec2 uv = (xy + .5) / size * 2. - 1.;
  149. // Note: Z coordinate is flipped to convert to left-handed cubemap coordinate space
  150. vec3 dir;
  151. switch (face) {
  152. case 0: dir = vec3(+1., -uv.y, +uv.x); break;
  153. case 1: dir = vec3(-1., -uv.y, -uv.x); break;
  154. case 2: dir = vec3(+uv.x, +1., -uv.y); break;
  155. case 3: dir = vec3(+uv.x, -1., +uv.y); break;
  156. case 4: dir = vec3(+uv.x, -uv.y, -1.); break;
  157. case 5: dir = vec3(-uv.x, -uv.y, +1.); break;
  158. }
  159. float len2 = dot(dir, dir);
  160. float len = sqrt(len2);
  161. dir *= 1. / len;
  162. float solidAngle = 4. / (len2 * len); // (uv^2)^(3/2) == len(uv)^2 * len(uv)
  163. totalAngle[id] += solidAngle;
  164. vec3 color;
  165. ivec3 texel = ivec3(xy, face);
  166. if (FORMAT == RGBA8) color = gammaToLinear(imageLoad(TextureRGBA8, texel).rgb);
  167. if (FORMAT == RGBA16F) color = imageLoad(TextureRGBA16F, texel).rgb;
  168. if (FORMAT == RGBA32F) color = imageLoad(TextureRGBA32F, texel).rgb;
  169. if (FORMAT == RG11B10F) color = imageLoad(TextureRG11B10F, texel).rgb;
  170. color *= solidAngle;
  171. coefficients[id][0] += color * .28209479177388;
  172. coefficients[id][1] += color * .48860251190292 * dir.y;
  173. coefficients[id][2] += color * .48860251190292 * dir.z;
  174. coefficients[id][3] += color * .48860251190292 * dir.x;
  175. coefficients[id][4] += color * 1.0925484305921 * dir.x * dir.y;
  176. coefficients[id][5] += color * 1.0925484305921 * dir.y * dir.z;
  177. coefficients[id][6] += color * .31539156525252 * (3. * dir.z * dir.z - 1.);
  178. coefficients[id][7] += color * 1.0925484305921 * dir.x * dir.z;
  179. coefficients[id][8] += color * .54627421529604 * (dir.x * dir.x - dir.y * dir.y);
  180. }
  181. }
  182. barrier();
  183. if (id == 0) {
  184. for (int t = 1; t < THREADS; t++) {
  185. totalAngle[0] += totalAngle[t];
  186. for (int i = 0; i < 9; i++) {
  187. coefficients[0][i] += coefficients[t][i];
  188. }
  189. }
  190. float scale = 4. * PI / totalAngle[0];
  191. for (int i = 0; i < 9; i++) {
  192. basis[i] = coefficients[0][i] * scale;
  193. }
  194. }
  195. }
  196. ]]
  197. local equirectShader = [[
  198. #define RGBA8 0
  199. #define RGBA16F 1
  200. #define RGBA32F 2
  201. #define RG11B10F 3
  202. layout(constant_id = 0) const uint FORMAT = RGBA32F;
  203. layout(rgba8) uniform readonly image2D TextureRGBA8;
  204. layout(rgba16f) uniform readonly image2D TextureRGBA16F;
  205. layout(rgba32f) uniform readonly image2D TextureRGBA32F;
  206. layout(r11f_g11f_b10f) uniform readonly image2D TextureRG11B10F;
  207. layout(std140) buffer writeonly Basis { vec3 basis[9]; };
  208. #define THREADS 64
  209. layout(local_size_x = 8, local_size_y = 8) in;
  210. shared vec3 coefficients[THREADS][9];
  211. shared float totalAngle[THREADS];
  212. void lovrmain() {
  213. uint id = LocalThreadIndex;
  214. totalAngle[id] = 0.;
  215. for (int i = 0; i < 9; i++) {
  216. coefficients[id][i] = vec3(0.);
  217. }
  218. ivec2 size;
  219. if (FORMAT == RGBA8) size = imageSize(TextureRGBA8);
  220. if (FORMAT == RGBA16F) size = imageSize(TextureRGBA16F);
  221. if (FORMAT == RGBA32F) size = imageSize(TextureRGBA32F);
  222. if (FORMAT == RG11B10F) size = imageSize(TextureRG11B10F);
  223. ivec2 tile = (size + ivec2(7, 7)) / ivec2(WorkgroupSize.xy);
  224. ivec2 origin = ivec2(LocalThreadID.xy) * tile;
  225. float width = size.x;
  226. float height = size.y;
  227. for (int y = 0; y < tile.y; y++) {
  228. if (origin.y + y >= size.y) continue;
  229. float phi = (origin.y + y) / height * PI;
  230. float sinphi = sin(phi);
  231. float cosphi = cos(phi);
  232. for (int x = 0; x < tile.x; x++) {
  233. if (origin.x + x >= size.x) continue;
  234. float theta = (.75 - (origin.x + x) / width) * 2. * PI;
  235. float solidAngle = (2. * PI / width) * (PI / height) * abs(sinphi);
  236. totalAngle[id] += solidAngle;
  237. vec3 color;
  238. ivec2 texel = origin + ivec2(x, y);
  239. if (FORMAT == RGBA8) color = gammaToLinear(imageLoad(TextureRGBA8, texel).rgb);
  240. if (FORMAT == RGBA16F) color = imageLoad(TextureRGBA16F, texel).rgb;
  241. if (FORMAT == RGBA32F) color = imageLoad(TextureRGBA32F, texel).rgb;
  242. if (FORMAT == RG11B10F) color = imageLoad(TextureRG11B10F, texel).rgb;
  243. color *= solidAngle;
  244. vec3 dir = normalize(vec3(cos(theta) * sinphi, cosphi, -sin(theta) * sinphi));
  245. coefficients[id][0] += color * .28209479177388;
  246. coefficients[id][1] += color * .48860251190292 * dir.y;
  247. coefficients[id][2] += color * .48860251190292 * dir.z;
  248. coefficients[id][3] += color * .48860251190292 * dir.x;
  249. coefficients[id][4] += color * 1.0925484305921 * dir.x * dir.y;
  250. coefficients[id][5] += color * 1.0925484305921 * dir.y * dir.z;
  251. coefficients[id][6] += color * .31539156525252 * (3. * dir.z * dir.z - 1.);
  252. coefficients[id][7] += color * 1.0925484305921 * dir.x * dir.z;
  253. coefficients[id][8] += color * .54627421529604 * (dir.x * dir.x - dir.y * dir.y);
  254. }
  255. }
  256. barrier();
  257. if (id == 0) {
  258. for (int t = 1; t < THREADS; t++) {
  259. totalAngle[0] += totalAngle[t];
  260. for (int i = 0; i < 9; i++) {
  261. coefficients[0][i] += coefficients[t][i];
  262. }
  263. }
  264. float scale = 4. * PI / totalAngle[0];
  265. for (int i = 0; i < 9; i++) {
  266. basis[i] = coefficients[0][i] * scale;
  267. }
  268. }
  269. }
  270. ]]
  271. local formatCodes = {
  272. rgba8 = 0,
  273. rgba16f = 1,
  274. rgba32f = 2,
  275. rg11b10f = 3
  276. }
  277. local shaders = {}
  278. local function getComputeShader(kind, format)
  279. local code = kind == 'cube' and cubeShader or equirectShader
  280. local options = { flags = { FORMAT = formatCodes[format] } }
  281. if not shaders[kind] then
  282. shaders[kind] = {}
  283. shaders[kind][format] = lovr.graphics.newShader(code, options)
  284. elseif not shaders[kind][format] then
  285. shaders[kind][format] = shaders[kind][next(shaders[kind])]:clone(options.flags)
  286. end
  287. return shaders[kind][format]
  288. end
  289. function shh.compute(pass, texture, buffer, offset)
  290. local kind, format, width, height = texture:getType(), texture:getFormat(), texture:getDimensions()
  291. if kind == 'cube' then
  292. assert(width % 4 == 0, 'Currently, cubemap dimensions must be a multiple of 4 (please open issue)')
  293. elseif kind == '2d' then
  294. assert(width == 2 * height, '2D equirectangular textures should have a 2:1 aspect ratio')
  295. else
  296. error('Expected 2d or cubemap texture')
  297. end
  298. assert(formatCodes[format], ('Unsupported texture format %q'):format(format))
  299. pass:push('state')
  300. pass:setShader(getComputeShader(kind, format))
  301. pass:send('Basis', buffer, offset)
  302. pass:send('Texture' .. texture:getFormat():upper(), texture)
  303. pass:compute()
  304. pass:pop('state')
  305. return buffer
  306. end
  307. -- Convenience shader helper
  308. local shader
  309. function shh.setShader(pass, ...)
  310. if not shader then
  311. shader = lovr.graphics.newShader('unlit', [[
  312. uniform SH { vec3 sh[9]; };
  313. vec3 evaluateSH(vec3 sh[9], vec3 n) {
  314. return max(
  315. .88622692545276 * sh[0] +
  316. 1.0233267079465 * sh[1] * n.y +
  317. 1.0233267079465 * sh[2] * n.z +
  318. 1.0233267079465 * sh[3] * n.x +
  319. .85808553080978 * sh[4] * n.x * n.y +
  320. .85808553080978 * sh[5] * n.y * n.z +
  321. .24770795610038 * sh[6] * (3 * n.z * n.z - 1) +
  322. .85808553080978 * sh[7] * n.x * n.z +
  323. .42904276540489 * sh[8] * (n.x * n.x - n.y * n.y),
  324. 0
  325. );
  326. }
  327. vec4 lovrmain() {
  328. return vec4(evaluateSH(sh, normalize(Normal)) / PI, 1.);
  329. }
  330. ]])
  331. end
  332. pass:setShader(shader)
  333. if type(...) == 'table' then pass:send('SH', { sh = ... })
  334. else pass:send('SH', ...) end
  335. end
  336. return shh