sdfgi_direct_light.glsl 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. #[compute]
  2. #version 450
  3. VERSION_DEFINES
  4. layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
  5. #define MAX_CASCADES 8
  6. layout(set = 0, binding = 1) uniform texture3D sdf_cascades[MAX_CASCADES];
  7. layout(set = 0, binding = 2) uniform sampler linear_sampler;
  8. layout(set = 0, binding = 3, std430) restrict readonly buffer DispatchData {
  9. uint x;
  10. uint y;
  11. uint z;
  12. uint total_count;
  13. }
  14. dispatch_data;
  15. struct ProcessVoxel {
  16. uint position; //xyz 7 bit packed, extra 11 bits for neigbours
  17. uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neibhbours
  18. uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbours
  19. uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours
  20. //total neighbours: 26
  21. };
  22. #ifdef MODE_PROCESS_STATIC
  23. layout(set = 0, binding = 4, std430) restrict buffer ProcessVoxels {
  24. #else
  25. layout(set = 0, binding = 4, std430) restrict buffer readonly ProcessVoxels {
  26. #endif
  27. ProcessVoxel data[];
  28. }
  29. process_voxels;
  30. layout(r32ui, set = 0, binding = 5) uniform restrict uimage3D dst_light;
  31. layout(rgba8, set = 0, binding = 6) uniform restrict image3D dst_aniso0;
  32. layout(rg8, set = 0, binding = 7) uniform restrict image3D dst_aniso1;
  33. struct CascadeData {
  34. vec3 offset; //offset of (0,0,0) in world coordinates
  35. float to_cell; // 1/bounds * grid_size
  36. ivec3 probe_world_offset;
  37. uint pad;
  38. };
  39. layout(set = 0, binding = 8, std140) uniform Cascades {
  40. CascadeData data[MAX_CASCADES];
  41. }
  42. cascades;
  43. #define LIGHT_TYPE_DIRECTIONAL 0
  44. #define LIGHT_TYPE_OMNI 1
  45. #define LIGHT_TYPE_SPOT 2
  46. struct Light {
  47. vec3 color;
  48. float energy;
  49. vec3 direction;
  50. bool has_shadow;
  51. vec3 position;
  52. float attenuation;
  53. uint type;
  54. float cos_spot_angle;
  55. float inv_spot_attenuation;
  56. float radius;
  57. vec4 shadow_color;
  58. };
  59. layout(set = 0, binding = 9, std140) buffer restrict readonly Lights {
  60. Light data[];
  61. }
  62. lights;
  63. layout(set = 0, binding = 10) uniform texture2DArray lightprobe_texture;
  64. layout(set = 0, binding = 11) uniform texture3D occlusion_texture;
  65. layout(push_constant, binding = 0, std430) uniform Params {
  66. vec3 grid_size;
  67. uint max_cascades;
  68. uint cascade;
  69. uint light_count;
  70. uint process_offset;
  71. uint process_increment;
  72. int probe_axis_size;
  73. float bounce_feedback;
  74. float y_mult;
  75. bool use_occlusion;
  76. }
  77. params;
  78. vec2 octahedron_wrap(vec2 v) {
  79. vec2 signVal;
  80. signVal.x = v.x >= 0.0 ? 1.0 : -1.0;
  81. signVal.y = v.y >= 0.0 ? 1.0 : -1.0;
  82. return (1.0 - abs(v.yx)) * signVal;
  83. }
  84. vec2 octahedron_encode(vec3 n) {
  85. // https://twitter.com/Stubbesaurus/status/937994790553227264
  86. n /= (abs(n.x) + abs(n.y) + abs(n.z));
  87. n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy);
  88. n.xy = n.xy * 0.5 + 0.5;
  89. return n.xy;
  90. }
  91. float get_omni_attenuation(float distance, float inv_range, float decay) {
  92. float nd = distance * inv_range;
  93. nd *= nd;
  94. nd *= nd; // nd^4
  95. nd = max(1.0 - nd, 0.0);
  96. nd *= nd; // nd^2
  97. return nd * pow(max(distance, 0.0001), -decay);
  98. }
  99. void main() {
  100. uint voxel_index = uint(gl_GlobalInvocationID.x);
  101. //used for skipping voxels every N frames
  102. if (params.process_increment > 1) {
  103. voxel_index *= params.process_increment;
  104. voxel_index += params.process_offset;
  105. }
  106. if (voxel_index >= dispatch_data.total_count) {
  107. return;
  108. }
  109. uint voxel_position = process_voxels.data[voxel_index].position;
  110. //keep for storing to texture
  111. ivec3 positioni = ivec3((uvec3(voxel_position, voxel_position, voxel_position) >> uvec3(0, 7, 14)) & uvec3(0x7F));
  112. vec3 position = vec3(positioni) + vec3(0.5);
  113. position /= cascades.data[params.cascade].to_cell;
  114. position += cascades.data[params.cascade].offset;
  115. uint voxel_albedo = process_voxels.data[voxel_index].albedo;
  116. vec3 albedo = vec3(uvec3(voxel_albedo >> 10, voxel_albedo >> 5, voxel_albedo) & uvec3(0x1F)) / float(0x1F);
  117. vec3 light_accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0));
  118. uint valid_aniso = (voxel_albedo >> 15) & 0x3F;
  119. const vec3 aniso_dir[6] = vec3[](
  120. vec3(1, 0, 0),
  121. vec3(0, 1, 0),
  122. vec3(0, 0, 1),
  123. vec3(-1, 0, 0),
  124. vec3(0, -1, 0),
  125. vec3(0, 0, -1));
  126. // Add indirect light first, in order to save computation resources
  127. #ifdef MODE_PROCESS_DYNAMIC
  128. if (params.bounce_feedback > 0.001) {
  129. vec3 feedback = (params.bounce_feedback < 1.0) ? (albedo * params.bounce_feedback) : mix(albedo, vec3(1.0), params.bounce_feedback - 1.0);
  130. vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size;
  131. ivec3 probe_base_pos = ivec3(pos);
  132. float weight_accum[6] = float[](0, 0, 0, 0, 0, 0);
  133. ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade));
  134. tex_pos.x += probe_base_pos.z * int(params.probe_axis_size);
  135. tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1);
  136. vec3 base_tex_posf = vec3(tex_pos);
  137. vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size));
  138. vec3 probe_uv_offset = vec3(ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx;
  139. for (uint j = 0; j < 8; j++) {
  140. ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1);
  141. ivec3 probe_posi = probe_base_pos;
  142. probe_posi += offset;
  143. // Compute weight
  144. vec3 probe_pos = vec3(probe_posi);
  145. vec3 probe_to_pos = pos - probe_pos;
  146. vec3 probe_dir = normalize(-probe_to_pos);
  147. // Compute lightprobe texture position
  148. vec3 trilinear = vec3(1.0) - abs(probe_to_pos);
  149. for (uint k = 0; k < 6; k++) {
  150. if (bool(valid_aniso & (1 << k))) {
  151. vec3 n = aniso_dir[k];
  152. float weight = trilinear.x * trilinear.y * trilinear.z * max(0, dot(n, probe_dir));
  153. if (weight > 0.0 && params.use_occlusion) {
  154. ivec3 occ_indexv = abs((cascades.data[params.cascade].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4);
  155. vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3)));
  156. vec3 occ_pos = (vec3(positioni) + aniso_dir[k] + vec3(0.5)) / params.grid_size;
  157. occ_pos.z += float(params.cascade);
  158. if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures
  159. occ_pos.x += 1.0;
  160. }
  161. occ_pos *= vec3(0.5, 1.0, 1.0 / float(params.max_cascades)); //renormalize
  162. float occlusion = dot(textureLod(sampler3D(occlusion_texture, linear_sampler), occ_pos, 0.0), occ_mask);
  163. weight *= occlusion;
  164. }
  165. if (weight > 0.0) {
  166. vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0);
  167. tex_posf.xy *= tex_pixel_size;
  168. vec3 pos_uvw = tex_posf;
  169. pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy;
  170. pos_uvw.x += float(offset.z) * probe_uv_offset.z;
  171. vec3 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb;
  172. light_accum[k] += indirect_light * weight;
  173. weight_accum[k] += weight;
  174. }
  175. }
  176. }
  177. }
  178. for (uint k = 0; k < 6; k++) {
  179. if (weight_accum[k] > 0.0) {
  180. light_accum[k] /= weight_accum[k];
  181. light_accum[k] *= feedback;
  182. }
  183. }
  184. }
  185. #endif
  186. {
  187. uint rgbe = process_voxels.data[voxel_index].light;
  188. //read rgbe8985
  189. float r = float((rgbe & 0xff) << 1);
  190. float g = float((rgbe >> 8) & 0x1ff);
  191. float b = float(((rgbe >> 17) & 0xff) << 1);
  192. float e = float((rgbe >> 25) & 0x1F);
  193. float m = pow(2.0, e - 15.0 - 9.0);
  194. vec3 l = vec3(r, g, b) * m;
  195. uint aniso = process_voxels.data[voxel_index].light_aniso;
  196. for (uint i = 0; i < 6; i++) {
  197. float strength = ((aniso >> (i * 5)) & 0x1F) / float(0x1F);
  198. light_accum[i] += l * strength;
  199. }
  200. }
  201. // Raytrace light
  202. vec3 pos_to_uvw = 1.0 / params.grid_size;
  203. vec3 uvw_ofs = pos_to_uvw * 0.5;
  204. for (uint i = 0; i < params.light_count; i++) {
  205. float attenuation = 1.0;
  206. vec3 direction;
  207. float light_distance = 1e20;
  208. switch (lights.data[i].type) {
  209. case LIGHT_TYPE_DIRECTIONAL: {
  210. direction = -lights.data[i].direction;
  211. } break;
  212. case LIGHT_TYPE_OMNI: {
  213. vec3 rel_vec = lights.data[i].position - position;
  214. direction = normalize(rel_vec);
  215. light_distance = length(rel_vec);
  216. rel_vec.y /= params.y_mult;
  217. attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation);
  218. } break;
  219. case LIGHT_TYPE_SPOT: {
  220. vec3 rel_vec = lights.data[i].position - position;
  221. direction = normalize(rel_vec);
  222. light_distance = length(rel_vec);
  223. rel_vec.y /= params.y_mult;
  224. attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation);
  225. float cos_spot_angle = lights.data[i].cos_spot_angle;
  226. float cos_angle = dot(-direction, lights.data[i].direction);
  227. if (cos_angle < cos_spot_angle) {
  228. continue;
  229. }
  230. float scos = max(cos_angle, cos_spot_angle);
  231. float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle));
  232. attenuation *= 1.0 - pow(spot_rim, lights.data[i].inv_spot_attenuation);
  233. } break;
  234. }
  235. if (attenuation < 0.001) {
  236. continue;
  237. }
  238. bool hit = false;
  239. vec3 ray_pos = position;
  240. vec3 ray_dir = direction;
  241. vec3 inv_dir = 1.0 / ray_dir;
  242. //this is how to properly bias outgoing rays
  243. float cell_size = 1.0 / cascades.data[params.cascade].to_cell;
  244. ray_pos += sign(direction) * cell_size * 0.48; // go almost to the box edge but remain inside
  245. ray_pos += ray_dir * 0.4 * cell_size; //apply a small bias from there
  246. for (uint j = params.cascade; j < params.max_cascades; j++) {
  247. //convert to local bounds
  248. vec3 pos = ray_pos - cascades.data[j].offset;
  249. pos *= cascades.data[j].to_cell;
  250. float local_distance = light_distance * cascades.data[j].to_cell;
  251. if (any(lessThan(pos, vec3(0.0))) || any(greaterThanEqual(pos, params.grid_size))) {
  252. continue; //already past bounds for this cascade, goto next
  253. }
  254. //find maximum advance distance (until reaching bounds)
  255. vec3 t0 = -pos * inv_dir;
  256. vec3 t1 = (params.grid_size - pos) * inv_dir;
  257. vec3 tmax = max(t0, t1);
  258. float max_advance = min(tmax.x, min(tmax.y, tmax.z));
  259. max_advance = min(local_distance, max_advance);
  260. float advance = 0.0;
  261. float occlusion = 1.0;
  262. while (advance < max_advance) {
  263. //read how much to advance from SDF
  264. vec3 uvw = (pos + ray_dir * advance) * pos_to_uvw;
  265. float distance = texture(sampler3D(sdf_cascades[j], linear_sampler), uvw).r * 255.0 - 1.0;
  266. if (distance < 0.001) {
  267. //consider hit
  268. hit = true;
  269. break;
  270. }
  271. occlusion = min(occlusion, distance);
  272. advance += distance;
  273. }
  274. if (hit) {
  275. attenuation *= occlusion;
  276. break;
  277. }
  278. if (advance >= local_distance) {
  279. break; //past light distance, abandon search
  280. }
  281. //change ray origin to collision with bounds
  282. pos += ray_dir * max_advance;
  283. pos /= cascades.data[j].to_cell;
  284. pos += cascades.data[j].offset;
  285. light_distance -= max_advance / cascades.data[j].to_cell;
  286. ray_pos = pos;
  287. }
  288. if (!hit) {
  289. vec3 light = albedo * lights.data[i].color.rgb * lights.data[i].energy * attenuation;
  290. for (int j = 0; j < 6; j++) {
  291. if (bool(valid_aniso & (1 << j))) {
  292. light_accum[j] += max(0.0, dot(aniso_dir[j], direction)) * light;
  293. }
  294. }
  295. }
  296. }
  297. // Store the light in the light texture
  298. float lumas[6];
  299. vec3 light_total = vec3(0);
  300. for (int i = 0; i < 6; i++) {
  301. light_total += light_accum[i];
  302. lumas[i] = max(light_accum[i].r, max(light_accum[i].g, light_accum[i].b));
  303. }
  304. float luma_total = max(light_total.r, max(light_total.g, light_total.b));
  305. uint light_total_rgbe;
  306. {
  307. //compress to RGBE9995 to save space
  308. const float pow2to9 = 512.0f;
  309. const float B = 15.0f;
  310. const float N = 9.0f;
  311. const float LN2 = 0.6931471805599453094172321215;
  312. float cRed = clamp(light_total.r, 0.0, 65408.0);
  313. float cGreen = clamp(light_total.g, 0.0, 65408.0);
  314. float cBlue = clamp(light_total.b, 0.0, 65408.0);
  315. float cMax = max(cRed, max(cGreen, cBlue));
  316. float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B;
  317. float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f);
  318. float exps = expp + 1.0f;
  319. if (0.0 <= sMax && sMax < pow2to9) {
  320. exps = expp;
  321. }
  322. float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f);
  323. float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f);
  324. float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f);
  325. #ifdef MODE_PROCESS_STATIC
  326. //since its self-save, use RGBE8985
  327. light_total_rgbe = ((uint(sRed) & 0x1FF) >> 1) | ((uint(sGreen) & 0x1FF) << 8) | (((uint(sBlue) & 0x1FF) >> 1) << 17) | ((uint(exps) & 0x1F) << 25);
  328. #else
  329. light_total_rgbe = (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27);
  330. #endif
  331. }
  332. #ifdef MODE_PROCESS_DYNAMIC
  333. vec4 aniso0;
  334. aniso0.r = lumas[0] / luma_total;
  335. aniso0.g = lumas[1] / luma_total;
  336. aniso0.b = lumas[2] / luma_total;
  337. aniso0.a = lumas[3] / luma_total;
  338. vec2 aniso1;
  339. aniso1.r = lumas[4] / luma_total;
  340. aniso1.g = lumas[5] / luma_total;
  341. //save to 3D textures
  342. imageStore(dst_aniso0, positioni, aniso0);
  343. imageStore(dst_aniso1, positioni, vec4(aniso1, 0.0, 0.0));
  344. imageStore(dst_light, positioni, uvec4(light_total_rgbe));
  345. //also fill neighbours, so light interpolation during the indirect pass works
  346. //recover the neighbour list from the leftover bits
  347. uint neighbours = (voxel_albedo >> 21) | ((voxel_position >> 21) << 11) | ((process_voxels.data[voxel_index].light >> 30) << 22) | ((process_voxels.data[voxel_index].light_aniso >> 30) << 24);
  348. const uint max_neighbours = 26;
  349. const ivec3 neighbour_positions[max_neighbours] = ivec3[](
  350. ivec3(-1, -1, -1),
  351. ivec3(-1, -1, 0),
  352. ivec3(-1, -1, 1),
  353. ivec3(-1, 0, -1),
  354. ivec3(-1, 0, 0),
  355. ivec3(-1, 0, 1),
  356. ivec3(-1, 1, -1),
  357. ivec3(-1, 1, 0),
  358. ivec3(-1, 1, 1),
  359. ivec3(0, -1, -1),
  360. ivec3(0, -1, 0),
  361. ivec3(0, -1, 1),
  362. ivec3(0, 0, -1),
  363. ivec3(0, 0, 1),
  364. ivec3(0, 1, -1),
  365. ivec3(0, 1, 0),
  366. ivec3(0, 1, 1),
  367. ivec3(1, -1, -1),
  368. ivec3(1, -1, 0),
  369. ivec3(1, -1, 1),
  370. ivec3(1, 0, -1),
  371. ivec3(1, 0, 0),
  372. ivec3(1, 0, 1),
  373. ivec3(1, 1, -1),
  374. ivec3(1, 1, 0),
  375. ivec3(1, 1, 1));
  376. for (uint i = 0; i < max_neighbours; i++) {
  377. if (bool(neighbours & (1 << i))) {
  378. ivec3 neighbour_pos = positioni + neighbour_positions[i];
  379. imageStore(dst_light, neighbour_pos, uvec4(light_total_rgbe));
  380. imageStore(dst_aniso0, neighbour_pos, aniso0);
  381. imageStore(dst_aniso1, neighbour_pos, vec4(aniso1, 0.0, 0.0));
  382. }
  383. }
  384. #endif
  385. #ifdef MODE_PROCESS_STATIC
  386. //save back the anisotropic
  387. uint light = process_voxels.data[voxel_index].light & (3 << 30);
  388. light |= light_total_rgbe;
  389. process_voxels.data[voxel_index].light = light; //replace
  390. uint light_aniso = process_voxels.data[voxel_index].light_aniso & (3 << 30);
  391. for (int i = 0; i < 6; i++) {
  392. light_aniso |= min(31, uint((lumas[i] / luma_total) * 31.0)) << (i * 5);
  393. }
  394. process_voxels.data[voxel_index].light_aniso = light_aniso;
  395. #endif
  396. }