IndirectDiffuseClipmaps.ankiprog 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #pragma anki mutator GPU_WAVE_SIZE 16 32 64
  6. #pragma anki mutator RADIANCE_OCTAHEDRON_MAP_SIZE 8 10 12 14 16 18 20
  7. #pragma anki mutator IRRADIANCE_OCTAHEDRON_MAP_SIZE 4 5 6
  8. #pragma anki mutator RT_MATERIAL_FETCH_CLIPMAP 0 1
  9. #pragma anki mutator SPATIAL_RECONSTRUCT_TYPE 0 1
  10. #pragma anki mutator IRRADIANCE_USE_SH_L2 0 1
  11. #pragma anki technique RtMaterialFetch rgen mutators RT_MATERIAL_FETCH_CLIPMAP SPATIAL_RECONSTRUCT_TYPE
  12. #pragma anki technique RtMaterialFetchInlineRt comp mutators
  13. #pragma anki technique PopulateCaches comp mutators RADIANCE_OCTAHEDRON_MAP_SIZE
  14. #pragma anki technique ComputeIrradiance comp mutators GPU_WAVE_SIZE RADIANCE_OCTAHEDRON_MAP_SIZE IRRADIANCE_OCTAHEDRON_MAP_SIZE IRRADIANCE_USE_SH_L2
  15. #pragma anki technique Apply comp mutators SPATIAL_RECONSTRUCT_TYPE
  16. #pragma anki technique ApplyInlineRt comp mutators SPATIAL_RECONSTRUCT_TYPE
  17. #pragma anki technique SpatialReconstruct comp mutators SPATIAL_RECONSTRUCT_TYPE
  18. #pragma anki technique TemporalDenoise comp mutators
  19. #pragma anki technique BilateralDenoise comp mutators
  20. #pragma anki technique VisualizeProbes vert pixel mutators
  21. #include <AnKi/Shaders/Include/GpuSceneTypes.h>
  22. #include <AnKi/Shaders/Functions.hlsl>
  23. #include <AnKi/Shaders/Include/MiscRendererTypes.h>
  24. #include <AnKi/Shaders/ImportanceSampling.hlsl>
  25. #include <AnKi/Shaders/PackFunctions.hlsl>
  26. #include <AnKi/Shaders/FastMathFunctions.hlsl>
  27. #include <AnKi/Shaders/IndirectDiffuseClipmaps.hlsl>
  28. #include <AnKi/Shaders/BilateralFilter.hlsl>
  29. #include <AnKi/Shaders/TemporalAA.hlsl>
  30. #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
  31. #include <ThirdParty/SHforHLSL/SH.hlsli>
  32. constexpr F32 kGaussianSigma = 0.55;
  33. constexpr F32 kMaxBilateralSamplesPerDirection = 5.0;
  34. constexpr Bool kLocalLightShadow = false;
  35. struct ClipmapRegion
  36. {
  37. UVec3 m_probesBegin;
  38. U32 m_partialUpdate;
  39. UVec3 m_probeCounts;
  40. U32 m_probeCount;
  41. };
  42. struct ProbeUpdateConsts
  43. {
  44. U32 m_clipmapIdx;
  45. U32 m_radianceOctMapSize; // Have it here as well as well as a mutator. Can't use the mutator cause it will create may raygen variants
  46. U32 m_rayCountPerTexel; // Ray count per oct map texel
  47. U32 m_maxProbesToUpdate;
  48. ClipmapRegion m_clipmapRegion;
  49. };
  50. // ===========================================================================
  51. // RtMaterialFetch and RtMaterialFetchInlineRt =
  52. // ===========================================================================
  53. #if(NOT_ZERO(ANKI_TECHNIQUE_RtMaterialFetch) && NOT_ZERO(RT_MATERIAL_FETCH_CLIPMAP)) || NOT_ZERO(ANKI_TECHNIQUE_RtMaterialFetchInlineRt)
  54. # define CLIPMAP_VOLUME
  55. # define INCLUDE_ALL
  56. # include <AnKi/Shaders/RtMaterialFetch.hlsl>
  57. ANKI_FAST_CONSTANTS(ProbeUpdateConsts, g_consts)
  58. # if ANKI_COMPUTE_SHADER
  59. [numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
  60. # else
  61. [Shader("raygeneration")] void main()
  62. # endif
  63. {
  64. # if ANKI_COMPUTE_SHADER
  65. const U32 tid = svDispatchThreadId.x;
  66. # else
  67. const U32 tid = DispatchRaysIndex().x;
  68. # endif
  69. const IndirectDiffuseClipmapConstants idConsts = g_globalRendererConstants.m_indirectDiffuseClipmaps;
  70. const U32 octMapTexelCount = square(g_consts.m_radianceOctMapSize);
  71. // Compute probe info. Make sure you shoot coherent rays as much as possible by using the same direction on a specific wave
  72. U32 probeIdx;
  73. U32 subRayIdx;
  74. U32 octMapTexelIdx;
  75. unflatten3dArrayIndex(octMapTexelCount, g_consts.m_maxProbesToUpdate, g_consts.m_rayCountPerTexel, tid, octMapTexelIdx, probeIdx, subRayIdx);
  76. # if ANKI_COMPUTE_SHADER
  77. if(octMapTexelIdx >= octMapTexelCount || probeIdx >= g_consts.m_maxProbesToUpdate || subRayIdx >= g_consts.m_rayCountPerTexel)
  78. {
  79. return;
  80. }
  81. # endif
  82. if(g_consts.m_clipmapRegion.m_partialUpdate)
  83. {
  84. // Choose every other probe depending on the budget
  85. const U32 div = g_consts.m_clipmapRegion.m_probeCount / g_consts.m_maxProbesToUpdate;
  86. probeIdx = g_globalRendererConstants.m_frame + div * probeIdx;
  87. probeIdx = probeIdx % g_consts.m_clipmapRegion.m_probeCount;
  88. }
  89. UVec3 probeId;
  90. unflatten3dArrayIndex(g_consts.m_clipmapRegion.m_probeCounts.z, g_consts.m_clipmapRegion.m_probeCounts.y,
  91. g_consts.m_clipmapRegion.m_probeCounts.x, probeIdx, probeId.z, probeId.y, probeId.x);
  92. probeId += g_consts.m_clipmapRegion.m_probesBegin;
  93. probeIdx = probeId.z * idConsts.m_probeCounts.x * idConsts.m_probeCounts.y + probeId.y * idConsts.m_probeCounts.x + probeId.x;
  94. // Check
  95. {
  96. const UVec3 probeIdBegin = g_consts.m_clipmapRegion.m_probesBegin;
  97. [unroll] for(U32 i = 1; i < 2; ++i)
  98. {
  99. ANKI_ASSERT(probeId[i] >= probeIdBegin[i] && probeId[i] < probeIdBegin[i] + g_consts.m_clipmapRegion.m_probeCounts[i]);
  100. }
  101. }
  102. const Vec3 probeSize = idConsts.m_sizes[g_consts.m_clipmapIdx] / idConsts.m_probeCounts;
  103. const Vec3 probeWorldPos = probeId * probeSize + probeSize * 0.5 + idConsts.m_aabbMins[g_consts.m_clipmapIdx];
  104. // Generate direction
  105. const UVec2 radianceOctCoord = UVec2(octMapTexelIdx % g_consts.m_radianceOctMapSize, octMapTexelIdx / g_consts.m_radianceOctMapSize);
  106. ANKI_ASSERT(all(radianceOctCoord < g_consts.m_radianceOctMapSize));
  107. const U32 sampleIdx = (g_globalRendererConstants.m_frame * g_consts.m_rayCountPerTexel + subRayIdx) % 16;
  108. const Vec2 sampleCoord = radianceOctCoord + 0.5 + generateMsaa16x(sampleIdx) / (16.0 * 2.0);
  109. const HVec3 dir = octahedronDecode(sampleCoord / g_consts.m_radianceOctMapSize);
  110. // Trace
  111. const F32 tMax = 1000.0; // TODO
  112. constexpr U32 traceFlags = RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES;
  113. GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
  114. F32 rayT = 0.0;
  115. Bool backfacing = false;
  116. # if ANKI_COMPUTE_SHADER
  117. const Bool hit = materialRayTraceInlineRt<F16>(probeWorldPos, dir, 0.0, tMax, 1000.0, gbuffer, rayT, backfacing);
  118. # else
  119. const Bool hit = materialRayTrace<F16>(probeWorldPos, dir, 0.0, tMax, 1000.0, gbuffer, rayT, backfacing, traceFlags);
  120. # endif
  121. HVec3 radiance;
  122. if(backfacing)
  123. {
  124. radiance = HVec3(1.0, 0.0, 1.0);
  125. }
  126. else
  127. {
  128. const Vec3 hitPos = probeWorldPos + dir * (rayT - 0.01);
  129. radiance = directLighting<F16>(gbuffer, hitPos, !hit, true, tMax, kLocalLightShadow, traceFlags | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH);
  130. // Apply indirect
  131. constexpr SampleClipmapFlag flags =
  132. kSampleClipmapFlagBackfacingProbeRejection | kSampleClipmapFlagBiasSamplePointSurfaceNormal | kSampleClipmapFlagUsePreviousFrame;
  133. const Vec3 irradiance = sampleClipmapIrradiance(hitPos, gbuffer.m_worldNormal, g_globalRendererConstants.m_cameraPosition, idConsts,
  134. g_linearAnyRepeatSampler, flags);
  135. radiance += irradiance * gbuffer.m_diffuse / kPi;
  136. }
  137. // Store result
  138. const F32 kMaxDist = 1000.0; // Chose something small and make sure its square doesn't overflow F16
  139. TEX(g_lightResultTex, UVec2(probeIdx, octMapTexelIdx * g_consts.m_rayCountPerTexel + subRayIdx)) = HVec4(radiance, min(rayT, kMaxDist));
  140. }
  141. #endif
  142. // ===========================================================================
  143. // PopulateCaches =
  144. // ===========================================================================
  145. #if NOT_ZERO(ANKI_TECHNIQUE_PopulateCaches)
  146. Texture2D<Vec4> g_rtResultTex : register(t0);
  147. RWTexture3D<Vec4> g_radianceVolume : register(u0);
  148. RWTexture3D<Vec4> g_distanceMomentsVolume : register(u1);
  149. RWTexture3D<Vec4> g_probeValidiryVolume : register(u2);
  150. ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
  151. ANKI_FAST_CONSTANTS(ProbeUpdateConsts, g_consts)
  152. [numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
  153. {
  154. const IndirectDiffuseClipmapConstants idConsts = g_globalRendererConstants.m_indirectDiffuseClipmaps;
  155. const U32 clipmapIdx = g_consts.m_clipmapIdx;
  156. const Vec3 clipmapSize = idConsts.m_sizes[clipmapIdx].xyz;
  157. const Vec3 prevClipmapAabbMin = idConsts.m_previousFrameAabbMins[clipmapIdx].xyz;
  158. const U32 octMapTexelCount = square(RADIANCE_OCTAHEDRON_MAP_SIZE);
  159. U32 probeIdx = svDispatchThreadId.x / octMapTexelCount;
  160. const U32 octMapTexelIdx = svDispatchThreadId.x % octMapTexelCount;
  161. if(octMapTexelIdx >= octMapTexelCount || probeIdx >= g_consts.m_maxProbesToUpdate)
  162. {
  163. return;
  164. }
  165. if(g_consts.m_clipmapRegion.m_partialUpdate)
  166. {
  167. // Choose every other probe depending on the budget
  168. const U32 div = g_consts.m_clipmapRegion.m_probeCount / g_consts.m_maxProbesToUpdate;
  169. probeIdx = g_globalRendererConstants.m_frame + div * probeIdx;
  170. probeIdx = probeIdx % g_consts.m_clipmapRegion.m_probeCount;
  171. }
  172. UVec3 probeId;
  173. unflatten3dArrayIndex(g_consts.m_clipmapRegion.m_probeCounts.z, g_consts.m_clipmapRegion.m_probeCounts.y,
  174. g_consts.m_clipmapRegion.m_probeCounts.x, probeIdx, probeId.z, probeId.y, probeId.x);
  175. probeId += g_consts.m_clipmapRegion.m_probesBegin;
  176. probeIdx = probeId.z * idConsts.m_probeCounts.x * idConsts.m_probeCounts.y + probeId.y * idConsts.m_probeCounts.x + probeId.x;
  177. // Check
  178. {
  179. const UVec3 probeIdBegin = g_consts.m_clipmapRegion.m_probesBegin;
  180. [unroll] for(U32 i = 1; i < 2; ++i)
  181. {
  182. ANKI_ASSERT(probeId[i] >= probeIdBegin[i] && probeId[i] < probeIdBegin[i] + g_consts.m_clipmapRegion.m_probeCounts[i]);
  183. }
  184. }
  185. // Read the result of RT
  186. HVec3 radiance = 0.0;
  187. Vec2 moments = 0.0;
  188. F32 weightSum = 0.0;
  189. for(U32 subray = 0; subray < g_consts.m_rayCountPerTexel; ++subray)
  190. {
  191. HVec4 comp = TEX(g_rtResultTex, UVec2(probeIdx, octMapTexelIdx * g_consts.m_rayCountPerTexel + subray));
  192. const F32 weight = 1.0 / g_consts.m_rayCountPerTexel;
  193. if(any(comp.xyz != HVec3(1.0, 0.0, 1.0)))
  194. {
  195. radiance += comp.xyz * weight;
  196. moments += Vec2(comp.w, square(comp.w)) * weight;
  197. weightSum += weight;
  198. }
  199. }
  200. if(weightSum > 0.0)
  201. {
  202. radiance /= weightSum;
  203. moments /= weightSum;
  204. }
  205. // Compute probe info
  206. const Vec3 probeSize = clipmapSize / idConsts.m_probeCounts;
  207. const Vec3 probeWorldPos = probeId * probeSize + probeSize * 0.5 + idConsts.m_aabbMins[clipmapIdx].xyz;
  208. const Bool blendWithHistory =
  209. g_consts.m_clipmapRegion.m_partialUpdate && all(probeWorldPos > prevClipmapAabbMin) && all(probeWorldPos < prevClipmapAabbMin + clipmapSize);
  210. UVec3 noOctTexCoord = frac(probeWorldPos / clipmapSize) * idConsts.m_probeCounts;
  211. noOctTexCoord = min(noOctTexCoord, idConsts.m_probeCounts - 1u);
  212. noOctTexCoord = noOctTexCoord.xzy;
  213. // Update the radiance and distance moments volumes
  214. {
  215. // Compute oct coord
  216. const UVec2 radianceOctCoord = UVec2(octMapTexelIdx % RADIANCE_OCTAHEDRON_MAP_SIZE, octMapTexelIdx / RADIANCE_OCTAHEDRON_MAP_SIZE);
  217. ANKI_ASSERT(all(radianceOctCoord < RADIANCE_OCTAHEDRON_MAP_SIZE));
  218. UVec3 actualVolumeTexCoord;
  219. actualVolumeTexCoord.xy = radianceOctCoord + noOctTexCoord * (RADIANCE_OCTAHEDRON_MAP_SIZE + 2) + 1;
  220. actualVolumeTexCoord.z = noOctTexCoord.z;
  221. HVec3 avgRadiance = 0.0;
  222. Vec2 avgMoments = 0.0;
  223. if(blendWithHistory)
  224. {
  225. const F16 blendFactor = 0.1 / 4.0;
  226. const HVec3 prevValue = TEX(g_radianceVolume, actualVolumeTexCoord).xyz;
  227. avgRadiance = lerp(prevValue, radiance, blendFactor);
  228. const Vec2 prevValue2 = TEX(g_distanceMomentsVolume, actualVolumeTexCoord).xy;
  229. avgMoments = lerp(prevValue2, moments, blendFactor);
  230. }
  231. else
  232. {
  233. avgRadiance = radiance;
  234. avgMoments = moments;
  235. }
  236. TEX(g_radianceVolume, actualVolumeTexCoord).xyz = avgRadiance;
  237. TEX(g_distanceMomentsVolume, actualVolumeTexCoord).xy = avgMoments;
  238. // Set oct borders
  239. IVec2 borders[3];
  240. const U32 borderCount = octahedronBorder(RADIANCE_OCTAHEDRON_MAP_SIZE, radianceOctCoord, borders);
  241. for(U32 i = 0; i < borderCount; ++i)
  242. {
  243. IVec3 actualVolumeTexCoord;
  244. actualVolumeTexCoord.xy = radianceOctCoord + noOctTexCoord * (RADIANCE_OCTAHEDRON_MAP_SIZE + 2) + 1;
  245. actualVolumeTexCoord.xy += borders[i];
  246. actualVolumeTexCoord.z = noOctTexCoord.z;
  247. TEX(g_radianceVolume, actualVolumeTexCoord).xyz = avgRadiance;
  248. TEX(g_distanceMomentsVolume, actualVolumeTexCoord).xy = avgMoments;
  249. }
  250. }
  251. // Update probe validity
  252. if(octMapTexelIdx == 0)
  253. {
  254. // Loop the directions again
  255. F32 invalidRayCount = 0.0;
  256. for(U32 i = 0; i < octMapTexelCount; ++i)
  257. {
  258. const U32 subray = 0;
  259. const HVec3 radiance = TEX(g_rtResultTex, UVec2(probeIdx, i * g_consts.m_rayCountPerTexel + subray));
  260. if(all(radiance == HVec3(1.0, 0.0, 1.0)))
  261. {
  262. invalidRayCount += 1.0;
  263. }
  264. }
  265. F32 valid = 1.0 - min(1.0, invalidRayCount / F32(octMapTexelCount / 4));
  266. if(blendWithHistory)
  267. {
  268. const F32 prev = TEX(g_probeValidiryVolume, noOctTexCoord).x;
  269. valid = lerp(prev, valid, 0.05);
  270. }
  271. TEX(g_probeValidiryVolume, noOctTexCoord).x = valid;
  272. }
  273. }
  274. #endif
  275. // ===========================================================================
  276. // ComputeIrradiance =
  277. // ===========================================================================
  278. #if NOT_ZERO(ANKI_TECHNIQUE_ComputeIrradiance)
  279. // Each thread is touching a radiance texel
  280. constexpr U32 kThreadCount = (RADIANCE_OCTAHEDRON_MAP_SIZE * RADIANCE_OCTAHEDRON_MAP_SIZE + 32 - 1) / 32 * 32;
  281. Texture3D<Vec4> g_radianceVolumes[kIndirectDiffuseClipmapCount] : register(t0);
  282. RWTexture3D<Vec4> g_irradianceVolumes[kIndirectDiffuseClipmapCount] : register(u0);
  283. RWTexture3D<Vec4> g_avgIrradianceVolumes[kIndirectDiffuseClipmapCount] : register(u3);
  284. ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
  285. # if IRRADIANCE_USE_SH_L2
  286. # define SH_TYPE SH::L2_F16_RGB
  287. # define SH_PROJECT_ONTO_LX SH::ProjectOntoL2
  288. # define SH_TO_L1(x) SH::L2toL1(x)
  289. # else
  290. # define SH_TYPE SH::L1_F16_RGB
  291. # define SH_PROJECT_ONTO_LX SH::ProjectOntoL1
  292. # define SH_TO_L1(x) (x)
  293. # endif
  294. groupshared SH_TYPE g_sh[kThreadCount];
  295. struct StoreBorderFunc
  296. {
  297. IVec3 m_startOfOctCoord;
  298. Vec3 m_value;
  299. U32 m_clipmapIdx;
  300. void operator()(IVec2 offset)
  301. {
  302. const IVec3 coord = m_startOfOctCoord + IVec3(offset, 0);
  303. TEX(g_irradianceVolumes[m_clipmapIdx], coord) = Vec4(m_value, 0.0);
  304. }
  305. };
  306. // The group services a single probe.
  307. // - Every thread reads a radiance value, converts it to SH and stores is in groupshared
  308. // - Then we do a reduction of all SH
  309. // - Then we use the SH to populate the irradiance
  310. [numthreads(kThreadCount, 1, 1)] void main(COMPUTE_ARGS)
  311. {
  312. const IndirectDiffuseClipmapConstants idConsts = g_globalRendererConstants.m_indirectDiffuseClipmaps;
  313. const U32 clipmapIdx = svGroupId.x / idConsts.m_probeCounts.x;
  314. const UVec3 probeId = UVec3(svGroupId.x % idConsts.m_probeCounts.x, svGroupId.y, svGroupId.z);
  315. // Read the radiance and put it in SH
  316. if(svGroupIndex < square(RADIANCE_OCTAHEDRON_MAP_SIZE))
  317. {
  318. UVec2 radianceOctCoordLocal;
  319. radianceOctCoordLocal.y = svGroupIndex / RADIANCE_OCTAHEDRON_MAP_SIZE;
  320. radianceOctCoordLocal.x = svGroupIndex % RADIANCE_OCTAHEDRON_MAP_SIZE;
  321. UVec3 radianceTexelCoordStart = probeId.xzy;
  322. radianceTexelCoordStart.xy *= RADIANCE_OCTAHEDRON_MAP_SIZE + 2;
  323. radianceTexelCoordStart.xy += 1;
  324. const Vec2 uv = (radianceOctCoordLocal + 0.5) / RADIANCE_OCTAHEDRON_MAP_SIZE;
  325. const Vec3 sampleDir = octahedronDecode(uv);
  326. const UVec3 coord = radianceTexelCoordStart + UVec3(radianceOctCoordLocal, 0);
  327. const Vec3 radiance = TEX(g_radianceVolumes[clipmapIdx], coord);
  328. const F16 sampleCountf = square(RADIANCE_OCTAHEDRON_MAP_SIZE);
  329. const F16 normalization = 1.0 / (sampleCountf * sampleDirectionSpherePdf());
  330. g_sh[svGroupIndex] = SH_PROJECT_ONTO_LX(HVec3(sampleDir), HVec3(radiance)) * normalization;
  331. }
  332. else
  333. {
  334. g_sh[svGroupIndex] = SH_TYPE::Zero();
  335. }
  336. // Integrate, like parallel prefix sum
  337. GroupMemoryBarrierWithGroupSync();
  338. [loop] for(U32 s = kThreadCount / 2u; s > 0u; s >>= 1u)
  339. {
  340. if(svGroupIndex < s)
  341. {
  342. g_sh[svGroupIndex] = g_sh[svGroupIndex] + g_sh[svGroupIndex + s];
  343. }
  344. # if ANKI_PLATFORM_MOBILE
  345. if(s > WaveGetLaneCount())
  346. {
  347. GroupMemoryBarrierWithGroupSync();
  348. }
  349. # else
  350. GroupMemoryBarrierWithGroupSync();
  351. # endif
  352. }
  353. const SH_TYPE sh = g_sh[0];
  354. // Store the irradiance
  355. if(svGroupIndex < square(IRRADIANCE_OCTAHEDRON_MAP_SIZE))
  356. {
  357. UVec2 octCoordLocal;
  358. octCoordLocal.y = svGroupIndex / IRRADIANCE_OCTAHEDRON_MAP_SIZE;
  359. octCoordLocal.x = svGroupIndex % IRRADIANCE_OCTAHEDRON_MAP_SIZE;
  360. UVec3 texelCoordStart = probeId.xzy;
  361. texelCoordStart.xy *= IRRADIANCE_OCTAHEDRON_MAP_SIZE + 2;
  362. texelCoordStart.xy += 1;
  363. const Vec2 uv = (octCoordLocal + 0.5) / IRRADIANCE_OCTAHEDRON_MAP_SIZE;
  364. const Vec3 sampleDir = octahedronDecode(uv);
  365. const Vec3 irradiance = SH::CalculateIrradiance<F16>(sh, sampleDir);
  366. const UVec3 coord = texelCoordStart + UVec3(octCoordLocal, 0);
  367. TEX(g_irradianceVolumes[clipmapIdx], coord) = Vec4(irradiance, 0.0);
  368. // Write the borders
  369. StoreBorderFunc func;
  370. func.m_clipmapIdx = clipmapIdx;
  371. func.m_startOfOctCoord = texelCoordStart;
  372. func.m_value = irradiance;
  373. storeOctahedronBorder(IRRADIANCE_OCTAHEDRON_MAP_SIZE, octCoordLocal, func);
  374. }
  375. // Store the average irradiance
  376. HVec3 dir;
  377. HVec3 color;
  378. SH::ApproximateDirectionalLight(SH_TO_L1(sh), dir, color);
  379. if(isInfOrNan(Vec3(color)))
  380. {
  381. color = 0.0;
  382. }
  383. TEX(g_avgIrradianceVolumes[clipmapIdx], probeId.xzy) = Vec4(color, 0.0);
  384. }
  385. #endif
  386. // ===========================================================================
  387. // Apply =
  388. // ===========================================================================
  389. #if NOT_ZERO(ANKI_TECHNIQUE_Apply)
  390. Texture2D<Vec4> g_depthTex : register(t0);
  391. Texture2D<Vec4> g_gbufferRt2 : register(t1);
  392. Texture2D<Vec4> g_blueNoiseTex : register(t2);
  393. RWTexture2D<Vec4> g_outTex : register(u0);
  394. ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
  395. SamplerState g_linearAnyRepeatSampler : register(s0);
  396. [numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
  397. {
  398. Vec2 lowTextureSize;
  399. g_outTex.GetDimensions(lowTextureSize.x, lowTextureSize.y);
  400. const UVec2 realSvDispatchThreadId = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
  401. # if SPATIAL_RECONSTRUCT_TYPE == 0
  402. const Vec2 fullViewportSize = lowTextureSize * Vec2(2.0, 1.0);
  403. const Vec2 coord = Vec2(realSvDispatchThreadId.x * 2u + (realSvDispatchThreadId.y & 1u), realSvDispatchThreadId.y);
  404. # else
  405. const Vec2 fullViewportSize = lowTextureSize * 2.0;
  406. const Vec2 coord = Vec2(realSvDispatchThreadId * 2u);
  407. # endif
  408. if(any(coord >= fullViewportSize))
  409. {
  410. return;
  411. }
  412. const F32 depth = TEX(g_depthTex, coord).r;
  413. const Vec2 uv = (coord + 0.5) / fullViewportSize;
  414. const Vec2 ndc = uvToNdc(uv);
  415. const Vec4 worldPos4 = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(ndc, depth, 1.0));
  416. const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
  417. const Vec3 normal = unpackNormalFromGBuffer(TEX(g_gbufferRt2, coord));
  418. // Rand
  419. UVec2 noiseTexSize;
  420. g_blueNoiseTex.GetDimensions(noiseTexSize.x, noiseTexSize.y);
  421. Vec3 noise3 = TEX(g_blueNoiseTex, realSvDispatchThreadId % noiseTexSize);
  422. noise3 = animateBlueNoise(noise3, g_globalRendererConstants.m_frame);
  423. const F32 noise = noise3.x;
  424. const U32 method = 0;
  425. Vec3 irradiance;
  426. if(method == 0)
  427. {
  428. const SampleClipmapFlag flags = kSampleClipmapFlagFullQuality | kSampleClipmapFlagBiasSamplePointTowardsCamera;
  429. irradiance = sampleClipmapIrradiance(worldPos, normal, g_globalRendererConstants.m_cameraPosition,
  430. g_globalRendererConstants.m_indirectDiffuseClipmaps, g_linearAnyRepeatSampler, flags, noise);
  431. }
  432. else
  433. {
  434. const SampleClipmapFlag flags = kSampleClipmapFlagFullQuality | kSampleClipmapFlagBiasSamplePointTowardsCamera;
  435. irradiance = sampleClipmapAvgIrradiance(worldPos, normal, g_globalRendererConstants.m_cameraPosition,
  436. g_globalRendererConstants.m_indirectDiffuseClipmaps, g_linearAnyRepeatSampler, flags, noise);
  437. }
  438. TEX(g_outTex, realSvDispatchThreadId) = Vec4(irradiance, 0.0);
  439. }
  440. #endif
  441. // ===========================================================================
  442. // RtMaterialFetch (Apply) =
  443. // ===========================================================================
  444. #if(ANKI_TECHNIQUE_RtMaterialFetch && !RT_MATERIAL_FETCH_CLIPMAP) || ANKI_TECHNIQUE_ApplyInlineRt
  445. # define INCLUDE_ALL
  446. # include <AnKi/Shaders/RtMaterialFetch.hlsl>
  447. # define INLINE_RT ANKI_TECHNIQUE_ApplyInlineRt
  448. struct Consts
  449. {
  450. F32 m_rayMax;
  451. F32 m_padding1;
  452. F32 m_padding2;
  453. F32 m_padding3;
  454. Vec4 m_padding[2];
  455. };
  456. ANKI_FAST_CONSTANTS(Consts, g_consts)
  457. # if INLINE_RT
  458. [numthreads(8, 8, 1)] void main(COMPUTE_ARGS)
  459. # else
  460. [Shader("raygeneration")] void main()
  461. # endif
  462. {
  463. # if INLINE_RT
  464. UVec2 texSize;
  465. g_colorAndPdfTex.GetDimensions(texSize.x, texSize.y);
  466. if(any(svDispatchThreadId.xy >= texSize))
  467. {
  468. return;
  469. }
  470. # else
  471. const UVec2 svDispatchThreadId = DispatchRaysIndex();
  472. const UVec2 texSize = DispatchRaysDimensions().xy;
  473. # endif
  474. # if SPATIAL_RECONSTRUCT_TYPE == 0
  475. const UVec2 fullCoord = UVec2(svDispatchThreadId.x * 2u + (svDispatchThreadId.y & 1u), svDispatchThreadId.y);
  476. const Vec2 uv = (fullCoord + 0.5) / (texSize * UVec2(2, 1));
  477. # else
  478. const UVec2 fullCoord = svDispatchThreadId.xy * 2u;
  479. const Vec2 uv = (fullCoord + 0.5) / (texSize * 2);
  480. # endif
  481. const F32 depth = TEX(g_depthTex, fullCoord).x;
  482. const Vec4 rt2 = TEX(g_gbufferRt2, fullCoord);
  483. const Vec3 worldNormal = unpackNormalFromGBuffer(rt2);
  484. const Vec4 v4 = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(uvToNdc(uv), depth, 1.0));
  485. const Vec3 worldPos = v4.xyz / v4.w;
  486. const Vec3 biasDir = normalize(g_globalRendererConstants.m_cameraPosition - worldPos);
  487. const Vec3 biasedWorldPos = worldPos + biasDir * 0.1;
  488. // Rand
  489. const UVec3 seed = rand3DPCG16(UVec3(fullCoord, g_globalRendererConstants.m_frame % 8u));
  490. const Vec2 randFactors = hammersleyRandom16(g_globalRendererConstants.m_frame % 64u, 64u, seed);
  491. const Mat3 tbn = rotationFromDirection(worldNormal);
  492. const Vec3 rayDir = normalize(mul(tbn, hemisphereSampleCos(randFactors)));
  493. // Trace
  494. const F32 tMax = g_consts.m_rayMax;
  495. constexpr U32 traceFlags = RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES;
  496. GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
  497. F32 rayT = 0.0;
  498. Bool backfacing = false;
  499. # if INLINE_RT
  500. const Bool hit = materialRayTraceInlineRt<F16>(biasedWorldPos, rayDir, 0.01, tMax, 1000.0, gbuffer, rayT, backfacing);
  501. # else
  502. const Bool hit = materialRayTrace<F16>(biasedWorldPos, rayDir, 0.01, tMax, 1000.0, gbuffer, rayT, backfacing, traceFlags);
  503. # endif
  504. HVec3 radiance = 0.0;
  505. Vec3 hitPos = 0.0;
  506. if(hit)
  507. {
  508. hitPos = biasedWorldPos + rayDir * (rayT - 0.01);
  509. radiance = directLighting<F16>(gbuffer, hitPos, !hit, true, 1000.0, kLocalLightShadow, traceFlags | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH);
  510. }
  511. Vec3 rayOrigin;
  512. Vec3 rayDir2;
  513. if(hit)
  514. {
  515. // 2nd bounce
  516. rayOrigin = hitPos;
  517. rayDir2 = gbuffer.m_worldNormal;
  518. }
  519. else
  520. {
  521. rayOrigin = biasedWorldPos;
  522. rayDir2 = worldNormal;
  523. }
  524. const SampleClipmapFlag flags = kSampleClipmapFlagFullQuality | kSampleClipmapFlagBiasSamplePointSurfaceNormal;
  525. const Vec3 irradiance =
  526. sampleClipmapIrradiance(rayOrigin, rayDir2, g_globalRendererConstants.m_cameraPosition, g_globalRendererConstants.m_indirectDiffuseClipmaps,
  527. g_linearAnyRepeatSampler, flags, randFactors.x);
  528. Vec3 final;
  529. if(hit)
  530. {
  531. final = radiance + irradiance * gbuffer.m_diffuse;
  532. }
  533. else
  534. {
  535. final = irradiance;
  536. }
  537. TEX(g_colorAndPdfTex, svDispatchThreadId.xy) = Vec4(final, 0.0);
  538. // TEX(g_colorAndPdfTex, DispatchRaysIndex().xy) = lerp(TEX(g_colorAndPdfTex, DispatchRaysIndex().xy), Vec4(final, 0.0), 0.05);
  539. }
  540. #endif
  541. // ===========================================================================
  542. // SpatialReconstruct =
  543. // ===========================================================================
  544. #if NOT_ZERO(ANKI_TECHNIQUE_SpatialReconstruct)
  545. Texture2D<Vec3> g_inTex : register(t0);
  546. Texture2D<F32> g_depthTex : register(t1);
  547. RWTexture2D<Vec4> g_outTex : register(u0);
  548. void appendSample(F32 refDepth, F32 sampleDepth, Vec3 sample, inout Vec3 sampleSum, inout F32 weightSum, F32 extraWeight = 1.0)
  549. {
  550. const F32 weight = calculateBilateralWeightDepth<F32>(refDepth, sampleDepth, 1.0) * extraWeight;
  551. sampleSum += sample * weight;
  552. weightSum += weight;
  553. }
  554. void normalizeSum(F32 weightSum, inout Vec3 sampleSum)
  555. {
  556. if(weightSum > kEpsilonF32 * 10.0)
  557. {
  558. sampleSum /= weightSum;
  559. }
  560. else
  561. {
  562. sampleSum = 0.0;
  563. }
  564. }
  565. void oneIn4Reconstruct(IVec2 svDispatchThreadId)
  566. {
  567. IVec2 viewportSize;
  568. g_outTex.GetDimensions(viewportSize.x, viewportSize.y);
  569. const IVec2 quarterViewportSize = viewportSize / 2;
  570. const IVec2 quarterCoord = svDispatchThreadId; // Coord in quarter rez
  571. // This is the pattern we are trying to fill
  572. // +---+---+
  573. // | 0 | 1 |
  574. // +---+---+
  575. // | 3 | 2 |
  576. // +---+---+
  577. // Gather the color of the neighbours and their depth
  578. Vec3 samples[2][2];
  579. F32 sampleDepths[2][2];
  580. F32 maxLuma = 0.0;
  581. IVec2 maxLumaPixel = 0;
  582. [unroll] for(U32 x = 0; x < 2; ++x)
  583. {
  584. [unroll] for(U32 y = 0; y < 2; ++y)
  585. {
  586. IVec2 coord = quarterCoord + IVec2(x, y);
  587. coord = min(coord, quarterViewportSize - 1);
  588. samples[x][y] = TEX(g_inTex, coord);
  589. const F32 luma = computeLuminance(samples[x][y]);
  590. if(luma > maxLuma)
  591. {
  592. maxLuma = luma;
  593. maxLumaPixel = IVec2(x, y);
  594. }
  595. coord = quarterCoord * 2 + IVec2(x, y);
  596. sampleDepths[x][y] = TEX(g_depthTex, coord);
  597. }
  598. }
  599. // Remove fireflies
  600. F32 avgLumaOf3 = 0.0;
  601. [unroll] for(U32 x = 0; x < 2; ++x)
  602. {
  603. [unroll] for(U32 y = 0; y < 2; ++y)
  604. {
  605. if(any(maxLumaPixel != IVec2(x, y)))
  606. {
  607. const F32 luma = computeLuminance(samples[x][y]);
  608. avgLumaOf3 += luma / 3.0;
  609. }
  610. }
  611. }
  612. if(maxLuma > avgLumaOf3 * 5.0)
  613. {
  614. // Firefly, tone it down
  615. samples[maxLumaPixel.x][maxLumaPixel.y] *= avgLumaOf3 / maxLuma;
  616. }
  617. // 0 is already filled, just write it
  618. IVec2 coord = quarterCoord * 2;
  619. TEX(g_outTex, coord) = Vec4(samples[0][0], 0.0);
  620. // For 2 use 4 samples
  621. coord = quarterCoord * 2 + 1;
  622. F32 refDepth = TEX(g_depthTex, coord);
  623. Vec3 sampleSum = 0.0;
  624. F32 weightSum = 0.0;
  625. appendSample(refDepth, sampleDepths[0][0], samples[0][0], sampleSum, weightSum);
  626. appendSample(refDepth, sampleDepths[1][0], samples[1][0], sampleSum, weightSum);
  627. appendSample(refDepth, sampleDepths[1][1], samples[1][1], sampleSum, weightSum);
  628. appendSample(refDepth, sampleDepths[0][1], samples[0][1], sampleSum, weightSum);
  629. normalizeSum(weightSum, sampleSum);
  630. TEX(g_outTex, coord) = Vec4(sampleSum, 0.0);
  631. const Vec3 sample2 = sampleSum;
  632. const F32 depth2 = refDepth;
  633. // For 1 use 3 samples
  634. coord = quarterCoord * 2 + IVec2(1, 0);
  635. refDepth = TEX(g_depthTex, coord);
  636. sampleSum = 0.0;
  637. weightSum = 0.0;
  638. appendSample(refDepth, sampleDepths[0][0], samples[0][0], sampleSum, weightSum, 1.0);
  639. appendSample(refDepth, sampleDepths[1][0], samples[1][0], sampleSum, weightSum, 1.0);
  640. appendSample(refDepth, depth2, sample2, sampleSum, weightSum, 0.5); // Less weight on that since it's reconstructed
  641. normalizeSum(weightSum, sampleSum);
  642. TEX(g_outTex, coord) = Vec4(sampleSum, 0.0);
  643. // For 4 use 3 samples
  644. coord = quarterCoord * 2 + IVec2(0, 1);
  645. refDepth = TEX(g_depthTex, coord);
  646. sampleSum = 0.0;
  647. weightSum = 0.0;
  648. appendSample(refDepth, sampleDepths[0][0], samples[0][0], sampleSum, weightSum, 1.0);
  649. appendSample(refDepth, sampleDepths[0][1], samples[0][1], sampleSum, weightSum, 1.0);
  650. appendSample(refDepth, depth2, sample2, sampleSum, weightSum, 0.5); // Less weight on that since it's reconstructed
  651. normalizeSum(weightSum, sampleSum);
  652. TEX(g_outTex, coord) = Vec4(sampleSum, 0.0);
  653. }
  654. void checkerboardReconstruct(IVec2 svDispatchThreadId)
  655. {
  656. IVec2 viewportSize;
  657. g_outTex.GetDimensions(viewportSize.x, viewportSize.y);
  658. const IVec2 filledCoord = IVec2(svDispatchThreadId.x * 2 + (svDispatchThreadId.y & 1), svDispatchThreadId.y);
  659. const IVec2 toBeFilledCoord = IVec2(svDispatchThreadId.x * 2 + ((svDispatchThreadId.y + 1) & 1), svDispatchThreadId.y);
  660. const F32 refDepth = TEX(g_depthTex, toBeFilledCoord);
  661. Vec3 toBeFilledColor = 0.0;
  662. F32 weightSum = 0.0;
  663. const IVec2 offsets[4] = {IVec2(-1, 0), IVec2(1, 0), IVec2(0, -1), IVec2(0, 1)};
  664. [unroll] for(U32 i = 0; i < 4; ++i)
  665. {
  666. const IVec2 sampleCoord = toBeFilledCoord + offsets[i];
  667. if(all(sampleCoord >= 0) && all(sampleCoord < viewportSize))
  668. {
  669. const F32 sampleDepth = TEX(g_depthTex, sampleCoord);
  670. const Vec3 sample = TEX(g_inTex, IVec2(sampleCoord.x / 2, sampleCoord.y));
  671. appendSample(refDepth, sampleDepth, sample, toBeFilledColor, weightSum);
  672. if(all(sampleCoord == filledCoord))
  673. {
  674. TEX(g_outTex, filledCoord) = Vec4(sample, 0.0);
  675. }
  676. }
  677. }
  678. normalizeSum(weightSum, toBeFilledColor);
  679. TEX(g_outTex, toBeFilledCoord) = Vec4(toBeFilledColor, 0.0);
  680. }
  681. [numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
  682. {
  683. const IVec2 realSvDispatchThreadId = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
  684. # if SPATIAL_RECONSTRUCT_TYPE == 0
  685. checkerboardReconstruct(realSvDispatchThreadId);
  686. # else
  687. oneIn4Reconstruct(realSvDispatchThreadId);
  688. # endif
  689. }
  690. #endif
  691. // ===========================================================================
  692. // TemporalDenoise =
  693. // ===========================================================================
  694. #if NOT_ZERO(ANKI_TECHNIQUE_TemporalDenoise)
  695. Texture2D<F32> g_historyLengthTex : register(t0);
  696. Texture2D<Vec2> g_motionVectorsTex : register(t1);
  697. Texture2D<Vec3> g_historyTex : register(t2);
  698. Texture2D<Vec4> g_currentTex : register(t3);
  699. RWTexture2D<Vec4> g_outTex : register(u0);
  700. SamplerState g_linearAnyClampSampler : register(s0);
  701. ConstantBuffer<GlobalRendererConstants> g_globalRendererConsts : register(b0);
  702. [numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
  703. {
  704. const Vec2 coord = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
  705. const F32 minBlendFactor = 0.025;
  706. const F32 maxBlendFactor = 0.9;
  707. const F32 historyLen = TEX(g_historyLengthTex, coord) * kMaxHistoryLength;
  708. F32 blendFactor = min(1.0, historyLen / 1.0);
  709. blendFactor = lerp(maxBlendFactor, minBlendFactor, blendFactor);
  710. Vec3 outColor = TEX(g_currentTex, coord);
  711. if(blendFactor > maxBlendFactor * 0.9)
  712. {
  713. // Don't accumulate
  714. }
  715. else
  716. {
  717. Vec2 viewport;
  718. g_historyLengthTex.GetDimensions(viewport.x, viewport.y);
  719. const Vec2 uv = (coord + 0.5) / viewport;
  720. const Vec2 historyUv =
  721. uv + TEX(g_motionVectorsTex, coord)
  722. + (g_globalRendererConsts.m_previousMatrices.m_jitterOffsetNdc - g_globalRendererConsts.m_matrices.m_jitterOffsetNdc) / Vec2(2.0, -2.0);
  723. const Vec3 history = g_historyTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0);
  724. outColor = lerp(history, outColor, blendFactor);
  725. }
  726. TEX(g_outTex, coord) = Vec4(outColor, historyLen);
  727. }
  728. #endif
  729. // ===========================================================================
  730. // BilateralDenoise =
  731. // ===========================================================================
  732. #if NOT_ZERO(ANKI_TECHNIQUE_BilateralDenoise)
  733. Texture2D<Vec4> g_inTex : register(t0);
  734. Texture2D<F32> g_depthTex : register(t1);
  735. RWTexture2D<Vec4> g_outTex : register(u0);
  736. [numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
  737. {
  738. const IVec2 coord = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
  739. IVec2 viewport;
  740. g_outTex.GetDimensions(viewport.x, viewport.y);
  741. if(any(coord >= viewport))
  742. {
  743. return;
  744. }
  745. const F32 refDepth = TEX(g_depthTex, coord);
  746. F32 weightSum = calculateBilateralWeightDepth<F32>(0.0, 0.0, 1.0); // Highest weight that this function can give
  747. const Vec4 rgba = TEX(g_inTex, coord);
  748. const F32 historyLen = rgba.w;
  749. Vec3 colorSum = rgba.xyz * weightSum;
  750. const F32 blurFactor = 1.0 - min(1.0, historyLen / 12.0);
  751. const I32 sampleCount = max(1.0, kMaxBilateralSamplesPerDirection * blurFactor);
  752. for(I32 x = -sampleCount; x <= sampleCount; ++x)
  753. {
  754. for(I32 y = -sampleCount; y <= sampleCount; ++y)
  755. {
  756. if(x == 0.0 && y == 0.0)
  757. {
  758. continue;
  759. }
  760. IVec2 newCoord = coord + IVec2(x, y);
  761. newCoord = clamp(newCoord, 0, viewport - 1);
  762. const Vec3 sampleColor = TEX(g_inTex, newCoord);
  763. const F32 sampleDepth = TEX(g_depthTex, newCoord);
  764. const F32 depthWeight = calculateBilateralWeightDepth<F32>(refDepth, sampleDepth, 1.0);
  765. const F32 weight = depthWeight;
  766. colorSum += sampleColor * weight;
  767. weightSum += weight;
  768. }
  769. }
  770. colorSum /= weightSum;
  771. TEX(g_outTex, coord) = Vec4(colorSum, 0.0);
  772. }
  773. #endif
  774. // ===========================================================================
  775. // VisualizeProbes =
  776. // ===========================================================================
  777. #if NOT_ZERO(ANKI_TECHNIQUE_VisualizeProbes)
  778. struct VertIn
  779. {
  780. U32 m_svVertexId : SV_VERTEXID;
  781. U32 m_svInstanceId : SV_INSTANCEID;
  782. };
  783. struct VertOut
  784. {
  785. Vec4 m_svPosition : SV_POSITION;
  786. Vec3 m_probeCenter : PROBE_CENTER;
  787. };
  788. struct FragOut
  789. {
  790. Vec4 m_color : SV_TARGET0;
  791. F32 m_svDepth : SV_Depth;
  792. };
  793. struct Consts
  794. {
  795. U32 m_clipmapIdx;
  796. U32 m_padding1;
  797. U32 m_padding2;
  798. U32 m_padding3;
  799. };
  800. ANKI_FAST_CONSTANTS(Consts, g_consts)
  801. ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
  802. Texture3D<Vec4> g_volume : register(t0);
  803. Texture3D<Vec4> g_probeValidityVolume : register(t1);
  804. SamplerState g_linearAnyRepeatSampler : register(s0);
  805. constexpr F32 kSphereRadius = 0.05;
  806. # if ANKI_VERTEX_SHADER
  807. // Cube vertex positions indexed via SV_VERTEXID
  808. constexpr Vec3 cubeVertices[8] = {Vec3(-1, -1, -1), Vec3(1, -1, -1), Vec3(1, 1, -1), Vec3(-1, 1, -1),
  809. Vec3(-1, -1, 1), Vec3(1, -1, 1), Vec3(1, 1, 1), Vec3(-1, 1, 1)};
  810. // Index order for drawing the cube as a triangle list (36 indices, 12 triangles)
  811. constexpr U32 cubeIndices[36] = {0, 1, 2, 2, 3, 0, 1, 5, 6, 6, 2, 1, 5, 4, 7, 7, 6, 5, 4, 0, 3, 3, 7, 4, 3, 2, 6, 6, 7, 3, 4, 5, 1, 1, 0, 4};
  812. VertOut main(VertIn input)
  813. {
  814. const Vec3 camPos = g_globalRendererConstants.m_cameraPosition;
  815. const IndirectDiffuseClipmapConstants idConsts = g_globalRendererConstants.m_indirectDiffuseClipmaps;
  816. const UVec3 probeCounts = idConsts.m_probeCounts;
  817. const U32 clipmapIdx = g_consts.m_clipmapIdx;
  818. UVec3 probeCoord;
  819. unflatten3dArrayIndex(probeCounts.z, probeCounts.y, probeCounts.x, input.m_svInstanceId, probeCoord.z, probeCoord.y, probeCoord.x);
  820. const Vec3 probeSize = idConsts.m_sizes[clipmapIdx] / probeCounts;
  821. const Vec3 probeWorldPos = probeCoord * probeSize + probeSize * 0.5 + idConsts.m_aabbMins[clipmapIdx];
  822. // Vert pos
  823. const U32 index = cubeIndices[input.m_svVertexId];
  824. Vec3 vertPos = cubeVertices[index];
  825. vertPos *= kSphereRadius;
  826. vertPos += probeWorldPos;
  827. VertOut output;
  828. output.m_svPosition = mul(g_globalRendererConstants.m_matrices.m_viewProjectionJitter, Vec4(vertPos, 1.0));
  829. output.m_probeCenter = probeWorldPos;
  830. return output;
  831. }
  832. # endif // ANKI_VERTEX_SHADER
  833. # if ANKI_PIXEL_SHADER
  834. FragOut main(VertOut input)
  835. {
  836. const IndirectDiffuseClipmapConstants idConsts = g_globalRendererConstants.m_indirectDiffuseClipmaps;
  837. const UVec3 probeCounts = idConsts.m_probeCounts;
  838. const U32 clipmapIdx = g_consts.m_clipmapIdx;
  839. FragOut output;
  840. // Compute the far point
  841. const Vec2 ndc = uvToNdc(input.m_svPosition.xy / g_globalRendererConstants.m_renderingSize);
  842. const Vec4 v4 = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(ndc, 1.0, 1.0));
  843. const Vec3 farPoint = v4.xyz / v4.w;
  844. // Do sphere to view vec collision
  845. const Vec3 rayDir = normalize(farPoint - g_globalRendererConstants.m_cameraPosition);
  846. F32 t0, t1;
  847. const Bool collides = testRaySphere(g_globalRendererConstants.m_cameraPosition, rayDir, input.m_probeCenter, kSphereRadius, t0, t1);
  848. if(!collides)
  849. {
  850. discard;
  851. }
  852. const F32 t = min(t0, t1);
  853. const Vec3 collisionPoint = g_globalRendererConstants.m_cameraPosition + rayDir * t;
  854. const Vec4 p = mul(g_globalRendererConstants.m_matrices.m_viewProjectionJitter, Vec4(collisionPoint, 1.0));
  855. output.m_svDepth = p.z / p.w;
  856. UVec3 texSize;
  857. g_volume.GetDimensions(texSize.x, texSize.y, texSize.z);
  858. const Bool hasOctMap = texSize.x != probeCounts.x;
  859. Vec3 uvw = frac(input.m_probeCenter.xzy / idConsts.m_sizes[clipmapIdx].xzy);
  860. const UVec3 texelCoord = uvw * probeCounts.xzy;
  861. if(hasOctMap)
  862. {
  863. const U32 octProbeSize = texSize.x / probeCounts.x - 2;
  864. const Vec3 normal = normalize(collisionPoint - input.m_probeCenter);
  865. uvw.xy = texelCoord.xy * (octProbeSize + 2);
  866. uvw.xy += octahedronEncode(normal) * octProbeSize + 1.0;
  867. uvw.xy /= probeCounts.xz * (octProbeSize + 2);
  868. uvw.z = (texelCoord.z + 0.5) / probeCounts.y;
  869. }
  870. else
  871. {
  872. uvw = texelCoord + 0.5;
  873. uvw /= texSize;
  874. }
  875. const Bool valid = TEX(g_probeValidityVolume, texelCoord).x > 0.9;
  876. Vec3 radiance;
  877. if(valid)
  878. {
  879. radiance = g_volume.SampleLevel(g_linearAnyRepeatSampler, uvw, 0.0).xyz;
  880. }
  881. else
  882. {
  883. radiance = Vec3(1.0, 0.0, 1.0);
  884. }
  885. output.m_color = Vec4(radiance, 0.0);
  886. return output;
  887. }
  888. # endif // ANKI_PIXEL_SHADER
  889. #endif