LightFunctions.hlsl 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. // Contains functions for light calculations
  6. #pragma once
  7. #include <AnKi/Shaders/Functions.hlsl>
  8. #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
  9. #include <AnKi/Shaders/PackFunctions.hlsl>
  10. #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
  11. #include <AnKi/Shaders/Include/MiscRendererTypes.h>
  12. constexpr Vec2 kPoissonDisk4[4u] = {Vec2(-0.15, 0.06), Vec2(0.14, -0.48), Vec2(-0.05, 0.97), Vec2(0.58, -0.18)};
  13. constexpr Vec2 kPoissonDisk8[8u] = {Vec2(-0.16, 0.66), Vec2(0.15, -0.02), Vec2(-0.76, 0.49), Vec2(-0.57, -0.48),
  14. Vec2(0.82, -0.43), Vec2(0.91, 0.15), Vec2(-0.39, -0.86), Vec2(0.24, -0.76)};
  15. // Fresnel term unreal
  16. // specular: The specular color aka F0
  17. Vec3 F_Unreal(Vec3 specular, F32 VoH)
  18. {
  19. return specular + (1.0 - specular) * pow(2.0, (-5.55473 * VoH - 6.98316) * VoH);
  20. }
  21. // Fresnel Schlick: "An Inexpensive BRDF Model for Physically-Based Rendering"
  22. // It has lower VGRPs than F_Unreal
  23. template<typename T>
  24. vector<T, 3> F_Schlick(vector<T, 3> f0, T VoH)
  25. {
  26. const T f = pow(max(T(0), T(1) - VoH), T(5.0));
  27. return f + f0 * (T(1) - f);
  28. }
  29. // D(n,h) aka NDF: GGX Trowbridge-Reitz
  30. template<typename T>
  31. T D_GGX(T roughness, T NoH, vector<T, 3> h, vector<T, 3> worldNormal)
  32. {
  33. #if 0 && ANKI_PLATFORM_MOBILE
  34. const vector<T, 3> NxH = cross(worldNormal, h);
  35. const T oneMinusNoHSquared = dot(NxH, NxH);
  36. #else
  37. const T oneMinusNoHSquared = T(1) - NoH * NoH;
  38. ANKI_MAYBE_UNUSED(h);
  39. ANKI_MAYBE_UNUSED(worldNormal);
  40. #endif
  41. const T a = roughness * roughness;
  42. const T v = NoH * a;
  43. const T k = a / (oneMinusNoHSquared + v * v);
  44. const T d = k * k * T(1.0 / kPi);
  45. return saturate(d);
  46. }
  47. // Visibility term: Geometric shadowing divided by BRDF denominator
  48. template<typename T>
  49. T V_Schlick(T roughness, T NoV, T NoL)
  50. {
  51. const T k = (roughness * roughness) * T(0.5);
  52. const T Vis_SchlickV = NoV * (T(1) - k) + k;
  53. const T Vis_SchlickL = NoL * (T(1) - k) + k;
  54. return T(0.25) / (Vis_SchlickV * Vis_SchlickL);
  55. }
  56. // Visibility term: Hammon 2017, "PBR Diffuse Lighting for GGX+Smith Microsurfaces"
  57. template<typename T>
  58. T V_SmithGGXCorrelatedFast(T roughness, T NoV, T NoL)
  59. {
  60. const T a = roughness * roughness;
  61. const T v = T(0.5) / lerp(T(2) * NoL * NoV, NoL + NoV, a);
  62. return saturate(v);
  63. }
  64. template<typename T>
  65. T Fd_Lambert()
  66. {
  67. return T(1.0 / kPi);
  68. }
  69. template<typename T>
  70. vector<T, 3> diffuseLobe(vector<T, 3> diffuse)
  71. {
  72. return diffuse * Fd_Lambert<T>();
  73. }
  74. // Performs BRDF specular lighting
  75. template<typename T>
  76. vector<T, 3> specularIsotropicLobe(vector<T, 3> normal, vector<T, 3> f0, T roughness, vector<T, 3> viewDir, vector<T, 3> frag2Light)
  77. {
  78. const vector<T, 3> H = normalize(frag2Light + viewDir);
  79. const T NoL = max(0.0, dot(normal, frag2Light));
  80. const T VoH = max(0.0, dot(viewDir, H));
  81. const T NoH = max(0.0, dot(normal, H));
  82. const T NoV = max(0.05, dot(normal, viewDir));
  83. // F
  84. const vector<T, 3> F = F_Schlick(f0, VoH);
  85. // D
  86. const T D = D_GGX(roughness, NoH, H, normal);
  87. // Vis
  88. const T V = V_SmithGGXCorrelatedFast(roughness, NoV, NoL);
  89. return F * (V * D);
  90. }
  91. template<typename T>
  92. vector<T, 3> specularDFG(vector<T, 3> F0, T roughness, Texture2D<Vec4> integrationLut, SamplerState integrationLutSampler, T NoV)
  93. {
  94. const vector<T, 2> envBRDF = integrationLut.SampleLevel(integrationLutSampler, vector<T, 2>(roughness, NoV), 0.0).xy;
  95. return lerp(envBRDF.xxx, envBRDF.yyy, F0);
  96. }
  97. template<typename T>
  98. T computeSpotFactor(vector<T, 3> normalizedFrag2Light, T outerCos, T innerCos, vector<T, 3> spotDir)
  99. {
  100. const T costheta = -dot(normalizedFrag2Light, spotDir);
  101. const T spotFactor = smoothstep(outerCos, innerCos, costheta);
  102. return spotFactor;
  103. }
  104. // PCSS calculation. Can be visualized here for spot lights: https://www.desmos.com/calculator/l0viaopwbi
  105. // and here for directional: https://www.desmos.com/calculator/0dh0ybqvv1
  106. template<typename T>
  107. struct Pcss
  108. {
  109. SamplerState m_linearClampSampler;
  110. vector<T, 2> computePenumbra(Texture2D<Vec4> shadowmap, Vec2 searchDist, Vec3 projCoords, T cosTheta, T sinTheta, F32 lightSize, Bool dirLight)
  111. {
  112. T inShadowCount = 0.0;
  113. F32 avgOccluderZ = 0.0;
  114. [unroll] for(U32 i = 0u; i < ARRAY_SIZE(kPoissonDisk4); ++i)
  115. {
  116. const vector<T, 2> diskPoint = kPoissonDisk4[i];
  117. // Rotate the disk point
  118. vector<T, 2> rotatedDiskPoint;
  119. rotatedDiskPoint.x = diskPoint.x * cosTheta - diskPoint.y * sinTheta;
  120. rotatedDiskPoint.y = diskPoint.y * cosTheta + diskPoint.x * sinTheta;
  121. // Offset calculation
  122. const Vec2 newUv = projCoords.xy + rotatedDiskPoint * searchDist;
  123. const F32 occluderZ = shadowmap.SampleLevel(m_linearClampSampler, newUv, 0.0).x;
  124. if(projCoords.z >= occluderZ)
  125. {
  126. inShadowCount += 1.0;
  127. avgOccluderZ += occluderZ;
  128. }
  129. }
  130. T factor;
  131. if(inShadowCount == 0.0 || inShadowCount == ARRAY_SIZE(kPoissonDisk4))
  132. {
  133. factor = 0.0;
  134. }
  135. else
  136. {
  137. avgOccluderZ /= inShadowCount;
  138. if(!dirLight)
  139. {
  140. factor = (projCoords.z - avgOccluderZ) * lightSize / avgOccluderZ;
  141. }
  142. else
  143. {
  144. // Dir light's depth is linear
  145. factor = (projCoords.z - avgOccluderZ) * lightSize / kPcssDirLightMaxPenumbraMeters;
  146. factor *= factor;
  147. }
  148. }
  149. return vector<T, 2>(factor, inShadowCount);
  150. }
  151. };
  152. template<typename T>
  153. struct PcssDisabled
  154. {
  155. vector<T, 2> computePenumbra(Texture2D<Vec4> shadowmap, Vec2 searchDist, Vec3 projCoords, T cosTheta, T sinTheta, F32 lightSize, Bool dirLight)
  156. {
  157. return -1.0;
  158. }
  159. };
  160. template<typename T, typename TPcss>
  161. T computeShadowFactorSpotLightGeneric(SpotLight light, Vec3 worldPos, Texture2D<Vec4> shadowTex, SamplerComparisonState shadowMapSampler, Bool pcf,
  162. T randFactor, TPcss pcss)
  163. {
  164. const Vec4 texCoords4 = mul(light.m_textureMatrix, Vec4(worldPos, 1.0));
  165. const Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
  166. T shadow;
  167. if(pcf)
  168. {
  169. Vec2 texSize;
  170. F32 mipCount;
  171. shadowTex.GetDimensions(0, texSize.x, texSize.y, mipCount);
  172. const Vec2 smTexelSize = 1.0 / texSize;
  173. const T sinTheta = sin(randFactor * 2.0 * kPi);
  174. const T cosTheta = cos(randFactor * 2.0 * kPi);
  175. // PCSS
  176. const vector<T, 2> pcssRes =
  177. pcss.computePenumbra(shadowTex, smTexelSize * kPcssSearchTexelRadius, texCoords3, cosTheta, sinTheta, light.m_radius, false);
  178. T pcfPixels;
  179. if(pcssRes.x == -1.0)
  180. {
  181. // PCSS disabled
  182. pcfPixels = kPcfTexelRadius;
  183. }
  184. else
  185. {
  186. if(pcssRes.y == ARRAY_SIZE(kPoissonDisk4))
  187. {
  188. return 0.0;
  189. }
  190. pcfPixels = kPcfTexelRadius + pcssRes.x * kPcssTexelRadius;
  191. }
  192. shadow = 0.0;
  193. [unroll] for(U32 i = 0u; i < ARRAY_SIZE(kPoissonDisk4); ++i)
  194. {
  195. const vector<T, 2> diskPoint = kPoissonDisk4[i];
  196. // Rotate the disk point
  197. vector<T, 2> rotatedDiskPoint;
  198. rotatedDiskPoint.x = diskPoint.x * cosTheta - diskPoint.y * sinTheta;
  199. rotatedDiskPoint.y = diskPoint.y * cosTheta + diskPoint.x * sinTheta;
  200. // Offset calculation
  201. const Vec2 newUv = texCoords3.xy + rotatedDiskPoint * smTexelSize * pcfPixels;
  202. shadow += shadowTex.SampleCmpLevelZero(shadowMapSampler, newUv, texCoords3.z);
  203. }
  204. shadow /= T(ARRAY_SIZE(kPoissonDisk4));
  205. }
  206. else
  207. {
  208. shadow = shadowTex.SampleCmpLevelZero(shadowMapSampler, texCoords3.xy, texCoords3.z);
  209. }
  210. return shadow;
  211. }
  212. template<typename T>
  213. T computeShadowFactorSpotLight(SpotLight light, Vec3 worldPos, Texture2D shadowTex, SamplerComparisonState shadowMapSampler)
  214. {
  215. PcssDisabled<T> noPcss = (PcssDisabled<T>)0;
  216. return computeShadowFactorSpotLightGeneric(light, worldPos, shadowTex, shadowMapSampler, false, 0.0, noPcss);
  217. }
  218. template<typename T>
  219. T computeShadowFactorSpotLightPcf(SpotLight light, Vec3 worldPos, Texture2D shadowTex, SamplerComparisonState shadowMapSampler, T randFactor)
  220. {
  221. PcssDisabled<T> noPcss = (PcssDisabled<T>)0;
  222. return computeShadowFactorSpotLightGeneric(light, worldPos, shadowTex, shadowMapSampler, true, randFactor, noPcss);
  223. }
  224. template<typename T>
  225. T computeShadowFactorSpotLightPcss(SpotLight light, Vec3 worldPos, Texture2D shadowTex, SamplerComparisonState shadowMapSampler, T randFactor,
  226. SamplerState linearClampAnySampler)
  227. {
  228. Pcss<T> pcss;
  229. pcss.m_linearClampSampler = linearClampAnySampler;
  230. return computeShadowFactorSpotLightGeneric(light, worldPos, shadowTex, shadowMapSampler, true, randFactor, pcss);
  231. }
  232. // Compute the shadow factor of point (omni) lights.
  233. template<typename T>
  234. T computeShadowFactorPointLightGeneric(PointLight light, Vec3 frag2Light, Texture2D shadowMap, SamplerComparisonState shadowMapSampler, T randFactor,
  235. Bool pcf)
  236. {
  237. const Vec3 dir = -frag2Light;
  238. const Vec3 dirabs = abs(dir);
  239. const F32 dist = max(dirabs.x, max(dirabs.y, dirabs.z)) - 0.01; // Push it out to avoid artifacts
  240. // 1) Project the dist to light's proj mat
  241. //
  242. const F32 near = kClusterObjectFrustumNearPlane;
  243. const F32 far = light.m_radius;
  244. const F32 g = near - far;
  245. const F32 zVSpace = -dist;
  246. const F32 w = -zVSpace;
  247. F32 z = (far * zVSpace + far * near) / g;
  248. z /= w;
  249. // 2) Read shadow tex
  250. //
  251. // Convert cube coords
  252. U32 faceIdxu;
  253. Vec2 uv = convertCubeUvs(dir * Vec3(1.0, 1.0, -1.0), faceIdxu);
  254. // Get the atlas offset
  255. const Vec2 atlasOffset = light.m_shadowAtlasTileOffsets[faceIdxu].xy;
  256. // Compute UV
  257. uv *= Vec2(light.m_shadowAtlasTileScale, light.m_shadowAtlasTileScale);
  258. uv += atlasOffset;
  259. // Sample
  260. T shadow;
  261. if(pcf)
  262. {
  263. F32 mipCount;
  264. Vec2 smTexelSize;
  265. shadowMap.GetDimensions(0, smTexelSize.x, smTexelSize.y, mipCount);
  266. smTexelSize = 1.0 / smTexelSize;
  267. const T sinTheta = sin(randFactor * 2.0 * kPi);
  268. const T cosTheta = cos(randFactor * 2.0 * kPi);
  269. shadow = 0.0;
  270. [unroll] for(U32 i = 0u; i < ARRAY_SIZE(kPoissonDisk4); ++i)
  271. {
  272. const vector<T, 2> diskPoint = kPoissonDisk4[i];
  273. // Rotate the disk point
  274. vector<T, 2> rotatedDiskPoint;
  275. rotatedDiskPoint.x = diskPoint.x * cosTheta - diskPoint.y * sinTheta;
  276. rotatedDiskPoint.y = diskPoint.y * cosTheta + diskPoint.x * sinTheta;
  277. // Offset calculation
  278. const Vec2 newUv = uv + rotatedDiskPoint * smTexelSize * kPcfTexelRadius;
  279. shadow += shadowMap.SampleCmpLevelZero(shadowMapSampler, newUv, z);
  280. }
  281. shadow /= T(ARRAY_SIZE(kPoissonDisk4));
  282. }
  283. else
  284. {
  285. shadow = shadowMap.SampleCmpLevelZero(shadowMapSampler, uv, z);
  286. }
  287. return shadow;
  288. }
  289. template<typename T>
  290. T computeShadowFactorPointLight(PointLight light, Vec3 frag2Light, Texture2D shadowMap, SamplerComparisonState shadowMapSampler)
  291. {
  292. return computeShadowFactorPointLightGeneric(light, frag2Light, shadowMap, shadowMapSampler, -1.0, false);
  293. }
  294. template<typename T>
  295. T computeShadowFactorPointLightPcf(PointLight light, Vec3 frag2Light, Texture2D shadowMap, SamplerComparisonState shadowMapSampler, T randFactor)
  296. {
  297. return computeShadowFactorPointLightGeneric(light, frag2Light, shadowMap, shadowMapSampler, randFactor, true);
  298. }
  299. // Compute the shadow factor of a directional light
  300. template<typename T, typename TPcss>
  301. T computeShadowFactorDirLightGeneric(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap,
  302. SamplerComparisonState shadowMapSampler, T randFactor, Bool pcf, TPcss pcss)
  303. {
  304. #define ANKI_FAST_CASCADES_WORKAROUND 1 // light might be in a constant buffer and dynamic indexing in constant buffers is too slow on nvidia
  305. #if ANKI_FAST_CASCADES_WORKAROUND
  306. // Assumes kMaxShadowCascades is 4
  307. Mat4 lightProjectionMat;
  308. F32 far;
  309. F32 pcfDistUvSpace;
  310. switch(cascadeIdx)
  311. {
  312. case 0:
  313. lightProjectionMat = light.m_textureMatrices[0];
  314. far = light.m_cascadeFarPlanes[0];
  315. pcfDistUvSpace = light.m_cascadePcfTexelRadius[0];
  316. break;
  317. case 1:
  318. lightProjectionMat = light.m_textureMatrices[1];
  319. far = light.m_cascadeFarPlanes[1];
  320. pcfDistUvSpace = light.m_cascadePcfTexelRadius[1];
  321. break;
  322. case 2:
  323. lightProjectionMat = light.m_textureMatrices[2];
  324. far = light.m_cascadeFarPlanes[2];
  325. pcfDistUvSpace = light.m_cascadePcfTexelRadius[2];
  326. break;
  327. default:
  328. lightProjectionMat = light.m_textureMatrices[3];
  329. far = light.m_cascadeFarPlanes[3];
  330. pcfDistUvSpace = light.m_cascadePcfTexelRadius[3];
  331. }
  332. #else
  333. const Mat4 lightProjectionMat = light.m_textureMatrices[cascadeIdx];
  334. #endif
  335. const Vec4 texCoords4 = mul(lightProjectionMat, Vec4(worldPos, 1.0));
  336. Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
  337. T shadow;
  338. if(pcf || pcfDistUvSpace == 0.0f)
  339. {
  340. const T sinTheta = sin(randFactor * 2.0 * kPi);
  341. const T cosTheta = cos(randFactor * 2.0 * kPi);
  342. // PCSS
  343. const Vec2 pcssRes =
  344. pcss.computePenumbra(shadowMap, pcfDistUvSpace * (kPcssSearchTexelRadius / kPcfTexelRadius), texCoords3, cosTheta, sinTheta, far, true);
  345. if(pcssRes.x == -1.0)
  346. {
  347. // PCSS disabled, do nothing
  348. }
  349. else
  350. {
  351. if(pcssRes.y == ARRAY_SIZE(kPoissonDisk4))
  352. {
  353. return 0.0;
  354. }
  355. pcfDistUvSpace = pcfDistUvSpace + pcssRes.x * pcfDistUvSpace * (kPcssTexelRadius / kPcfTexelRadius);
  356. }
  357. shadow = 0.0;
  358. [unroll] for(U32 i = 0u; i < ARRAY_SIZE(kPoissonDisk8); ++i)
  359. {
  360. const vector<T, 2> diskPoint = kPoissonDisk8[i];
  361. // Rotate the disk point
  362. vector<T, 2> rotatedDiskPoint;
  363. rotatedDiskPoint.x = diskPoint.x * cosTheta - diskPoint.y * sinTheta;
  364. rotatedDiskPoint.y = diskPoint.y * cosTheta + diskPoint.x * sinTheta;
  365. // Offset calculation
  366. Vec2 newUv = texCoords3.xy + rotatedDiskPoint * pcfDistUvSpace;
  367. shadow += shadowMap.SampleCmpLevelZero(shadowMapSampler, newUv, texCoords3.z);
  368. }
  369. shadow /= T(ARRAY_SIZE(kPoissonDisk8));
  370. }
  371. else
  372. {
  373. shadow = shadowMap.SampleCmpLevelZero(shadowMapSampler, texCoords3.xy, texCoords3.z);
  374. }
  375. return shadow;
  376. }
  377. template<typename T>
  378. T computeShadowFactorDirLight(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap, SamplerComparisonState shadowMapSampler)
  379. {
  380. PcssDisabled<T> noPcss = (PcssDisabled<T>)0;
  381. return computeShadowFactorDirLightGeneric(light, cascadeIdx, worldPos, shadowMap, shadowMapSampler, -1.0, false, noPcss);
  382. }
  383. template<typename T>
  384. T computeShadowFactorDirLightPcf(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap, SamplerComparisonState shadowMapSampler,
  385. T randFactor)
  386. {
  387. PcssDisabled<T> noPcss = (PcssDisabled<T>)0;
  388. return computeShadowFactorDirLightGeneric(light, cascadeIdx, worldPos, shadowMap, shadowMapSampler, randFactor, true, noPcss);
  389. }
  390. template<typename T>
  391. T computeShadowFactorDirLightPcss(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap, SamplerComparisonState shadowMapSampler,
  392. T randFactor, SamplerState linearClampAnySampler)
  393. {
  394. Pcss<T> pcss;
  395. pcss.m_linearClampSampler = linearClampAnySampler;
  396. return computeShadowFactorDirLightGeneric(light, cascadeIdx, worldPos, shadowMap, shadowMapSampler, randFactor, true, pcss);
  397. }
  398. // Compute the shadow factor of a directional light
  399. template<typename T>
  400. T computeShadowFactorDirLight(Mat4 lightProjectionMat, Vec3 worldPos, Texture2D<Vec4> shadowMap, SamplerComparisonState shadowMapSampler)
  401. {
  402. const Vec4 texCoords4 = mul(lightProjectionMat, Vec4(worldPos, 1.0));
  403. const Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
  404. const T shadowFactor = shadowMap.SampleCmpLevelZero(shadowMapSampler, texCoords3.xy, texCoords3.z);
  405. return shadowFactor;
  406. }
  407. // Compute the cubemap texture lookup vector given the reflection vector (r) the radius squared of the probe (R2) and
  408. // the frag pos in sphere space (f)
  409. Vec3 computeCubemapVecAccurate(Vec3 r, F32 R2, Vec3 f)
  410. {
  411. // Compute the collision of the r to the inner part of the sphere
  412. // From now on we work on the sphere's space
  413. // Project the center of the sphere (it's zero now since we are in sphere space) in ray "f,r"
  414. const Vec3 p = f - r * dot(f, r);
  415. // The collision to the sphere is point x where x = p + T * r
  416. // Because of the pythagorean theorem: R^2 = dot(p, p) + dot(T * r, T * r)
  417. // solving for T, T = R / |p|
  418. // then x becomes x = sqrt(R^2 - dot(p, p)) * r + p;
  419. F32 pp = dot(p, p);
  420. pp = min(pp, R2);
  421. const F32 sq = sqrt(R2 - pp);
  422. const Vec3 x = p + sq * r;
  423. return x;
  424. }
  425. // Cheap version of computeCubemapVecAccurate
  426. Vec3 computeCubemapVecCheap(Vec3 r, F32 R2, Vec3 f)
  427. {
  428. ANKI_MAYBE_UNUSED(R2);
  429. ANKI_MAYBE_UNUSED(f);
  430. return r;
  431. }
  432. template<typename T>
  433. T computeAttenuationFactor(T lightRadius, Vec3 frag2Light)
  434. {
  435. const F32 fragLightDist = dot(frag2Light, frag2Light);
  436. T att = fragLightDist / (lightRadius * lightRadius);
  437. att = T(1) - att;
  438. att = max(0.0, att);
  439. return att * att;
  440. }
  441. // Given the probe properties trace a ray inside the probe and find the cube tex coordinates to sample
  442. Vec3 intersectProbe(Vec3 fragPos, // Ray origin
  443. Vec3 rayDir, // Ray direction
  444. Vec3 probeAabbMin, Vec3 probeAabbMax,
  445. Vec3 probeOrigin // Cubemap origin
  446. )
  447. {
  448. // Compute the intersection point
  449. const F32 intresectionDist = testRayAabbInside(fragPos, rayDir, probeAabbMin, probeAabbMax);
  450. const Vec3 intersectionPoint = fragPos + intresectionDist * rayDir;
  451. // Compute the cubemap vector
  452. return intersectionPoint - probeOrigin;
  453. }
  454. // Compute a weight (factor) of fragPos against some probe's bounds. The weight will be zero when fragPos is close to
  455. // AABB bounds and 1.0 at fadeDistance and less.
  456. F32 computeProbeBlendWeight(Vec3 fragPos, // Doesn't need to be inside the AABB
  457. Vec3 probeAabbMin, Vec3 probeAabbMax, F32 fadeDistance)
  458. {
  459. // Compute the min distance of fragPos from the edges of the AABB
  460. const Vec3 distFromMin = fragPos - probeAabbMin;
  461. const Vec3 distFromMax = probeAabbMax - fragPos;
  462. const Vec3 minDistVec = min(distFromMin, distFromMax);
  463. const F32 minDist = min(minDistVec.x, min(minDistVec.y, minDistVec.z));
  464. // Use saturate because minDist might be negative.
  465. return saturate(minDist / fadeDistance);
  466. }
  467. // Given the value of the 6 faces of the dice and a normal, sample the correct weighted value.
  468. // https://www.shadertoy.com/view/XtcBDB
  469. template<typename T>
  470. vector<T, 3> sampleAmbientDice(vector<T, 3> posx, vector<T, 3> negx, vector<T, 3> posy, vector<T, 3> negy, vector<T, 3> posz, vector<T, 3> negz,
  471. vector<T, 3> normal)
  472. {
  473. normal.z *= -1.0;
  474. const vector<T, 3> axisWeights = normal * normal;
  475. const vector<T, 3> uv = normal * 0.5 + 0.5;
  476. vector<T, 3> col = lerp(negx, posx, uv.x) * axisWeights.x;
  477. col += lerp(negy, posy, uv.y) * axisWeights.y;
  478. col += lerp(negz, posz, uv.z) * axisWeights.z;
  479. // Divide by weight
  480. col /= axisWeights.x + axisWeights.y + axisWeights.z + 0.0001;
  481. return col;
  482. }
  483. // Sample the irradiance term from the clipmap
  484. template<typename T>
  485. vector<T, 3> sampleGlobalIllumination(const Vec3 worldPos, const vector<T, 3> normal, const GlobalIlluminationProbe probe, Texture3D<Vec4> tex,
  486. SamplerState linearAnyClampSampler)
  487. {
  488. // Find the UVW
  489. Vec3 uvw = (worldPos - probe.m_aabbMin) / (probe.m_aabbMax - probe.m_aabbMin);
  490. uvw = saturate(uvw);
  491. uvw.y = 1.0f - uvw.y;
  492. // The U contains the 6 directions so divide
  493. uvw.x /= 6.0;
  494. // Calmp it to avoid direction leaking
  495. uvw.x = clamp(uvw.x, probe.m_halfTexelSizeU, (1.0 / 6.0) - probe.m_halfTexelSizeU);
  496. // Read the irradiance
  497. vector<T, 3> irradiancePerDir[6u];
  498. [unroll] for(U32 dir = 0u; dir < 6u; ++dir)
  499. {
  500. // Point to the correct UV
  501. Vec3 shiftedUVw = uvw;
  502. shiftedUVw.x += (1.0 / 6.0) * F32(dir);
  503. irradiancePerDir[dir] = tex.SampleLevel(linearAnyClampSampler, shiftedUVw, 0.0).rgb;
  504. }
  505. // Sample the irradiance
  506. const vector<T, 3> irradiance = sampleAmbientDice<T>(irradiancePerDir[0], irradiancePerDir[1], irradiancePerDir[2], irradiancePerDir[3],
  507. irradiancePerDir[4], irradiancePerDir[5], normal);
  508. return irradiance;
  509. }
  510. /// To play with it use https://www.shadertoy.com/view/sttSDf
  511. /// http://jcgt.org/published/0007/04/01/paper.pdf by Eric Heitz
  512. /// Input v: view direction (camPos - pos)
  513. /// Input alphaX, alphaY: roughness parameters
  514. /// Input u1, u2: uniform random numbers
  515. /// Output: normal sampled with PDF D_Ve(nE) = G1(v) * max(0, dot(v, nE)) * D(nE) / v.z
  516. Vec3 sampleGgxVndf(Vec3 v, F32 alphaX, F32 alphaY, F32 u1, F32 u2)
  517. {
  518. // Section 3.2: transforming the view direction to the hemisphere configuration
  519. const Vec3 vH = normalize(Vec3(alphaX * v.x, alphaY * v.y, v.z));
  520. // Section 4.1: orthonormal basis (with special case if cross product is zero)
  521. const F32 lensq = vH.x * vH.x + vH.y * vH.y;
  522. const Vec3 tangent1 = (lensq > 0.0) ? Vec3(-vH.y, vH.x, 0) * rsqrt(lensq) : Vec3(1.0, 0.0, 0.0);
  523. const Vec3 tangent2 = cross(vH, tangent1);
  524. // Section 4.2: parameterization of the projected area
  525. const F32 r = sqrt(u1);
  526. const F32 phi = 2.0 * kPi * u2;
  527. const F32 t1 = r * cos(phi);
  528. F32 t2 = r * sin(phi);
  529. const F32 s = 0.5 * (1.0 + vH.z);
  530. t2 = (1.0 - s) * sqrt(1.0 - t1 * t1) + s * t2;
  531. // Section 4.3: reprojection onto hemisphere
  532. const Vec3 nH = t1 * tangent1 + t2 * tangent2 + sqrt(max(0.0, 1.0 - t1 * t1 - t2 * t2)) * vH;
  533. // Section 3.4: transforming the normal back to the ellipsoid configuration
  534. const Vec3 nE = normalize(Vec3(alphaX * nH.x, alphaY * nH.y, max(0.0, nH.z)));
  535. return nE;
  536. }
  537. // The PDF for the sampleGgxVndf. It's D_Ve(nE) = G1(v) * max(0, dot(v, nE)) * D(nE) / v.z
  538. F32 pdfGgxVndf(Vec3 nE, Vec3 v, F32 alphaX, F32 alphaY)
  539. {
  540. // Equation (2) in the paper
  541. F32 lambdaV = (square(alphaX * v.x) + square(alphaY * v.y)) / square(v.z);
  542. lambdaV = (-1.0 + sqrt(1.0 + lambdaV)) / 2.0;
  543. F32 G1 = 1.0 / (1.0 + lambdaV);
  544. // Equation (1) in the paper
  545. F32 DnE = square(nE.x / alphaX) + square(nE.y / alphaY) + square(nE.z);
  546. DnE = kPi * alphaX * alphaY * square(DnE);
  547. DnE = 1.0 / DnE;
  548. const F32 pdf = G1 * max(0.0, dot(v, nE)) * DnE / v.z;
  549. return pdf;
  550. }
  551. // Same thing as sampleGgxVndf but it works in world space and not in TBN and it's also isotropic
  552. // https://auzaiffe.wordpress.com/2024/04/15/vndf-importance-sampling-an-isotropic-distribution/
  553. // viewDir is camPos-worldPos
  554. Vec3 sampleVndfIsotropic(Vec2 randFactors, Vec3 viewDir, F32 alpha, Vec3 normal)
  555. {
  556. // decompose the floattor in parallel and perpendicular components
  557. const Vec3 viewDirZ = -normal * dot(viewDir, normal);
  558. const Vec3 viewDirXY = viewDir + viewDirZ;
  559. // warp to the hemisphere configuration
  560. const Vec3 wiStd = -normalize(alpha * viewDirXY + viewDirZ);
  561. // sample a spherical cap in (-wiStd.z, 1]
  562. const F32 wiStdZ = dot(wiStd, normal);
  563. const F32 z = 1.0 - randFactors.y * (1.0 + wiStdZ);
  564. const F32 sinTheta = sqrt(saturate(1.0 - z * z));
  565. const F32 phi = 2.0 * kPi * randFactors.x - kPi;
  566. const F32 x = sinTheta * cos(phi);
  567. const F32 y = sinTheta * sin(phi);
  568. const Vec3 cStd = Vec3(x, y, z);
  569. // reflect sample to align with normal
  570. const Vec3 up = Vec3(0, 0, 1.000001); // Used for the singularity
  571. const Vec3 wr = normal + up;
  572. const Vec3 c = dot(wr, cStd) * wr / wr.z - cStd;
  573. // compute halfway direction as standard normal
  574. const Vec3 wmStd = c + wiStd;
  575. const Vec3 wmStdZ = normal * dot(normal, wmStd);
  576. const Vec3 wmStdXY = wmStdZ - wmStd;
  577. // return final normal
  578. const Vec3 nE = normalize(alpha * wmStdXY + wmStdZ);
  579. return nE;
  580. }
  581. // The PDF of sampleVndfIsotropic
  582. F32 pdfVndfIsotropic(Vec3 reflectedDir, Vec3 viewDir, F32 alpha, Vec3 normal)
  583. {
  584. const F32 alphaSquare = alpha * alpha;
  585. const Vec3 wm = normalize(reflectedDir + viewDir);
  586. const F32 zm = dot(wm, normal);
  587. const F32 zi = dot(viewDir, normal);
  588. const F32 nrm = rsqrt((zi * zi) * (1.0f - alphaSquare) + alphaSquare);
  589. const F32 sigmaStd = (zi * nrm) * 0.5f + 0.5f;
  590. const F32 sigmaI = sigmaStd / nrm;
  591. const F32 nrmN = (zm * zm) * (alphaSquare - 1.0f) + 1.0f;
  592. return alphaSquare / (kPi * 4.0f * nrmN * nrmN * sigmaI + kEpsilonF32);
  593. }
  594. /// Calculate the reflection vector based on roughness. Sometimes the refl vector is bellow the normal so this func will try again to get a new one.
  595. /// viewDir is camPos-worldPos
  596. Vec3 sampleReflectionVectorAnisotropic(Vec3 viewDir, Vec3 normal, F32 roughnessX, F32 roughnessY, Vec2 randFactors, U32 tryCount, out F32 pdf)
  597. {
  598. pdf = 0.0;
  599. const Mat3 tbn = rotationFromDirection(normal);
  600. const Mat3 tbnT = transpose(tbn);
  601. const Vec3 viewDirTbn = mul(tbnT, viewDir);
  602. const F32 alphaX = roughnessX * roughnessX;
  603. const F32 alphaY = roughnessY * roughnessY;
  604. Vec3 reflectedDirTbn;
  605. do
  606. {
  607. const Vec3 sampledNormalTbn = sampleGgxVndf(viewDirTbn, alphaX, alphaY, randFactors.x, randFactors.y);
  608. reflectedDirTbn = reflect(-viewDirTbn, sampledNormalTbn);
  609. if(dot(reflectedDirTbn, Vec3(0.0, 0.0, 1.0)) > cos(kPi / 2.0 * 0.9))
  610. {
  611. // Angle between the refl vec and the normal is less than 90 degr. We are good to go
  612. pdf = pdfGgxVndf(sampledNormalTbn, viewDirTbn, alphaX, alphaY);
  613. break;
  614. }
  615. else
  616. {
  617. // Try again
  618. randFactors.x = frac(randFactors.x + 0.7324);
  619. randFactors.y = frac(randFactors.y + 0.6523);
  620. }
  621. } while(--tryCount);
  622. // Transform reflectedDirTbn back to the initial space.
  623. const Vec3 r = mul(tbn, reflectedDirTbn);
  624. return r;
  625. }
  626. // Another version of sampleReflectionVector. Possibly faster
  627. Vec3 sampleReflectionVectorIsotropic(Vec3 viewDir, Vec3 normal, F32 roughness, Vec2 randFactors, U32 tryCount, out F32 pdf)
  628. {
  629. const F32 alpha = roughness * roughness;
  630. Vec3 reflDir = normal;
  631. do
  632. {
  633. const Vec3 nE = sampleVndfIsotropic(randFactors, viewDir, alpha, normal);
  634. reflDir = reflect(-viewDir, nE);
  635. if(dot(reflDir, normal) > cos(kPi / 2.0 * 0.9))
  636. {
  637. // Angle between the refl vec and the normal is less than 90 degr. We are good to go
  638. break;
  639. }
  640. else
  641. {
  642. // Try again
  643. randFactors.x = frac(randFactors.x + 0.7324);
  644. randFactors.y = frac(randFactors.y + 0.6523);
  645. }
  646. } while(--tryCount);
  647. pdf = pdfVndfIsotropic(reflDir, viewDir, alpha, normal);
  648. return reflDir;
  649. }
  650. /// Get the index of the cascade given the distance from zero.
  651. U32 computeShadowCascadeIndex(F32 distance, Vec4 cascadeDistances, U32 shadowCascadeCount)
  652. {
  653. U32 cascade;
  654. if(distance < cascadeDistances[0u])
  655. {
  656. cascade = 0u;
  657. }
  658. else if(distance < cascadeDistances[1u])
  659. {
  660. cascade = 1u;
  661. }
  662. else if(distance < cascadeDistances[2u])
  663. {
  664. cascade = 2u;
  665. }
  666. else
  667. {
  668. cascade = 3u;
  669. }
  670. return min(shadowCascadeCount - 1u, cascade);
  671. }
  672. /// Bring the indices of the closest cascades and a factor to blend them. To visualize what's going on go to:
  673. /// https://www.desmos.com/calculator/g1ibye6ebg
  674. template<typename T>
  675. UVec2 computeShadowCascadeIndex2(F32 distance, Vec4 cascadeDistances, U32 shadowCascadeCount, out T factor)
  676. {
  677. const U32 cascade = computeShadowCascadeIndex(distance, cascadeDistances, shadowCascadeCount);
  678. const U32 nextCascade = min(cascade + 1u, shadowCascadeCount - 1u);
  679. const F32 minDist = (cascade == 0u) ? 0.0 : cascadeDistances[cascade - 1u];
  680. const F32 maxDist = cascadeDistances[cascade];
  681. factor = (distance - minDist) / max(kEpsilonF32, maxDist - minDist);
  682. factor = pow(factor, T(16.0)); // WARNING: Need to change the C++ code if you change this
  683. return UVec2(cascade, nextCascade);
  684. }