Functions.glsl 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. // Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #pragma once
  6. #include <AnKi/Shaders/Common.glsl>
  7. #if defined(ANKI_FRAGMENT_SHADER)
  8. Vec3 dither(Vec3 col, F32 C)
  9. {
  10. Vec3 vDither = Vec3(dot(Vec2(171.0, 231.0), gl_FragCoord.xy));
  11. vDither.rgb = fract(vDither.rgb / Vec3(103.0, 71.0, 97.0));
  12. col = col * (255.0 / C) + vDither.rgb;
  13. col = floor(col) / 255.0;
  14. col *= C;
  15. return col;
  16. }
  17. F32 dither(F32 col, F32 C)
  18. {
  19. F32 vDither = dot(Vec2(171.0, 231.0), gl_FragCoord.xy);
  20. vDither = fract(vDither / 103.0);
  21. col = col * (255.0 / C) + vDither;
  22. col = floor(col) / 255.0;
  23. col *= C;
  24. return col;
  25. }
  26. #endif
  27. // Convert to linear depth
  28. F32 linearizeDepth(F32 depth, F32 zNear, F32 zFar)
  29. {
  30. return zNear / ((zNear - zFar) + zFar / depth);
  31. }
  32. // This is the optimal linearizeDepth where a=(n-f)/n and b=f/n
  33. F32 linearizeDepthOptimal(F32 depth, F32 a, F32 b)
  34. {
  35. return 1.0 / (a + b / depth);
  36. }
  37. // This is the optimal linearizeDepth where a=(n-f)/n and b=f/n
  38. Vec4 linearizeDepthOptimal(Vec4 depths, F32 a, F32 b)
  39. {
  40. return 1.0 / (a + b / depths);
  41. }
  42. // Project a vector by knowing only the non zero values of a perspective matrix
  43. Vec4 projectPerspective(Vec4 vec, F32 m00, F32 m11, F32 m22, F32 m23)
  44. {
  45. Vec4 o;
  46. o.x = vec.x * m00;
  47. o.y = vec.y * m11;
  48. o.z = vec.z * m22 + vec.w * m23;
  49. o.w = -vec.z;
  50. return o;
  51. }
  52. #if defined(ANKI_FRAGMENT_SHADER)
  53. // Stolen from shadertoy.com/view/4tyGDD
  54. Vec4 textureCatmullRom4Samples(texture2D tex, sampler sampl, Vec2 uv, Vec2 texSize)
  55. {
  56. const Vec2 halff = 2.0 * fract(0.5 * uv * texSize - 0.25) - 1.0;
  57. const Vec2 f = fract(halff);
  58. const Vec2 sum0 = (2.0 * f - 3.5) * f + 0.5;
  59. const Vec2 sum1 = (2.0 * f - 2.5) * f - 0.5;
  60. Vec4 w = Vec4(f * sum0 + 1.0, f * sum1);
  61. const Vec4 pos = Vec4((((-2.0 * f + 3.0) * f + 0.5) * f - 1.5) * f / (w.xy * texSize) + uv,
  62. (((-2.0 * f + 5.0) * f - 2.5) * f - 0.5) / (sum1 * texSize) + uv);
  63. w.xz *= halff.x * halff.y > 0.0 ? 1.0 : -1.0;
  64. return (texture(tex, sampl, pos.xy) * w.x + texture(tex, sampl, pos.zy) * w.z) * w.y
  65. + (texture(tex, sampl, pos.xw) * w.x + texture(tex, sampl, pos.zw) * w.z) * w.w;
  66. }
  67. #endif
  68. F32 rand(Vec2 n)
  69. {
  70. return 0.5 + 0.5 * fract(sin(dot(n, Vec2(12.9898, 78.233))) * 43758.5453);
  71. }
  72. Vec4 nearestDepthUpscale(Vec2 uv, texture2D depthFull, texture2D depthHalf, texture2D colorTex,
  73. sampler linearAnyClampSampler, Vec2 linearDepthCf, F32 depthThreshold)
  74. {
  75. F32 fullDepth = textureLod(depthFull, linearAnyClampSampler, uv, 0.0).r; // Sampler not important.
  76. fullDepth = linearizeDepthOptimal(fullDepth, linearDepthCf.x, linearDepthCf.y);
  77. Vec4 halfDepths = textureGather(sampler2D(depthHalf, linearAnyClampSampler), uv, 0); // Sampler not important.
  78. halfDepths = linearizeDepthOptimal(halfDepths, linearDepthCf.x, linearDepthCf.y);
  79. const Vec4 diffs = abs(Vec4(fullDepth) - halfDepths);
  80. Vec4 color;
  81. if(all(lessThan(diffs, Vec4(depthThreshold))))
  82. {
  83. // No major discontinuites, sample with bilinear
  84. color = textureLod(colorTex, linearAnyClampSampler, uv, 0.0);
  85. }
  86. else
  87. {
  88. // Some discontinuites, need to use the newUv
  89. const Vec4 r = textureGather(sampler2D(colorTex, linearAnyClampSampler), uv, 0);
  90. const Vec4 g = textureGather(sampler2D(colorTex, linearAnyClampSampler), uv, 1);
  91. const Vec4 b = textureGather(sampler2D(colorTex, linearAnyClampSampler), uv, 2);
  92. const Vec4 a = textureGather(sampler2D(colorTex, linearAnyClampSampler), uv, 3);
  93. F32 minDiff = diffs.x;
  94. U32 comp = 0u;
  95. if(diffs.y < minDiff)
  96. {
  97. comp = 1u;
  98. minDiff = diffs.y;
  99. }
  100. if(diffs.z < minDiff)
  101. {
  102. comp = 2u;
  103. minDiff = diffs.z;
  104. }
  105. if(diffs.w < minDiff)
  106. {
  107. comp = 3u;
  108. }
  109. color = Vec4(r[comp], g[comp], b[comp], a[comp]);
  110. }
  111. return color;
  112. }
  113. F32 _calcDepthWeight(texture2D depthLow, sampler nearestAnyClamp, Vec2 uv, F32 ref, Vec2 linearDepthCf)
  114. {
  115. const F32 d = textureLod(depthLow, nearestAnyClamp, uv, 0.0).r;
  116. const F32 linearD = linearizeDepthOptimal(d, linearDepthCf.x, linearDepthCf.y);
  117. return 1.0 / (EPSILON + abs(ref - linearD));
  118. }
  119. Vec4 _sampleAndWeight(texture2D depthLow, texture2D colorLow, sampler linearAnyClamp, sampler nearestAnyClamp,
  120. const Vec2 lowInvSize, Vec2 uv, const Vec2 offset, const F32 ref, const F32 weight,
  121. const Vec2 linearDepthCf, inout F32 normalize)
  122. {
  123. uv += offset * lowInvSize;
  124. const F32 dw = _calcDepthWeight(depthLow, nearestAnyClamp, uv, ref, linearDepthCf);
  125. const Vec4 v = textureLod(colorLow, linearAnyClamp, uv, 0.0);
  126. normalize += weight * dw;
  127. return v * dw * weight;
  128. }
  129. Vec4 bilateralUpsample(texture2D depthHigh, texture2D depthLow, texture2D colorLow, sampler linearAnyClamp,
  130. sampler nearestAnyClamp, const Vec2 lowInvSize, const Vec2 uv, const Vec2 linearDepthCf)
  131. {
  132. const Vec3 WEIGHTS = Vec3(0.25, 0.125, 0.0625);
  133. const F32 depthRef =
  134. linearizeDepthOptimal(textureLod(depthHigh, nearestAnyClamp, uv, 0.0).r, linearDepthCf.x, linearDepthCf.y);
  135. F32 normalize = 0.0;
  136. Vec4 sum = _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(0.0, 0.0),
  137. depthRef, WEIGHTS.x, linearDepthCf, normalize);
  138. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(-1.0, 0.0),
  139. depthRef, WEIGHTS.y, linearDepthCf, normalize);
  140. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(0.0, -1.0),
  141. depthRef, WEIGHTS.y, linearDepthCf, normalize);
  142. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(1.0, 0.0),
  143. depthRef, WEIGHTS.y, linearDepthCf, normalize);
  144. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(0.0, 1.0),
  145. depthRef, WEIGHTS.y, linearDepthCf, normalize);
  146. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(1.0, 1.0),
  147. depthRef, WEIGHTS.z, linearDepthCf, normalize);
  148. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(1.0, -1.0),
  149. depthRef, WEIGHTS.z, linearDepthCf, normalize);
  150. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(-1.0, 1.0),
  151. depthRef, WEIGHTS.z, linearDepthCf, normalize);
  152. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(-1.0, -1.0),
  153. depthRef, WEIGHTS.z, linearDepthCf, normalize);
  154. return sum / normalize;
  155. }
  156. Vec3 getCubemapDirection(const Vec2 norm, const U32 faceIdx)
  157. {
  158. Vec3 zDir = Vec3((faceIdx <= 1u) ? 1 : 0, (faceIdx & 2u) >> 1u, (faceIdx & 4u) >> 2u);
  159. zDir *= (((faceIdx & 1u) == 1u) ? -1.0 : 1.0);
  160. const Vec3 yDir =
  161. (faceIdx == 2u) ? Vec3(0.0, 0.0, 1.0) : (faceIdx == 3u) ? Vec3(0.0, 0.0, -1.0) : Vec3(0.0, -1.0, 0.0);
  162. const Vec3 xDir = cross(zDir, yDir);
  163. return normalize(norm.x * xDir + norm.y * yDir + zDir);
  164. }
  165. // Convert 3D cubemap coordinates to 2D plus face index. v doesn't need to be normalized.
  166. Vec2 convertCubeUvs(const Vec3 v, out F32 faceIndex)
  167. {
  168. const Vec3 absV = abs(v);
  169. F32 mag;
  170. Vec2 uv;
  171. if(all(greaterThanEqual(absV.zz, absV.xy)))
  172. {
  173. faceIndex = (v.z < 0.0) ? 5.0 : 4.0;
  174. uv = Vec2((v.z < 0.0) ? -v.x : v.x, -v.y);
  175. mag = absV.z;
  176. }
  177. else if(absV.y >= absV.x)
  178. {
  179. faceIndex = (v.y < 0.0) ? 3.0 : 2.0;
  180. uv = Vec2(v.x, (v.y < 0.0) ? -v.z : v.z);
  181. mag = absV.y;
  182. }
  183. else
  184. {
  185. faceIndex = (v.x < 0.0) ? 1.0 : 0.0;
  186. uv = Vec2((v.x < 0.0) ? v.z : -v.z, -v.y);
  187. mag = absV.x;
  188. }
  189. return 0.5 / mag * uv + 0.5;
  190. }
  191. // Same as convertCubeUvs but it returns the faceIndex as unsigned I32.
  192. Vec2 convertCubeUvsu(const Vec3 v, out U32 faceIndex)
  193. {
  194. const Vec3 absV = abs(v);
  195. F32 mag;
  196. Vec2 uv;
  197. if(all(greaterThanEqual(absV.zz, absV.xy)))
  198. {
  199. faceIndex = (v.z < 0.0) ? 5u : 4u;
  200. uv = Vec2((v.z < 0.0) ? -v.x : v.x, -v.y);
  201. mag = absV.z;
  202. }
  203. else if(absV.y >= absV.x)
  204. {
  205. faceIndex = (v.y < 0.0) ? 3u : 2u;
  206. uv = Vec2(v.x, (v.y < 0.0) ? -v.z : v.z);
  207. mag = absV.y;
  208. }
  209. else
  210. {
  211. faceIndex = (v.x < 0.0) ? 1u : 0u;
  212. uv = Vec2((v.x < 0.0) ? v.z : -v.z, -v.y);
  213. mag = absV.x;
  214. }
  215. return 0.5 / mag * uv + 0.5;
  216. }
  217. Vec3 grayScale(const Vec3 col)
  218. {
  219. const F32 grey = (col.r + col.g + col.b) * (1.0 / 3.0);
  220. return Vec3(grey);
  221. }
  222. Vec3 saturateColor(const Vec3 col, const F32 factor)
  223. {
  224. const Vec3 LUM_COEFF = Vec3(0.2125, 0.7154, 0.0721);
  225. const Vec3 intensity = Vec3(dot(col, LUM_COEFF));
  226. return mix(intensity, col, factor);
  227. }
  228. Vec3 gammaCorrection(Vec3 gamma, Vec3 col)
  229. {
  230. return pow(col, 1.0 / gamma);
  231. }
  232. // Can use 0.15 for sharpenFactor
  233. Vec3 readSharpen(texture2D tex, sampler sampl, Vec2 uv, F32 sharpenFactor, Bool detailed)
  234. {
  235. Vec3 col = textureLod(tex, sampl, uv, 0.0).rgb;
  236. Vec3 col2 = textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(1, 1)).rgb;
  237. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(-1, -1)).rgb;
  238. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(1, -1)).rgb;
  239. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(-1, 1)).rgb;
  240. F32 f = 4.0;
  241. if(detailed)
  242. {
  243. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(0, 1)).rgb;
  244. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(1, 0)).rgb;
  245. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(-1, 0)).rgb;
  246. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(0, -1)).rgb;
  247. f = 8.0;
  248. }
  249. col = col * (f * sharpenFactor + 1.0) - sharpenFactor * col2;
  250. return max(Vec3(0.0), col);
  251. }
  252. Vec3 readErosion(texture2D tex, sampler sampl, const Vec2 uv)
  253. {
  254. Vec3 minValue = textureLod(tex, sampl, uv, 0.0).rgb;
  255. #define ANKI_EROSION(x, y) \
  256. col2 = textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(x, y)).rgb; \
  257. minValue = min(col2, minValue);
  258. Vec3 col2;
  259. ANKI_EROSION(1, 1);
  260. ANKI_EROSION(-1, -1);
  261. ANKI_EROSION(1, -1);
  262. ANKI_EROSION(-1, 1);
  263. ANKI_EROSION(0, 1);
  264. ANKI_EROSION(1, 0);
  265. ANKI_EROSION(-1, 0);
  266. ANKI_EROSION(0, -1);
  267. #undef ANKI_EROSION
  268. return minValue;
  269. }
  270. // 5 color heatmap from a factor.
  271. Vec3 heatmap(const F32 factor)
  272. {
  273. F32 intPart;
  274. const F32 fractional = modf(factor * 4.0, intPart);
  275. if(intPart < 1.0)
  276. {
  277. return mix(Vec3(0.0, 0.0, 0.0), Vec3(0.0, 0.0, 1.0), fractional);
  278. }
  279. else if(intPart < 2.0)
  280. {
  281. return mix(Vec3(0.0, 0.0, 1.0), Vec3(0.0, 1.0, 0.0), fractional);
  282. }
  283. else if(intPart < 3.0)
  284. {
  285. return mix(Vec3(0.0, 1.0, 0.0), Vec3(1.0, 1.0, 0.0), fractional);
  286. }
  287. else
  288. {
  289. return mix(Vec3(1.0, 1.0, 0.0), Vec3(1.0, 0.0, 0.0), fractional);
  290. }
  291. }
  292. // Return a color per cubemap face. The +X is red, -X dark red, +Y green, -Y dark green, +Z blue, -Z dark blue
  293. Vec3 colorPerCubeFace(const U32 dir)
  294. {
  295. Vec3 color;
  296. switch(dir)
  297. {
  298. case 0:
  299. color = Vec3(1.0, 0.0, 0.0);
  300. break;
  301. case 1:
  302. color = Vec3(0.25, 0.0, 0.0);
  303. break;
  304. case 2:
  305. color = Vec3(0.0, 1.0, 0.0);
  306. break;
  307. case 3:
  308. color = Vec3(0.0, 0.25, 0.0);
  309. break;
  310. case 4:
  311. color = Vec3(0.0, 0.0, 1.0);
  312. break;
  313. default:
  314. color = Vec3(0.0, 0.0, 0.25);
  315. }
  316. return color;
  317. }
  318. Bool incorrectColor(const Vec3 c)
  319. {
  320. return isnan(c.x) || isnan(c.y) || isnan(c.z) || isinf(c.x) || isinf(c.y) || isinf(c.z);
  321. }
  322. F32 areaElement(const F32 x, const F32 y)
  323. {
  324. return atan(x * y, sqrt(x * x + y * y + 1.0));
  325. }
  326. // Compute the solid angle of a cube. Solid angle is the area of a sphere when projected into a cubemap. It's also the
  327. // delta omega (dω) in the irradiance integral and other integrals that operate in a sphere.
  328. // http://www.rorydriscoll.com/2012/01/15/cubemap-texel-solid-angle/
  329. F32 cubeCoordSolidAngle(Vec2 norm, F32 cubeFaceSize)
  330. {
  331. const Vec2 invSize = Vec2(1.0 / cubeFaceSize);
  332. const Vec2 v0 = norm - invSize;
  333. const Vec2 v1 = norm + invSize;
  334. return areaElement(v0.x, v0.y) - areaElement(v0.x, v1.y) - areaElement(v1.x, v0.y) + areaElement(v1.x, v1.y);
  335. }
  336. // A convenience function to skip out of bounds invocations on post-process compute shaders. Both the arguments should
  337. // be constexpr.
  338. #if defined(ANKI_COMPUTE_SHADER)
  339. Bool skipOutOfBoundsInvocations(UVec2 workgroupSize, UVec2 globalInvocationCount)
  340. {
  341. if((globalInvocationCount.x % workgroupSize.x) != 0u || (globalInvocationCount.y % workgroupSize.y) != 0u)
  342. {
  343. if(gl_GlobalInvocationID.x >= globalInvocationCount.x || gl_GlobalInvocationID.y >= globalInvocationCount.y)
  344. {
  345. return true;
  346. }
  347. }
  348. return false;
  349. }
  350. #endif
  351. // Create a matrix from some direction.
  352. Mat3 rotationFromDirection(Vec3 zAxis)
  353. {
  354. #if 0
  355. const Vec3 z = zAxis;
  356. const Bool alignsWithXBasis = abs(z.x - 1.0) <= EPSILON; // aka z == Vec3(1.0, 0.0, 0.0)
  357. Vec3 x = (alignsWithXBasis) ? Vec3(0.0, 0.0, 1.0) : Vec3(1.0, 0.0, 0.0);
  358. const Vec3 y = normalize(cross(x, z));
  359. x = normalize(cross(z, y));
  360. return Mat3(x, y, z);
  361. #else
  362. // http://jcgt.org/published/0006/01/01/
  363. const Vec3 z = zAxis;
  364. const F32 sign = (z.z >= 0.0) ? 1.0 : -1.0;
  365. const F32 a = -1.0 / (sign + z.z);
  366. const F32 b = z.x * z.y * a;
  367. const Vec3 x = Vec3(1.0 + sign * a * pow(z.x, 2.0), sign * b, -sign * z.x);
  368. const Vec3 y = Vec3(b, sign + a * pow(z.y, 2.0), -z.y);
  369. return Mat3(x, y, z);
  370. #endif
  371. }
  372. #if defined(ANKI_COMPUTE_SHADER)
  373. // See getOptimalGlobalInvocationId8x8Amd
  374. U32 ABfiM(U32 src, U32 ins, U32 bits)
  375. {
  376. const U32 mask = (1u << bits) - 1u;
  377. return (ins & mask) | (src & (~mask));
  378. }
  379. // See getOptimalGlobalInvocationId8x8Amd
  380. U32 ABfe(U32 src, U32 off, U32 bits)
  381. {
  382. const U32 mask = (1u << bits) - 1u;
  383. return (src >> off) & mask;
  384. }
  385. // See getOptimalGlobalInvocationId8x8Amd
  386. UVec2 ARmpRed8x8(U32 a)
  387. {
  388. return UVec2(ABfiM(ABfe(a, 2u, 3u), a, 1u), ABfiM(ABfe(a, 3u, 3u), ABfe(a, 1u, 2u), 2u));
  389. }
  390. // https://github.com/GPUOpen-Effects/FidelityFX-CAS/blob/master/ffx-cas/ffx_a.h
  391. UVec2 getOptimalGlobalInvocationId8x8Amd()
  392. {
  393. const UVec2 localInvocationId = ARmpRed8x8(gl_LocalInvocationIndex);
  394. return gl_WorkGroupID.xy * UVec2(8u) + localInvocationId;
  395. }
  396. // https://github.com/LouisBavoil/ThreadGroupIDSwizzling/blob/master/ThreadGroupTilingX.hlsl
  397. UVec2 getOptimalGlobalInvocationId8x8Nvidia()
  398. {
  399. const U32 maxTileWidth = 8u;
  400. const UVec2 workgroupSize = UVec2(8u);
  401. const U32 workgroupsInAPerfectTile = maxTileWidth * gl_NumWorkGroups.y;
  402. const U32 perfectTileCount = gl_NumWorkGroups.x / maxTileWidth;
  403. const U32 totalWorkgroupsInAllPerfectTiles = perfectTileCount * maxTileWidth * gl_NumWorkGroups.y;
  404. const U32 vThreadGroupIDFlattened = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;
  405. const U32 tileIdOfCurrentWorkgroup = vThreadGroupIDFlattened / workgroupsInAPerfectTile;
  406. const U32 localWorkgroupIdWithinCurrentTile = vThreadGroupIDFlattened % workgroupsInAPerfectTile;
  407. U32 localWorkgroupIdYWithinCurrentTile;
  408. U32 localWorgroupIdXWithinCurrentTile;
  409. if(totalWorkgroupsInAllPerfectTiles <= vThreadGroupIDFlattened)
  410. {
  411. U32 xDimensionOfLastTile = gl_NumWorkGroups.x % maxTileWidth;
  412. localWorkgroupIdYWithinCurrentTile = localWorkgroupIdWithinCurrentTile / xDimensionOfLastTile;
  413. localWorgroupIdXWithinCurrentTile = localWorkgroupIdWithinCurrentTile % xDimensionOfLastTile;
  414. }
  415. else
  416. {
  417. localWorkgroupIdYWithinCurrentTile = localWorkgroupIdWithinCurrentTile / maxTileWidth;
  418. localWorgroupIdXWithinCurrentTile = localWorkgroupIdWithinCurrentTile % maxTileWidth;
  419. }
  420. const U32 swizzledvThreadGroupIdFlattened = tileIdOfCurrentWorkgroup * maxTileWidth
  421. + localWorkgroupIdYWithinCurrentTile * gl_NumWorkGroups.x
  422. + localWorgroupIdXWithinCurrentTile;
  423. UVec2 swizzledvThreadGroupId;
  424. swizzledvThreadGroupId.y = swizzledvThreadGroupIdFlattened / gl_NumWorkGroups.x;
  425. swizzledvThreadGroupId.x = swizzledvThreadGroupIdFlattened % gl_NumWorkGroups.x;
  426. UVec2 swizzledGlobalId;
  427. swizzledGlobalId.x = workgroupSize.x * swizzledvThreadGroupId.x + gl_LocalInvocationID.x;
  428. swizzledGlobalId.y = workgroupSize.y * swizzledvThreadGroupId.y + gl_LocalInvocationID.y;
  429. return swizzledGlobalId.xy;
  430. }
  431. #endif
  432. // Gaussian distrubution function
  433. F32 gaussianWeight(F32 s, F32 x)
  434. {
  435. F32 p = 1.0 / (s * sqrt(2.0 * PI));
  436. p *= exp((x * x) / (-2.0 * s * s));
  437. return p;
  438. }
  439. Vec4 bilinearFiltering(texture2D tex, sampler nearestSampler, Vec2 uv, F32 lod, Vec2 textureSize)
  440. {
  441. const Vec2 texelSize = 1.0 / textureSize;
  442. const Vec2 unnormTexCoord = (uv * textureSize) - 0.5;
  443. const Vec2 f = fract(unnormTexCoord);
  444. const Vec2 snapTexCoord = (floor(unnormTexCoord) + 0.5) / textureSize;
  445. const Vec4 s1 = textureLod(tex, nearestSampler, uv, lod);
  446. const Vec4 s2 = textureLod(tex, nearestSampler, uv + Vec2(texelSize.x, 0.0), lod);
  447. const Vec4 s3 = textureLod(tex, nearestSampler, uv + Vec2(0.0, texelSize.y), lod);
  448. const Vec4 s4 = textureLod(tex, nearestSampler, uv + texelSize, lod);
  449. return mix(mix(s1, s2, f.x), mix(s3, s4, f.x), f.y);
  450. }
  451. // https://www.shadertoy.com/view/WsfBDf
  452. Vec3 animateBlueNoise(Vec3 inputBlueNoise, U32 frameIdx)
  453. {
  454. const F32 goldenRatioConjugate = 0.61803398875;
  455. return fract(inputBlueNoise + F32(frameIdx % 64u) * goldenRatioConjugate);
  456. }
  457. #if defined(ANKI_FRAGMENT_SHADER)
  458. /// https://bgolus.medium.com/distinctive-derivative-differences-cce38d36797b
  459. /// normalizedUvs is uv*textureResolution
  460. F32 computeMipLevel(Vec2 normalizedUvs)
  461. {
  462. const Vec2 dx = dFdxCoarse(normalizedUvs);
  463. const Vec2 dy = dFdyCoarse(normalizedUvs);
  464. const F32 deltaMax2 = max(dot(dx, dx), dot(dy, dy));
  465. return max(0.0, 0.5 * log2(deltaMax2));
  466. }
  467. #endif
  468. /// The regular findLSB in glslang has some issues since it invokes a builtin that is only supposed to be used with
  469. /// 32bit input. This is an alternative implementation but it expects that the input is not zero.
  470. I32 findLSB64(U64 v)
  471. {
  472. const I32 lsb1 = findLSB(U32(v));
  473. const I32 lsb2 = findLSB(U32(v >> 32ul));
  474. return (lsb1 >= 0) ? lsb1 : lsb2 + 32;
  475. }