Functions.glsl 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606
  1. // Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #pragma once
  6. #include <AnKi/Shaders/Common.glsl>
  7. #if defined(ANKI_FRAGMENT_SHADER)
  8. Vec3 dither(Vec3 col, F32 C)
  9. {
  10. Vec3 vDither = Vec3(dot(Vec2(171.0, 231.0), gl_FragCoord.xy));
  11. vDither.rgb = fract(vDither.rgb / Vec3(103.0, 71.0, 97.0));
  12. col = col * (255.0 / C) + vDither.rgb;
  13. col = floor(col) / 255.0;
  14. col *= C;
  15. return col;
  16. }
  17. F32 dither(F32 col, F32 C)
  18. {
  19. F32 vDither = dot(Vec2(171.0, 231.0), gl_FragCoord.xy);
  20. vDither = fract(vDither / 103.0);
  21. col = col * (255.0 / C) + vDither;
  22. col = floor(col) / 255.0;
  23. col *= C;
  24. return col;
  25. }
  26. #endif
  27. // Convert to linear depth
  28. F32 linearizeDepth(F32 depth, F32 zNear, F32 zFar)
  29. {
  30. return zNear / ((zNear - zFar) + zFar / depth);
  31. }
  32. // This is the optimal linearizeDepth where a=(n-f)/n and b=f/n
  33. F32 linearizeDepthOptimal(F32 depth, F32 a, F32 b)
  34. {
  35. return 1.0 / (a + b / depth);
  36. }
  37. // This is the optimal linearizeDepth where a=(n-f)/n and b=f/n
  38. Vec4 linearizeDepthOptimal(Vec4 depths, F32 a, F32 b)
  39. {
  40. return 1.0 / (a + b / depths);
  41. }
  42. // Project a vector by knowing only the non zero values of a perspective matrix
  43. Vec4 projectPerspective(Vec4 vec, F32 m00, F32 m11, F32 m22, F32 m23)
  44. {
  45. Vec4 o;
  46. o.x = vec.x * m00;
  47. o.y = vec.y * m11;
  48. o.z = vec.z * m22 + vec.w * m23;
  49. o.w = -vec.z;
  50. return o;
  51. }
  52. #if defined(ANKI_FRAGMENT_SHADER)
  53. // Stolen from shadertoy.com/view/4tyGDD
  54. Vec4 textureCatmullRom4Samples(texture2D tex, sampler sampl, Vec2 uv, Vec2 texSize)
  55. {
  56. const Vec2 halff = 2.0 * fract(0.5 * uv * texSize - 0.25) - 1.0;
  57. const Vec2 f = fract(halff);
  58. const Vec2 sum0 = (2.0 * f - 3.5) * f + 0.5;
  59. const Vec2 sum1 = (2.0 * f - 2.5) * f - 0.5;
  60. Vec4 w = Vec4(f * sum0 + 1.0, f * sum1);
  61. const Vec4 pos = Vec4((((-2.0 * f + 3.0) * f + 0.5) * f - 1.5) * f / (w.xy * texSize) + uv,
  62. (((-2.0 * f + 5.0) * f - 2.5) * f - 0.5) / (sum1 * texSize) + uv);
  63. w.xz *= halff.x * halff.y > 0.0 ? 1.0 : -1.0;
  64. return (texture(tex, sampl, pos.xy) * w.x + texture(tex, sampl, pos.zy) * w.z) * w.y
  65. + (texture(tex, sampl, pos.xw) * w.x + texture(tex, sampl, pos.zw) * w.z) * w.w;
  66. }
  67. #endif
  68. // Stolen from shadertoy.com/view/4df3Dn
  69. Vec4 textureBicubic(texture2D tex, sampler sampl, Vec2 uv, F32 lod, Vec2 texSize)
  70. {
  71. #define w0(a) ((1.0 / 6.0) * ((a) * ((a) * (-(a) + 3.0) - 3.0) + 1.0))
  72. #define w1(a) ((1.0 / 6.0) * ((a) * (a) * (3.0 * (a)-6.0) + 4.0))
  73. #define w2(a) ((1.0 / 6.0) * ((a) * ((a) * (-3.0 * (a) + 3.0) + 3.0) + 1.0))
  74. #define w3(a) ((1.0 / 6.0) * ((a) * (a) * (a)))
  75. #define g0(a) (w0(a) + w1(a))
  76. #define g1(a) (w2(a) + w3(a))
  77. #define h0(a) (-1.0 + w1(a) / (w0(a) + w1(a)))
  78. #define h1(a) (1.0 + w3(a) / (w2(a) + w3(a)))
  79. #define texSample(uv) textureLod(tex, sampl, uv, lod)
  80. uv = uv * texSize + 0.5;
  81. const Vec2 iuv = floor(uv);
  82. const Vec2 fuv = fract(uv);
  83. const F32 g0x = g0(fuv.x);
  84. const F32 g1x = g1(fuv.x);
  85. const F32 h0x = h0(fuv.x);
  86. const F32 h1x = h1(fuv.x);
  87. const F32 h0y = h0(fuv.y);
  88. const F32 h1y = h1(fuv.y);
  89. const Vec2 p0 = (Vec2(iuv.x + h0x, iuv.y + h0y) - 0.5) / texSize;
  90. const Vec2 p1 = (Vec2(iuv.x + h1x, iuv.y + h0y) - 0.5) / texSize;
  91. const Vec2 p2 = (Vec2(iuv.x + h0x, iuv.y + h1y) - 0.5) / texSize;
  92. const Vec2 p3 = (Vec2(iuv.x + h1x, iuv.y + h1y) - 0.5) / texSize;
  93. return g0(fuv.y) * (g0x * texSample(p0) + g1x * texSample(p1))
  94. + g1(fuv.y) * (g0x * texSample(p2) + g1x * texSample(p3));
  95. #undef w0
  96. #undef w1
  97. #undef w2
  98. #undef g0
  99. #undef g1
  100. #undef h0
  101. #undef h1
  102. #undef texSample
  103. }
  104. F32 rand(Vec2 n)
  105. {
  106. return 0.5 + 0.5 * fract(sin(dot(n, Vec2(12.9898, 78.233))) * 43758.5453);
  107. }
  108. Vec4 nearestDepthUpscale(Vec2 uv, texture2D depthFull, texture2D depthHalf, texture2D colorTex,
  109. sampler linearAnyClampSampler, Vec2 linearDepthCf, F32 depthThreshold)
  110. {
  111. F32 fullDepth = textureLod(depthFull, linearAnyClampSampler, uv, 0.0).r; // Sampler not important.
  112. fullDepth = linearizeDepthOptimal(fullDepth, linearDepthCf.x, linearDepthCf.y);
  113. Vec4 halfDepths = textureGather(sampler2D(depthHalf, linearAnyClampSampler), uv, 0); // Sampler not important.
  114. halfDepths = linearizeDepthOptimal(halfDepths, linearDepthCf.x, linearDepthCf.y);
  115. const Vec4 diffs = abs(Vec4(fullDepth) - halfDepths);
  116. Vec4 color;
  117. if(all(lessThan(diffs, Vec4(depthThreshold))))
  118. {
  119. // No major discontinuites, sample with bilinear
  120. color = textureLod(colorTex, linearAnyClampSampler, uv, 0.0);
  121. }
  122. else
  123. {
  124. // Some discontinuites, need to use the newUv
  125. const Vec4 r = textureGather(sampler2D(colorTex, linearAnyClampSampler), uv, 0);
  126. const Vec4 g = textureGather(sampler2D(colorTex, linearAnyClampSampler), uv, 1);
  127. const Vec4 b = textureGather(sampler2D(colorTex, linearAnyClampSampler), uv, 2);
  128. const Vec4 a = textureGather(sampler2D(colorTex, linearAnyClampSampler), uv, 3);
  129. F32 minDiff = diffs.x;
  130. U32 comp = 0u;
  131. if(diffs.y < minDiff)
  132. {
  133. comp = 1u;
  134. minDiff = diffs.y;
  135. }
  136. if(diffs.z < minDiff)
  137. {
  138. comp = 2u;
  139. minDiff = diffs.z;
  140. }
  141. if(diffs.w < minDiff)
  142. {
  143. comp = 3u;
  144. }
  145. color = Vec4(r[comp], g[comp], b[comp], a[comp]);
  146. }
  147. return color;
  148. }
  149. F32 _calcDepthWeight(texture2D depthLow, sampler nearestAnyClamp, Vec2 uv, F32 ref, Vec2 linearDepthCf)
  150. {
  151. const F32 d = textureLod(depthLow, nearestAnyClamp, uv, 0.0).r;
  152. const F32 linearD = linearizeDepthOptimal(d, linearDepthCf.x, linearDepthCf.y);
  153. return 1.0 / (EPSILON + abs(ref - linearD));
  154. }
  155. Vec4 _sampleAndWeight(texture2D depthLow, texture2D colorLow, sampler linearAnyClamp, sampler nearestAnyClamp,
  156. const Vec2 lowInvSize, Vec2 uv, const Vec2 offset, const F32 ref, const F32 weight,
  157. const Vec2 linearDepthCf, inout F32 normalize)
  158. {
  159. uv += offset * lowInvSize;
  160. const F32 dw = _calcDepthWeight(depthLow, nearestAnyClamp, uv, ref, linearDepthCf);
  161. const Vec4 v = textureLod(colorLow, linearAnyClamp, uv, 0.0);
  162. normalize += weight * dw;
  163. return v * dw * weight;
  164. }
  165. Vec4 bilateralUpsample(texture2D depthHigh, texture2D depthLow, texture2D colorLow, sampler linearAnyClamp,
  166. sampler nearestAnyClamp, const Vec2 lowInvSize, const Vec2 uv, const Vec2 linearDepthCf)
  167. {
  168. const Vec3 WEIGHTS = Vec3(0.25, 0.125, 0.0625);
  169. const F32 depthRef =
  170. linearizeDepthOptimal(textureLod(depthHigh, nearestAnyClamp, uv, 0.0).r, linearDepthCf.x, linearDepthCf.y);
  171. F32 normalize = 0.0;
  172. Vec4 sum = _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(0.0, 0.0),
  173. depthRef, WEIGHTS.x, linearDepthCf, normalize);
  174. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(-1.0, 0.0),
  175. depthRef, WEIGHTS.y, linearDepthCf, normalize);
  176. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(0.0, -1.0),
  177. depthRef, WEIGHTS.y, linearDepthCf, normalize);
  178. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(1.0, 0.0),
  179. depthRef, WEIGHTS.y, linearDepthCf, normalize);
  180. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(0.0, 1.0),
  181. depthRef, WEIGHTS.y, linearDepthCf, normalize);
  182. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(1.0, 1.0),
  183. depthRef, WEIGHTS.z, linearDepthCf, normalize);
  184. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(1.0, -1.0),
  185. depthRef, WEIGHTS.z, linearDepthCf, normalize);
  186. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(-1.0, 1.0),
  187. depthRef, WEIGHTS.z, linearDepthCf, normalize);
  188. sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(-1.0, -1.0),
  189. depthRef, WEIGHTS.z, linearDepthCf, normalize);
  190. return sum / normalize;
  191. }
  192. Vec3 getCubemapDirection(const Vec2 norm, const U32 faceIdx)
  193. {
  194. Vec3 zDir = Vec3((faceIdx <= 1u) ? 1 : 0, (faceIdx & 2u) >> 1u, (faceIdx & 4u) >> 2u);
  195. zDir *= (((faceIdx & 1u) == 1u) ? -1.0 : 1.0);
  196. const Vec3 yDir = (faceIdx == 2u) ? Vec3(0.0, 0.0, 1.0)
  197. : (faceIdx == 3u) ? Vec3(0.0, 0.0, -1.0)
  198. : Vec3(0.0, -1.0, 0.0);
  199. const Vec3 xDir = cross(zDir, yDir);
  200. return normalize(norm.x * xDir + norm.y * yDir + zDir);
  201. }
  202. // Convert 3D cubemap coordinates to 2D plus face index. v doesn't need to be normalized.
  203. Vec2 convertCubeUvs(const Vec3 v, out F32 faceIndex)
  204. {
  205. const Vec3 absV = abs(v);
  206. F32 mag;
  207. Vec2 uv;
  208. if(all(greaterThanEqual(absV.zz, absV.xy)))
  209. {
  210. faceIndex = (v.z < 0.0) ? 5.0 : 4.0;
  211. uv = Vec2((v.z < 0.0) ? -v.x : v.x, -v.y);
  212. mag = absV.z;
  213. }
  214. else if(absV.y >= absV.x)
  215. {
  216. faceIndex = (v.y < 0.0) ? 3.0 : 2.0;
  217. uv = Vec2(v.x, (v.y < 0.0) ? -v.z : v.z);
  218. mag = absV.y;
  219. }
  220. else
  221. {
  222. faceIndex = (v.x < 0.0) ? 1.0 : 0.0;
  223. uv = Vec2((v.x < 0.0) ? v.z : -v.z, -v.y);
  224. mag = absV.x;
  225. }
  226. return 0.5 / mag * uv + 0.5;
  227. }
  228. // Same as convertCubeUvs but it returns the faceIndex as unsigned I32.
  229. Vec2 convertCubeUvsu(const Vec3 v, out U32 faceIndex)
  230. {
  231. const Vec3 absV = abs(v);
  232. F32 mag;
  233. Vec2 uv;
  234. if(all(greaterThanEqual(absV.zz, absV.xy)))
  235. {
  236. faceIndex = (v.z < 0.0) ? 5u : 4u;
  237. uv = Vec2((v.z < 0.0) ? -v.x : v.x, -v.y);
  238. mag = absV.z;
  239. }
  240. else if(absV.y >= absV.x)
  241. {
  242. faceIndex = (v.y < 0.0) ? 3u : 2u;
  243. uv = Vec2(v.x, (v.y < 0.0) ? -v.z : v.z);
  244. mag = absV.y;
  245. }
  246. else
  247. {
  248. faceIndex = (v.x < 0.0) ? 1u : 0u;
  249. uv = Vec2((v.x < 0.0) ? v.z : -v.z, -v.y);
  250. mag = absV.x;
  251. }
  252. return 0.5 / mag * uv + 0.5;
  253. }
  254. Vec3 grayScale(const Vec3 col)
  255. {
  256. const F32 grey = (col.r + col.g + col.b) * (1.0 / 3.0);
  257. return Vec3(grey);
  258. }
  259. Vec3 saturateColor(const Vec3 col, const F32 factor)
  260. {
  261. const Vec3 LUM_COEFF = Vec3(0.2125, 0.7154, 0.0721);
  262. const Vec3 intensity = Vec3(dot(col, LUM_COEFF));
  263. return mix(intensity, col, factor);
  264. }
  265. Vec3 gammaCorrection(Vec3 gamma, Vec3 col)
  266. {
  267. return pow(col, 1.0 / gamma);
  268. }
  269. // Can use 0.15 for sharpenFactor
  270. Vec3 readSharpen(texture2D tex, sampler sampl, Vec2 uv, F32 sharpenFactor, Bool detailed)
  271. {
  272. Vec3 col = textureLod(tex, sampl, uv, 0.0).rgb;
  273. Vec3 col2 = textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(1, 1)).rgb;
  274. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(-1, -1)).rgb;
  275. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(1, -1)).rgb;
  276. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(-1, 1)).rgb;
  277. F32 f = 4.0;
  278. if(detailed)
  279. {
  280. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(0, 1)).rgb;
  281. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(1, 0)).rgb;
  282. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(-1, 0)).rgb;
  283. col2 += textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(0, -1)).rgb;
  284. f = 8.0;
  285. }
  286. col = col * (f * sharpenFactor + 1.0) - sharpenFactor * col2;
  287. return max(Vec3(0.0), col);
  288. }
  289. Vec3 readErosion(texture2D tex, sampler sampl, const Vec2 uv)
  290. {
  291. Vec3 minValue = textureLod(tex, sampl, uv, 0.0).rgb;
  292. #define ANKI_EROSION(x, y) \
  293. col2 = textureLodOffset(sampler2D(tex, sampl), uv, 0.0, IVec2(x, y)).rgb; \
  294. minValue = min(col2, minValue);
  295. Vec3 col2;
  296. ANKI_EROSION(1, 1);
  297. ANKI_EROSION(-1, -1);
  298. ANKI_EROSION(1, -1);
  299. ANKI_EROSION(-1, 1);
  300. ANKI_EROSION(0, 1);
  301. ANKI_EROSION(1, 0);
  302. ANKI_EROSION(-1, 0);
  303. ANKI_EROSION(0, -1);
  304. #undef ANKI_EROSION
  305. return minValue;
  306. }
  307. // 5 color heatmap from a factor.
  308. Vec3 heatmap(const F32 factor)
  309. {
  310. F32 intPart;
  311. const F32 fractional = modf(factor * 4.0, intPart);
  312. if(intPart < 1.0)
  313. {
  314. return mix(Vec3(0.0, 0.0, 0.0), Vec3(0.0, 0.0, 1.0), fractional);
  315. }
  316. else if(intPart < 2.0)
  317. {
  318. return mix(Vec3(0.0, 0.0, 1.0), Vec3(0.0, 1.0, 0.0), fractional);
  319. }
  320. else if(intPart < 3.0)
  321. {
  322. return mix(Vec3(0.0, 1.0, 0.0), Vec3(1.0, 1.0, 0.0), fractional);
  323. }
  324. else
  325. {
  326. return mix(Vec3(1.0, 1.0, 0.0), Vec3(1.0, 0.0, 0.0), fractional);
  327. }
  328. }
  329. // Return a color per cubemap face. The +X is red, -X dark red, +Y green, -Y dark green, +Z blue, -Z dark blue
  330. Vec3 colorPerCubeFace(const U32 dir)
  331. {
  332. Vec3 color;
  333. switch(dir)
  334. {
  335. case 0:
  336. color = Vec3(1.0, 0.0, 0.0);
  337. break;
  338. case 1:
  339. color = Vec3(0.25, 0.0, 0.0);
  340. break;
  341. case 2:
  342. color = Vec3(0.0, 1.0, 0.0);
  343. break;
  344. case 3:
  345. color = Vec3(0.0, 0.25, 0.0);
  346. break;
  347. case 4:
  348. color = Vec3(0.0, 0.0, 1.0);
  349. break;
  350. default:
  351. color = Vec3(0.0, 0.0, 0.25);
  352. }
  353. return color;
  354. }
  355. Bool incorrectColor(const Vec3 c)
  356. {
  357. return isnan(c.x) || isnan(c.y) || isnan(c.z) || isinf(c.x) || isinf(c.y) || isinf(c.z);
  358. }
  359. F32 areaElement(const F32 x, const F32 y)
  360. {
  361. return atan(x * y, sqrt(x * x + y * y + 1.0));
  362. }
  363. // Compute the solid angle of a cube. Solid angle is the area of a sphere when projected into a cubemap. It's also the
  364. // delta omega (dω) in the irradiance integral and other integrals that operate in a sphere.
  365. // http://www.rorydriscoll.com/2012/01/15/cubemap-texel-solid-angle/
  366. F32 cubeCoordSolidAngle(Vec2 norm, F32 cubeFaceSize)
  367. {
  368. const Vec2 invSize = Vec2(1.0 / cubeFaceSize);
  369. const Vec2 v0 = norm - invSize;
  370. const Vec2 v1 = norm + invSize;
  371. return areaElement(v0.x, v0.y) - areaElement(v0.x, v1.y) - areaElement(v1.x, v0.y) + areaElement(v1.x, v1.y);
  372. }
  373. // A convenience function to skip out of bounds invocations on post-process compute shaders. Both the arguments should
  374. // be constexpr.
  375. #if defined(ANKI_COMPUTE_SHADER)
  376. Bool skipOutOfBoundsInvocations(UVec2 workgroupSize, UVec2 globalInvocationCount)
  377. {
  378. if((globalInvocationCount.x % workgroupSize.x) != 0u || (globalInvocationCount.y % workgroupSize.y) != 0u)
  379. {
  380. if(gl_GlobalInvocationID.x >= globalInvocationCount.x || gl_GlobalInvocationID.y >= globalInvocationCount.y)
  381. {
  382. return true;
  383. }
  384. }
  385. return false;
  386. }
  387. #endif
  388. // Create a matrix from some direction.
  389. Mat3 rotationFromDirection(Vec3 zAxis)
  390. {
  391. #if 0
  392. const Vec3 z = zAxis;
  393. const Bool alignsWithXBasis = abs(z.x - 1.0) <= EPSILON; // aka z == Vec3(1.0, 0.0, 0.0)
  394. Vec3 x = (alignsWithXBasis) ? Vec3(0.0, 0.0, 1.0) : Vec3(1.0, 0.0, 0.0);
  395. const Vec3 y = normalize(cross(x, z));
  396. x = normalize(cross(z, y));
  397. return Mat3(x, y, z);
  398. #else
  399. // http://jcgt.org/published/0006/01/01/
  400. const Vec3 z = zAxis;
  401. const F32 sign = (z.z >= 0.0) ? 1.0 : -1.0;
  402. const F32 a = -1.0 / (sign + z.z);
  403. const F32 b = z.x * z.y * a;
  404. const Vec3 x = Vec3(1.0 + sign * a * pow(z.x, 2.0), sign * b, -sign * z.x);
  405. const Vec3 y = Vec3(b, sign + a * pow(z.y, 2.0), -z.y);
  406. return Mat3(x, y, z);
  407. #endif
  408. }
  409. #if defined(ANKI_COMPUTE_SHADER)
  410. // See getOptimalGlobalInvocationId8x8Amd
  411. U32 _ABfiM(U32 src, U32 ins, U32 bits)
  412. {
  413. const U32 mask = (1u << bits) - 1u;
  414. return (ins & mask) | (src & (~mask));
  415. }
  416. // See getOptimalGlobalInvocationId8x8Amd
  417. U32 _ABfe(U32 src, U32 off, U32 bits)
  418. {
  419. const U32 mask = (1u << bits) - 1u;
  420. return (src >> off) & mask;
  421. }
  422. // See getOptimalGlobalInvocationId8x8Amd
  423. UVec2 _ARmpRed8x8(U32 a)
  424. {
  425. return UVec2(_ABfiM(_ABfe(a, 2u, 3u), a, 1u), _ABfiM(_ABfe(a, 3u, 3u), _ABfe(a, 1u, 2u), 2u));
  426. }
  427. // https://github.com/GPUOpen-Effects/FidelityFX-CAS/blob/master/ffx-cas/ffx_a.h
  428. UVec2 getOptimalGlobalInvocationId8x8Amd()
  429. {
  430. const UVec2 localInvocationId = _ARmpRed8x8(gl_LocalInvocationIndex);
  431. return gl_WorkGroupID.xy * UVec2(8u) + localInvocationId;
  432. }
  433. // https://github.com/LouisBavoil/ThreadGroupIDSwizzling/blob/master/ThreadGroupTilingX.hlsl
  434. UVec2 getOptimalGlobalInvocationId8x8Nvidia()
  435. {
  436. const U32 maxTileWidth = 8u;
  437. const UVec2 workgroupSize = UVec2(8u);
  438. const U32 workgroupsInAPerfectTile = maxTileWidth * gl_NumWorkGroups.y;
  439. const U32 perfectTileCount = gl_NumWorkGroups.x / maxTileWidth;
  440. const U32 totalWorkgroupsInAllPerfectTiles = perfectTileCount * maxTileWidth * gl_NumWorkGroups.y;
  441. const U32 vThreadGroupIDFlattened = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;
  442. const U32 tileIdOfCurrentWorkgroup = vThreadGroupIDFlattened / workgroupsInAPerfectTile;
  443. const U32 localWorkgroupIdWithinCurrentTile = vThreadGroupIDFlattened % workgroupsInAPerfectTile;
  444. U32 localWorkgroupIdYWithinCurrentTile;
  445. U32 localWorgroupIdXWithinCurrentTile;
  446. if(totalWorkgroupsInAllPerfectTiles <= vThreadGroupIDFlattened)
  447. {
  448. U32 xDimensionOfLastTile = gl_NumWorkGroups.x % maxTileWidth;
  449. localWorkgroupIdYWithinCurrentTile = localWorkgroupIdWithinCurrentTile / xDimensionOfLastTile;
  450. localWorgroupIdXWithinCurrentTile = localWorkgroupIdWithinCurrentTile % xDimensionOfLastTile;
  451. }
  452. else
  453. {
  454. localWorkgroupIdYWithinCurrentTile = localWorkgroupIdWithinCurrentTile / maxTileWidth;
  455. localWorgroupIdXWithinCurrentTile = localWorkgroupIdWithinCurrentTile % maxTileWidth;
  456. }
  457. const U32 swizzledvThreadGroupIdFlattened = tileIdOfCurrentWorkgroup * maxTileWidth
  458. + localWorkgroupIdYWithinCurrentTile * gl_NumWorkGroups.x
  459. + localWorgroupIdXWithinCurrentTile;
  460. UVec2 swizzledvThreadGroupId;
  461. swizzledvThreadGroupId.y = swizzledvThreadGroupIdFlattened / gl_NumWorkGroups.x;
  462. swizzledvThreadGroupId.x = swizzledvThreadGroupIdFlattened % gl_NumWorkGroups.x;
  463. UVec2 swizzledGlobalId;
  464. swizzledGlobalId.x = workgroupSize.x * swizzledvThreadGroupId.x + gl_LocalInvocationID.x;
  465. swizzledGlobalId.y = workgroupSize.y * swizzledvThreadGroupId.y + gl_LocalInvocationID.y;
  466. return swizzledGlobalId.xy;
  467. }
  468. #endif
  469. // Gaussian distrubution function
  470. F32 gaussianWeight(F32 s, F32 x)
  471. {
  472. F32 p = 1.0 / (s * sqrt(2.0 * PI));
  473. p *= exp((x * x) / (-2.0 * s * s));
  474. return p;
  475. }
  476. Vec4 bilinearFiltering(texture2D tex, sampler nearestSampler, Vec2 uv, F32 lod, Vec2 textureSize)
  477. {
  478. const Vec2 texelSize = 1.0 / textureSize;
  479. const Vec2 unnormTexCoord = (uv * textureSize) - 0.5;
  480. const Vec2 f = fract(unnormTexCoord);
  481. const Vec2 snapTexCoord = (floor(unnormTexCoord) + 0.5) / textureSize;
  482. const Vec4 s1 = textureLod(tex, nearestSampler, uv, lod);
  483. const Vec4 s2 = textureLod(tex, nearestSampler, uv + Vec2(texelSize.x, 0.0), lod);
  484. const Vec4 s3 = textureLod(tex, nearestSampler, uv + Vec2(0.0, texelSize.y), lod);
  485. const Vec4 s4 = textureLod(tex, nearestSampler, uv + texelSize, lod);
  486. return mix(mix(s1, s2, f.x), mix(s3, s4, f.x), f.y);
  487. }
  488. // https://www.shadertoy.com/view/WsfBDf
  489. Vec3 animateBlueNoise(Vec3 inputBlueNoise, U32 frameIdx)
  490. {
  491. const F32 goldenRatioConjugate = 0.61803398875;
  492. return fract(inputBlueNoise + F32(frameIdx % 64u) * goldenRatioConjugate);
  493. }
  494. #if defined(ANKI_FRAGMENT_SHADER)
  495. /// https://bgolus.medium.com/distinctive-derivative-differences-cce38d36797b
  496. /// normalizedUvs is uv*textureResolution
  497. F32 computeMipLevel(Vec2 normalizedUvs)
  498. {
  499. const Vec2 dx = dFdxCoarse(normalizedUvs);
  500. const Vec2 dy = dFdyCoarse(normalizedUvs);
  501. const F32 deltaMax2 = max(dot(dx, dx), dot(dy, dy));
  502. return max(0.0, 0.5 * log2(deltaMax2));
  503. }
  504. #endif
  505. #if defined(U64)
  506. /// The regular findLSB in glslang has some issues since it invokes a builtin that is only supposed to be used with
  507. /// 32bit input. This is an alternative implementation but it expects that the input is not zero.
  508. I32 findLSB2(U64 v)
  509. {
  510. const I32 lsb1 = findLSB(U32(v));
  511. const I32 lsb2 = findLSB(U32(v >> 32ul));
  512. return (lsb1 >= 0) ? lsb1 : lsb2 + 32;
  513. }
  514. #endif
  515. /// Define an alternative findLSB to go in pair with the 64bit version.
  516. I32 findLSB2(U32 v)
  517. {
  518. return findLSB(v);
  519. }