ffx_fsr2_sample.h 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. // This file is part of the FidelityFX SDK.
  2. //
  3. // Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
  4. //
  5. // Permission is hereby granted, free of charge, to any person obtaining a copy
  6. // of this software and associated documentation files (the "Software"), to deal
  7. // in the Software without restriction, including without limitation the rights
  8. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. // copies of the Software, and to permit persons to whom the Software is
  10. // furnished to do so, subject to the following conditions:
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. // THE SOFTWARE.
  21. #ifndef FFX_FSR2_SAMPLE_H
  22. #define FFX_FSR2_SAMPLE_H
  23. // suppress warnings
  24. #ifdef FFX_HLSL
  25. #pragma warning(disable: 4008) // potentially divide by zero
  26. #endif //FFX_HLSL
  27. struct FetchedBilinearSamples {
  28. FfxFloat32x4 fColor00;
  29. FfxFloat32x4 fColor10;
  30. FfxFloat32x4 fColor01;
  31. FfxFloat32x4 fColor11;
  32. };
  33. struct FetchedBicubicSamples {
  34. FfxFloat32x4 fColor00;
  35. FfxFloat32x4 fColor10;
  36. FfxFloat32x4 fColor20;
  37. FfxFloat32x4 fColor30;
  38. FfxFloat32x4 fColor01;
  39. FfxFloat32x4 fColor11;
  40. FfxFloat32x4 fColor21;
  41. FfxFloat32x4 fColor31;
  42. FfxFloat32x4 fColor02;
  43. FfxFloat32x4 fColor12;
  44. FfxFloat32x4 fColor22;
  45. FfxFloat32x4 fColor32;
  46. FfxFloat32x4 fColor03;
  47. FfxFloat32x4 fColor13;
  48. FfxFloat32x4 fColor23;
  49. FfxFloat32x4 fColor33;
  50. };
  51. #if FFX_HALF
  52. struct FetchedBilinearSamplesMin16 {
  53. FFX_MIN16_F4 fColor00;
  54. FFX_MIN16_F4 fColor10;
  55. FFX_MIN16_F4 fColor01;
  56. FFX_MIN16_F4 fColor11;
  57. };
  58. struct FetchedBicubicSamplesMin16 {
  59. FFX_MIN16_F4 fColor00;
  60. FFX_MIN16_F4 fColor10;
  61. FFX_MIN16_F4 fColor20;
  62. FFX_MIN16_F4 fColor30;
  63. FFX_MIN16_F4 fColor01;
  64. FFX_MIN16_F4 fColor11;
  65. FFX_MIN16_F4 fColor21;
  66. FFX_MIN16_F4 fColor31;
  67. FFX_MIN16_F4 fColor02;
  68. FFX_MIN16_F4 fColor12;
  69. FFX_MIN16_F4 fColor22;
  70. FFX_MIN16_F4 fColor32;
  71. FFX_MIN16_F4 fColor03;
  72. FFX_MIN16_F4 fColor13;
  73. FFX_MIN16_F4 fColor23;
  74. FFX_MIN16_F4 fColor33;
  75. };
  76. #else //FFX_HALF
  77. #define FetchedBicubicSamplesMin16 FetchedBicubicSamples
  78. #define FetchedBilinearSamplesMin16 FetchedBilinearSamples
  79. #endif //FFX_HALF
  80. FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t)
  81. {
  82. return A + (B - A) * t;
  83. }
  84. FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac)
  85. {
  86. FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
  87. FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
  88. FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
  89. return fColorXY;
  90. }
  91. #if FFX_HALF
  92. FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t)
  93. {
  94. return A + (B - A) * t;
  95. }
  96. FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac)
  97. {
  98. FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
  99. FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
  100. FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
  101. return fColorXY;
  102. }
  103. #endif
  104. FfxFloat32 Lanczos2NoClamp(FfxFloat32 x)
  105. {
  106. const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants
  107. return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x));
  108. }
  109. FfxFloat32 Lanczos2(FfxFloat32 x)
  110. {
  111. x = ffxMin(abs(x), 2.0f);
  112. return Lanczos2NoClamp(x);
  113. }
  114. #if FFX_HALF
  115. #if 0
  116. FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x)
  117. {
  118. const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants
  119. return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x));
  120. }
  121. #endif
  122. FFX_MIN16_F Lanczos2(FFX_MIN16_F x)
  123. {
  124. x = ffxMin(abs(x), FFX_MIN16_F(2.0f));
  125. return FFX_MIN16_F(Lanczos2NoClamp(x));
  126. }
  127. #endif //FFX_HALF
  128. // FSR1 lanczos approximation. Input is x*x and must be <= 4.
  129. FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2)
  130. {
  131. FfxFloat32 a = (2.0f / 5.0f) * x2 - 1;
  132. FfxFloat32 b = (1.0f / 4.0f) * x2 - 1;
  133. return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b);
  134. }
  135. #if FFX_HALF
  136. FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2)
  137. {
  138. FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1);
  139. FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1);
  140. return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b);
  141. }
  142. #endif //FFX_HALF
  143. FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2)
  144. {
  145. x2 = ffxMin(x2, 4.0f);
  146. return Lanczos2ApproxSqNoClamp(x2);
  147. }
  148. #if FFX_HALF
  149. FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2)
  150. {
  151. x2 = ffxMin(x2, FFX_MIN16_F(4.0f));
  152. return Lanczos2ApproxSqNoClamp(x2);
  153. }
  154. #endif //FFX_HALF
  155. FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x)
  156. {
  157. return Lanczos2ApproxSqNoClamp(x * x);
  158. }
  159. #if FFX_HALF
  160. FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x)
  161. {
  162. return Lanczos2ApproxSqNoClamp(x * x);
  163. }
  164. #endif //FFX_HALF
  165. FfxFloat32 Lanczos2Approx(FfxFloat32 x)
  166. {
  167. return Lanczos2ApproxSq(x * x);
  168. }
  169. #if FFX_HALF
  170. FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x)
  171. {
  172. return Lanczos2ApproxSq(x * x);
  173. }
  174. #endif //FFX_HALF
  175. FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x)
  176. {
  177. return SampleLanczos2Weight(abs(x));
  178. }
  179. #if FFX_HALF
  180. FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x)
  181. {
  182. return FFX_MIN16_F(SampleLanczos2Weight(abs(x)));
  183. }
  184. #endif //FFX_HALF
  185. FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
  186. {
  187. FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t);
  188. FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t);
  189. FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t);
  190. FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t);
  191. return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
  192. }
  193. #if FFX_HALF
  194. FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
  195. {
  196. FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t);
  197. FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t);
  198. FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t);
  199. FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t);
  200. return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
  201. }
  202. #endif
  203. FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
  204. {
  205. FfxFloat32 fWeight0 = Lanczos2(-1.f - t);
  206. FfxFloat32 fWeight1 = Lanczos2(-0.f - t);
  207. FfxFloat32 fWeight2 = Lanczos2(+1.f - t);
  208. FfxFloat32 fWeight3 = Lanczos2(+2.f - t);
  209. return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
  210. }
  211. FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
  212. {
  213. FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
  214. FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
  215. FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
  216. FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
  217. FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
  218. // Deringing
  219. // TODO: only use 4 by checking jitter
  220. const FfxInt32 iDeringingSampleCount = 4;
  221. const FfxFloat32x4 fDeringingSamples[4] = {
  222. Samples.fColor11,
  223. Samples.fColor21,
  224. Samples.fColor12,
  225. Samples.fColor22,
  226. };
  227. FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
  228. FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
  229. FFX_UNROLL
  230. for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
  231. fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
  232. fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
  233. }
  234. fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
  235. return fColorXY;
  236. }
  237. #if FFX_HALF
  238. FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
  239. {
  240. FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t);
  241. FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t);
  242. FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t);
  243. FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t);
  244. return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
  245. }
  246. FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
  247. {
  248. FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
  249. FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
  250. FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
  251. FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
  252. FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
  253. // Deringing
  254. // TODO: only use 4 by checking jitter
  255. const FfxInt32 iDeringingSampleCount = 4;
  256. const FFX_MIN16_F4 fDeringingSamples[4] = {
  257. Samples.fColor11,
  258. Samples.fColor21,
  259. Samples.fColor12,
  260. Samples.fColor22,
  261. };
  262. FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
  263. FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
  264. FFX_UNROLL
  265. for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
  266. {
  267. fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
  268. fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
  269. }
  270. fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
  271. return fColorXY;
  272. }
  273. #endif //FFX_HALF
  274. FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
  275. {
  276. FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
  277. FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
  278. FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
  279. FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
  280. FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
  281. // Deringing
  282. // TODO: only use 4 by checking jitter
  283. const FfxInt32 iDeringingSampleCount = 4;
  284. const FfxFloat32x4 fDeringingSamples[4] = {
  285. Samples.fColor11,
  286. Samples.fColor21,
  287. Samples.fColor12,
  288. Samples.fColor22,
  289. };
  290. FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
  291. FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
  292. FFX_UNROLL
  293. for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
  294. fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
  295. fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
  296. }
  297. fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
  298. return fColorXY;
  299. }
  300. #if FFX_HALF
  301. FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
  302. {
  303. FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
  304. FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
  305. FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
  306. FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
  307. FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
  308. // Deringing
  309. // TODO: only use 4 by checking jitter
  310. const FfxInt32 iDeringingSampleCount = 4;
  311. const FFX_MIN16_F4 fDeringingSamples[4] = {
  312. Samples.fColor11,
  313. Samples.fColor21,
  314. Samples.fColor12,
  315. Samples.fColor22,
  316. };
  317. FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
  318. FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
  319. FFX_UNROLL
  320. for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
  321. {
  322. fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
  323. fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
  324. }
  325. fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
  326. return fColorXY;
  327. }
  328. #endif //FFX_HALF
  329. FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
  330. {
  331. FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t);
  332. FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t);
  333. FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t);
  334. FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t);
  335. return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
  336. }
  337. #if FFX_HALF
  338. FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
  339. {
  340. FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t);
  341. FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t);
  342. FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t);
  343. FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t);
  344. return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
  345. }
  346. #endif //FFX_HALF
  347. FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
  348. {
  349. FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
  350. FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
  351. FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
  352. FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
  353. FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
  354. // Deringing
  355. // TODO: only use 4 by checking jitter
  356. const FfxInt32 iDeringingSampleCount = 4;
  357. const FfxFloat32x4 fDeringingSamples[4] = {
  358. Samples.fColor11,
  359. Samples.fColor21,
  360. Samples.fColor12,
  361. Samples.fColor22,
  362. };
  363. FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
  364. FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
  365. FFX_UNROLL
  366. for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
  367. {
  368. fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
  369. fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
  370. }
  371. fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
  372. return fColorXY;
  373. }
  374. #if FFX_HALF
  375. FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
  376. {
  377. FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
  378. FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
  379. FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
  380. FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
  381. FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
  382. // Deringing
  383. // TODO: only use 4 by checking jitter
  384. const FfxInt32 iDeringingSampleCount = 4;
  385. const FFX_MIN16_F4 fDeringingSamples[4] = {
  386. Samples.fColor11,
  387. Samples.fColor21,
  388. Samples.fColor12,
  389. Samples.fColor22,
  390. };
  391. FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
  392. FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
  393. FFX_UNROLL
  394. for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
  395. {
  396. fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
  397. fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
  398. }
  399. fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
  400. return fColorXY;
  401. }
  402. #endif
  403. // Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant.
  404. FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
  405. {
  406. FfxInt32x2 result = iPxSample + iPxOffset;
  407. result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
  408. result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
  409. result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
  410. result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
  411. return result;
  412. }
  413. #if FFX_HALF
  414. FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
  415. {
  416. FFX_MIN16_I2 result = iPxSample + iPxOffset;
  417. result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
  418. result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
  419. result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
  420. result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
  421. return result;
  422. }
  423. #endif //FFX_HALF
  424. #define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \
  425. SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
  426. { \
  427. SampleType Samples; \
  428. \
  429. Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \
  430. Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \
  431. Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \
  432. Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \
  433. \
  434. Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \
  435. Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
  436. Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
  437. Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \
  438. \
  439. Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \
  440. Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
  441. Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
  442. Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \
  443. \
  444. Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \
  445. Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \
  446. Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \
  447. Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \
  448. \
  449. return Samples; \
  450. }
  451. #define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \
  452. DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
  453. #define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \
  454. DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
  455. #define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \
  456. SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
  457. { \
  458. SampleType Samples; \
  459. Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
  460. Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
  461. Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
  462. Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
  463. return Samples; \
  464. }
  465. #define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \
  466. DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
  467. #define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \
  468. DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
  469. // BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
  470. // is common, so iPxSample can "jitter"
  471. #define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \
  472. FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
  473. { \
  474. FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
  475. /* Clamp base coords */ \
  476. fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
  477. fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
  478. /* */ \
  479. FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
  480. FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \
  481. FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
  482. return fColorXY; \
  483. }
  484. #define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \
  485. FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
  486. { \
  487. FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
  488. /* Clamp base coords */ \
  489. fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
  490. fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
  491. /* */ \
  492. FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
  493. FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \
  494. FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
  495. return fColorXY; \
  496. }
  497. #define FFX_FSR2_CONCAT_ID(x, y) x ## y
  498. #define FFX_FSR2_CONCAT(x, y) FFX_FSR2_CONCAT_ID(x, y)
  499. #define FFX_FSR2_SAMPLER_1D_0 Lanczos2
  500. #define FFX_FSR2_SAMPLER_1D_1 Lanczos2LUT
  501. #define FFX_FSR2_SAMPLER_1D_2 Lanczos2Approx
  502. #define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x)
  503. #endif //!defined( FFX_FSR2_SAMPLE_H )