TemporalResolve.bslinc 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. #include "$ENGINE$/PerCameraData.bslinc"
  2. #include "$ENGINE$/ColorSpace.bslinc"
  3. mixin TemporalResolve
  4. {
  5. mixin PerCameraData;
  6. mixin ColorSpace;
  7. code
  8. {
  9. ////////////////// CUSTOMIZATION PARAMETERS /////////////////////////////
  10. // When enabled, the system will sample a specific sample from a MS texture. UV coordinates are assumed
  11. // to be in pixel space in that case. When disabled sampleIdx parameter is ignored and UV coordinates
  12. // are assumed be in standard [0, 1] range.
  13. #ifndef MSAA
  14. #define MSAA 0
  15. #endif
  16. // 0 - System will use the velocity of the current pixel
  17. // 1 - System will search 4 neighbor pixels in + pattern, and choose the velocity of the pixel nearest
  18. // to the camera
  19. // 2 - System will search 8 surrounding pixels and choose the velocity of the pixel nearest to the camera
  20. //
  21. // Searching the neighborhod instead of just using current velocity yields nicer edges for objects in
  22. // motion. See TEMPORAL_SEARCH_RADIUS in order to customize how far away to search.
  23. //
  24. // Only relevant if TEMPORAL_LOCAL_VELOCITY is enabled, since without it no per-object velocity
  25. // information is present and everything is blended based on camera movement.
  26. #ifndef TEMPORAL_SEARCH_NEAREST
  27. #define TEMPORAL_SEARCH_NEAREST 1
  28. #endif
  29. // Determine how far away to sample pixels when TEMPORAL_SEARCH_NEAREST is enabled.
  30. // 1 - Immediately adjacent pixels are searched
  31. // 2 - Pixels two away are searched (looks better than 1)
  32. // 3 - etc.
  33. #ifndef TEMPORAL_SEARCH_RADIUS
  34. #define TEMPORAL_SEARCH_RADIUS 2
  35. #endif
  36. // 0 - The system will only account for velocity due to camera movement (not due to individual objects)
  37. // 1 - The system will account both for velocity due to camera movement, as well as individual object
  38. // movement. Requires the user to provide a per-pixel velocity buffer.
  39. #ifndef TEMPORAL_LOCAL_VELOCITY
  40. #define TEMPORAL_LOCAL_VELOCITY 1
  41. #endif
  42. // When enabled, the resolve operation will be performed in YCoCg color space. This can yield better
  43. // results, requires less color samples and no value clipping.
  44. #ifndef TEMPORAL_YCOCG
  45. #define TEMPORAL_YCOCG 0
  46. #endif
  47. // When enabled, green color will be used instead of calculating luminosity. This will yield better
  48. // performance but can result in lower quality. Ignored when TEMPORAL_YCOCG is enabled, since luminosity
  49. // is already available as part of the YCoCg color space.
  50. #ifndef TEMPORAL_GREEN_AS_LUMA
  51. #define TEMPORAL_GREEN_AS_LUMA 0
  52. #endif
  53. // When enabled the input samples will be tonemapped using the provided exposure value. Once the final
  54. // value is resolved, it will be scaled back into original range. This ensures high frequency data from
  55. // HDR content is removed, as it would cause aliasing otherwise. We scale the result back into high range
  56. // so the high-quality tonemap shader can be ran on it.
  57. #ifndef TEMPORAL_TONEMAP
  58. #define TEMPORAL_TONEMAP 1
  59. #endif
  60. // When enabled an extra low-pass filter is ran when sampling scene color, for better quality.
  61. #ifndef TEMPORAL_LOWPASS
  62. #define TEMPORAL_LOWPASS 1
  63. #endif
  64. // When enabled, clamp/clip color neighborhood will be deduced using standard deviation of all the
  65. // neighborhood samples. When disabled a min/max operation is performed instead.
  66. #ifndef TEMPORAL_SMOOTH_NEIGHBORHOOD
  67. #define TEMPORAL_SMOOTH_NEIGHBORHOOD 1
  68. #endif
  69. // When enabled, neighborhood clipping will use an AABB intersection to clip the history value. When disabled
  70. // just a clamp will be used instead. Not relevant when TEMPORAL_YCOCG is enabled because it always uses a clamp.
  71. #ifndef TEMPORAL_CLIP_AABB
  72. #define TEMPORAL_CLIP_AABB 1
  73. #endif
  74. // Determines how is the history value blended with the current value.
  75. // 0 - The system will calculate the optimal blend value automatically
  76. // >0 - A fixed blend factor will be used, equal to the multiplicative inverse of the provided value.
  77. // (i.e. a value of 8 will result in blend factor of 1/8, meaning 12.5% of the history value will be used)
  78. #ifndef TEMPORAL_BLEND_FACTOR
  79. #define TEMPORAL_BLEND_FACTOR 0
  80. #endif
  81. // Determines how many frames should pixels deemed as "bad" (too different from current pixel) contribute to the
  82. // current frame.
  83. #ifndef TEMPORAL_BAD_RETENTION
  84. #define TEMPORAL_BAD_RETENTION 3
  85. #endif
  86. // Determines how many frames should pixels deemed as "good" (similar to the current pixel) contribute to the
  87. // current frame.
  88. #ifndef TEMPORAL_GOOD_RETENTION
  89. #define TEMPORAL_GOOD_RETENTION 10
  90. #endif
  91. ////////////////////////// HELPER MACROS /////////////////////////
  92. #if MSAA
  93. #define _TEX2D(n) Texture2DMS n
  94. #define _PTEX2D(n) n
  95. #define _SAMPLE(n, uv) n.Load((int2)uv, sampleIdx)
  96. #define _SAMPLEOFF(n, uv, offset) n.Load((int2)(uv) + offset)
  97. #define _PIXSIZE(n) int2(1, 1)
  98. #else
  99. #define _TEX2D(n) Texture2D n, SamplerState n##SampState, float2 n##TexelSize
  100. #define _PTEX2D(n) n, n##SampState, n##TexelSize
  101. #define _SAMPLE(n, uv) n.Sample(n##SampState, uv)
  102. #define _SAMPLEOFF(n, uv, offset) n.Sample(n##SampState, uv, offset)
  103. #define _PIXSIZE(n) n##TexelSize
  104. #endif
  105. ///////////////////////// HELPER FUNCTIONS ////////////////////////
  106. float3 findNearest3x3(_TEX2D(sceneDepth), float2 uv, int sampleIdx)
  107. {
  108. int r = TEMPORAL_SEARCH_RADIUS;
  109. float3 dmin = float3(0, 0, 1);
  110. [unroll]
  111. for(int y = -r; y <= r; y += r)
  112. {
  113. [unroll]
  114. for(int x = -r; x <= r; x += r)
  115. {
  116. float depth = _SAMPLEOFF(sceneDepth, uv, int2(x, y)).x;
  117. dmin = depth < dmin.z ? float3(x, y, depth) : dmin;
  118. }
  119. }
  120. return float3(uv + dmin.xy * _PIXSIZE(sceneDepth), dmin.z);
  121. }
  122. float3 findNearestCross(_TEX2D(sceneDepth), float2 uv, int sampleIdx)
  123. {
  124. int r = TEMPORAL_SEARCH_RADIUS;
  125. float3 dmin = float3(0, 0, 1);
  126. {
  127. float depth = _SAMPLE(sceneDepth, uv).x;
  128. dmin = depth < dmin.z ? float3(0, 0, depth) : dmin;
  129. }
  130. {
  131. float depth = _SAMPLEOFF(sceneDepth, uv, int2(-r, 0)).x;
  132. dmin = depth < dmin.z ? float3(-r, 0, depth) : dmin;
  133. }
  134. {
  135. float depth = _SAMPLEOFF(sceneDepth, uv, int2(r, 0)).x;
  136. dmin = depth < dmin.z ? float3(r, 0, depth) : dmin;
  137. }
  138. {
  139. float depth = _SAMPLEOFF(sceneDepth, uv, int2(0, -r)).x;
  140. dmin = depth < dmin.z ? float3(0, -r, depth) : dmin;
  141. }
  142. {
  143. float depth = _SAMPLEOFF(sceneDepth, uv, int2(0, r)).x;
  144. dmin = depth < dmin.z ? float3(0, r, depth) : dmin;
  145. }
  146. return float3(uv + dmin.xy * _PIXSIZE(sceneDepth), dmin.z);
  147. }
  148. float3 clipAABB(float3 boxMin, float3 boxMax, float3 history, float3 current)
  149. {
  150. // Note: Is this necessary? Will "current" always be in the box?
  151. boxMin = min(current, boxMin);
  152. boxMax = max(current, boxMax);
  153. float3 center = (boxMax + boxMin) * 0.5f;
  154. float3 extents = boxMax - center;
  155. float3 origin = history - center; // Relative to box
  156. float3 dir = current - history;
  157. float3 rDir = rcp(dir);
  158. float3 tNeg = (extents - origin) * rDir;
  159. float3 tPos = (-extents - origin) * rDir;
  160. float t = saturate(max(max(min(tNeg.x, tPos.x), min(tNeg.y, tPos.y)), min(tNeg.z, tPos.z)));
  161. return history + t * dir;
  162. }
  163. // Encodes velocity into a format suitable for storing in a 16-bit SNORM texture.
  164. // Velocity range of [-2, 2] is supported (full NDC).
  165. float2 encodeVelocity16SNORM(float2 velocity)
  166. {
  167. return velocity * 0.5f;
  168. }
  169. // Decodes velocity from an encoded 16-bit SNORM format. See encodeVelocity16SNORM().
  170. // Velocity range of [-2, 2] is supported (full NDC).
  171. float2 decodeVelocity16SNORM(float2 val)
  172. {
  173. return val * 2.0f;
  174. }
  175. ////////////////////// HELPER TONEMAP/COLOR SPACE DEFINES /////////////////////
  176. // Automatically scale HDR values based on luminance, if enabled
  177. #if TEMPORAL_TONEMAP
  178. #if TEMPORAL_YCOCG
  179. #define _TONEMAP_COLOR(v) HDRScaleY(v, exposureScale)
  180. #elif TEMPORAL_GREEN_AS_LUMA
  181. #define _TONEMAP_COLOR(v) HDRScaleG(v, exposureScale)
  182. #else
  183. #define _TONEMAP_COLOR(v) HDRScaleRGB(v, exposureScale)
  184. #endif
  185. #else // TEMPORAL_TONEMAP
  186. #define _TONEMAP_COLOR(v) v
  187. #endif // TEMPORAL_TONEMAP
  188. // Automatically convert from/to YCoCg space, if enabled
  189. #if TEMPORAL_YCOCG
  190. #define _SAMPLE_COLOR(n, uv, offset) _TONEMAP_COLOR(RGBToYCoCg(_SAMPLEOFF(n, uv, offset).rgb))
  191. #else // TEMPORAL_YCOCG
  192. #define _SAMPLE_COLOR(n, uv, offset) _TONEMAP_COLOR(_SAMPLEOFF(n, uv, offset).rgb)
  193. #endif // TEMPORAL_YCOCG
  194. ///////////////////////////// MAIN /////////////////////////////////
  195. [internal]
  196. cbuffer TemporalInput
  197. {
  198. float gSampleWeights[9];
  199. float gSampleWeightsLowpass[9];
  200. }
  201. // TODO - Need to use SNORM 16-bit format for velocity
  202. // TODO - Add gNDCToPrevNDC matrix to PerCameraData
  203. // TODO - Generate C++ samples (make sure to account for YCoCg path, and remove jitter)
  204. // TODO - Add notes that velocity buffer isn't currenlty being used
  205. float3 temporalResolve(
  206. _TEX2D(sceneDepth),
  207. _TEX2D(sceneColor),
  208. _TEX2D(prevColor),
  209. #if TEMPORAL_LOCAL_VELOCITY
  210. _TEX2D(velocityBuffer),
  211. #endif // TEMPORAL_LOCAL_VELOCITY
  212. #if TEMPORAL_TONEMAP
  213. float exposureScale,
  214. #endif // TEMPORAL_TONEMAP
  215. float2 uv,
  216. float2 ndcPos, // Can be derived from UV, but we usually have it for free, so pass it directly
  217. int sampleIdx)
  218. {
  219. ///////////// DETERMINE PER-PIXEL VELOCITY & CURRENT DEPTH ///////////////////
  220. float curDepth;
  221. float2 velocity;
  222. #if TEMPORAL_LOCAL_VELOCITY
  223. #if TEMPORAL_SEARCH_NEAREST == 1
  224. float3 nearest = findNearestCross(_PTEX2D(sceneDepth), uv, sampleIdx);
  225. velocity = _SAMPLE(velocityBuffer, nearest.xy);
  226. curDepth = nearest.z;
  227. #elif TEMPORAL_SEARCH_NEAREST == 2
  228. float3 nearest = findNearest3x3(_PTEX2D(sceneDepth), uv, sampleIdx);
  229. velocity = _SAMPLE(velocityBuffer, nearest.xy);
  230. curDepth = nearest.z;
  231. #else // TEMPORAL_SEARCH_NEAREST
  232. velocity = _SAMPLE(velocityBuffer, uv);
  233. curDepth = _SAMPLE(sceneDepth, uv).x;
  234. #endif // TEMPORAL_SEARCH_NEAREST
  235. #else // TEMPORAL_LOCAL_VELOCITY
  236. velocity = 0;
  237. curDepth = _SAMPLE(sceneDepth, uv).x;
  238. #endif // TEMPORAL_LOCAL_VELOCITY
  239. ///////////////////// DETERMINE PREV. FRAME UV //////////////////////////////
  240. float2 prevNdcPos;
  241. bool hasLocalVelocity = (abs(velocity.x) + abs(velocity.y)) > 0;
  242. if(hasLocalVelocity)
  243. {
  244. velocity = decodeVelocity16SNORM(velocity);
  245. prevNdcPos = float2(ndcPos - velocity);
  246. }
  247. else
  248. {
  249. // Assumes velocity due to camera movement
  250. float4 currentNDC = float4(ndcPos, curDepth, 1);
  251. float4 prevClip = mul(gNDCToPrevNDC, currentNDC);
  252. prevNdcPos = prevClip.xy / prevClip.w;
  253. }
  254. #if MSAA
  255. float2 prevUV = NDCToScreen(prevNdcPos);
  256. #else
  257. float2 prevUV = NDCToUV(prevNdcPos);
  258. #endif
  259. /////////////// GET FILTERED COLOR VALUE AND NEIGHBORHOOD MIN/MAX /////////////
  260. #if TEMPORAL_YCOCG
  261. // YCOCG only requires a + pattern for good quality
  262. float3 neighbor[5];
  263. neighbor[0] = _SAMPLE_COLOR(sceneColor, uv, int2(-1, 0));
  264. neighbor[1] = _SAMPLE_COLOR(sceneColor, uv, int2( 0, -1));
  265. neighbor[2] = _SAMPLE_COLOR(sceneColor, uv, int2( 0, 0));
  266. neighbor[3] = _SAMPLE_COLOR(sceneColor, uv, int2( 1, 0));
  267. neighbor[4] = _SAMPLE_COLOR(sceneColor, uv, int2( 0, 1));
  268. float3 filtered = 0;
  269. [unroll]
  270. for(uint i = 0; i < 5; ++i)
  271. filtered += neighbor[i] * gSampleWeights[i];
  272. float3 filteredLow = filtered;
  273. float3 neighborMin = min(min(min(neighbor[0], neighbor[1]), min(neighbor[2], neighbor[3])),
  274. neighbor[4]);
  275. float3 neighborMax = max(max(max(neighbor[0], neighbor[1]), max(neighbor[2], neighbor[3])),
  276. neighbor[4]);
  277. #else // TEMPORAL_YCOCG
  278. float3 neighbor[9];
  279. neighbor[0] = _SAMPLE_COLOR(sceneColor, uv, int2(-1, -1));
  280. neighbor[1] = _SAMPLE_COLOR(sceneColor, uv, int2( 0, -1));
  281. neighbor[2] = _SAMPLE_COLOR(sceneColor, uv, int2( 1, -1));
  282. neighbor[3] = _SAMPLE_COLOR(sceneColor, uv, int2(-1, 0));
  283. neighbor[4] = _SAMPLE_COLOR(sceneColor, uv, int2( 0, 0));
  284. neighbor[5] = _SAMPLE_COLOR(sceneColor, uv, int2( 1, 0));
  285. neighbor[6] = _SAMPLE_COLOR(sceneColor, uv, int2(-1, 1));
  286. neighbor[7] = _SAMPLE_COLOR(sceneColor, uv, int2( 0, 1));
  287. neighbor[8] = _SAMPLE_COLOR(sceneColor, uv, int2( 1, 1));
  288. float3 filtered = 0;
  289. [unroll]
  290. for(uint i = 0; i < 9; ++i)
  291. filtered += neighbor[i] * gSampleWeights[i];
  292. #if TEMPORAL_LOWPASS
  293. float3 filteredLow = 0;
  294. [unroll]
  295. for(uint i = 0; i < 9; ++i)
  296. filteredLow += neighbor[i] * gSampleWeightsLowpass[i];
  297. #else
  298. float3 filteredLow = filtered;
  299. #endif // TEMPORAL_LOWPASS
  300. #if TEMPORAL_SMOOTH_NEIGHBORHOOD
  301. // Calculate standard deviation and determine neighborhood min/max based on it
  302. float3 mean = 0;
  303. [unroll]
  304. for(uint i = 0; i < 9; ++i)
  305. mean += neighbor[i];
  306. mean /= 9.0f;
  307. float3 meanSqrd = 0;
  308. [unroll]
  309. for(uint i = 0; i < 9; ++i)
  310. meanSqrd += neighbor[i] * neighbor[i];
  311. meanSqrd /= 9.0f;
  312. float3 stdDev = sqrt(abs(meanSqrd - mean * mean));
  313. float3 neighborMin = mean - stdDev;
  314. float3 neighborMax = mean + stdDev;
  315. #else // TEMPORAL_SMOOTH_NEIGHBORHOOD
  316. float3 neighborMin = min(min(
  317. min(min(neighbor[0], neighbor[1]), min(neighbor[2], neighbor[3])),
  318. min(min(neighbor[4], neighbor[5]), min(neighbor[6], neighbor[7]))),
  319. neighbor[8]);
  320. float3 neighborMax = max(max(
  321. max(max(neighbor[0], neighbor[1]), max(neighbor[2], neighbor[3])),
  322. max(max(neighbor[4], neighbor[5]), max(neighbor[6], neighbor[7]))),
  323. neighbor[8]);
  324. #endif // TEMPORAL_SMOOTH_NEIGHBORHOOD
  325. #endif // TEMPORAL_YCOCG
  326. /////////////////// GET PREVIOUS FRAME COLOR ///////////////////////
  327. float3 prevColorVal = _SAMPLE_COLOR(prevColor, prevUV, int2(0, 0));
  328. ///////////////////// CLAMP TO NEIGHBORHOOD ////////////////////////
  329. // Clamping to neighborhood ensures we don't blend with values that are too
  330. // different, which can happen when history data becomes invalid.
  331. #if TEMPORAL_YCOCG
  332. prevColorVal = clamp(prevColorVal, neighborMin, neighborMax);
  333. #else // TEMPORAL_YCOCG
  334. // Uses low-pass to reduce flickering
  335. #if TEMPORAL_CLIP_AABB
  336. prevColorVal = clipAABB(neighborMin, neighborMax, prevColorVal, filteredLow);
  337. #else // TEMPORAL_CLIP_AABB
  338. prevColorVal = clamp(prevColorVal, neighborMin, neighborMax);
  339. #endif // TEMPORAL_CLIP_AABB
  340. #endif // TEMPORAL_YCOCG
  341. //////////////// BLEND BETWEEN CURRENT AND HISTORY //////////////////
  342. // Find out how much impact should the previous frame's color have
  343. #if TEMPORAL_BLEND_FACTOR // Fixed blend factor
  344. float blendAmount = 1.0f / TEMPORAL_BLEND_FACTOR;
  345. float3 output = lerp(prevColorVal, filtered, blendAmount);
  346. #else // TEMPORAL_BLEND_FACTOR
  347. #if TEMPORAL_YCOCG
  348. float lumaCurrent = filtered.r;
  349. float lumaHistory = prevColorVal.r;
  350. #else // TEMPORAL_YCOCG
  351. #if TEMPORAL_GREEN_AS_LUMA
  352. float lumaCurrent = filtered.g;
  353. float lumaHistory = prevColorVal.g;
  354. #else // TEMPORAL_GREEN_AS_LUMA
  355. float lumaCurrent = LuminanceRGB(filtered);
  356. float lumaHistory = LuminanceRGB(prevColorVal);
  357. #endif // TEMPORAL_GREEN_AS_LUMA
  358. #endif // TEMPORAL_YCOCG
  359. // Based on T. Lottes: https://www.youtube.com/watch?v=WzpLWzGvFK4&t=18m
  360. float blendWeight = 1.0f - abs(lumaCurrent - lumaHistory) / max(max(lumaCurrent, lumaHistory), 0.001f);
  361. float weightBad = 1.0f - 1.0f / TEMPORAL_BAD_RETENTION;
  362. float weightGood = 1.0f - 1.0f / TEMPORAL_GOOD_RETENTION;
  363. float blendAmount = lerp(weightBad, weightGood, blendWeight * blendWeight);
  364. float3 output = lerp(filtered, prevColorVal, blendAmount);
  365. #endif // TEMPORAL_BLEND_FACTOR
  366. //////// UNDO TONEMAP & MOVE BACK TO RGB SPACE //////////////////////
  367. #if TEMPORAL_TONEMAP
  368. #if TEMPORAL_YCOCG
  369. output = HDRScaleYInv(output, exposureScale);
  370. #elif TEMPORAL_GREEN_AS_LUMA
  371. output = HDRScaleGInv(output, exposureScale);
  372. #else
  373. output = HDRScaleRGBInv(output, exposureScale);
  374. #endif
  375. #endif // TEMPORAL_TONEMAP
  376. #if TEMPORAL_YCOCG
  377. output = YCoCgToRGB(output);
  378. #endif // TEMPORAL_YCOCG
  379. // Note: Potential improvements:
  380. // - Add a sharpen step
  381. // - Use filtering when sampling history
  382. // - Properly handle borders when sampling neighbors
  383. // - Better blend amount calculation? (Needs experimentation)
  384. return output;
  385. }
  386. #undef _TEX2D
  387. #undef _PTEX2D
  388. #undef _SAMPLE
  389. #undef _PIXSIZE
  390. #undef _TONEMAP_COLOR
  391. #undef _TONEMAP_COLOR_INV
  392. #undef _SAMPLE_COLOR
  393. #undef _RESOLVE_COLOR
  394. };
  395. };