#include "$ENGINE$/PerCameraData.bslinc" #include "$ENGINE$/ColorSpace.bslinc" mixin TemporalResolve { mixin PerCameraData; mixin ColorSpace; code { ////////////////// CUSTOMIZATION PARAMETERS ///////////////////////////// // When enabled, the system will sample a specific sample from a MS texture. UV coordinates are assumed // to be in pixel space in that case. When disabled sampleIdx parameter is ignored and UV coordinates // are assumed be in standard [0, 1] range. #ifndef MSAA #define MSAA 0 #endif // Only relevant when MSAA is enabled. When disabled color textures are assumed to be non-MSAA. When // enabled all textures are assumed to be MSAA. #ifndef MSAA_COLOR #define MSAA_COLOR MSAA #endif // 0 - System will use the velocity of the current pixel // 1 - System will search 4 neighbor pixels in + pattern, and choose the velocity of the pixel nearest // to the camera // 2 - System will search 8 surrounding pixels and choose the velocity of the pixel nearest to the camera // // Searching the neighborhod instead of just using current velocity yields nicer edges for objects in // motion. See TEMPORAL_SEARCH_RADIUS in order to customize how far away to search. // // Only relevant if TEMPORAL_LOCAL_VELOCITY is enabled, since without it no per-object velocity // information is present and everything is blended based on camera movement. #ifndef TEMPORAL_SEARCH_NEAREST #define TEMPORAL_SEARCH_NEAREST 1 #endif // Determine how far away to sample pixels when TEMPORAL_SEARCH_NEAREST is enabled. // 1 - Immediately adjacent pixels are searched // 2 - Pixels two away are searched (looks better than 1) // 3 - etc. #ifndef TEMPORAL_SEARCH_RADIUS #define TEMPORAL_SEARCH_RADIUS 2 #endif // 0 - The system will only account for velocity due to camera movement (not due to individual objects) // 1 - The system will account both for velocity due to camera movement, as well as individual object // movement. Requires the user to provide a per-pixel velocity buffer. #ifndef TEMPORAL_LOCAL_VELOCITY #define TEMPORAL_LOCAL_VELOCITY 1 #endif // When enabled, the resolve operation will be performed in YCoCg color space. This can yield better // results, requires less color samples and no value clipping. #ifndef TEMPORAL_YCOCG #define TEMPORAL_YCOCG 0 #endif // When enabled, green color will be used instead of calculating luminosity. This will yield better // performance but can result in lower quality. Ignored when TEMPORAL_YCOCG is enabled, since luminosity // is already available as part of the YCoCg color space. #ifndef TEMPORAL_GREEN_AS_LUMA #define TEMPORAL_GREEN_AS_LUMA 0 #endif // When enabled the input samples will be tonemapped using the provided exposure value. Once the final // value is resolved, it will be scaled back into original range. This ensures high frequency data from // HDR content is removed, as it would cause aliasing otherwise. We scale the result back into high range // so the high-quality tonemap shader can be ran on it. #ifndef TEMPORAL_TONEMAP #define TEMPORAL_TONEMAP 1 #endif // When enabled an extra low-pass filter is ran when sampling scene color, for better quality. #ifndef TEMPORAL_LOWPASS #define TEMPORAL_LOWPASS 1 #endif // When enabled, clamp/clip color neighborhood will be deduced using standard deviation of all the // neighborhood samples. When disabled a min/max operation is performed instead. #ifndef TEMPORAL_SMOOTH_NEIGHBORHOOD #define TEMPORAL_SMOOTH_NEIGHBORHOOD 1 #endif // When enabled, neighborhood clipping will use an AABB intersection to clip the history value. When disabled // just a clamp will be used instead. Not relevant when TEMPORAL_YCOCG is enabled because it always uses a clamp. #ifndef TEMPORAL_CLIP_AABB #define TEMPORAL_CLIP_AABB 1 #endif // Determines how is the history value blended with the current value. // 0 - The system will calculate the optimal blend value automatically // >0 - A fixed blend factor will be used, equal to the multiplicative inverse of the provided value. // (i.e. a value of 8 will result in blend factor of 1/8, meaning 12.5% of the history value will be used) #ifndef TEMPORAL_BLEND_FACTOR #define TEMPORAL_BLEND_FACTOR 0 #endif // Determines how many frames should pixels deemed as "bad" (too different from current pixel) contribute to the // current frame. #ifndef TEMPORAL_BAD_RETENTION #define TEMPORAL_BAD_RETENTION 3 #endif // Determines how many frames should pixels deemed as "good" (similar to the current pixel) contribute to the // current frame. #ifndef TEMPORAL_GOOD_RETENTION #define TEMPORAL_GOOD_RETENTION 10 #endif ////////////////////////// HELPER MACROS ///////////////////////// #if MSAA #define _TEX2D(n) Texture2DMS n #if MSAA_COLOR #define _TEXCOLOR(n) Texture2DMS n #else #define _TEXCOLOR(n) Texture2D n, SamplerState n##SampState, float2 n##TexelSize #endif #define _PTEX2D(n) n #define _SAMPLE(n, uv) n.Load((int2)uv, sampleIdx) #define _SAMPLEOFF(n, uv, offset) n.Load((int2)(uv) + offset, sampleIdx) #if MSAA_COLOR #define _SAMPLECOL(n, uv, offset) _SAMPLEOFF(n, uv, offset) #else #define _SAMPLECOL(n, uv, offset) n.Sample(n##SampState, uv, offset) #endif #define _PIXSIZE(n) int2(1, 1) #else #define _TEX2D(n) Texture2D n, SamplerState n##SampState, float2 n##TexelSize #define _TEXCOLOR(n) _TEX2D(n) #define _PTEX2D(n) n, n##SampState, n##TexelSize #define _SAMPLE(n, uv) n.Sample(n##SampState, uv) #define _SAMPLEOFF(n, uv, offset) n.Sample(n##SampState, uv, offset) #define _SAMPLECOL(n, uv, offset) _SAMPLEOFF(n, uv, offset) #define _PIXSIZE(n) n##TexelSize #endif ///////////////////////// HELPER FUNCTIONS //////////////////////// float3 findNearest3x3(_TEX2D(sceneDepth), float2 uv, int sampleIdx) { int r = TEMPORAL_SEARCH_RADIUS; float3 dmin = float3(0, 0, 1); [unroll] for(int y = -r; y <= r; y += r) { [unroll] for(int x = -r; x <= r; x += r) { float depth = _SAMPLEOFF(sceneDepth, uv, int2(x, y)).x; dmin = depth < dmin.z ? float3(x, y, depth) : dmin; } } return float3(uv + dmin.xy * _PIXSIZE(sceneDepth), dmin.z); } float3 findNearestCross(_TEX2D(sceneDepth), float2 uv, int sampleIdx) { int r = TEMPORAL_SEARCH_RADIUS; float3 dmin = float3(0, 0, 1); { float depth = _SAMPLE(sceneDepth, uv).x; dmin = depth < dmin.z ? float3(0, 0, depth) : dmin; } { float depth = _SAMPLEOFF(sceneDepth, uv, int2(-r, 0)).x; dmin = depth < dmin.z ? float3(-r, 0, depth) : dmin; } { float depth = _SAMPLEOFF(sceneDepth, uv, int2(r, 0)).x; dmin = depth < dmin.z ? float3(r, 0, depth) : dmin; } { float depth = _SAMPLEOFF(sceneDepth, uv, int2(0, -r)).x; dmin = depth < dmin.z ? float3(0, -r, depth) : dmin; } { float depth = _SAMPLEOFF(sceneDepth, uv, int2(0, r)).x; dmin = depth < dmin.z ? float3(0, r, depth) : dmin; } return float3(uv + dmin.xy * _PIXSIZE(sceneDepth), dmin.z); } float3 clipAABB(float3 boxMin, float3 boxMax, float3 history, float3 current) { // Note: Is this necessary? Will "current" always be in the box? boxMin = min(current, boxMin); boxMax = max(current, boxMax); float3 center = (boxMax + boxMin) * 0.5f; float3 extents = boxMax - center; float3 origin = history - center; // Relative to box float3 dir = current - history; float3 rDir = rcp(dir); float3 tNeg = (extents - origin) * rDir; float3 tPos = (-extents - origin) * rDir; float t = saturate(max(max(min(tNeg.x, tPos.x), min(tNeg.y, tPos.y)), min(tNeg.z, tPos.z))); return history + t * dir; } // Encodes velocity into a format suitable for storing in a 16-bit SNORM texture. // Velocity range of [-2, 2] is supported (full NDC). float2 encodeVelocity16SNORM(float2 velocity) { return velocity * 0.5f; } // Decodes velocity from an encoded 16-bit SNORM format. See encodeVelocity16SNORM(). // Velocity range of [-2, 2] is supported (full NDC). float2 decodeVelocity16SNORM(float2 val) { return val * 2.0f; } ////////////////////// HELPER TONEMAP/COLOR SPACE DEFINES ///////////////////// // Automatically scale HDR values based on luminance, if enabled #if TEMPORAL_TONEMAP #if TEMPORAL_YCOCG #define _TONEMAP_COLOR(v) HDRScaleY(v, exposureScale) #elif TEMPORAL_GREEN_AS_LUMA #define _TONEMAP_COLOR(v) HDRScaleG(v, exposureScale) #else #define _TONEMAP_COLOR(v) HDRScaleRGB(v, exposureScale) #endif #else // TEMPORAL_TONEMAP #define _TONEMAP_COLOR(v) v #endif // TEMPORAL_TONEMAP // Automatically convert from/to YCoCg space, if enabled #if TEMPORAL_YCOCG #define _SAMPLE_COLOR(n, uv, offset) _TONEMAP_COLOR(RGBToYCoCg(_SAMPLECOL(n, uv, offset).rgb)) #else // TEMPORAL_YCOCG #define _SAMPLE_COLOR(n, uv, offset) _TONEMAP_COLOR(_SAMPLECOL(n, uv, offset).rgb) #endif // TEMPORAL_YCOCG ///////////////////////////// MAIN ///////////////////////////////// [internal] cbuffer TemporalInput { float gSampleWeights[9]; float gSampleWeightsLowpass[9]; } float3 temporalResolve( _TEX2D(sceneDepth), _TEXCOLOR(sceneColor), _TEXCOLOR(prevColor), #if TEMPORAL_LOCAL_VELOCITY _TEX2D(velocityBuffer), #endif // TEMPORAL_LOCAL_VELOCITY #if TEMPORAL_TONEMAP float exposureScale, #endif // TEMPORAL_TONEMAP float2 uv, float2 ndcPos, // Can be derived from UV, but we usually have it for free, so pass it directly int sampleIdx) { ///////////// DETERMINE PER-PIXEL VELOCITY & CURRENT DEPTH /////////////////// float curDepth; float2 velocity; #if TEMPORAL_LOCAL_VELOCITY #if TEMPORAL_SEARCH_NEAREST == 1 float3 nearest = findNearestCross(_PTEX2D(sceneDepth), uv, sampleIdx); velocity = _SAMPLE(velocityBuffer, nearest.xy); curDepth = nearest.z; #elif TEMPORAL_SEARCH_NEAREST == 2 float3 nearest = findNearest3x3(_PTEX2D(sceneDepth), uv, sampleIdx); velocity = _SAMPLE(velocityBuffer, nearest.xy); curDepth = nearest.z; #else // TEMPORAL_SEARCH_NEAREST velocity = _SAMPLE(velocityBuffer, uv); curDepth = _SAMPLE(sceneDepth, uv).x; #endif // TEMPORAL_SEARCH_NEAREST #else // TEMPORAL_LOCAL_VELOCITY velocity = 0; curDepth = _SAMPLE(sceneDepth, uv).x; #endif // TEMPORAL_LOCAL_VELOCITY ///////////////////// DETERMINE PREV. FRAME UV ////////////////////////////// float2 prevNdcPos; bool hasLocalVelocity = (abs(velocity.x) + abs(velocity.y)) > 0; if(hasLocalVelocity) { velocity = decodeVelocity16SNORM(velocity); prevNdcPos = float2(ndcPos - velocity); } else { // Assumes velocity due to camera movement float4 currentNDC = float4(ndcPos, curDepth, 1); float4 prevClip = mul(gNDCToPrevNDC, currentNDC); prevNdcPos = prevClip.xy / prevClip.w; } #if MSAA && MSAA_COLOR float2 prevUV = NDCToScreen(prevNdcPos); #else float2 prevUV = NDCToUV(prevNdcPos); #endif /////////////// GET FILTERED COLOR VALUE AND NEIGHBORHOOD MIN/MAX ///////////// #if MSAA && !MSAA_COLOR float2 uvColor = uv * sceneColorTexelSize; #else float2 uvColor = uv; #endif #if TEMPORAL_YCOCG // YCOCG only requires a + pattern for good quality float3 neighbor[5]; neighbor[0] = _SAMPLE_COLOR(sceneColor, uvColor, int2(-1, 0)); neighbor[1] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, -1)); neighbor[2] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, 0)); neighbor[3] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 1, 0)); neighbor[4] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, 1)); float3 filtered = 0; [unroll] for(uint i = 0; i < 5; ++i) filtered += neighbor[i] * gSampleWeights[i]; float3 filteredLow = filtered; float3 neighborMin = min(min(min(neighbor[0], neighbor[1]), min(neighbor[2], neighbor[3])), neighbor[4]); float3 neighborMax = max(max(max(neighbor[0], neighbor[1]), max(neighbor[2], neighbor[3])), neighbor[4]); #else // TEMPORAL_YCOCG float3 neighbor[9]; neighbor[0] = _SAMPLE_COLOR(sceneColor, uvColor, int2(-1, -1)); neighbor[1] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, -1)); neighbor[2] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 1, -1)); neighbor[3] = _SAMPLE_COLOR(sceneColor, uvColor, int2(-1, 0)); neighbor[4] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, 0)); neighbor[5] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 1, 0)); neighbor[6] = _SAMPLE_COLOR(sceneColor, uvColor, int2(-1, 1)); neighbor[7] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, 1)); neighbor[8] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 1, 1)); float3 filtered = 0; [unroll] for(uint i = 0; i < 9; ++i) filtered += neighbor[i] * gSampleWeights[i]; #if TEMPORAL_LOWPASS float3 filteredLow = 0; [unroll] for(uint i = 0; i < 9; ++i) filteredLow += neighbor[i] * gSampleWeightsLowpass[i]; #else float3 filteredLow = filtered; #endif // TEMPORAL_LOWPASS #if TEMPORAL_SMOOTH_NEIGHBORHOOD // Calculate standard deviation and determine neighborhood min/max based on it float3 mean = 0; [unroll] for(uint i = 0; i < 9; ++i) mean += neighbor[i]; mean /= 9.0f; float3 meanSqrd = 0; [unroll] for(uint i = 0; i < 9; ++i) meanSqrd += neighbor[i] * neighbor[i]; meanSqrd /= 9.0f; float3 stdDev = sqrt(abs(meanSqrd - mean * mean)); float3 neighborMin = mean - stdDev; float3 neighborMax = mean + stdDev; #else // TEMPORAL_SMOOTH_NEIGHBORHOOD float3 neighborMin = min(min( min(min(neighbor[0], neighbor[1]), min(neighbor[2], neighbor[3])), min(min(neighbor[4], neighbor[5]), min(neighbor[6], neighbor[7]))), neighbor[8]); float3 neighborMax = max(max( max(max(neighbor[0], neighbor[1]), max(neighbor[2], neighbor[3])), max(max(neighbor[4], neighbor[5]), max(neighbor[6], neighbor[7]))), neighbor[8]); #endif // TEMPORAL_SMOOTH_NEIGHBORHOOD #endif // TEMPORAL_YCOCG /////////////////// GET PREVIOUS FRAME COLOR /////////////////////// float3 prevColorVal = _SAMPLE_COLOR(prevColor, prevUV, int2(0, 0)); ///////////////////// CLAMP TO NEIGHBORHOOD //////////////////////// // Clamping to neighborhood ensures we don't blend with values that are too // different, which can happen when history data becomes invalid. #if TEMPORAL_YCOCG prevColorVal = clamp(prevColorVal, neighborMin, neighborMax); #else // TEMPORAL_YCOCG // Uses low-pass to reduce flickering #if TEMPORAL_CLIP_AABB prevColorVal = clipAABB(neighborMin, neighborMax, prevColorVal, filteredLow); #else // TEMPORAL_CLIP_AABB prevColorVal = clamp(prevColorVal, neighborMin, neighborMax); #endif // TEMPORAL_CLIP_AABB #endif // TEMPORAL_YCOCG //////////////// BLEND BETWEEN CURRENT AND HISTORY ////////////////// // Find out how much impact should the previous frame's color have #if TEMPORAL_BLEND_FACTOR // Fixed blend factor float blendAmount = 1.0f / TEMPORAL_BLEND_FACTOR; float3 output = lerp(prevColorVal, filtered, blendAmount); #else // TEMPORAL_BLEND_FACTOR #if TEMPORAL_YCOCG float lumaCurrent = filtered.r; float lumaHistory = prevColorVal.r; #else // TEMPORAL_YCOCG #if TEMPORAL_GREEN_AS_LUMA float lumaCurrent = filtered.g; float lumaHistory = prevColorVal.g; #else // TEMPORAL_GREEN_AS_LUMA float lumaCurrent = LuminanceRGB(filtered); float lumaHistory = LuminanceRGB(prevColorVal); #endif // TEMPORAL_GREEN_AS_LUMA #endif // TEMPORAL_YCOCG // Based on T. Lottes: https://www.youtube.com/watch?v=WzpLWzGvFK4&t=18m float blendWeight = 1.0f - abs(lumaCurrent - lumaHistory) / max(max(lumaCurrent, lumaHistory), 0.001f); float weightBad = 1.0f - 1.0f / TEMPORAL_BAD_RETENTION; float weightGood = 1.0f - 1.0f / TEMPORAL_GOOD_RETENTION; float blendAmount = lerp(weightBad, weightGood, blendWeight * blendWeight); float3 output = lerp(filtered, prevColorVal, blendAmount); #endif // TEMPORAL_BLEND_FACTOR //////// UNDO TONEMAP & MOVE BACK TO RGB SPACE ////////////////////// #if TEMPORAL_TONEMAP #if TEMPORAL_YCOCG output = HDRScaleYInv(output, exposureScale); #elif TEMPORAL_GREEN_AS_LUMA output = HDRScaleGInv(output, exposureScale); #else output = HDRScaleRGBInv(output, exposureScale); #endif #endif // TEMPORAL_TONEMAP #if TEMPORAL_YCOCG output = YCoCgToRGB(output); #endif // TEMPORAL_YCOCG // Note: Potential improvements: // - Add a sharpen step // - Use filtering when sampling history // - Properly handle borders when sampling neighbors // - Better blend amount calculation? (Needs experimentation) return output; } #undef _TEX2D #undef _PTEX2D #undef _SAMPLE #undef _PIXSIZE #undef _TONEMAP_COLOR #undef _TONEMAP_COLOR_INV #undef _SAMPLE_COLOR #undef _RESOLVE_COLOR }; };