#include "$ENGINE$/PerCameraData.bslinc" #include "$ENGINE$/ColorSpace.bslinc" #include "$ENGINE$/SurfaceData.bslinc" mixin TemporalResolve { mixin PerCameraData; mixin ColorSpace; mixin SurfaceData; code { ////////////////// CUSTOMIZATION PARAMETERS ///////////////////////////// // When enabled, the system will sample a specific sample from a MS texture. UV coordinates are assumed // to be in pixel space in that case. When disabled sampleIdx parameter is ignored and UV coordinates // are assumed be in standard [0, 1] range. #ifndef MSAA #define MSAA 0 #endif // Only relevant when MSAA is enabled. When disabled color textures are assumed to be non-MSAA. When // enabled all textures are assumed to be MSAA. #ifndef MSAA_COLOR #define MSAA_COLOR MSAA #endif // 0 - System will use the velocity of the current pixel // 1 - System will search 4 neighbor pixels in + pattern, and choose the velocity of the pixel nearest // to the camera // 2 - System will search 8 surrounding pixels and choose the velocity of the pixel nearest to the camera // // Searching the neighborhod instead of just using current velocity yields nicer edges for objects in // motion. See TEMPORAL_SEARCH_RADIUS in order to customize how far away to search. // // Only relevant if TEMPORAL_LOCAL_VELOCITY is enabled, since without it no per-object velocity // information is present and everything is blended based on camera movement. #ifndef TEMPORAL_SEARCH_NEAREST #define TEMPORAL_SEARCH_NEAREST 1 #endif // Determine how far away to sample pixels when TEMPORAL_SEARCH_NEAREST is enabled. // 1 - Immediately adjacent pixels are searched // 2 - Pixels two away are searched (looks better than 1) // 3 - etc. #ifndef TEMPORAL_SEARCH_RADIUS #define TEMPORAL_SEARCH_RADIUS 2 #endif // 0 - The system will only account for velocity due to camera movement (not due to individual objects) // 1 - The system will account both for velocity due to camera movement, as well as individual object // movement. Requires the user to provide a per-pixel velocity buffer. #ifndef TEMPORAL_LOCAL_VELOCITY #define TEMPORAL_LOCAL_VELOCITY 1 #endif // If enabled the scene color value will be filtered from multiple pixels, instead of just taking the color // of the center pixel. This can yield better results at the cost of more sampling. #ifndef TEMPORAL_FILTER_COLOR #define TEMPORAL_FILTER_COLOR 1 #endif // If enabled, the history pixel will be sampled using a catmull-rom curve. Otherwise a single history // sample will be taken. #ifndef TEMPORAL_BICUBIC_HISTORY #define TEMPORAL_BICUBIC_HISTORY 0 #endif // When enabled, the resolve operation will be performed in YCoCg color space. This can yield better // results, requires less color samples and no value clipping. #ifndef TEMPORAL_YCOCG #define TEMPORAL_YCOCG 0 #endif // When enabled, green color will be used instead of calculating luminosity. This will yield better // performance but can result in lower quality. Ignored when TEMPORAL_YCOCG is enabled, since luminosity // is already available as part of the YCoCg color space. #ifndef TEMPORAL_GREEN_AS_LUMA #define TEMPORAL_GREEN_AS_LUMA 0 #endif // When enabled the input samples will be tonemapped using the provided exposure value. Once the final // value is resolved, it will be scaled back into original range. This ensures high frequency data from // HDR content is removed, as it would cause aliasing otherwise. We scale the result back into high range // so the high-quality tonemap shader can be ran on it. #ifndef TEMPORAL_TONEMAP #define TEMPORAL_TONEMAP 1 #endif // When enabled an extra low-pass filter is ran when sampling scene color, for better quality. #ifndef TEMPORAL_LOWPASS #define TEMPORAL_LOWPASS 1 #endif // When enabled, clamp/clip color neighborhood will be deduced using standard deviation of all the // neighborhood samples. When disabled a min/max operation is performed instead. #ifndef TEMPORAL_SMOOTH_NEIGHBORHOOD #define TEMPORAL_SMOOTH_NEIGHBORHOOD 1 #endif // When enabled, neighborhood clipping will use an AABB intersection to clip the history value. When disabled // just a clamp will be used instead. Not relevant when TEMPORAL_YCOCG is enabled because it always uses a clamp. #ifndef TEMPORAL_CLIP_AABB #define TEMPORAL_CLIP_AABB 1 #endif // Determines how is the history value blended with the current value. // 0 - The system will calculate the optimal blend value automatically // >0 - A fixed blend factor will be used, equal to the multiplicative inverse of the provided value. // (i.e. a value of 8 will result in blend factor of 1/8, meaning 12.5% of the history value will be used) #ifndef TEMPORAL_BLEND_FACTOR #define TEMPORAL_BLEND_FACTOR 0 #endif // Determines how many frames should pixels deemed as "bad" (too different from current pixel) contribute to the // current frame. #ifndef TEMPORAL_BAD_RETENTION #define TEMPORAL_BAD_RETENTION 3 #endif // Determines how many frames should pixels deemed as "good" (similar to the current pixel) contribute to the // current frame. #ifndef TEMPORAL_GOOD_RETENTION #define TEMPORAL_GOOD_RETENTION 10 #endif ////////////////////////// HELPER MACROS ///////////////////////// #if MSAA #define _TEX2D(n) Texture2DMS n #if MSAA_COLOR #define _TEXCOLOR(n) Texture2DMS n #define _PTEXCOLOR(n) n #else #define _TEXCOLOR(n) Texture2D n, SamplerState n##SampState, float4 n##TexelSize #define _PTEXCOLOR(n) n, n##SampState, n##TexelSize #endif #define _PTEX2D(n) n #define _SAMPLE(n, uv) n.Load((int2)uv, sampleIdx) #define _SAMPLEOFF(n, uv, offset) n.Load((int2)(uv) + offset, sampleIdx) #if MSAA_COLOR #define _SAMPLECOL(n, uv, offset) _SAMPLEOFF(n, uv, offset) #else #define _SAMPLECOL(n, uv, offset) n.Sample(n##SampState, uv, offset) #endif #define _PIXSIZE(n) int2(1, 1) #else #define _TEX2D(n) Texture2D n, SamplerState n##SampState, float4 n##TexelSize #define _TEXCOLOR(n) _TEX2D(n) #define _PTEX2D(n) n, n##SampState, n##TexelSize #define _PTEXCOLOR(n) n, n##SampState, n##TexelSize #define _SAMPLE(n, uv) n.Sample(n##SampState, uv) #define _SAMPLEOFF(n, uv, offset) n.Sample(n##SampState, uv, offset) #define _SAMPLECOL(n, uv, offset) _SAMPLEOFF(n, uv, offset) #define _PIXSIZE(n) n##TexelSize.xy #endif ///////////////////////// HELPER FUNCTIONS //////////////////////// float3 findNearest3x3(_TEX2D(sceneDepth), float2 uv, int sampleIdx) { int r = TEMPORAL_SEARCH_RADIUS; float3 dmin = float3(0, 0, 1); [unroll] for(int y = -r; y <= r; y += r) { [unroll] for(int x = -r; x <= r; x += r) { float depth = _SAMPLEOFF(sceneDepth, uv, int2(x, y)).x; dmin = depth < dmin.z ? float3(x, y, depth) : dmin; } } return float3(uv + dmin.xy * _PIXSIZE(sceneDepth), dmin.z); } float3 findNearestCross(_TEX2D(sceneDepth), float2 uv, int sampleIdx) { int r = TEMPORAL_SEARCH_RADIUS; float3 dmin = float3(0, 0, 1); { float depth = _SAMPLE(sceneDepth, uv).x; dmin = depth < dmin.z ? float3(0, 0, depth) : dmin; } { float depth = _SAMPLEOFF(sceneDepth, uv, int2(-r, 0)).x; dmin = depth < dmin.z ? float3(-r, 0, depth) : dmin; } { float depth = _SAMPLEOFF(sceneDepth, uv, int2(r, 0)).x; dmin = depth < dmin.z ? float3(r, 0, depth) : dmin; } { float depth = _SAMPLEOFF(sceneDepth, uv, int2(0, -r)).x; dmin = depth < dmin.z ? float3(0, -r, depth) : dmin; } { float depth = _SAMPLEOFF(sceneDepth, uv, int2(0, r)).x; dmin = depth < dmin.z ? float3(0, r, depth) : dmin; } return float3(uv + dmin.xy * _PIXSIZE(sceneDepth), dmin.z); } float clipAABB(float3 boxMin, float3 boxMax, float3 history, float3 current) { // Note: Is this necessary? Will "current" always be in the box? boxMin = min(current, boxMin); boxMax = max(current, boxMax); float3 center = (boxMax + boxMin) * 0.5f; float3 extents = boxMax - center; float3 origin = history - center; // Relative to box float3 dir = current - history; float3 rDir = rcp(dir); float3 tNeg = (extents - origin) * rDir; float3 tPos = (-extents - origin) * rDir; return saturate(max(max(min(tNeg.x, tPos.x), min(tNeg.y, tPos.y)), min(tNeg.z, tPos.z))); } void catmullRom2D(float2 uv, float2 size, out float2 samples[3], out float2 weights[3]) { uv *= size; float2 pixelCenter = floor(uv - 0.5f) + 0.5f; float2 f = uv - pixelCenter; float2 f2 = f * f; float2 f3 = f2 * f; float2 w0 = f2 - 0.5f * (f3 + f); float2 w1 = 1.5f * f3 - 2.5f * f2 + 1.0f; float2 w3 = 0.5f * (f3 - f2); float2 w2 = 1.0f - w0 - w1 - w3; weights[0] = w0; weights[1] = w1 + w2; weights[2] = w3; samples[0] = pixelCenter - 1.0f; samples[1] = pixelCenter + w2 / weights[1]; samples[2] = pixelCenter + 2.0f; float2 invSize = 1.0f / size; samples[0] *= invSize; samples[1] *= invSize; samples[2] *= invSize; } ////////////////////// HELPER TONEMAP/COLOR SPACE DEFINES ///////////////////// // Automatically scale HDR values based on luminance, if enabled #if TEMPORAL_TONEMAP #if TEMPORAL_YCOCG #define _TONEMAP_COLOR(v) HDRScaleY(v, exposureScale) #elif TEMPORAL_GREEN_AS_LUMA #define _TONEMAP_COLOR(v) HDRScaleG(v, exposureScale) #else #define _TONEMAP_COLOR(v) HDRScaleRGB(v, exposureScale) #endif #else // TEMPORAL_TONEMAP #define _TONEMAP_COLOR(v) v #endif // TEMPORAL_TONEMAP // Converts scene color to YCoCg space (if enabled) and applies tonemapping float4 convertColor( float4 color #if TEMPORAL_TONEMAP , float exposureScale #endif // TEMPORAL_TONEMAP ) { #if TEMPORAL_YCOCG color.rgb = _TONEMAP_COLOR(RGBToYCoCg(color.rgb)); #else color.rgb = _TONEMAP_COLOR(color.rgb); #endif return color; } #if TEMPORAL_TONEMAP #define _SAMPLE_COLOR(n, uv, offset) convertColor(_SAMPLECOL(n, uv, offset), exposureScale) #else #define _SAMPLE_COLOR(n, uv, offset) convertColor(_SAMPLECOL(n, uv, offset)) #endif ///////////////////////////// MAIN ///////////////////////////////// [internal] cbuffer TemporalInput { float gSampleWeights[9]; float gSampleWeightsLowpass[9]; } float4 temporalResolve( _TEX2D(sceneDepth), _TEXCOLOR(sceneColor), _TEXCOLOR(prevColor), #if TEMPORAL_LOCAL_VELOCITY _TEX2D(velocityBuffer), #endif // TEMPORAL_LOCAL_VELOCITY #if TEMPORAL_TONEMAP float exposureScale, #endif // TEMPORAL_TONEMAP float2 uv, float2 ndcPos, // Can be derived from UV, but we usually have it for free, so pass it directly int sampleIdx) { ///////////// DETERMINE PER-PIXEL VELOCITY & CURRENT DEPTH /////////////////// float curDepth; float2 velocity; #if TEMPORAL_LOCAL_VELOCITY #if TEMPORAL_SEARCH_NEAREST == 1 float3 nearest = findNearestCross(_PTEX2D(sceneDepth), uv, sampleIdx); velocity = _SAMPLE(velocityBuffer, nearest.xy); curDepth = nearest.z; #elif TEMPORAL_SEARCH_NEAREST == 2 float3 nearest = findNearest3x3(_PTEX2D(sceneDepth), uv, sampleIdx); velocity = _SAMPLE(velocityBuffer, nearest.xy); curDepth = nearest.z; #else // TEMPORAL_SEARCH_NEAREST velocity = _SAMPLE(velocityBuffer, uv); curDepth = _SAMPLE(sceneDepth, uv).x; #endif // TEMPORAL_SEARCH_NEAREST #else // TEMPORAL_LOCAL_VELOCITY velocity = 0; curDepth = _SAMPLE(sceneDepth, uv).x; #endif // TEMPORAL_LOCAL_VELOCITY ///////////////////// DETERMINE PREV. FRAME UV ////////////////////////////// float2 prevNdcPos; bool hasLocalVelocity = (abs(velocity.x) + abs(velocity.y)) > 0; if(hasLocalVelocity) { velocity = decodeVelocity16SNORM(velocity); prevNdcPos = float2(ndcPos - velocity); } else { // Assumes velocity due to camera movement float4 currentNDC = float4(ndcPos, curDepth, 1); float4 prevClip = mul(gNDCToPrevNDC, currentNDC); prevNdcPos = prevClip.xy / prevClip.w; } #if MSAA && MSAA_COLOR float2 prevUV = NDCToScreen(prevNdcPos); #else float2 prevUV = NDCToUV(prevNdcPos); #endif /////////////// GET FILTERED COLOR VALUE AND NEIGHBORHOOD MIN/MAX ///////////// #if MSAA && !MSAA_COLOR float2 uvColor = uv * sceneColorTexelSize.xy; #else float2 uvColor = uv; #endif #if TEMPORAL_YCOCG // YCOCG only requires a + pattern for good quality float4 neighbor[5]; neighbor[0] = _SAMPLE_COLOR(sceneColor, uvColor, int2(-1, 0)); neighbor[1] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, -1)); neighbor[2] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, 0)); neighbor[3] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 1, 0)); neighbor[4] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, 1)); float4 filtered = 0; #if TEMPORAL_FILTER_COLOR // TODO - Do a border check? If offscreen just use the center sample, or // do the border check per-sample [unroll] for(uint i = 0; i < 5; ++i) filtered += neighbor[i] * gSampleWeights[i]; #else // TEMPORAL_FILTER_COLOR filtered = neighbor[2]; #endif // TEMPORAL_FILTER_COLOR float4 filteredLow = filtered; float4 neighborMin = min(min(min(neighbor[0], neighbor[1]), min(neighbor[2], neighbor[3])), neighbor[4]); float4 neighborMax = max(max(max(neighbor[0], neighbor[1]), max(neighbor[2], neighbor[3])), neighbor[4]); #else // TEMPORAL_YCOCG float4 neighbor[9]; neighbor[0] = _SAMPLE_COLOR(sceneColor, uvColor, int2(-1, -1)); neighbor[1] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, -1)); neighbor[2] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 1, -1)); neighbor[3] = _SAMPLE_COLOR(sceneColor, uvColor, int2(-1, 0)); neighbor[4] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, 0)); neighbor[5] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 1, 0)); neighbor[6] = _SAMPLE_COLOR(sceneColor, uvColor, int2(-1, 1)); neighbor[7] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, 1)); neighbor[8] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 1, 1)); float4 filtered = 0; float4 filteredLow = 0; #if TEMPORAL_FILTER_COLOR // TODO - Do a border check? If offscreen just use the center sample, or // do the border check per-sample [unroll] for(uint i = 0; i < 9; ++i) filtered += neighbor[i] * gSampleWeights[i]; #if TEMPORAL_LOWPASS [unroll] for(uint i = 0; i < 9; ++i) filteredLow += neighbor[i] * gSampleWeightsLowpass[i]; #else // TEMPORAL_LOWPASS filteredLow = filtered; #endif // TEMPORAL_LOWPASS #else // TEMPORAL_FILTER_COLOR filtered = neighbor[4]; filteredLow = neighbor[4]; #endif // TEMPORAL_FILTER_COLOR #if TEMPORAL_SMOOTH_NEIGHBORHOOD // Calculate standard deviation and determine neighborhood min/max based on it float4 mean = 0; [unroll] for(uint i = 0; i < 9; ++i) mean += neighbor[i]; mean /= 9.0f; float4 meanSqrd = 0; [unroll] for(uint i = 0; i < 9; ++i) meanSqrd += neighbor[i] * neighbor[i]; meanSqrd /= 9.0f; float4 stdDev = sqrt(abs(meanSqrd - mean * mean)); float4 neighborMin = mean - stdDev; float4 neighborMax = mean + stdDev; #else // TEMPORAL_SMOOTH_NEIGHBORHOOD float4 neighborMin = min(min( min(min(neighbor[0], neighbor[1]), min(neighbor[2], neighbor[3])), min(min(neighbor[4], neighbor[5]), min(neighbor[6], neighbor[7]))), neighbor[8]); float4 neighborMax = max(max( max(max(neighbor[0], neighbor[1]), max(neighbor[2], neighbor[3])), max(max(neighbor[4], neighbor[5]), max(neighbor[6], neighbor[7]))), neighbor[8]); #endif // TEMPORAL_SMOOTH_NEIGHBORHOOD #endif // TEMPORAL_YCOCG /////////////////// GET PREVIOUS FRAME COLOR /////////////////////// #if TEMPORAL_BICUBIC_HISTORY && !MSAA_COLOR float2 weights[3]; float2 samples[3]; catmullRom2D(prevUV.xy, prevColorTexelSize.zw, samples, weights); float4 prevColorVal = 0.0f; [unroll] for(int y = 0; y < 3; y++) { [unroll] for(int x = 0; x < 3; x++) { float2 sampleUV = float2(samples[x].x, samples[y].y); float sampleWeight = weights[x].x * weights[y].y; prevColorVal += _SAMPLE_COLOR(prevColor, sampleUV, int2(0, 0)) * sampleWeight; } } #else float4 prevColorVal = _SAMPLE_COLOR(prevColor, prevUV, int2(0, 0)); #endif ///////////////////// CLAMP TO NEIGHBORHOOD //////////////////////// // Clamping to neighborhood ensures we don't blend with values that are too // different, which can happen when history data becomes invalid. #if TEMPORAL_YCOCG prevColorVal = clamp(prevColorVal, neighborMin, neighborMax); #else // TEMPORAL_YCOCG // Uses low-pass to reduce flickering #if TEMPORAL_CLIP_AABB float clipT = clipAABB(neighborMin.rgb, neighborMax.rgb, prevColorVal.rgb, filteredLow.rgb); prevColorVal = prevColorVal + clipT * (filteredLow - prevColorVal); #else // TEMPORAL_CLIP_AABB prevColorVal = clamp(prevColorVal, neighborMin, neighborMax); #endif // TEMPORAL_CLIP_AABB #endif // TEMPORAL_YCOCG //////////////// BLEND BETWEEN CURRENT AND HISTORY ////////////////// // Find out how much impact should the previous frame's color have #if TEMPORAL_BLEND_FACTOR // Fixed blend factor float blendAmount = 1.0f / TEMPORAL_BLEND_FACTOR; float4 output = lerp(prevColorVal, filtered, blendAmount); #else // TEMPORAL_BLEND_FACTOR #if TEMPORAL_YCOCG float lumaCurrent = filtered.r; float lumaHistory = prevColorVal.r; #else // TEMPORAL_YCOCG #if TEMPORAL_GREEN_AS_LUMA float lumaCurrent = filtered.g; float lumaHistory = prevColorVal.g; #else // TEMPORAL_GREEN_AS_LUMA float lumaCurrent = LuminanceRGB(filtered); float lumaHistory = LuminanceRGB(prevColorVal); #endif // TEMPORAL_GREEN_AS_LUMA #endif // TEMPORAL_YCOCG // Based on T. Lottes: https://www.youtube.com/watch?v=WzpLWzGvFK4&t=18m float blendWeight = 1.0f - abs(lumaCurrent - lumaHistory) / max(max(lumaCurrent, lumaHistory), 0.001f); float weightBad = 1.0f - 1.0f / TEMPORAL_BAD_RETENTION; float weightGood = 1.0f - 1.0f / TEMPORAL_GOOD_RETENTION; float blendAmount = lerp(weightBad, weightGood, blendWeight * blendWeight); float4 output = lerp(filtered, prevColorVal, blendAmount); #endif // TEMPORAL_BLEND_FACTOR //////// UNDO TONEMAP & MOVE BACK TO RGB SPACE ////////////////////// #if TEMPORAL_TONEMAP #if TEMPORAL_YCOCG output.rgb = HDRScaleYInv(output.rgb, exposureScale); #elif TEMPORAL_GREEN_AS_LUMA output.rgb = HDRScaleGInv(output.rgb, exposureScale); #else output.rgb = HDRScaleRGBInv(output.rgb, exposureScale); #endif #endif // TEMPORAL_TONEMAP #if TEMPORAL_YCOCG output.rgb = YCoCgToRGB(output.rgb); #endif // TEMPORAL_YCOCG // Note: Potential improvements: // - Add a sharpen step // - Properly handle borders when sampling neighbors // - Better blend amount calculation? (Needs experimentation) return output; } #undef _TEX2D #undef _PTEX2D #undef _SAMPLE #undef _PIXSIZE #undef _TONEMAP_COLOR #undef _TONEMAP_COLOR_INV #undef _RESOLVE_COLOR }; };