#include "$ENGINE$/PerCameraData.bslinc"
#include "$ENGINE$/ColorSpace.bslinc"

mixin TemporalResolve
{
	mixin PerCameraData;
	mixin ColorSpace;

	code
	{
		////////////////// CUSTOMIZATION PARAMETERS /////////////////////////////
	
		// When enabled, the system will sample a specific sample from a MS texture. UV coordinates are assumed
		// to be in pixel space in that case. When disabled sampleIdx parameter is ignored and UV coordinates
		// are assumed be in standard [0, 1] range.
		#ifndef MSAA
			#define MSAA 0
		#endif
		
		// Only relevant when MSAA is enabled. When disabled color textures are assumed to be non-MSAA. When
		// enabled all textures are assumed to be MSAA.
		#ifndef MSAA_COLOR
			#define MSAA_COLOR MSAA
		#endif
	
		// 0 - System will use the velocity of the current pixel
		// 1 - System will search 4 neighbor pixels in + pattern, and choose the velocity of the pixel nearest 
		//     to the camera
		// 2 - System will search 8 surrounding pixels and choose the velocity of the pixel nearest to the camera
		//
		// Searching the neighborhod instead of just using current velocity yields nicer edges for objects in 
		// motion. See TEMPORAL_SEARCH_RADIUS in order to customize how far away to search.
		//
		// Only relevant if TEMPORAL_LOCAL_VELOCITY is enabled, since without it no per-object velocity
		// information is present and everything is blended based on camera movement.
		#ifndef TEMPORAL_SEARCH_NEAREST
			#define TEMPORAL_SEARCH_NEAREST 1
		#endif
		
		// Determine how far away to sample pixels when TEMPORAL_SEARCH_NEAREST is enabled. 
		// 1 - Immediately adjacent pixels are searched
		// 2 - Pixels two away are searched (looks better than 1)
		// 3 - etc.
		#ifndef TEMPORAL_SEARCH_RADIUS
			#define TEMPORAL_SEARCH_RADIUS 2
		#endif
		
		// 0 - The system will only account for velocity due to camera movement (not due to individual objects)
		// 1 - The system will account both for velocity due to camera movement, as well as individual object 
		//     movement. Requires the user to provide a per-pixel velocity buffer.
		#ifndef TEMPORAL_LOCAL_VELOCITY
			#define TEMPORAL_LOCAL_VELOCITY 1
		#endif
		
		// When enabled, the resolve operation will be performed in YCoCg color space. This can yield better
		// results, requires less color samples and no value clipping.
		#ifndef TEMPORAL_YCOCG
			#define TEMPORAL_YCOCG 0
		#endif
		
		// When enabled, green color will be used instead of calculating luminosity. This will yield better
		// performance but can result in lower quality. Ignored when TEMPORAL_YCOCG is enabled, since luminosity
		// is already available as part of the YCoCg color space.
		#ifndef TEMPORAL_GREEN_AS_LUMA
			#define TEMPORAL_GREEN_AS_LUMA 0
		#endif
		
		// When enabled the input samples will be tonemapped using the provided exposure value. Once the final
		// value is resolved, it will be scaled back into original range. This ensures high frequency data from
		// HDR content is removed, as it would cause aliasing otherwise. We scale the result back into high range
		// so the high-quality tonemap shader can be ran on it.
		#ifndef TEMPORAL_TONEMAP
			#define TEMPORAL_TONEMAP 1
		#endif
	
		// When enabled an extra low-pass filter is ran when sampling scene color, for better quality.
		#ifndef TEMPORAL_LOWPASS
			#define TEMPORAL_LOWPASS 1
		#endif
		
		// When enabled, clamp/clip color neighborhood will be deduced using standard deviation of all the
		// neighborhood samples. When disabled a min/max operation is performed instead.
		#ifndef TEMPORAL_SMOOTH_NEIGHBORHOOD
			#define TEMPORAL_SMOOTH_NEIGHBORHOOD 1
		#endif
		
		// When enabled, neighborhood clipping will use an AABB intersection to clip the history value. When disabled
		// just a clamp will be used instead. Not relevant when TEMPORAL_YCOCG is enabled because it always uses a clamp.
		#ifndef TEMPORAL_CLIP_AABB
			#define TEMPORAL_CLIP_AABB 1
		#endif
		
		// Determines how is the history value blended with the current value.
		// 0 - The system will calculate the optimal blend value automatically
		// >0 - A fixed blend factor will be used, equal to the multiplicative inverse of the provided value.
		//      (i.e. a value of 8 will result in blend factor of 1/8, meaning 12.5% of the history value will be used)
		#ifndef TEMPORAL_BLEND_FACTOR
			#define TEMPORAL_BLEND_FACTOR 0
		#endif
		
		// Determines how many frames should pixels deemed as "bad" (too different from current pixel) contribute to the
		// current frame.
		#ifndef TEMPORAL_BAD_RETENTION
			#define TEMPORAL_BAD_RETENTION 3
		#endif
		
		// Determines how many frames should pixels deemed as "good" (similar to the current pixel) contribute to the
		// current frame.
		#ifndef TEMPORAL_GOOD_RETENTION
			#define TEMPORAL_GOOD_RETENTION 10
		#endif
	
		////////////////////////// HELPER MACROS /////////////////////////
		#if MSAA
			#define _TEX2D(n) Texture2DMS<float> n
			#if MSAA_COLOR
				#define _TEXCOLOR(n) Texture2DMS<float4> n
			#else
				#define _TEXCOLOR(n) Texture2D n, SamplerState n##SampState, float2 n##TexelSize
			#endif
			
			#define _PTEX2D(n) n
			#define _SAMPLE(n, uv) n.Load((int2)uv, sampleIdx)
			#define _SAMPLEOFF(n, uv, offset) n.Load((int2)(uv) + offset, sampleIdx)
			
			#if MSAA_COLOR
				#define _SAMPLECOL(n, uv, offset) _SAMPLEOFF(n, uv, offset)
			#else
				#define _SAMPLECOL(n, uv, offset) n.Sample(n##SampState, uv, offset)
			#endif
			
			#define _PIXSIZE(n) int2(1, 1)
		#else
			#define _TEX2D(n) Texture2D n, SamplerState n##SampState, float2 n##TexelSize
			#define _TEXCOLOR(n) _TEX2D(n)
			#define _PTEX2D(n) n, n##SampState, n##TexelSize
			#define _SAMPLE(n, uv) n.Sample(n##SampState, uv)
			#define _SAMPLEOFF(n, uv, offset) n.Sample(n##SampState, uv, offset)
			#define _SAMPLECOL(n, uv, offset) _SAMPLEOFF(n, uv, offset)
			#define _PIXSIZE(n) n##TexelSize
		#endif
		
		///////////////////////// HELPER FUNCTIONS ////////////////////////
		float3 findNearest3x3(_TEX2D(sceneDepth), float2 uv, int sampleIdx)
		{
			int r = TEMPORAL_SEARCH_RADIUS;
			float3 dmin = float3(0, 0, 1);
			
			[unroll]
			for(int y = -r; y <= r; y += r)
			{
				[unroll]
				for(int x = -r; x <= r; x += r)
				{
					float depth = _SAMPLEOFF(sceneDepth, uv, int2(x, y)).x;
					dmin = depth < dmin.z ? float3(x, y, depth) : dmin;	
				}
			}
			
			return float3(uv + dmin.xy * _PIXSIZE(sceneDepth), dmin.z);
		}
		
		float3 findNearestCross(_TEX2D(sceneDepth), float2 uv, int sampleIdx)
		{
			int r = TEMPORAL_SEARCH_RADIUS;
			float3 dmin = float3(0, 0, 1);
			
			{
				float depth = _SAMPLE(sceneDepth, uv).x;
				dmin = depth < dmin.z ? float3(0, 0, depth) : dmin;	
			}
			
			{
				float depth = _SAMPLEOFF(sceneDepth, uv, int2(-r, 0)).x;
				dmin = depth < dmin.z ? float3(-r, 0, depth) : dmin;	
			}
			
			{
				float depth = _SAMPLEOFF(sceneDepth, uv, int2(r, 0)).x;
				dmin = depth < dmin.z ? float3(r, 0, depth) : dmin;	
			}

			{
				float depth = _SAMPLEOFF(sceneDepth, uv, int2(0, -r)).x;
				dmin = depth < dmin.z ? float3(0, -r, depth) : dmin;	
			}

			{
				float depth = _SAMPLEOFF(sceneDepth, uv, int2(0, r)).x;
				dmin = depth < dmin.z ? float3(0, r, depth) : dmin;	
			}			
			
			return float3(uv + dmin.xy * _PIXSIZE(sceneDepth), dmin.z);
		}
		
		float3 clipAABB(float3 boxMin, float3 boxMax, float3 history, float3 current)
		{
			// Note: Is this necessary? Will "current" always be in the box?
			boxMin = min(current, boxMin);
			boxMax = max(current, boxMax);
			
			float3 center = (boxMax + boxMin) * 0.5f;
			float3 extents = boxMax - center;
			
			float3 origin = history - center; // Relative to box
			float3 dir = current - history;
			
			float3 rDir = rcp(dir);
			float3 tNeg = (extents - origin) * rDir;
			float3 tPos = (-extents - origin) * rDir;
			
			float t = saturate(max(max(min(tNeg.x, tPos.x), min(tNeg.y, tPos.y)), min(tNeg.z, tPos.z)));
			return history + t * dir;
		}
		
		// Encodes velocity into a format suitable for storing in a 16-bit SNORM texture. 
		// Velocity range of [-2, 2] is supported (full NDC).
		float2 encodeVelocity16SNORM(float2 velocity)
		{
			return velocity * 0.5f;
		}
		
		// Decodes velocity from an encoded 16-bit SNORM format. See encodeVelocity16SNORM().
		// Velocity range of [-2, 2] is supported (full NDC).
		float2 decodeVelocity16SNORM(float2 val)
		{
			return val * 2.0f;
		}

		////////////////////// HELPER TONEMAP/COLOR SPACE DEFINES /////////////////////
		// Automatically scale HDR values based on luminance, if enabled
		#if TEMPORAL_TONEMAP
			#if TEMPORAL_YCOCG
				#define _TONEMAP_COLOR(v) HDRScaleY(v, exposureScale)
			#elif TEMPORAL_GREEN_AS_LUMA
				#define _TONEMAP_COLOR(v) HDRScaleG(v, exposureScale)
			#else
				#define _TONEMAP_COLOR(v) HDRScaleRGB(v, exposureScale)
			#endif
		#else // TEMPORAL_TONEMAP
			#define _TONEMAP_COLOR(v) v
		#endif // TEMPORAL_TONEMAP
		
		// Automatically convert from/to YCoCg space, if enabled
		#if TEMPORAL_YCOCG
			#define _SAMPLE_COLOR(n, uv, offset) _TONEMAP_COLOR(RGBToYCoCg(_SAMPLECOL(n, uv, offset).rgb))
		#else // TEMPORAL_YCOCG
			#define _SAMPLE_COLOR(n, uv, offset) _TONEMAP_COLOR(_SAMPLECOL(n, uv, offset).rgb)
		#endif // TEMPORAL_YCOCG
						
		///////////////////////////// MAIN /////////////////////////////////
		[internal]
		cbuffer TemporalInput
		{
			float gSampleWeights[9];
			float gSampleWeightsLowpass[9];
		}
		
		float3 temporalResolve(
			_TEX2D(sceneDepth), 
			_TEXCOLOR(sceneColor), 
			_TEXCOLOR(prevColor), 
			#if TEMPORAL_LOCAL_VELOCITY
			_TEX2D(velocityBuffer),
			#endif // TEMPORAL_LOCAL_VELOCITY
			#if TEMPORAL_TONEMAP
			float exposureScale,
			#endif // TEMPORAL_TONEMAP
			float2 uv,
			float2 ndcPos, // Can be derived from UV, but we usually have it for free, so pass it directly
			int sampleIdx)
		{
			///////////// DETERMINE PER-PIXEL VELOCITY & CURRENT DEPTH ///////////////////
			float curDepth;
			float2 velocity;
			#if TEMPORAL_LOCAL_VELOCITY
				#if TEMPORAL_SEARCH_NEAREST == 1
					float3 nearest = findNearestCross(_PTEX2D(sceneDepth), uv, sampleIdx);
					velocity = _SAMPLE(velocityBuffer, nearest.xy);
					curDepth = nearest.z;
				#elif TEMPORAL_SEARCH_NEAREST == 2
					float3 nearest = findNearest3x3(_PTEX2D(sceneDepth), uv, sampleIdx);
					velocity = _SAMPLE(velocityBuffer, nearest.xy);
					curDepth = nearest.z;
				#else // TEMPORAL_SEARCH_NEAREST
					velocity = _SAMPLE(velocityBuffer, uv);
					curDepth = _SAMPLE(sceneDepth, uv).x;
				#endif // TEMPORAL_SEARCH_NEAREST
			#else // TEMPORAL_LOCAL_VELOCITY
				velocity = 0;
				curDepth = _SAMPLE(sceneDepth, uv).x;
			#endif // TEMPORAL_LOCAL_VELOCITY
			
			///////////////////// DETERMINE PREV. FRAME UV //////////////////////////////
			float2 prevNdcPos;
			bool hasLocalVelocity = (abs(velocity.x) + abs(velocity.y)) > 0;
			if(hasLocalVelocity)
			{
				velocity = decodeVelocity16SNORM(velocity);
				prevNdcPos = float2(ndcPos - velocity);
			}
			else
			{
				// Assumes velocity due to camera movement
				float4 currentNDC = float4(ndcPos, curDepth, 1);
				float4 prevClip = mul(gNDCToPrevNDC, currentNDC);
				prevNdcPos = prevClip.xy / prevClip.w;
			}
			
			#if MSAA && MSAA_COLOR
			float2 prevUV = NDCToScreen(prevNdcPos);
			#else
			float2 prevUV = NDCToUV(prevNdcPos);
			#endif
			
			/////////////// GET FILTERED COLOR VALUE AND NEIGHBORHOOD MIN/MAX /////////////
			#if MSAA && !MSAA_COLOR
			float2 uvColor = uv * sceneColorTexelSize;
			#else
			float2 uvColor = uv;
			#endif
			
			#if TEMPORAL_YCOCG
			// YCOCG only requires a + pattern for good quality
			float3 neighbor[5];
			neighbor[0] = _SAMPLE_COLOR(sceneColor, uvColor, int2(-1,  0));
			neighbor[1] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, -1));
			neighbor[2] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0,  0));
			neighbor[3] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 1,  0));
			neighbor[4] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0,  1));
			
			float3 filtered = 0;
			[unroll]
			for(uint i = 0; i < 5; ++i)
				filtered += neighbor[i] * gSampleWeights[i];
			
			float3 filteredLow = filtered;
			
			float3 neighborMin = min(min(min(neighbor[0], neighbor[1]), min(neighbor[2], neighbor[3])), 
				neighbor[4]);
				
			float3 neighborMax = max(max(max(neighbor[0], neighbor[1]), max(neighbor[2], neighbor[3])), 
				neighbor[4]);
			
			#else // TEMPORAL_YCOCG
			float3 neighbor[9];
			neighbor[0] = _SAMPLE_COLOR(sceneColor, uvColor, int2(-1, -1));
			neighbor[1] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0, -1));
			neighbor[2] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 1, -1));
			neighbor[3] = _SAMPLE_COLOR(sceneColor, uvColor, int2(-1,  0));
			neighbor[4] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0,  0));
			neighbor[5] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 1,  0));
			neighbor[6] = _SAMPLE_COLOR(sceneColor, uvColor, int2(-1,  1));
			neighbor[7] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 0,  1));
			neighbor[8] = _SAMPLE_COLOR(sceneColor, uvColor, int2( 1,  1));
			
			float3 filtered = 0;
			[unroll]
			for(uint i = 0; i < 9; ++i)
				filtered += neighbor[i] * gSampleWeights[i];

			#if TEMPORAL_LOWPASS
				float3 filteredLow = 0;
				[unroll]
				for(uint i = 0; i < 9; ++i)
					filteredLow += neighbor[i] * gSampleWeightsLowpass[i];
			#else
				float3 filteredLow = filtered;
			#endif // TEMPORAL_LOWPASS
			
			#if TEMPORAL_SMOOTH_NEIGHBORHOOD
			// Calculate standard deviation and determine neighborhood min/max based on it
			float3 mean = 0;
			[unroll]
			for(uint i = 0; i < 9; ++i)
				mean += neighbor[i];
			
			mean /= 9.0f;
			
			float3 meanSqrd = 0;
			[unroll]
			for(uint i = 0; i < 9; ++i)
				meanSqrd += neighbor[i] * neighbor[i];
			
			meanSqrd /= 9.0f;
			
			float3 stdDev = sqrt(abs(meanSqrd - mean * mean));
			float3 neighborMin = mean - stdDev;
			float3 neighborMax = mean + stdDev;
			
			#else // TEMPORAL_SMOOTH_NEIGHBORHOOD
			float3 neighborMin = min(min(
				min(min(neighbor[0], neighbor[1]), min(neighbor[2], neighbor[3])), 
				min(min(neighbor[4], neighbor[5]), min(neighbor[6], neighbor[7]))), 
				neighbor[8]);
				
			float3 neighborMax = max(max(
				max(max(neighbor[0], neighbor[1]), max(neighbor[2], neighbor[3])), 
				max(max(neighbor[4], neighbor[5]), max(neighbor[6], neighbor[7]))), 
				neighbor[8]);
			
			#endif // TEMPORAL_SMOOTH_NEIGHBORHOOD
			#endif // TEMPORAL_YCOCG
			
			/////////////////// GET PREVIOUS FRAME COLOR ///////////////////////
			float3 prevColorVal = _SAMPLE_COLOR(prevColor, prevUV, int2(0, 0));
			
			///////////////////// CLAMP TO NEIGHBORHOOD ////////////////////////
			// Clamping to neighborhood ensures we don't blend with values that are too
			// different, which can happen when history data becomes invalid.
			#if TEMPORAL_YCOCG
				prevColorVal = clamp(prevColorVal, neighborMin, neighborMax);
			#else // TEMPORAL_YCOCG
				// Uses low-pass to reduce flickering
				#if TEMPORAL_CLIP_AABB
					prevColorVal = clipAABB(neighborMin, neighborMax, prevColorVal, filteredLow);
				#else // TEMPORAL_CLIP_AABB
					prevColorVal = clamp(prevColorVal, neighborMin, neighborMax);
				#endif // TEMPORAL_CLIP_AABB
			#endif // TEMPORAL_YCOCG
			
			//////////////// BLEND BETWEEN CURRENT AND HISTORY //////////////////
			// Find out how much impact should the previous frame's color have
			#if TEMPORAL_BLEND_FACTOR // Fixed blend factor
				float blendAmount = 1.0f / TEMPORAL_BLEND_FACTOR;
				float3 output = lerp(prevColorVal, filtered, blendAmount);
			#else // TEMPORAL_BLEND_FACTOR
				#if TEMPORAL_YCOCG
					float lumaCurrent = filtered.r;
					float lumaHistory = prevColorVal.r;
				#else // TEMPORAL_YCOCG
					#if TEMPORAL_GREEN_AS_LUMA
						float lumaCurrent = filtered.g;
						float lumaHistory = prevColorVal.g;
					#else // TEMPORAL_GREEN_AS_LUMA
						float lumaCurrent = LuminanceRGB(filtered);
						float lumaHistory = LuminanceRGB(prevColorVal);
					#endif // TEMPORAL_GREEN_AS_LUMA
				#endif // TEMPORAL_YCOCG
			
				// Based on T. Lottes: https://www.youtube.com/watch?v=WzpLWzGvFK4&t=18m
				float blendWeight = 1.0f - abs(lumaCurrent - lumaHistory) / max(max(lumaCurrent, lumaHistory), 0.001f);
				
				float weightBad = 1.0f - 1.0f / TEMPORAL_BAD_RETENTION;
				float weightGood = 1.0f - 1.0f / TEMPORAL_GOOD_RETENTION;
				
				float blendAmount = lerp(weightBad, weightGood, blendWeight * blendWeight);
				float3 output = lerp(filtered, prevColorVal, blendAmount);
			#endif // TEMPORAL_BLEND_FACTOR
			
			//////// UNDO TONEMAP & MOVE BACK TO RGB SPACE //////////////////////
			#if TEMPORAL_TONEMAP
				#if TEMPORAL_YCOCG
					output = HDRScaleYInv(output, exposureScale);
				#elif TEMPORAL_GREEN_AS_LUMA
					output = HDRScaleGInv(output, exposureScale);
				#else
					output = HDRScaleRGBInv(output, exposureScale);
				#endif
			#endif // TEMPORAL_TONEMAP
			
			#if TEMPORAL_YCOCG
				output = YCoCgToRGB(output);
			#endif // TEMPORAL_YCOCG			
			
			// Note: Potential improvements:
			//  - Add a sharpen step
			//  - Use filtering when sampling history
			//  - Properly handle borders when sampling neighbors
			//  - Better blend amount calculation? (Needs experimentation)
			
			return output;
		}
		
		#undef _TEX2D
		#undef _PTEX2D
		#undef _SAMPLE
		#undef _PIXSIZE
		#undef _TONEMAP_COLOR
		#undef _TONEMAP_COLOR_INV
		#undef _SAMPLE_COLOR
		#undef _RESOLVE_COLOR
	};
};