Browse Source

Stohastic screen space reflections hooked up and functional

BearishSun 8 years ago
parent
commit
4f1722d7c1

+ 18 - 4
Data/Raw/Engine/Includes/ImportanceSampling.bslinc

@@ -4,7 +4,7 @@ mixin ImportanceSampling
 	{
 		#define PI 3.1415926
 	
-		float radicalInverse(uint bits)  
+		uint radicalInverse(uint bits)  
 		{
 			// Reverse bits. Algorithm from Hacker's Delight.
 			bits = (bits << 16u) | (bits >> 16u);
@@ -13,19 +13,33 @@ mixin ImportanceSampling
 			bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
 			bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
 			
-			// Normalizes unsigned int in range [0, 4294967295] to [0, 1]
-			return float(bits) * 2.3283064365386963e-10;
+			return bits;
 		}
 		
 		float2 hammersleySequence(uint i, uint count)
 		{
 			float2 output;
 			output.x = i / (float)count;
-			output.y = radicalInverse(i);
+			uint y = radicalInverse(i);
+			
+			// Normalizes unsigned int in range [0, 4294967295] to [0, 1]
+			output.y = float(y) * 2.3283064365386963e-10;
 			
 			return output;
 		}
 		
+		float2 hammersleySequence(uint i, uint count, uint2 random)
+		{
+			float2 output;
+			output.x = frac(i / (float)count + float(random.x & 0xFFFF) / (1<<16));
+			uint y = radicalInverse(i) ^ random.y;
+			
+			// Normalizes unsigned int in range [0, 4294967295] to [0, 1]
+			output.y = float(y) * 2.3283064365386963e-10;
+			
+			return output;
+		}		
+		
 		// Returns cos(theta) in x and phi in y
 		float2 importanceSampleGGX(float2 e, float roughness4)
 		{

+ 2 - 6
Data/Raw/Engine/Includes/TemporalResolve.bslinc

@@ -235,10 +235,6 @@ mixin TemporalResolve
 			float gSampleWeightsLowpass[9];
 		}
 		
-		// TODO - Need to use SNORM 16-bit format for velocity
-		// TODO - Add gNDCToPrevNDC matrix to PerCameraData
-		// TODO - Generate C++ samples (make sure to account for YCoCg path, and remove jitter)
-		// TODO - Add notes that velocity buffer isn't currenlty being used
 		float3 temporalResolve(
 			_TEX2D(sceneDepth), 
 			_TEX2D(sceneColor), 
@@ -335,7 +331,7 @@ mixin TemporalResolve
 			[unroll]
 			for(uint i = 0; i < 9; ++i)
 				filtered += neighbor[i] * gSampleWeights[i];
-			
+
 			#if TEMPORAL_LOWPASS
 				float3 filteredLow = 0;
 				[unroll]
@@ -350,7 +346,7 @@ mixin TemporalResolve
 			float3 mean = 0;
 			[unroll]
 			for(uint i = 0; i < 9; ++i)
-					mean += neighbor[i];
+				mean += neighbor[i];
 			
 			mean /= 9.0f;
 			

+ 2 - 1
Data/Raw/Engine/Shaders/PPSSRResolve.bsl

@@ -5,6 +5,7 @@
 #define TEMPORAL_LOCAL_VELOCITY 0
 #define TEMPORAL_SEARCH_NEAREST 0
 #define TEMPORAL_BLEND_FACTOR 8
+#define TEMPORAL_SMOOTH_NEIGHBORHOOD 0
 #include "$ENGINE$\TemporalResolve.bslinc"
 
 technique PPSSRStencil
@@ -56,7 +57,7 @@ technique PPSSRStencil
 			#else
 				return temporalResolve(
 					gSceneDepth, gPointSampler, gSceneDepthTexelSize,
-					gSceneColor, gPointSampler, gSceneColorTexelSize, 
+					gSceneColor, gLinearSampler, gSceneColorTexelSize, 
 					gPrevColor, gLinearSampler, gSceneColorTexelSize,
 					exposureScale, input.uv0, input.screenPos, 0);
 			#endif

+ 9 - 3
Data/Raw/Engine/Shaders/PPSSRStencil.bsl

@@ -23,11 +23,17 @@ technique PPSSRStencil
 			float2 gRoughnessScaleBias;
 		}
 		
-		float fsmain(VStoFS input) : SV_Target0
+		float fsmain(VStoFS input			
+		#if MSAA_COUNT > 1 
+			, uint sampleIdx : SV_SampleIndex
+		#endif
+		) : SV_Target0
 		{
-			// TODO - Support MSAA?
-		
+			#if MSAA_COUNT > 1 
+			SurfaceData surfData = getGBufferData(trunc(input.position.xy), sampleIdx);
+			#else
 			SurfaceData surfData = getGBufferData(input.uv0);
+			#endif
 			
 			// Surfaces that are too rough fall back to refl. probes
 			float fadeValue = 1.0f - saturate(surfData.roughness * gRoughnessScaleBias.x + gRoughnessScaleBias.y);

+ 30 - 15
Data/Raw/Engine/Shaders/PPSSRTrace.bsl

@@ -34,8 +34,13 @@ technique PPSSRTrace
 			int gHiZNumMips;
 			float gIntensity;
 			float2 gRoughnessScaleBias;
+			int gTemporalJitter;
 		}
 		
+		#ifndef MSAA_RESOLVE_0TH
+			#define MSAA_RESOLVE_0TH 0
+		#endif
+		
 		Texture2D gSceneColor;
 		SamplerState gSceneColorSamp;
 		
@@ -54,11 +59,22 @@ technique PPSSRTrace
 				 | (y << 1) & 0x2 	| ((y << 2) & 0x8);
 		}		
 
-		float4 fsmain(VStoFS input, float4 pixelPos : SV_Position) : SV_Target0
+		float4 fsmain(VStoFS input, float4 pixelPos : SV_Position
+			#if MSAA_COUNT > 1 && !MSAA_RESOLVE_0TH
+			, uint sampleIdx : SV_SampleIndex
+			#endif
+		) : SV_Target0
 		{
-			// TODO - Support MSAA?
+			#if MSAA_RESOLVE_0TH
+			uint sampleIdx = 0;
+			#endif
 		
+			#if MSAA_COUNT > 1
+			SurfaceData surfData = getGBufferData(trunc(pixelPos.xy), sampleIdx);
+			#else
 			SurfaceData surfData = getGBufferData(input.uv0);
+			#endif
+			
 			float3 P = NDCToWorld(input.screenPos, surfData.depth);
 			float3 V = normalize(gViewOrigin - P);
 			float3 N = surfData.worldNormal.xyz;
@@ -66,8 +82,7 @@ technique PPSSRTrace
 			float roughness = surfData.roughness;
 			
 			
-			//roughness = 0.3f;//DEBUG ONLY
-			roughness = 0.0f;
+			roughness = 0.3f;//DEBUG ONLY
 			
 			
 			float roughness2 = roughness * roughness;
@@ -81,7 +96,6 @@ technique PPSSRTrace
 
 			// Jitter ray offset in 4x4 tile, in order to avoid stairstep artifacts
 			uint pixelIdx = mortonCode4x4((uint)pixelPos.x, (uint)pixelPos.y);
-			float jitterOffset = (pixelIdx & 15) / 15.0f - 0.5f; // TODO - Also add per-frame jitter			
 			
 			RayMarchParams rayMarchParams;
 			rayMarchParams.bufferSize = gHiZSize;
@@ -89,17 +103,21 @@ technique PPSSRTrace
 			rayMarchParams.NDCToHiZUV = gNDCToHiZUV;
 			rayMarchParams.HiZUVToScreenUV = gHiZUVToScreenUV;
 			rayMarchParams.rayOrigin = P;
-			rayMarchParams.jitterOffset = jitterOffset;			
-			
-			int NUM_RAYS = 1; // DEBUG ONLY
+
+			// Make sure each pixel chooses different ray directions (noise looks better than repeating patterns)
+			//// Magic integer is arbitrary, in order to convert from [0, 1] float
+			uint2 pixRandom = random(pixelPos.xy + gTemporalJitter * uint2(61, 85)) * uint2(0x36174842, 0x15249835);
+			int NUM_RAYS = 8; // DEBUG ONLY
 			
 			float4 sum = 0;
 			[loop]
 			for(int i = 0; i < NUM_RAYS; ++i)
 			{
-				// TODO - Add per-frame random? (for temporal filtering)
-				float2 random = hammersleySequence(i, NUM_RAYS);
-				float2 sphericalH = importanceSampleGGX(random, roughness4);
+				uint rayRandom = (pixelIdx + (gTemporalJitter + i * 207) & 15);
+				rayMarchParams.jitterOffset = rayRandom / 15.0f - 0.5f;
+			
+				float2 e = hammersleySequence(i, NUM_RAYS, pixRandom);
+				float2 sphericalH = importanceSampleGGX(e, roughness4);
 				
 				float cosTheta = sphericalH.x;
 				float phi = sphericalH.y;
@@ -113,8 +131,7 @@ technique PPSSRTrace
 				float3 tangentY = cross(N, tangentX);
 				
 				H = tangentX * H.x + tangentY * H.y + N * H.z; 
-				//float3 R = 2 * dot( V, H ) * H - V;
-				float3 R = 2 * dot( V, N ) * N - V;
+				float3 R = 2 * dot( V, H ) * H - V;
 				
 				// Eliminate rays pointing towards the viewer. They won't hit anything, plus they can screw up precision
 				// and cause ray step offset to be too small, causing self-intersections.
@@ -142,8 +159,6 @@ technique PPSSRTrace
 		
 					color = color * saturate(1.0f - dot(vignette, vignette));
 					
-					// Note: Not accounting for PDF here since we don't evaluate BRDF until later. Looks good though.
-					
 					// Tonemap the color to get a nicer visual average
 					color.rgb /= (1 + LuminanceRGB(color.rgb));
 					

+ 22 - 1
Source/RenderBeast/Include/BsPostProcessing.h

@@ -635,9 +635,15 @@ namespace bs { namespace ct
 		 * @param[in]	settings		Parameters used for controling the SSR effect.
 		 */
 		void execute(const RendererView& view, GBufferTextures gbuffer, const ScreenSpaceReflectionsSettings& settings);
+
+		/** Returns the material variation matching the provided parameters. */
+		static SSRStencilMat* getVariation(bool msaa);
 	private:
 		SPtr<GpuParamBlockBuffer> mParamBuffer;
 		GBufferParams mGBufferParams;
+
+		static ShaderVariation VAR_NoMSAA;
+		static ShaderVariation VAR_MSAA;
 	};
 
 	BS_PARAM_BLOCK_BEGIN(SSRTraceParamDef)
@@ -647,6 +653,7 @@ namespace bs { namespace ct
 		BS_PARAM_BLOCK_ENTRY(int, gHiZNumMips)
 		BS_PARAM_BLOCK_ENTRY(float, gIntensity)
 		BS_PARAM_BLOCK_ENTRY(Vector2, gRoughnessScaleBias)
+		BS_PARAM_BLOCK_ENTRY(int, gTemporalJitter)
 	BS_PARAM_BLOCK_END
 
 	extern SSRTraceParamDef gSSRTraceParamDef;
@@ -679,11 +686,25 @@ namespace bs { namespace ct
 		 * is 1/2 the length of @p maxRoughness.
 		 */
 		static Vector2 calcRoughnessFadeScaleBias(float maxRoughness);
+
+		/** 
+		 * Returns the material variation matching the provided parameters. 
+		 * 
+		 * @param[in]	msaa				True if the shader will operate on a multisampled surface.
+		 * @param[in]	singleSampleMSAA	Only relevant of @p msaa is true. When enabled only the first sample will be
+		 *									evaluated. Otherwise all samples will be evaluated.
+		 * @return							Requested variation of the material.
+		 */
+		static SSRTraceMat* getVariation(bool msaa, bool singleSampleMSAA = false);
 	private:
 		SPtr<GpuParamBlockBuffer> mParamBuffer;
 		GBufferParams mGBufferParams;
 		GpuParamTexture mSceneColorTexture;
 		GpuParamTexture mHiZTexture;
+
+		static ShaderVariation VAR_NoMSAA;
+		static ShaderVariation VAR_FullMSAA;
+		static ShaderVariation VAR_SingleMSAA;
 	};
 
 	BS_PARAM_BLOCK_BEGIN(TemporalResolveParamDef)
@@ -728,7 +749,7 @@ namespace bs { namespace ct
 		 *							the eye adaptation shader. Otherwise the manually provided exposure value is used
 		 *							instead.
 		 */
-		SSRResolveMat* getVariation(bool eyeAdaptation);
+		static SSRResolveMat* getVariation(bool eyeAdaptation);
 
 	private:
 		SPtr<GpuParamBlockBuffer> mSSRParamBuffer;

+ 13 - 5
Source/RenderBeast/Include/BsRenderBeast.h

@@ -23,14 +23,23 @@ namespace bs
 	 *  @{
 	 */
 
+	/** Information about current time and frame index. */
+	struct FrameTimings
+	{
+		float time;
+		float timeDelta;
+		UINT64 frameIdx;
+	};
+
 	/** Contains information global to an entire frame. */
 	struct FrameInfo
 	{
-		FrameInfo(float timeDelta, const RendererAnimationData* animData = nullptr)
-			:timeDelta(timeDelta), animData(animData)
+		FrameInfo(const FrameTimings& timings, const RendererAnimationData* animData = nullptr)
+			:timeDelta(timings.timeDelta), frameIdx(timings.frameIdx), animData(animData)
 		{ }
 
 		float timeDelta;
+		UINT64 frameIdx;
 		const RendererAnimationData* animData;
 	};
 
@@ -134,12 +143,11 @@ namespace bs
 		/**
 		 * Performs rendering over all camera proxies.
 		 *
-		 * @param[in]	time	Current frame time in milliseconds.
-		 * @param[in]	delta	Time elapsed since the last frame.
+		 * @param[in]	timings		Information about frame time and frame index.
 		 *
 		 * @note	Core thread only.
 		 */
-		void renderAllCore(float time, float delta);
+		void renderAllCore(FrameTimings timings);
 
 		/**
 		 * Renders all views in the provided view group.

+ 7 - 0
Source/RenderBeast/Include/BsRenderCompositor.h

@@ -585,6 +585,8 @@ namespace ct
 	public:
 		SPtr<PooledRenderTexture> output;
 
+		~RCNodeSSR();
+
 		static StringID getNodeId() { return "SSR"; }
 		static SmallVector<StringID, 4> getDependencies(const RendererView& view);
 	protected:
@@ -593,6 +595,11 @@ namespace ct
 
 		/** @copydoc RenderCompositorNode::clear */
 		void clear() override;
+
+		/** Cleans up any outputs. */
+		void deallocOutputs();
+
+		SPtr<PooledRenderTexture> mPrevFrame;
 	};
 
 	/** @} */

+ 1 - 0
Source/RenderBeast/Include/BsRendererView.h

@@ -142,6 +142,7 @@ namespace bs { namespace ct
 		Rect2I viewRect;
 		Rect2 nrmViewRect;
 		UINT32 numSamples;
+		UINT32 frameIdx;
 
 		UINT32 clearFlags;
 		Color clearColor;

+ 61 - 5
Source/RenderBeast/Source/BsPostProcessing.cpp

@@ -1360,6 +1360,14 @@ namespace bs { namespace ct
 
 	SSRStencilParamDef gSSRStencilParamDef;
 
+	ShaderVariation SSRStencilMat::VAR_NoMSAA = ShaderVariation({
+		ShaderVariation::Param("MSAA_COUNT", 1)
+	});
+
+	ShaderVariation SSRStencilMat::VAR_MSAA = ShaderVariation({
+		ShaderVariation::Param("MSAA_COUNT", 2)
+	});
+
 	SSRStencilMat::SSRStencilMat()
 		:mGBufferParams(mMaterial, mParamsSet)
 	{
@@ -1369,7 +1377,8 @@ namespace bs { namespace ct
 
 	void SSRStencilMat::_initVariations(ShaderVariations& variations)
 	{
-		// Do nothing
+		variations.add(VAR_NoMSAA);
+		variations.add(VAR_NoMSAA);
 	}
 
 	void SSRStencilMat::execute(const RendererView& view, GBufferTextures gbuffer, 
@@ -1388,8 +1397,29 @@ namespace bs { namespace ct
 		gRendererUtility().drawScreenQuad();
 	}
 
+	SSRStencilMat* SSRStencilMat::getVariation(bool msaa)
+	{
+		if (msaa)
+			return get(VAR_MSAA);
+		else
+			return get(VAR_NoMSAA);
+	}
+
 	SSRTraceParamDef gSSRTraceParamDef;
 
+	ShaderVariation SSRTraceMat::VAR_NoMSAA = ShaderVariation({
+		ShaderVariation::Param("MSAA_COUNT", 1)
+	});
+
+	ShaderVariation SSRTraceMat::VAR_FullMSAA = ShaderVariation({
+		ShaderVariation::Param("MSAA_COUNT", 2)
+	});
+
+	ShaderVariation SSRTraceMat::VAR_SingleMSAA = ShaderVariation({
+		ShaderVariation::Param("MSAA_COUNT", 2),
+		ShaderVariation::Param("MSAA_RESOLVE_0TH", true)
+	});
+
 	SSRTraceMat::SSRTraceMat()
 		:mGBufferParams(mMaterial, mParamsSet)
 	{
@@ -1405,7 +1435,9 @@ namespace bs { namespace ct
 
 	void SSRTraceMat::_initVariations(ShaderVariations& variations)
 	{
-		// Do nothing
+		variations.add(VAR_NoMSAA);
+		variations.add(VAR_FullMSAA);
+		variations.add(VAR_SingleMSAA);
 	}
 
 	void SSRTraceMat::execute(const RendererView& view, GBufferTextures gbuffer, const SPtr<Texture>& sceneColor, 
@@ -1449,6 +1481,8 @@ namespace bs { namespace ct
 		// Used for roughness fading
 		Vector2 roughnessScaleBias = calcRoughnessFadeScaleBias(settings.maxRoughness);
 
+		UINT32 temporalJitter = (viewProps.frameIdx % 8) * 1503;
+
 		Vector2I bufferSize(viewRect.width, viewRect.height);
 		gSSRTraceParamDef.gHiZSize.set(mParamBuffer, bufferSize);
 		gSSRTraceParamDef.gHiZNumMips.set(mParamBuffer, hiZProps.getNumMipmaps());
@@ -1456,6 +1490,7 @@ namespace bs { namespace ct
 		gSSRTraceParamDef.gHiZUVToScreenUV.set(mParamBuffer, HiZUVToScreenUV);
 		gSSRTraceParamDef.gIntensity.set(mParamBuffer, settings.intensity);
 		gSSRTraceParamDef.gRoughnessScaleBias.set(mParamBuffer, roughnessScaleBias);
+		gSSRTraceParamDef.gTemporalJitter.set(mParamBuffer, temporalJitter);
 
 		SPtr<GpuParamBlockBuffer> perView = view.getPerViewBuffer();
 		mParamsSet->setParamBlockBuffer("PerCamera", perView);
@@ -1478,6 +1513,19 @@ namespace bs { namespace ct
 		return scaleBias;
 	}
 
+	SSRTraceMat* SSRTraceMat::getVariation(bool msaa, bool singleSampleMSAA)
+	{
+		if (msaa)
+		{
+			if (singleSampleMSAA)
+				return get(VAR_SingleMSAA);
+			else
+				return get(VAR_FullMSAA);
+		}
+		else
+			return get(VAR_NoMSAA);
+	}
+
 	TemporalResolveParamDef gTemporalResolveParamDef;
 	SSRResolveParamDef gSSRResolveParamDef;
 
@@ -1560,7 +1608,7 @@ namespace bs { namespace ct
 
 		gSSRResolveParamDef.gSceneColorTexelSize.set(mSSRParamBuffer, colorPixelSize);
 		gSSRResolveParamDef.gSceneDepthTexelSize.set(mSSRParamBuffer, depthPixelSize);
-		// TODO - Set manual exposure value
+		gSSRResolveParamDef.gManualExposure.set(mSSRParamBuffer, 1.0f);
 
 		// Generate samples
 		// Note: Move this code to a more general spot where it can be used by other temporal shaders.
@@ -1636,8 +1684,8 @@ namespace bs { namespace ct
 
 		for (UINT32 i = 0; i < 9; ++i)
 		{
-			gTemporalResolveParamDef.gSampleWeights.set(mTemporalParamBuffer, sampleWeights[i] / totalWeights);
-			gTemporalResolveParamDef.gSampleWeightsLowpass.set(mTemporalParamBuffer, sampleWeightsLowPass[i] / totalWeightsLowPass);
+			gTemporalResolveParamDef.gSampleWeights.set(mTemporalParamBuffer, sampleWeights[i] / totalWeights, i);
+			gTemporalResolveParamDef.gSampleWeightsLowpass.set(mTemporalParamBuffer, sampleWeightsLowPass[i] / totalWeightsLowPass, i);
 		}
 		
 		SPtr<GpuParamBlockBuffer> perView = view.getPerViewBuffer();
@@ -1650,4 +1698,12 @@ namespace bs { namespace ct
 		gRendererUtility().setPassParams(mParamsSet);
 		gRendererUtility().drawScreenQuad();
 	}
+
+	SSRResolveMat* SSRResolveMat::getVariation(bool eyeAdaptation)
+	{
+		if (eyeAdaptation)
+			return get(VAR_EyeAdaptation);
+		else
+			return get(VAR_NoEyeAdaptation);
+	}
 }}

+ 10 - 5
Source/RenderBeast/Source/BsRenderBeast.cpp

@@ -250,11 +250,16 @@ namespace bs { namespace ct
 			gCoreThread().queueCommand(std::bind(&RenderBeast::syncOptions, this, *mOptions));
 			mOptionsDirty = false;
 		}
+
+		FrameTimings timings;
+		timings.time = gTime().getTime();
+		timings.timeDelta = gTime().getFrameDelta();
+		timings.frameIdx = gTime().getFrameIdx();
 		
-		gCoreThread().queueCommand(std::bind(&RenderBeast::renderAllCore, this, gTime().getTime(), gTime().getFrameDelta()));
+		gCoreThread().queueCommand(std::bind(&RenderBeast::renderAllCore, this, timings));
 	}
 
-	void RenderBeast::renderAllCore(float time, float delta)
+	void RenderBeast::renderAllCore(FrameTimings timings)
 	{
 		THROW_IF_NOT_CORE_THREAD;
 
@@ -269,7 +274,7 @@ namespace bs { namespace ct
 		mScene->refreshSamplerOverrides();
 
 		// Update global per-frame hardware buffers
-		mObjectRenderer->setParamFrameParams(time);
+		mObjectRenderer->setParamFrameParams(timings.time);
 
 		// Retrieve animation data
 		AnimationManager::instance().waitUntilComplete();
@@ -278,7 +283,7 @@ namespace bs { namespace ct
 		sceneInfo.renderableReady.resize(sceneInfo.renderables.size(), false);
 		sceneInfo.renderableReady.assign(sceneInfo.renderables.size(), false);
 		
-		FrameInfo frameInfo(delta, &animData);
+		FrameInfo frameInfo(timings, &animData);
 
 		// Make sure any renderer tasks finish first, as rendering might depend on them
 		processTasks(false);
@@ -652,7 +657,7 @@ namespace bs { namespace ct
 		RendererViewGroup viewGroup(viewPtrs, 6, mCoreOptions->shadowMapSize);
 		viewGroup.determineVisibility(sceneInfo);
 
-		FrameInfo frameInfo(1.0f/60.0f);
+		FrameInfo frameInfo({ 0.0f, 1.0f / 60.0f, 0 });
 		renderViews(viewGroup, frameInfo);
 
 		// Make sure the render texture is available for reads

+ 106 - 45
Source/RenderBeast/Source/BsRenderCompositor.cpp

@@ -756,26 +756,24 @@ namespace bs { namespace ct
 		RCNodeSceneDepth* sceneDepthNode = static_cast<RCNodeSceneDepth*>(inputs.inputNodes[2]);
 		RCNodeLightAccumulation* lightAccumNode = static_cast <RCNodeLightAccumulation*>(inputs.inputNodes[3]);
 
-		SPtr<Texture> ssao;
-		if (rs.ambientOcclusion.enabled)
-		{
-			RCNodeSSAO* ssaoNode = static_cast<RCNodeSSAO*>(inputs.inputNodes[5]);
-			ssao = ssaoNode->output->texture;
-		}
-		else
-			ssao = Texture::WHITE;
-
 		SPtr<Texture> ssr;
 		if (rs.screenSpaceReflections.enabled)
 		{
-			UINT32 nodeIdx = rs.ambientOcclusion.enabled ? 6 : 5;
-
-			RCNodeSSR* ssrNode = static_cast<RCNodeSSR*>(inputs.inputNodes[nodeIdx]);
+			RCNodeSSR* ssrNode = static_cast<RCNodeSSR*>(inputs.inputNodes[5]);
 			ssr = ssrNode->output->texture;
 		}
 		else
 			ssr = Texture::BLACK;
 
+		SPtr<Texture> ssao;
+		if (rs.ambientOcclusion.enabled)
+		{
+			RCNodeSSAO* ssaoNode = static_cast<RCNodeSSAO*>(inputs.inputNodes[6]);
+			ssao = ssaoNode->output->texture;
+		}
+		else
+			ssao = Texture::WHITE;
+
 		const RendererViewProperties& viewProps = inputs.view.getProperties();
 		TiledDeferredImageBasedLightingMat* material = TiledDeferredImageBasedLightingMat::getVariation(viewProps.numSamples);
 
@@ -810,13 +808,11 @@ namespace bs { namespace ct
 		deps.push_back(RCNodeSceneDepth::getNodeId());
 		deps.push_back(RCNodeLightAccumulation::getNodeId());
 		deps.push_back(RCNodeIndirectLighting::getNodeId());
+		deps.push_back(RCNodeSSR::getNodeId());
 
 		if(view.getRenderSettings().ambientOcclusion.enabled)
 			deps.push_back(RCNodeSSAO::getNodeId());
 
-		if (view.getRenderSettings().screenSpaceReflections.enabled)
-			deps.push_back(RCNodeSSR::getNodeId());
-
 		return deps;
 	}
 
@@ -1711,59 +1707,124 @@ namespace bs { namespace ct
 		return { RCNodeResolvedSceneDepth::getNodeId(), RCNodeGBuffer::getNodeId() };
 	}
 
-	void RCNodeSSR::render(const RenderCompositorNodeInputs& inputs)
+	RCNodeSSR::~RCNodeSSR()
 	{
-		RCNodeSceneDepth* sceneDepthNode = static_cast<RCNodeSceneDepth*>(inputs.inputNodes[0]);
-		RCNodeLightAccumulation* lightAccumNode = static_cast<RCNodeLightAccumulation*>(inputs.inputNodes[1]);
-		RCNodeGBuffer* gbufferNode = static_cast<RCNodeGBuffer*>(inputs.inputNodes[2]);
-		RCNodeHiZ* hiZNode = static_cast<RCNodeHiZ*>(inputs.inputNodes[3]);
+		deallocOutputs();
+	}
 
-		GpuResourcePool& resPool = GpuResourcePool::instance();
-		const RendererViewProperties& viewProps = inputs.view.getProperties();
+	void RCNodeSSR::render(const RenderCompositorNodeInputs& inputs)
+	{
 		const ScreenSpaceReflectionsSettings& settings = inputs.view.getRenderSettings().screenSpaceReflections;
+		if (settings.enabled)
+		{
+			RCNodeSceneDepth* sceneDepthNode = static_cast<RCNodeSceneDepth*>(inputs.inputNodes[0]);
+			RCNodeLightAccumulation* lightAccumNode = static_cast<RCNodeLightAccumulation*>(inputs.inputNodes[1]);
+			RCNodeGBuffer* gbufferNode = static_cast<RCNodeGBuffer*>(inputs.inputNodes[2]);
+			RCNodeHiZ* hiZNode = static_cast<RCNodeHiZ*>(inputs.inputNodes[3]);
 
-		SPtr<Texture> hiZ = hiZNode->output->texture;
+			GpuResourcePool& resPool = GpuResourcePool::instance();
+			const RendererViewProperties& viewProps = inputs.view.getProperties();
 
-		// This will be executing before scene color is resolved
-		SPtr<Texture> sceneColor = lightAccumNode->lightAccumulationTex->texture;
+			UINT32 width = viewProps.viewRect.width;
+			UINT32 height = viewProps.viewRect.height;
 
-		GBufferTextures gbuffer;
-		gbuffer.albedo = gbufferNode->albedoTex->texture;
-		gbuffer.normals = gbufferNode->normalTex->texture;
-		gbuffer.roughMetal = gbufferNode->roughMetalTex->texture;
-		gbuffer.depth = sceneDepthNode->depthTex->texture;
+			SPtr<Texture> hiZ = hiZNode->output->texture;
 
-		UINT32 width = viewProps.viewRect.width;
-		UINT32 height = viewProps.viewRect.height;
+			// This will be executing before scene color is resolved, so get the light accum buffer instead
+			SPtr<Texture> sceneColor = lightAccumNode->lightAccumulationTex->texture;
 
-		output = resPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(PF_RGBA16F, width, height, TU_RENDERTARGET));
+			// Resolve multiple samples if MSAA is used
+			SPtr<PooledRenderTexture> resolvedSceneColor;
+			if(viewProps.numSamples > 1)
+			{
+				resolvedSceneColor = resPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(PF_RGBA16F, width, height, 
+					TU_RENDERTARGET));
+
+				RenderAPI::instance().setRenderTarget(resolvedSceneColor->renderTexture);
+				gRendererUtility().blit(sceneColor);
+
+				sceneColor = resolvedSceneColor->texture;
+			}
+
+			GBufferTextures gbuffer;
+			gbuffer.albedo = gbufferNode->albedoTex->texture;
+			gbuffer.normals = gbufferNode->normalTex->texture;
+			gbuffer.roughMetal = gbufferNode->roughMetalTex->texture;
+			gbuffer.depth = sceneDepthNode->depthTex->texture;
+
+			SSRStencilMat* stencilMat = SSRStencilMat::getVariation(viewProps.numSamples > 1);
+
+			// TODO - Run SSRStencil
+			// TODO - Is stencil clear at this point? Also use stencil mask.
+			// RenderAPI::instance().setRenderTarget(sceneDepthNode->depthTex->renderTexture);
+			// stencilMat->execute(inputs.view, gbuffer, settings);
+
+			SPtr<PooledRenderTexture> traceOutput = resPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(PF_RGBA16F, width, 
+				height, TU_RENDERTARGET));
+
+			SSRTraceMat* traceMat = SSRTraceMat::getVariation(viewProps.numSamples > 1);
+			traceMat->execute(inputs.view, gbuffer, sceneColor, hiZ, settings, traceOutput->renderTexture);
 
-		// TODO - Run SSRStencil
+			if (resolvedSceneColor)
+			{
+				resPool.release(resolvedSceneColor);
+				resolvedSceneColor = nullptr;
+			}
 
-		SSRTraceMat* traceMat = SSRTraceMat::get();
-		traceMat->execute(inputs.view, gbuffer, sceneColor, hiZ, settings, output->renderTexture);
+			if (mPrevFrame)
+			{
+				output = resPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(PF_RGBA16F, width, height, TU_RENDERTARGET));
 
-		// TODO - Run temporal resolve
+				SSRResolveMat* resolveMat = SSRResolveMat::getVariation(false);
+				resolveMat->execute(inputs.view, mPrevFrame->texture, traceOutput->texture, sceneDepthNode->depthTex->texture,
+					output->renderTexture);
 
-		RenderAPI::instance().setRenderTarget(nullptr);
+				resPool.release(traceOutput);
+			}
+			else
+				output = traceOutput;
+
+			RenderAPI::instance().setRenderTarget(nullptr);
+		}
+		else
+			deallocOutputs();
 	}
 
 	void RCNodeSSR::clear()
 	{
 		GpuResourcePool& resPool = GpuResourcePool::instance();
-		resPool.release(output);
+
+		if(mPrevFrame)
+			resPool.release(mPrevFrame);
+
+		mPrevFrame = output;
+		output = nullptr;
+	}
+
+	void RCNodeSSR::deallocOutputs()
+	{
+		GpuResourcePool& resPool = GpuResourcePool::instance();
+		
+		if(mPrevFrame)
+		{
+			resPool.release(mPrevFrame);
+			mPrevFrame = nullptr;
+		}
 	}
 
 	SmallVector<StringID, 4> RCNodeSSR::getDependencies(const RendererView& view)
 	{
 		SmallVector<StringID, 4> deps;
-		deps.push_back(RCNodeSceneDepth::getNodeId());
-		deps.push_back(RCNodeLightAccumulation::getNodeId());
-		deps.push_back(RCNodeGBuffer::getNodeId());
-		deps.push_back(RCNodeHiZ::getNodeId());
+		if (view.getRenderSettings().screenSpaceReflections.enabled)
+		{
+			deps.push_back(RCNodeSceneDepth::getNodeId());
+			deps.push_back(RCNodeLightAccumulation::getNodeId());
+			deps.push_back(RCNodeGBuffer::getNodeId());
+			deps.push_back(RCNodeHiZ::getNodeId());
 
-		if (view.getProperties().numSamples > 1)
-			deps.push_back(RCNodeUnflattenLightAccum::getNodeId());
+			if (view.getProperties().numSamples > 1)
+				deps.push_back(RCNodeUnflattenLightAccum::getNodeId());
+		}
 
 		return deps;
 	}

+ 17 - 5
Source/RenderBeast/Source/BsRendererView.cpp

@@ -68,7 +68,7 @@ namespace bs { namespace ct
 	}
 
 	RendererViewProperties::RendererViewProperties(const RENDERER_VIEW_DESC& src)
-		:RendererViewData(src)
+		:RendererViewData(src), frameIdx(0)
 	{
 		viewProjTransform = src.projTransform * src.viewTransform;
 
@@ -119,7 +119,8 @@ namespace bs { namespace ct
 
 		mRenderSettingsHash++;
 
-		// Update compositor hierarchy
+		// Update compositor hierarchy (Note: Needs to be called even when viewport size (or other information) changes,
+		// but we're currently calling it here as all such calls are followed by setRenderSettings.
 		mCompositor.build(*this, RCNodeFinalResolve::getNodeId());
 	}
 
@@ -139,6 +140,7 @@ namespace bs { namespace ct
 		mCamera = desc.sceneCamera;
 		mProperties = desc;
 		mProperties.viewProjTransform = desc.projTransform * desc.viewTransform;
+		mProperties.prevViewProjTransform = Matrix4::IDENTITY;
 		mTargetDesc = desc.target;
 
 		setStateReductionMode(desc.stateReduction);
@@ -146,6 +148,12 @@ namespace bs { namespace ct
 
 	void RendererView::beginFrame()
 	{
+		// Note: inverse view-projection can be cached, it doesn't change every frame
+		Matrix4 viewProj = mProperties.projTransform * mProperties.viewTransform;
+		Matrix4 invViewProj = viewProj.inverse();
+		Matrix4 NDCToPrevNDC = mProperties.prevViewProjTransform * invViewProj;
+		
+		gPerCameraParamDef.gNDCToPrevNDC.set(mParamBuffer, NDCToPrevNDC);
 	}
 
 	void RendererView::endFrame()
@@ -153,6 +161,10 @@ namespace bs { namespace ct
 		// Save view-projection matrix to use for temporal filtering
 		mProperties.prevViewProjTransform = mProperties.viewProjTransform;
 
+		// Advance per-view frame index. This is used primarily by temporal rendering effects, and pausing the frame index
+		// allows you to freeze the current rendering as is, without temporal artifacts.
+		mProperties.frameIdx++;
+
 		mOpaqueQueue->clear();
 		mTransparentQueue->clear();
 	}
@@ -395,8 +407,6 @@ namespace bs { namespace ct
 
 	void RendererView::updatePerViewBuffer()
 	{
-		RenderAPI& rapi = RenderAPI::instance();
-
 		Matrix4 viewProj = mProperties.projTransform * mProperties.viewTransform;
 		Matrix4 invViewProj = viewProj.inverse();
 
@@ -416,9 +426,11 @@ namespace bs { namespace ct
 		projZ[2][3] = mProperties.projTransform[2][3];
 		projZ[3][2] = mProperties.projTransform[3][2];
 		projZ[3][3] = 0.0f;
+
+		Matrix4 NDCToPrevNDC = mProperties.prevViewProjTransform * invViewProj;
 		
 		gPerCameraParamDef.gMatScreenToWorld.set(mParamBuffer, invViewProj * projZ);
-		gPerCameraParamDef.gNDCToPrevNDC.set(mParamBuffer, mProperties.prevViewProjTransform * invViewProj);
+		gPerCameraParamDef.gNDCToPrevNDC.set(mParamBuffer, NDCToPrevNDC);
 		gPerCameraParamDef.gViewDir.set(mParamBuffer, mProperties.viewDirection);
 		gPerCameraParamDef.gViewOrigin.set(mParamBuffer, mProperties.viewOrigin);
 		gPerCameraParamDef.gDeviceZToWorldZ.set(mParamBuffer, getDeviceZToViewZ(mProperties.projTransform));