Browse Source

SSAO: Added a separate bilateral blur step
- Added options for controlling fade-out and intensity

BearishSun 8 years ago
parent
commit
210100d2ed

+ 4 - 0
Data/Raw/Engine/DataList.json

@@ -313,6 +313,10 @@
         {
             "Path": "PPSSAODownsample.bsl",
             "UUID": "c06a649a-646e-48cf-89fc-f97e46f75264"
+        },
+        {
+            "Path": "PPSSAOBlur.bsl",
+            "UUID": "46c7f742-a02d-4e1f-b121-047cd62a6d4d"
         }
     ],
     "Skin": [

+ 17 - 6
Data/Raw/Engine/Shaders/PPSSAO.bsl

@@ -18,6 +18,9 @@ technique PPSSAO
 			float gCotHalfFOV;
 			float gBias;
 			float2 gDownsampledPixelSize;
+			float2 gFadeMultiplyAdd;
+			float gPower;
+			float gIntensity;
 		}		
 
 		SamplerState gInputSamp;
@@ -79,7 +82,7 @@ technique PPSSAO
 			uvs[7] = uv + float2( 0,  1) * gDownsampledPixelSize;
 			uvs[8] = uv + float2( 1,  1) * gDownsampledPixelSize;
 			
-			float weightedSum = 0.0f;
+			float weightedSum = 0.00001f;
 			float weightSum = 0.00001f;
 			
 			[unroll]
@@ -107,8 +110,6 @@ technique PPSSAO
 		
 		float fsmain(VStoFS input, float4 pixelPos : SV_Position) : SV_Target0
 		{
-			// TODO - Support MSAA (most likely don't require all samples)
-		
 			#if FINAL_AO // Final uses gbuffer input
 			float sceneDepth = convertFromDeviceZ(gDepthTex.Sample(gInputSamp, input.uv0).r);
 			float3 worldNormal = gNormalsTex.Sample(gInputSamp, input.uv0).xyz * 2.0f - 1.0f;
@@ -132,7 +133,11 @@ technique PPSSAO
 			float sampleRadius = gSampleRadius * lerp(-sceneDepth, 1, gWorldSpaceRadiusMask) * gCotHalfFOV / -sceneDepth;
 			
 			// Get random rotation
+			#if QUALITY == 0
+			float2 rotateDir = float2(0, 1); // No random rotation
+			#else
 			float2 rotateDir = gRandomTex.Sample(gRandomSamp, input.uv0 * gRandomTileScale) * 2 - 1;
+			#endif
 			
 			// Scale by screen space sample radius
 			rotateDir *= sampleRadius;
@@ -221,14 +226,19 @@ technique PPSSAO
 			#endif
 			
 			#if FINAL_AO
-			// TODO - Fade out far away AO
-			// TODO - Adjust power/intensity
+			// Fade out far away AO
+			// Reference: 1 - saturate((depth - fadeDistance) / fadeRange)
+			output = lerp(output, 1.0f, saturate(-sceneDepth * gFadeMultiplyAdd.x + gFadeMultiplyAdd.y));
+			
+			// Adjust power and intensity
+			output = 1.0f - saturate(pow(1.0f - output, gPower) * gIntensity);
 			#endif
 			
+			// On quality 0 we don't blur at all. At qualities higher than 1 we use a proper bilateral blur.
+			#if QUALITY == 1
 			// Perform a 2x2 ad-hoc blur to hide the dither pattern
 			// Note: Ideally the blur would be 4x4 since the pattern is 4x4
 			
-			// TODO - Don't blur on minimal quality level
 			float4 myVal = float4(output, viewNormal);
 			float4 dX = ddx_fine(myVal);
 			float4 dY = ddy_fine(myVal);
@@ -249,6 +259,7 @@ technique PPSSAO
 			weightVert *= invWeight;
 			
 			output = output * myWeight + horzVal.r * weightHorz + vertVal.r * weightVert;
+			#endif
 			
 			return output;
 		}	

+ 66 - 0
Data/Raw/Engine/Shaders/PPSSAOBlur.bsl

@@ -0,0 +1,66 @@
+#include "$ENGINE$\PPBase.bslinc"
+#include "$ENGINE$\PerCameraData.bslinc"
+
+technique PPSSAOBlur
+{
+	mixin PPBase;
+	mixin PerCameraData;
+
+	code
+	{
+		[internal]
+		cbuffer Input
+		{
+			float2 gPixelSize;
+			float2 gPixelOffset;
+			float gInvDepthThreshold;
+		}		
+
+		SamplerState gInputSamp;
+		Texture2D gInputTex;
+		Texture2D gDepthTex;
+		
+		static const int NUM_SAMPLES = 2;
+		
+		float fsmain(VStoFS input) : SV_Target0
+		{
+			float centerDepth = convertFromDeviceZ(gDepthTex.Sample(gInputSamp, input.uv0).r);
+		
+			float weightedSum = 0.0f;
+			float weightSum = 0.0f;
+			
+			float centerAO = gInputTex.Sample(gInputSamp, input.uv0).r;
+			weightedSum += centerAO;
+			weightSum += 1.0f;
+		
+			// Note: Consider using normals as a weight as well
+			[unroll]
+			for(int i = 1; i < (NUM_SAMPLES + 1); ++i)
+			{
+				float2 sampleUV = gPixelSize * i + input.uv0;
+			
+				float sampleAO = gInputTex.Sample(gInputSamp, sampleUV).r;
+				float sampleDepth = convertFromDeviceZ(gDepthTex.Sample(gInputSamp, sampleUV).r);
+				
+				float weight = saturate(1.0f - abs(sampleDepth - centerDepth) * gInvDepthThreshold);
+				weightedSum += sampleAO * weight;
+				weightSum += weight;
+			}
+			
+			[unroll]
+			for(int i = -NUM_SAMPLES; i < 0; ++i)
+			{
+				float2 sampleUV = gPixelSize * i + input.uv0;
+			
+				float sampleAO = gInputTex.Sample(gInputSamp, sampleUV).r;
+				float sampleDepth = convertFromDeviceZ(gDepthTex.Sample(gInputSamp, sampleUV).r);
+				
+				float weight = saturate(1.0f - abs(sampleDepth - centerDepth) * gInvDepthThreshold);
+				weightedSum += sampleAO * weight;
+				weightSum += weight;
+			}
+					
+			return weightedSum / weightSum;
+		}	
+	};
+};

+ 31 - 0
Source/BansheeEngine/Include/BsStandardPostProcessSettings.h

@@ -222,6 +222,37 @@ namespace bs
 		 */
 		float bias;
 
+		/**
+		 * Distance (in view space, in meters) after which AO starts fading out. The fade process will happen over the
+		 * range as specified by @p fadeRange.
+		 */
+		float fadeDistance;
+
+		/**
+		 * Range (in view space, in meters) in which AO fades out from 100% to 0%. AO starts fading out after the distance
+		 * specified in @p fadeDistance.
+		 */
+		float fadeRange;
+
+		/**
+		 * Linearly scales the intensity of the AO effect. Values less than 1 make the AO effect less pronounced, and vice
+		 * versa. Valid range is roughly [0.2, 2].
+		 */
+		float intensity;
+
+		/**
+		 * Controls how quickly does the AO darkening effect increase with higher occlusion percent. This is a non-linear
+		 * control and will cause the darkening to ramp up exponentially. Valid range is roughly [1, 4], where 1 means no
+		 * extra darkening will occur.
+		 */
+		float power;
+
+		/**
+		 * Quality level of generated ambient occlusion. In range [0, 4]. Higher levels yield higher quality AO at the cost
+		 * of performance.
+		 */
+		UINT32 quality;
+
 		/************************************************************************/
 		/* 								RTTI		                     		*/
 		/************************************************************************/

+ 5 - 0
Source/BansheeEngine/Include/BsStandardPostProcessSettingsRTTI.h

@@ -188,6 +188,11 @@ namespace bs
 			BS_RTTI_MEMBER_PLAIN(enabled, 0)
 			BS_RTTI_MEMBER_PLAIN(radius, 1)
 			BS_RTTI_MEMBER_PLAIN(bias, 2)
+			BS_RTTI_MEMBER_PLAIN(fadeRange, 3)
+			BS_RTTI_MEMBER_PLAIN(fadeDistance, 4)
+			BS_RTTI_MEMBER_PLAIN(intensity, 5)
+			BS_RTTI_MEMBER_PLAIN(power, 6)
+			BS_RTTI_MEMBER_PLAIN(quality, 7)
 		BS_END_RTTI_MEMBERS
 
 	public:

+ 17 - 1
Source/BansheeEngine/Source/BsStandardPostProcessSettings.cpp

@@ -66,7 +66,8 @@ namespace bs
 	}
 
 	AmbientOcclusionSettings::AmbientOcclusionSettings()
-		: enabled(true), radius(1.5f), bias(1.0f)
+		: enabled(true), radius(1.5f), bias(1.0f), fadeDistance(500.0f), fadeRange(50.0f), intensity(1.0f), power(1.0f)
+		, quality(3)
 	{ }
 
 	RTTITypeBase* AmbientOcclusionSettings::getRTTIStatic()
@@ -153,6 +154,11 @@ namespace bs
 		bufferSize += rttiGetElemSize(ambientOcclusion.enabled);
 		bufferSize += rttiGetElemSize(ambientOcclusion.radius);
 		bufferSize += rttiGetElemSize(ambientOcclusion.bias);
+		bufferSize += rttiGetElemSize(ambientOcclusion.fadeRange);
+		bufferSize += rttiGetElemSize(ambientOcclusion.fadeDistance);
+		bufferSize += rttiGetElemSize(ambientOcclusion.intensity);
+		bufferSize += rttiGetElemSize(ambientOcclusion.power);
+		bufferSize += rttiGetElemSize(ambientOcclusion.quality);
 
 		if (buffer == nullptr)
 		{
@@ -209,6 +215,11 @@ namespace bs
 		writeDst = rttiWriteElem(ambientOcclusion.enabled, writeDst);
 		writeDst = rttiWriteElem(ambientOcclusion.radius, writeDst);
 		writeDst = rttiWriteElem(ambientOcclusion.bias, writeDst);
+		writeDst = rttiWriteElem(ambientOcclusion.fadeRange, writeDst);
+		writeDst = rttiWriteElem(ambientOcclusion.fadeDistance, writeDst);
+		writeDst = rttiWriteElem(ambientOcclusion.intensity, writeDst);
+		writeDst = rttiWriteElem(ambientOcclusion.power, writeDst);
+		writeDst = rttiWriteElem(ambientOcclusion.quality, writeDst);
 	}
 
 	void StandardPostProcessSettings::_setSyncData(UINT8* buffer, UINT32 size)
@@ -257,5 +268,10 @@ namespace bs
 		readSource = rttiReadElem(ambientOcclusion.enabled, readSource);
 		readSource = rttiReadElem(ambientOcclusion.radius, readSource);
 		readSource = rttiReadElem(ambientOcclusion.bias, readSource);
+		readSource = rttiReadElem(ambientOcclusion.fadeRange, readSource);
+		readSource = rttiReadElem(ambientOcclusion.fadeDistance, readSource);
+		readSource = rttiReadElem(ambientOcclusion.intensity, readSource);
+		readSource = rttiReadElem(ambientOcclusion.power, readSource);
+		readSource = rttiReadElem(ambientOcclusion.quality, readSource);
 	}
 }

+ 43 - 1
Source/RenderBeast/Include/BsPostProcessing.h

@@ -589,6 +589,9 @@ namespace bs { namespace ct
 		BS_PARAM_BLOCK_ENTRY(float, gCotHalfFOV)
 		BS_PARAM_BLOCK_ENTRY(float, gBias)
 		BS_PARAM_BLOCK_ENTRY(Vector2, gDownsampledPixelSize)
+		BS_PARAM_BLOCK_ENTRY(Vector2, gFadeMultiplyAdd)
+		BS_PARAM_BLOCK_ENTRY(float, gPower)
+		BS_PARAM_BLOCK_ENTRY(float, gIntensity)
 	BS_PARAM_BLOCK_END
 
 	extern SSAOParamDef gSSAOParamDef;
@@ -656,7 +659,7 @@ namespace bs { namespace ct
 		BS_PARAM_BLOCK_ENTRY(float, gInvDepthThreshold)
 	BS_PARAM_BLOCK_END
 
-	extern SSAOParamDef gSSAOParamDef;
+	extern SSAODownsampleParamDef gSSAODownsampleParamDef;
 
 	/** 
 	 * Shader that downsamples the depth & normal buffer and stores their results in a common texture, to be consumed
@@ -687,6 +690,43 @@ namespace bs { namespace ct
 		GpuParamTexture mNormalsTexture;
 	};
 
+	BS_PARAM_BLOCK_BEGIN(SSAOBlurParamDef)
+		BS_PARAM_BLOCK_ENTRY(Vector2, gPixelSize)
+		BS_PARAM_BLOCK_ENTRY(Vector2, gPixelOffset)
+		BS_PARAM_BLOCK_ENTRY(float, gInvDepthThreshold)
+	BS_PARAM_BLOCK_END
+
+	extern SSAOBlurParamDef gSSAOBlurParamDef;
+
+	/** 
+	 * Shaders that blurs the ambient occlusion output, in order to hide the noise caused by the randomization texture.
+	 */
+	template<bool HORIZONTAL>
+	class SSAOBlurMat : public RendererMaterial<SSAOBlurMat<HORIZONTAL>>
+	{
+		RMAT_DEF("PPSSAOBlur.bsl");
+
+	public:
+		SSAOBlurMat();
+
+		/** 
+		 * Renders the post-process effect with the provided parameters. 
+		 * 
+		 * @param[in]	view			Information about the view we're rendering from.
+		 * @param[in]	ao				Input texture containing ambient occlusion data to be blurred.
+		 * @param[in]	sceneDepth		Input texture containing scene depth.
+		 * @param[in]	destination		Output texture to which to write the blurred data to.
+		 * @param[in]	depthRange		Valid depth range (in view space) within which nearby samples will be averaged.
+		 */
+		void execute(const RendererView& view, const SPtr<Texture>& ao, const SPtr<Texture>& sceneDepth,
+			const SPtr<RenderTexture>& destination, float depthRange);
+
+	private:
+		SPtr<GpuParamBlockBuffer> mParamBuffer;
+		GpuParamTexture mAOTexture;
+		GpuParamTexture mDepthTexture;
+	};
+
 	/** Helper class that is used for calculating the SSAO information. */
 	class SSAO
 	{
@@ -716,6 +756,8 @@ namespace bs { namespace ct
 			const AmbientOcclusionSettings& settings);
 
 		SSAODownsampleMat mDownsample;
+		SSAOBlurMat<true> mBlurHorz;
+		SSAOBlurMat<false> mBlurVert;
 		SPtr<Texture> mSSAORandomizationTex;
 
 #define DEFINE_MATERIAL(QUALITY)							\

+ 116 - 4
Source/RenderBeast/Source/BsPostProcessing.cpp

@@ -1178,11 +1178,19 @@ namespace bs { namespace ct
 
 		float radius = settings.radius * scale;
 
+		// Factors used for scaling the AO contribution with range
+		Vector2 fadeMultiplyAdd;
+		fadeMultiplyAdd.x = 1.0f / settings.fadeRange;
+		fadeMultiplyAdd.y = -settings.fadeDistance / settings.fadeRange;
+
 		gSSAOParamDef.gSampleRadius.set(mParamBuffer, radius);
 		gSSAOParamDef.gCotHalfFOV.set(mParamBuffer, cotHalfFOV);
 		gSSAOParamDef.gTanHalfFOV.set(mParamBuffer, tanHalfFOV);
 		gSSAOParamDef.gWorldSpaceRadiusMask.set(mParamBuffer, 1.0f);
 		gSSAOParamDef.gBias.set(mParamBuffer, (settings.bias * viewScale) / 1000.0f);
+		gSSAOParamDef.gFadeMultiplyAdd.set(mParamBuffer, fadeMultiplyAdd);
+		gSSAOParamDef.gPower.set(mParamBuffer, settings.power);
+		gSSAOParamDef.gIntensity.set(mParamBuffer, settings.intensity);
 		
 		if(UPSAMPLE)
 		{
@@ -1288,6 +1296,73 @@ namespace bs { namespace ct
 		gRendererUtility().drawScreenQuad();
 	}
 
+	SSAOBlurParamDef gSSAOBlurParamDef;
+
+	template<bool HORIZONTAL>
+	SSAOBlurMat<HORIZONTAL>::SSAOBlurMat()
+	{
+		mParamBuffer = gSSAOBlurParamDef.createBuffer();
+		mParamsSet->setParamBlockBuffer("Input", mParamBuffer);
+
+		SPtr<GpuParams> gpuParams = mParamsSet->getGpuParams();
+		gpuParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gInputTex", mAOTexture);
+		gpuParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gDepthTex", mDepthTexture);
+
+		SAMPLER_STATE_DESC inputSampDesc;
+		inputSampDesc.minFilter = FO_POINT;
+		inputSampDesc.magFilter = FO_POINT;
+		inputSampDesc.mipFilter = FO_POINT;
+		inputSampDesc.addressMode.u = TAM_CLAMP;
+		inputSampDesc.addressMode.v = TAM_CLAMP;
+		inputSampDesc.addressMode.w = TAM_CLAMP;
+
+		SPtr<SamplerState> inputSampState = SamplerState::create(inputSampDesc);
+		gpuParams->setSamplerState(GPT_FRAGMENT_PROGRAM, "gInputSamp", inputSampState);
+	}
+
+	template<bool HORIZONTAL>
+	void SSAOBlurMat<HORIZONTAL>::_initDefines(ShaderDefines& defines)
+	{
+		defines.set("DIR_HORZ", HORIZONTAL ? 1 : 0);
+	}
+
+	template<bool HORIZONTAL>
+	void SSAOBlurMat<HORIZONTAL>::execute(const RendererView& view, const SPtr<Texture>& ao, const SPtr<Texture>& depth, 
+		const SPtr<RenderTexture>& destination, float depthRange)
+	{
+		const RendererViewProperties& viewProps = view.getProperties();
+		const TextureProperties& texProps = ao->getProperties();
+
+		Vector2 pixelSize;
+		pixelSize.x = 1.0f / texProps.getWidth();
+		pixelSize.y = 1.0f / texProps.getHeight();
+
+		Vector2 pixelOffset(BsZero);
+		if (HORIZONTAL)
+			pixelOffset.x = pixelSize.x;
+		else
+			pixelOffset.y = pixelSize.y;
+
+		float scale = viewProps.viewRect.width / (float)texProps.getWidth();
+
+		gSSAOBlurParamDef.gPixelSize.set(mParamBuffer, pixelSize);
+		gSSAOBlurParamDef.gPixelOffset.set(mParamBuffer, pixelOffset);
+		gSSAOBlurParamDef.gInvDepthThreshold.set(mParamBuffer, (1.0f / depthRange) / scale);
+
+		mAOTexture.set(ao);
+		mDepthTexture.set(depth);
+
+		SPtr<GpuParamBlockBuffer> perView = view.getPerViewBuffer();
+		mParamsSet->setParamBlockBuffer("PerCamera", perView);
+
+		RenderAPI& rapi = RenderAPI::instance();
+		rapi.setRenderTarget(destination);
+
+		gRendererUtility().setPass(mMaterial);
+		gRendererUtility().setPassParams(mParamsSet);
+		gRendererUtility().drawScreenQuad();
+	}
+
 	SSAO::SSAO()
 	{
 		mSSAORandomizationTex = generate4x4RandomizationTexture();
@@ -1305,14 +1380,29 @@ namespace bs { namespace ct
 		SPtr<Texture> sceneDepth = renderTargets->get(RTT_ResolvedDepth);
 		SPtr<Texture> sceneNormals = renderTargets->get(RTT_GBuffer, RT_COLOR1);
 
-		// TODO - Resolve normals if MSAA
-		// TODO - When downsampling, consider using previous pass as input instead of always sampling gbuffer data
+		const TextureProperties& normalsProps = sceneNormals->getProperties();
+		SPtr<PooledRenderTexture> resolvedNormals;
+		if(sceneNormals->getProperties().getNumSamples() > 1)
+		{
+			POOLED_RENDER_TEXTURE_DESC desc = POOLED_RENDER_TEXTURE_DESC::create2D(normalsProps.getFormat(), 
+				normalsProps.getWidth(), normalsProps.getHeight(), TU_RENDERTARGET);
+			resolvedNormals = GpuResourcePool::instance().get(desc);
+
+			RenderAPI::instance().setRenderTarget(resolvedNormals->renderTexture);
+			gRendererUtility().blit(sceneNormals);
+
+			sceneNormals = resolvedNormals->texture;
+		}
 
 		// Multiple downsampled AO levels are used to minimize cache trashing. Downsampled AO targets use larger radius,
 		// whose contents are then blended with the higher level.
 
-		UINT32 numDownsampleLevels = 2; // TODO - Make it a property, ranging [0, 2]
-		UINT32 quality = 1; // TODO - Make it a property
+		UINT32 quality = settings.quality;
+		UINT32 numDownsampleLevels = 0;
+		if (quality > 1)
+			numDownsampleLevels = 1;
+		else if (quality > 2)
+			numDownsampleLevels = 2;
 
 		SPtr<PooledRenderTexture> setupTex0;
 		if(numDownsampleLevels > 0)
@@ -1407,11 +1497,33 @@ namespace bs { namespace ct
 			executeSSAOMat(upsample, true, quality, view, textures, destination, settings);
 		}
 
+		if(resolvedNormals)
+		{
+			GpuResourcePool::instance().release(resolvedNormals);
+			resolvedNormals = nullptr;
+		}
+
 		if(numDownsampleLevels > 0)
 		{
 			GpuResourcePool::instance().release(setupTex0);
 			GpuResourcePool::instance().release(downAOTex0);
 		}
+
+		// Blur the output
+		if(quality > 1) // On level 0 we don't blur at all, on level 1 we use the ad-hoc blur in shader
+		{
+			const RenderTargetProperties& rtProps = destination->getProperties();
+
+			POOLED_RENDER_TEXTURE_DESC desc = POOLED_RENDER_TEXTURE_DESC::create2D(PF_R8, rtProps.getWidth(), 
+				rtProps.getHeight(), TU_RENDERTARGET);
+			SPtr<PooledRenderTexture> blurIntermediateTex = GpuResourcePool::instance().get(desc);
+
+			mBlurHorz.execute(view, destination->getColorTexture(0), sceneDepth, blurIntermediateTex->renderTexture, 
+				DEPTH_RANGE);
+			mBlurVert.execute(view, blurIntermediateTex->texture, sceneDepth, destination, DEPTH_RANGE);
+
+			GpuResourcePool::instance().release(blurIntermediateTex);
+		}
 	}
 
 	void SSAO::executeSSAOMat(bool upsample, bool final, int quality, const RendererView& view, 

+ 8 - 24
Source/RenderBeast/Source/BsRenderBeast.cpp

@@ -633,16 +633,8 @@ namespace bs { namespace ct
 		renderTargets->allocate(RTT_SceneColor);
 		imageBasedLightingMat->execute(renderTargets, perCameraBuffer, mPreintegratedEnvBRDF);
 
-
-
-
-		// DEBUG ONLY
-		//if (useSSAO)
-		//	renderTargets->release(RTT_AmbientOcclusion);
-
-
-
-
+		if (useSSAO)
+			renderTargets->release(RTT_AmbientOcclusion);
 
 		renderTargets->release(RTT_LightAccumulation);
 		renderTargets->release(RTT_GBuffer);
@@ -726,22 +718,14 @@ namespace bs { namespace ct
 		if (isMSAA)
 			renderTargets->release(RTT_ResolvedDepth);
 
-
-
-
-
 		// DEBUG ONLY
-		if(useSSAO)
-		{
-			rapi.setRenderTarget(viewInfo->getProperties().target);
-			gRendererUtility().blit(renderTargets->get(RTT_AmbientOcclusion));
-
-			renderTargets->release(RTT_AmbientOcclusion);
-		}
-
-
-
+		//if(useSSAO)
+		//{
+		//	rapi.setRenderTarget(viewInfo->getProperties().target);
+		//	gRendererUtility().blit(renderTargets->get(RTT_AmbientOcclusion));
 
+		//	renderTargets->release(RTT_AmbientOcclusion);
+		//}
 
 		// Trigger overlay callbacks
 		if (viewProps.triggerCallbacks)