浏览代码

Inital work on HBAO

BearishSun 8 年之前
父节点
当前提交
f888d7c751

+ 4 - 0
Data/Raw/Engine/DataList.json

@@ -313,6 +313,10 @@
         {
             "Path": "PPFXAA.bsl",
             "UUID": "f064b202-dbc0-440f-bd9f-37f094b2c521"
+        },
+        {
+            "Path": "PPSSAO.bsl",
+            "UUID": "7a65b0f1-9a37-452e-ba3f-2a1fb58362cb"
         }
     ],
     "Skin": [

+ 2 - 0
Data/Raw/Engine/Includes/PPBase.bslinc

@@ -12,6 +12,7 @@ mixin PPBase
 		{
 			float4 position : SV_POSITION;
 			float2 uv0 : TEXCOORD0;
+			float2 screenPos : TEXCOORD1;
 		};
 
 		struct VertexInput
@@ -26,6 +27,7 @@ mixin PPBase
 		
 			output.position = float4(input.screenPos, 0, 1);
 			output.uv0 = input.uv0;
+			output.screenPos = input.screenPos;
 
 			return output;
 		}			

+ 6 - 0
Data/Raw/Engine/Includes/PerCameraData.bslinc

@@ -71,6 +71,12 @@ mixin PerCameraData
 			return ndcPos.xy * gClipToUVScaleOffset.xy + gClipToUVScaleOffset.zw;
 		}
 		
+		/** Converts position in UV coordinates mapped to screen rectangle to NDC coordinates. */
+		float2 UVToNDC(float2 uvPos)
+		{
+			return (uvPos - gClipToUVScaleOffset.zw) / gClipToUVScaleOffset.xy;
+		}
+		
 		/** Converts position in UV coordinates mapped to the screen, to screen coordinates in pixels. */
 		uint2 UVToScreen(float2 uv)
 		{

+ 163 - 0
Data/Raw/Engine/Shaders/PPSSAO.bsl

@@ -0,0 +1,163 @@
+#include "$ENGINE$\PPBase.bslinc"
+#include "$ENGINE$\PerCameraData.bslinc"
+
+technique PPSSAO
+{
+	mixin PPBase;
+	mixin PerCameraData;
+
+	code
+	{
+		[internal]
+		cbuffer Input
+		{
+			float4x4 gMixedToView;
+		
+			float gSampleRadius;
+			float gWorldSpaceRadiusMask;
+			float2 gTanHalfFOV; // x - horz FOV, y - vert FOV
+			float gCotHalfFOV;
+		}		
+
+		SamplerState gInputSamp;
+		Texture2D gDepthTex;
+		Texture2D gNormalsTex;
+		
+		// TODO - Allow these to be controlled by a quality level
+		#define SAMPLE_COUNT 6
+		#define SAMPLE_STEPS 20
+		
+		static const float2 SAMPLES[6] =
+		{
+			// Points within a disc, at equally separated angles from 0 to 2PI.
+			// Each point is also placed further away from the disc center, up to unit disc radius.
+			float2( 0.000f,  0.166f),
+			float2( 0.288f,  0.166f),
+			float2( 0.433f, -0.250f),
+			float2( 0.000f, -0.666f),
+			float2(-0.721f, -0.416f),
+			float2(-0.866,   0.500f)
+		};
+		
+		float2 ndcToDepthUV(float2 ndc)
+		{
+			return NDCToUV(ndc);
+		}
+		
+		float3 getViewSpacePos(float2 ndc, float depth)
+		{
+			float2 clipSpace = ndc * -depth;
+			
+			// Use the tan(FOV/2) & aspect to move from clip to view space (basically just scaling).
+			// This is the equivalent of multiplying by mixedToView matrix that's used in most
+			// depth -> world space calculations, but if we make some assumptions we can avoid the
+			// matrix multiply and get the same result. We can also avoid division by .w since we know
+			// the depth is in view space and the mixedToView matrix wouldn't affect it.
+			// The only entries that effect the coordinate are 0,0 and 1,1 entries in the matrix
+			// (if the matrix is symmetric, which we assume is true), which are just the cotangent
+			// of the half of the two aspect ratios.
+			
+			return float3(clipSpace * gTanHalfFOV, depth);
+		}
+		
+		float4 fsmain(VStoFS input) : SV_Target0
+		{
+			// TODO - Support MSAA (most likely don't require all samples)
+		
+			// TODO - Read depth and normal from intermediates if they are available
+			float sceneDepth = convertFromDeviceZ(gDepthTex.Sample(gInputSamp, input.uv0).r);
+			float3 worldNormal = gNormalsTex.Sample(gInputSamp, input.uv0).xyz * 2.0f - 1.0f;
+			float3 viewNormal = normalize(mul((float3x3)gMatView, worldNormal));
+			float3 viewPos = getViewSpacePos(input.screenPos, sceneDepth);
+			
+			// Project sample radius to screen space (approximately), using the formula:
+			// screenRadius = worldRadius * 1/tan(fov/2) / z
+			// The formula approximates sphere projection and is more accurate the closer to the screen center
+			// the sphere origin is.
+			float sampleRadius = gSampleRadius * lerp(-sceneDepth, 1, gWorldSpaceRadiusMask) * gCotHalfFOV / -sceneDepth;
+			
+			// TODO - Apply bias to viewposition (and reconstruct screen pos from it)
+			// TODO - Get random rotation (depending on active quality)
+			float2 rotateDir = float2(0, 1);
+			
+			// Scale by screen space sample radius
+			rotateDir *= sampleRadius;
+			
+			// Construct rotation matrix
+			float2 rotateDir90 = float2(-rotateDir.y, rotateDir.x); // Rotate 90 degrees
+			float2x2 rotateTfrm = float2x2(
+				rotateDir.x, rotateDir90.x,
+				rotateDir.y, rotateDir90.y
+			);
+						
+			float invRange = 1.0f / gSampleRadius;
+			
+			// For every sample, find the highest horizon angle in the direction of the sample
+			float2 accumulator = 0.00001f;
+			[unroll]
+			for(int i = 0; i < SAMPLE_COUNT; ++i)
+			{
+				float2 sampleOffset = mul(rotateTfrm, SAMPLES[i]);
+			
+				// Step along the direction of the sample offset, looking for the maximum angle in two directions
+				// (positive dir of the sample offset, and negative). Steps are weighted so that those that are
+				// further away from the origin contribute less.
+				float3 stepAccum = 0;
+				
+				[unroll]
+				for(int j = 1; j <= SAMPLE_STEPS; ++j)
+				{
+					float scale = j / (float)SAMPLE_STEPS;
+					
+					float2 screenPosL = input.screenPos + sampleOffset;
+					float2 screenPosR = input.screenPos - sampleOffset;
+					
+					// TODO - Sample HiZ here to minimize cache trashing (depending on quality)
+					float depthL = gDepthTex.Sample(gInputSamp, ndcToDepthUV(screenPosL)).r;
+					float depthR = gDepthTex.Sample(gInputSamp, ndcToDepthUV(screenPosR)).r;
+					
+					depthL = convertFromDeviceZ(depthL);
+					depthR = convertFromDeviceZ(depthR);
+					
+					float3 viewPosL = getViewSpacePos(screenPosL, depthL);
+					float3 viewPosR = getViewSpacePos(screenPosR, depthR);
+					
+					float3 diffL = viewPosL - viewPos;
+					float3 diffR = viewPosR - viewPos;
+					
+					float angleL = saturate(dot(diffL, viewNormal) * rsqrt(dot(diffL, diffL)));
+					float angleR = saturate(dot(diffR, viewNormal) * rsqrt(dot(diffR, diffR)));
+					
+					float weight = saturate(1.0f - length(diffL) * invRange);
+					weight *= saturate(1.0f - length(diffR) * invRange);
+					
+					float2 angles = float2(angleL, angleR);
+					stepAccum = lerp(stepAccum, float3(max(angles, stepAccum.xy), 1), weight);
+				}
+				
+				// Negate since higher angle means more occlusion
+				float2 weightedValue = 1.0f - stepAccum.xy;
+				
+				// Square to reduce impact on areas with low AO, and increase impact on areas with high AO
+				weightedValue *= weightedValue;
+				
+				// Multiply by weight since we calculate the weighted average
+				weightedValue *= stepAccum.z;
+				
+				// Accumulate sum total and weight total
+				accumulator += float2(weightedValue.x + weightedValue.y, 2.0f * stepAccum.z);
+			}
+			
+			float4 output = 0;
+			
+			// Divide by total weight to get the weighted average
+			output.r = accumulator.x / accumulator.y;
+			
+			// TODO - Mix with upsampled AO data
+			// TODO - Fade out far away AO
+			// TODO - Adjust power/intensity
+			// TODO - Perform filtering over 2x2 pixels using derivatives
+			return output;
+		}	
+	};
+};

+ 37 - 0
Source/RenderBeast/Include/BsPostProcessing.h

@@ -583,6 +583,42 @@ namespace bs { namespace ct
 		GpuParamTexture mInputTexture;
 	};
 
+	BS_PARAM_BLOCK_BEGIN(SSAOParamDef)
+		BS_PARAM_BLOCK_ENTRY(Matrix4, gMixedToView)
+		BS_PARAM_BLOCK_ENTRY(float, gSampleRadius)
+		BS_PARAM_BLOCK_ENTRY(float, gWorldSpaceRadiusMask)
+		BS_PARAM_BLOCK_ENTRY(Vector2, gTanHalfFOV)
+		BS_PARAM_BLOCK_ENTRY(float, gCotHalfFOV)
+	BS_PARAM_BLOCK_END
+
+	extern SSAOParamDef gSSAOParamDef;
+
+	/** Shader that computes ambient occlusion using screen based methods. */
+	class SSAOMat : public RendererMaterial<SSAOMat>
+	{
+		RMAT_DEF("PPSSAO.bsl");
+
+	public:
+		SSAOMat();
+
+		/** 
+		 * Renders the post-process effect with the provided parameters. 
+		 * 
+		 * @param[in]	view			Information about the view we're rendering from.
+		 * @param[in]	sceneDepth		Input texture containing scene depth.
+		 * @param[in]	sceneNormals	Input texture containing scene world space normals.
+		 * @param[in]	destination		Output texture to which to write the ambient occlusion data to.
+		 */
+		void execute(const RendererView& view, const SPtr<Texture>& sceneDepth, const SPtr<Texture>& sceneNormals, 
+			const SPtr<RenderTexture>& destination);
+
+	private:
+		SPtr<GpuParamBlockBuffer> mParamBuffer;
+		GpuParamTexture mDepthTexture;
+		GpuParamTexture mNormalsTexture;
+		GpuParamSampState mInputSampState;
+	};
+
 	/**
 	 * Renders post-processing effects for the provided render target.
 	 *
@@ -608,6 +644,7 @@ namespace bs { namespace ct
 		TonemappingMaterials mTonemapping;
 		GaussianDOF mGaussianDOF;
 		FXAAMat mFXAA;
+		SSAOMat mSSAO;
 	};
 
 	/** @} */

+ 90 - 0
Source/RenderBeast/Source/BsPostProcessing.cpp

@@ -1126,6 +1126,81 @@ namespace bs { namespace ct
 		RenderAPI& rapi = RenderAPI::instance();
 		rapi.setRenderTarget(destination);
 
+		gRendererUtility().setPass(mMaterial);
+		gRendererUtility().setPassParams(mParamsSet);
+		gRendererUtility().drawScreenQuad();
+	}
+
+	SSAOParamDef gSSAOParamDef;
+
+	SSAOMat::SSAOMat()
+	{
+		mParamBuffer = gSSAOParamDef.createBuffer();
+
+		mParamsSet->setParamBlockBuffer("Input", mParamBuffer);
+
+		SPtr<GpuParams> gpuParams = mParamsSet->getGpuParams();
+		gpuParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gDepthTex", mDepthTexture);
+		gpuParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gNormalsTex", mNormalsTexture);
+
+		SAMPLER_STATE_DESC desc;
+		desc.minFilter = FO_POINT;
+		desc.magFilter = FO_POINT;
+		desc.mipFilter = FO_POINT;
+		desc.addressMode.u = TAM_CLAMP;
+		desc.addressMode.v = TAM_CLAMP;
+		desc.addressMode.w = TAM_CLAMP;
+
+		SPtr<SamplerState> sampState = SamplerState::create(desc);
+		gpuParams->setSamplerState(GPT_FRAGMENT_PROGRAM, "gInputSamp", sampState);
+	}
+
+	void SSAOMat::_initDefines(ShaderDefines& defines)
+	{
+		// Do nothing
+	}
+
+	void SSAOMat::execute(const RendererView& view, const SPtr<Texture>& depth, const SPtr<Texture>& normals, 
+		const SPtr<RenderTexture>& destination)
+	{
+		const RendererViewProperties& viewProps = view.getProperties();
+
+		// TODO - Retrieve these from settings
+		Vector2 tanHalfFOV;
+		tanHalfFOV.x = 1.0f / viewProps.projTransform[0][0];
+		tanHalfFOV.y = 1.0f / viewProps.projTransform[1][1];
+
+		float cotHalfFOV = viewProps.projTransform[0][0];
+
+		gSSAOParamDef.gSampleRadius.set(mParamBuffer, 0.03f);
+		gSSAOParamDef.gCotHalfFOV.set(mParamBuffer, cotHalfFOV);
+		gSSAOParamDef.gTanHalfFOV.set(mParamBuffer, tanHalfFOV);
+		gSSAOParamDef.gWorldSpaceRadiusMask.set(mParamBuffer, 1.0f);
+
+		// Construct a special inverse view-projection matrix that had projection entries that effect z and w eliminated.
+		// Used to transform a vector(clip_x, clip_y, view_z, view_w), where clip_x/clip_y are in clip space, and 
+		// view_z/view_w in view space, into world space.
+
+		// Only projects z/w coordinates (cancels out with the inverse matrix below)
+		Matrix4 projZ = Matrix4::IDENTITY;
+		projZ[2][2] = viewProps.projTransform[2][2];
+		projZ[2][3] = viewProps.projTransform[2][3];
+		projZ[3][2] = viewProps.projTransform[3][2];
+		projZ[3][3] = 0.0f;
+
+		Matrix4 xyProj = viewProps.projTransform.inverse() * projZ;
+		
+		gSSAOParamDef.gMixedToView.set(mParamBuffer, xyProj);
+
+		mDepthTexture.set(depth);
+		mNormalsTexture.set(normals);
+
+		SPtr<GpuParamBlockBuffer> perView = view.getPerViewBuffer();
+		mParamsSet->setParamBlockBuffer("PerCamera", perView);
+
+		RenderAPI& rapi = RenderAPI::instance();
+		rapi.setRenderTarget(destination);
+
 		gRendererUtility().setPass(mMaterial);
 		gRendererUtility().setPassParams(mParamsSet);
 		gRendererUtility().drawScreenQuad();
@@ -1144,6 +1219,14 @@ namespace bs { namespace ct
 		bool hdr = viewProps.isHDR;
 		bool msaa = viewProps.numSamples > 1;
 
+		// DEBUG ONLY
+		//SPtr<PooledRenderTexture> temp = GpuResourcePool::instance().get(
+		//	POOLED_RENDER_TEXTURE_DESC::create2D(PF_R8, viewProps.viewRect.width, viewProps.viewRect.height, 
+		//	TU_RENDERTARGET));
+
+		//mSSAO.execute(*viewInfo, renderTargets->getSceneDepth(), renderTargets->getGBufferB(), temp->renderTexture);
+		// END DEBUG ONLY
+
 		if(hdr && settings.enableAutoExposure)
 		{
 			mDownsample.execute(1, msaa, sceneColor, ppInfo);
@@ -1232,6 +1315,13 @@ namespace bs { namespace ct
 				renderTargets->release(RTT_ResolvedSceneColorSecondary);
 		}
 
+		// BEGIN DEBUG ONLY
+		//RenderAPI::instance().setRenderTarget(viewProps.target);
+		//gRendererUtility().blit(temp->texture);
+
+		//GpuResourcePool::instance().release(temp);
+		// END DEBUG ONLY
+
 		if (ppInfo.settingDirty)
 			ppInfo.settingDirty = false;
 	}