Просмотр исходного кода

Initial work on SSR: Linear and HiZ ray marching

BearishSun 8 лет назад
Родитель
Сommit
f9d9804e7f

+ 8 - 0
Data/Raw/Engine/DataList.json

@@ -139,6 +139,10 @@
         {
         {
             "Path": "PPGaussianDOFCommon.bslinc",
             "Path": "PPGaussianDOFCommon.bslinc",
             "UUID": "78521453-a352-4fd8-b997-8e58942dee2d"
             "UUID": "78521453-a352-4fd8-b997-8e58942dee2d"
+        },
+        {
+            "Path": "RayMarch.bslinc",
+            "UUID": "c64f9bee-c0c2-442e-9f59-35da0494e827"
         }
         }
     ],
     ],
     "Shaders": [
     "Shaders": [
@@ -317,6 +321,10 @@
         {
         {
             "Path": "PPSSAOBlur.bsl",
             "Path": "PPSSAOBlur.bsl",
             "UUID": "46c7f742-a02d-4e1f-b121-047cd62a6d4d"
             "UUID": "46c7f742-a02d-4e1f-b121-047cd62a6d4d"
+        },
+        {
+            "Path": "PPSSRTrace.bsl",
+            "UUID": "daadcf73-1ae5-4f8f-90e5-a6e538832304"
         }
         }
     ],
     ],
     "Skin": [
     "Skin": [

+ 15 - 0
Data/Raw/Engine/Includes/GBufferInput.bslinc

@@ -7,10 +7,15 @@ mixin GBufferInput
 
 
 	code 
 	code 
 	{
 	{
+		// Note: Only one or two sampler states are likely required, I can avoid wasting register space
 		SamplerState gGBufferASamp;
 		SamplerState gGBufferASamp;
 		SamplerState gGBufferBSamp;
 		SamplerState gGBufferBSamp;
 		SamplerState gGBufferCSamp;
 		SamplerState gGBufferCSamp;
 		SamplerState gDepthBufferSamp;
 		SamplerState gDepthBufferSamp;
+		
+		#ifndef MSAA_COUNT
+			#define MSAA_COUNT 1
+		#endif
 
 
 		#if MSAA_COUNT > 1
 		#if MSAA_COUNT > 1
 		Texture2DMS<float4> gGBufferATex;
 		Texture2DMS<float4> gGBufferATex;
@@ -58,6 +63,16 @@ mixin GBufferInput
 			float2 GBufferCData = gGBufferCTex.Load(int3(pixelPos, 0)).rg;
 			float2 GBufferCData = gGBufferCTex.Load(int3(pixelPos, 0)).rg;
 			float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0)).r;
 			float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0)).r;
 			
 			
+			return decodeGBuffer(GBufferAData, GBufferBData, GBufferCData, deviceZ);
+		}
+
+		SurfaceData getGBufferData(float2 uvPos)
+		{
+			float4 GBufferAData = gGBufferATex.Sample(gGBufferASamp, uvPos);
+			float4 GBufferBData = gGBufferBTex.Sample(gGBufferBSamp, uvPos);
+			float2 GBufferCData = gGBufferCTex.Sample(gGBufferCSamp, uvPos).rg;
+			float deviceZ = gDepthBufferTex.Sample(gDepthBufferSamp, uvPos).r;
+			
 			return decodeGBuffer(GBufferAData, GBufferBData, GBufferCData, deviceZ);
 			return decodeGBuffer(GBufferAData, GBufferBData, GBufferCData, deviceZ);
 		}			
 		}			
 		#endif			
 		#endif			

+ 1 - 1
Data/Raw/Engine/Includes/PerCameraData.bslinc

@@ -52,7 +52,7 @@ mixin PerCameraData
 		{
 		{
 			return -gNDCZToWorldZ.y + (gNDCZToWorldZ.x / viewZ);
 			return -gNDCZToWorldZ.y + (gNDCZToWorldZ.x / viewZ);
 		}
 		}
-		
+				
 		/** Converts Z value from NDC space to device Z value in range [0, 1]. */
 		/** Converts Z value from NDC space to device Z value in range [0, 1]. */
 		float NDCZToDeviceZ(float ndcZ)
 		float NDCZToDeviceZ(float ndcZ)
 		{
 		{

+ 202 - 0
Data/Raw/Engine/Includes/RayMarch.bslinc

@@ -0,0 +1,202 @@
+#include "$ENGINE$/PerCameraData.bslinc"
+
+mixin RayMarch
+{
+	mixin PerCameraData;
+
+	code
+	{
+		#ifndef NUM_STEPS
+			#define NUM_STEPS 12
+		#endif
+		
+		#ifndef HI_Z
+			#define HI_Z 0
+		#endif
+		
+		#define MAX_HIZ_ITERATIONS 9
+		#define HIZ_START_LEVEL 1
+	
+		float3 viewToNDC(float3 view)
+		{
+			float4 projected = mul(gMatProj, float4(view, 1));
+			projected.xyz /= projected.w;
+			
+			return projected.xyz;
+		}
+		
+		bool linearSearch(Texture2D depth, SamplerState samp, float3 rayStart, float3 rayStep, int numSteps, float stepIncrement, float compareTolerance, inout float t)
+		{
+			float lastDiff = 0.0f;
+			
+			[unroll]
+			for(int i = 0; i < numSteps; ++i)
+			{
+				float3 rayPos = rayStart + rayStep * t;
+			
+				#if HI_Z
+				float sampleDepth = depth.Sample(samp, rayPos.xy).r;
+				#else
+				float sampleDepth = depth.SampleLevel(samp, rayPos.xy, 0).r;
+				#endif
+				
+				float depthDiff = rayPos.z - sampleDepth;
+				bool hit = depthDiff > -compareTolerance;
+				if(hit)
+				{
+					// Refine hit using line segment intersection
+					float tt = lastDiff / (depthDiff - lastDiff);
+					t += tt * stepIncrement + stepIncrement;
+					
+					return true;
+				}
+			
+				lastDiff = depthDiff;
+				t += stepIncrement;
+			}
+			
+			return false;
+		}
+		
+		bool hiZSearch(Texture2D depth, SamplerState samp, int2 bufferSize, int maxMipLevel, float3 rayStart, float3 rayDir, inout float t)
+		{
+			float iterationCount = 0.0f;
+			int mipLevel = HIZ_START_LEVEL;
+			
+			bufferSize >>= mipLevel;
+			
+			float3 rayPos = rayStart + rayDir * t;
+			while(mipLevel >= 0 && iterationCount < MAX_HIZ_ITERATIONS)
+			{
+				if(any(rayPos < 0.0f) || any(rayPos > 1.0f))
+					return false; // Reached the end of valid range
+			
+				// Get position of the ray, relative to the current cell (sub-pixel)
+				float2 subCellPos = frac(rayPos.xy * bufferSize);
+				
+				// Move subCellPos to [-1,1] range, as it makes the calculation below easier
+				subCellPos *= 2.0f - 1.0f;
+				
+				// Find how much we can move the ray (in "t") before we hit a cell wall
+				//// We want: subCellPos + |rayDir| * t = 1
+				//// Solve for t: t = (1 - subCellPos) / |rayDir|
+				float epsilon = 0.00001f; // Handle div by zero
+				float2 maxXY = (1.0f - subCellPos) / abs(rayDir.xy + epsilon);
+				float maxT = min(maxXY.x, maxXY.y);
+				
+				// Get depth of the current cell
+				float cellZ = depth.SampleLevel(samp, rayPos.xy, mipLevel).r;
+				
+				// Find intersection with the cell
+				//// We want: rayPos.z + rayDir.z * t = cellZ
+				//// Solve for t: t = (cellZ - rayPos.z) / rayDir.z
+				t = (cellZ - rayPos.z) / rayDir.z;
+				
+				// The hit was within the cell walls, meaning we hit the floor of the cell (ray depth is higher than cell depth)
+				float hitBias = 0.002;
+				if(t < (maxT + hitBias))
+				{
+					// We're at the highest detail level, hit found
+					if(mipLevel < 1)
+						return true;
+						
+					// Increase detail level and refine search
+					mipLevel -= 1;
+					bufferSize <<= 1;
+				}
+				else
+				{
+					// We hit the cell wall, meaning we should move to the next cell
+					rayPos = rayStart + rayDir * maxT * 1.04;
+					
+					// Decrease detail level
+					int oldMipLevel = mipLevel;
+					
+					mipLevel = min(maxMipLevel, mipLevel + 1);
+					bufferSize >>= (mipLevel - oldMipLevel);
+				}
+				
+				iterationCount += 1.0f;
+			}
+						
+			return false;
+		}
+		
+		struct RayMarchParams
+		{
+			int2 bufferSize;
+			int numMips;
+			float4 hiZUVMapping; // From NDC to HiZ UV. .xy - multiply, .zw - add
+			float3 rayOrigin; // World space
+			float3 rayDir; // World space
+			float rayLength;
+			float jitterOffset;
+		};
+	
+		float4 rayMarch(Texture2D depth, SamplerState samp, RayMarchParams params)
+		{
+			float3 viewOrigin = mul(float4(params.rayOrigin, 1), gMatView);
+			float3 viewDir = mul(float4(params.rayDir, 0), gMatView);
+		
+			// Clip ray length so it doesn't go past the near plane
+			float rayLength = (viewOrigin.z + viewDir.z * params.rayLength) > gNearFar.x 
+				? (gNearFar.x - viewOrigin.z) / viewDir.z 
+				: params.rayLength;
+			
+			float3 ndcStart = viewToNDC(viewOrigin);
+			float3 ndcEnd = viewToNDC(viewOrigin + viewDir * rayLength);
+			float3 ndcStep = ndcEnd - ndcStart;
+			
+			// Resize ray so it reaches screen edge
+			//// We want: start + |step| * t = 1
+			//// Solve for t: t = (1 - start) / |step|
+			//// This has two solutions, but we can handle them both in a single equation by flipping sign depending on "step", on only one of the components:
+			//// t = 1/|step| - start/step
+			float epsilon = 0.00001f; // Handle div by zero
+			float2 stepScale = 1.0f / abs(ndcStep.xy + epsilon) - ndcStart.xy/(ndcStep.xy + epsilon);
+			ndcStep *= min(stepScale.x, stepScale.y);
+		
+			#if HI_Z
+			float3 uvStart;
+			uvStart.xy = ndcStart.xy * params.hiZUVMapping.xy + params.hiZUVMapping.zw;
+			uvStart.z = NDCZToDeviceZ(ndcStart.z);
+			
+			float3 uvStep;
+			uvStep.xy = ndcStep.xy * params.hiZUVMapping.xy + params.hiZUVMapping.zw;
+			uvStep.z = NDCZToDeviceZ(ndcStep.z);
+		
+			#else
+			float3 uvStart = float3(NDCToUV(ndcStart.xy), NDCZToDeviceZ(ndcStart.z));
+			float3 uvStep = float3(NDCToUV(ndcStep.xy), NDCZToDeviceZ(ndcStep.z));
+			#endif
+		
+			float stepIncrement = 1.0f / NUM_STEPS;
+			// Offset starting position to avoid self-intersection. Use random values to avoid
+			// staircase artifacts.
+			float t = stepIncrement + stepIncrement * params.jitterOffset;
+			
+			// Note: Perhaps tweak this value
+			float compareTolerance = uvStep.z * stepIncrement;
+			
+			// Always do three steps of linear search
+			// (HiZ search is more expensive for short runs)
+			if(linearSearch(depth, samp, uvStart, uvStep, 3, stepIncrement, compareTolerance, t))
+				return float4(uvStart + uvStep * t, t);
+			
+			#if HI_Z
+			
+			// Hierarchical search
+			if(hiZSearch(depth, samp, params.bufferSize, params.numMips, uvStart, normalize(uvStep), t))
+				return float4(uvStart + uvStep * t, t);
+			#else
+			
+			// Plain linear search
+			if(linearSearch(depth, samp, uvStart, uvStep, NUM_STEPS - 3, stepIncrement, compareTolerance, t))
+				return float4(uvStart + uvStep * t, t);
+			#endif
+			
+			// Hit not found
+			return float4(0, 0, 0, 1);
+		}		
+	};
+};

+ 62 - 0
Data/Raw/Engine/Shaders/PPSSRTrace.bsl

@@ -0,0 +1,62 @@
+#include "$ENGINE$\PPBase.bslinc"
+#include "$ENGINE$\GBufferInput.bslinc"
+#include "$ENGINE$\PerCameraData.bslinc"
+#include "$ENGINE$\RayMarch.bslinc"
+
+technique PPSSRTrace
+{
+	mixin PPBase;
+	mixin PerCameraData;
+	mixin GBufferInput;
+	mixin RayMarch;
+
+	code
+	{
+		[internal]
+		cbuffer Input
+		{
+			float4 gHiZUVMapping;
+			int2 gHiZSize;
+			int gHiZNumMips;
+		}
+		
+		Texture2D gSceneColor;
+		SamplerState gSceneColorSamp;
+
+		float4 fsmain(VStoFS input, float4 pixelPos : SV_Position) : SV_Target0
+		{
+			// TODO - Support MSAA?
+		
+			SurfaceData surfData = getGBufferData(input.uv0);
+			float3 P = NDCToWorld(input.screenPos, surfData.depth);
+			float3 V = normalize(P - gViewOrigin);
+			float3 N = surfData.worldNormal.xzy;
+			
+			// TODO - Allow number of steps and rays be customized using a quality level
+			//  - And HiZ vs linear search
+			
+			// TODO - Use Hammersley + random to generate ray directions based on GGX BRDF
+			//  - Clip BRDF lobe? And renormalize PDF?
+			// TODO - Generate random ray step offset
+			// TODO - Reject rays pointing under the surface
+			float3 R = reflect(-V, N);
+			
+			RayMarchParams rayMarchParams;
+			rayMarchParams.bufferSize = gHiZSize;
+			rayMarchParams.numMips = gHiZNumMips;
+			rayMarchParams.hiZUVMapping = gHiZUVMapping;
+			rayMarchParams.rayOrigin = P;
+			rayMarchParams.rayDir = R;
+			rayMarchParams.rayLength = -surfData.depth; // Arbitrary since I resize the ray anyway?
+			rayMarchParams.jitterOffset = 0.0f;
+			
+			// TODO - Fade based on roughness
+			
+			float4 rayHit = rayMarch(gDepthBufferTex, gDepthBufferSamp, rayMarchParams);
+			if(rayHit.w < 1.0f) // Hit
+				return gSceneColor.Sample(gSceneColorSamp, rayHit.xy);
+
+			return 0.0f;
+		}	
+	};
+};

+ 31 - 0
Source/RenderBeast/Include/BsPostProcessing.h

@@ -7,6 +7,7 @@
 #include "BsParamBlocks.h"
 #include "BsParamBlocks.h"
 #include "BsGpuResourcePool.h"
 #include "BsGpuResourcePool.h"
 #include "BsStandardPostProcessSettings.h"
 #include "BsStandardPostProcessSettings.h"
+#include "BsLightRendering.h"
 
 
 namespace bs { namespace ct
 namespace bs { namespace ct
 {
 {
@@ -775,6 +776,36 @@ namespace bs { namespace ct
 #undef DEFINE_MATERIAL
 #undef DEFINE_MATERIAL
 	};
 	};
 
 
+	BS_PARAM_BLOCK_BEGIN(SSRTraceParamDef)
+		BS_PARAM_BLOCK_ENTRY(Vector4, gHiZUVMapping)
+		BS_PARAM_BLOCK_ENTRY(Vector2I, gHiZSize)
+		BS_PARAM_BLOCK_ENTRY(int, gHiZNumMips)
+	BS_PARAM_BLOCK_END
+
+	extern SSRTraceParamDef gSSRTraceParamDef;
+
+	/** Shader used for tracing rays for screen space reflections. */
+	class SSRTraceMat : public RendererMaterial<SSRTraceMat>
+	{
+		RMAT_DEF("PPSSRTrace.bsl");
+
+	public:
+		SSRTraceMat();
+
+		/** 
+		 * Renders the effect with the provided parameters. 
+		 * 
+		 * @param[in]	view			Information about the view we're rendering from.
+		 * @param[in]	destination		Output texture to which to write the results to.
+		 */
+		void execute(const RendererView& view, const SPtr<RenderTexture>& destination);
+
+	private:
+		SPtr<GpuParamBlockBuffer> mParamBuffer;
+		GBufferParams mGBufferParams;
+		GpuParamTexture mSceneColorTexture;
+	};
+
 	/**
 	/**
 	 * Renders post-processing effects for the provided render target.
 	 * Renders post-processing effects for the provided render target.
 	 *
 	 *

+ 64 - 2
Source/RenderBeast/Source/BsPostProcessing.cpp

@@ -1062,8 +1062,8 @@ namespace bs { namespace ct
 		size = 1 << numMips;
 		size = 1 << numMips;
 
 
 		// Note: Use the 32-bit buffer here as 16-bit causes too much banding (most of the scene gets assigned 4-5 different
 		// Note: Use the 32-bit buffer here as 16-bit causes too much banding (most of the scene gets assigned 4-5 different
-		// depth values). Perhaps if the depth was linealized before generation, or the far plane distance reduced, 16-bit
-		// would work, but for now sticking with 32-bit.
+		// depth values). 
+		//  - When I add UNORM 16-bit format I should be able to switch to that
 		return POOLED_RENDER_TEXTURE_DESC::create2D(PF_FLOAT32_R, size, size, TU_RENDERTARGET, 1, false, 1, numMips);
 		return POOLED_RENDER_TEXTURE_DESC::create2D(PF_FLOAT32_R, size, size, TU_RENDERTARGET, 1, false, 1, numMips);
 	}
 	}
 
 
@@ -1593,6 +1593,68 @@ namespace bs { namespace ct
 		return Texture::create(pixelData);
 		return Texture::create(pixelData);
 	}
 	}
 
 
+	SSRTraceParamDef gSSRTraceParamDef;
+
+	SSRTraceMat::SSRTraceMat()
+		:mGBufferParams(mMaterial, mParamsSet)
+	{
+		mParamBuffer = gSSRTraceParamDef.createBuffer();
+		mParamsSet->setParamBlockBuffer("Input", mParamBuffer);
+
+		SPtr<GpuParams> gpuParams = mParamsSet->getGpuParams();
+		gpuParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gSceneColor", mSceneColorTexture);
+	}
+
+	void SSRTraceMat::_initDefines(ShaderDefines& defines)
+	{
+		// Do nothing
+	}
+
+	void SSRTraceMat::execute(const RendererView& view, const SPtr<RenderTexture>& destination)
+	{
+		const RendererViewProperties& viewProps = view.getProperties();
+		RenderTargets& renderTargets = *view.getRenderTargets();
+
+		mGBufferParams.bind(renderTargets);
+		mSceneColorTexture.set(renderTargets.get(RTT_ResolvedSceneColor));
+
+		SPtr<Texture> hiZ = renderTargets.get(RTT_HiZ);
+		const TextureProperties& hiZProps = hiZ->getProperties();
+		
+		Rect2I viewRect = viewProps.viewRect;
+
+		// Maps from NDC to UV [0, 1]
+		Vector4 ndcToUVMapping;
+		ndcToUVMapping.x = 0.5f;
+		ndcToUVMapping.y = -0.5f;
+		ndcToUVMapping.z = 0.5f;
+		ndcToUVMapping.w = 0.5f;
+
+		// Either of these flips the Y axis, but if they're both true they cancel out
+		RenderAPI& rapi = RenderAPI::instance();
+		const RenderAPIInfo& rapiInfo = rapi.getAPIInfo();
+		if (rapiInfo.isFlagSet(RenderAPIFeatureFlag::UVYAxisUp) ^ rapiInfo.isFlagSet(RenderAPIFeatureFlag::NDCYAxisDown))
+			ndcToUVMapping.y = -ndcToUVMapping.y;
+		
+		// Maps from [0, 1] to are of HiZ where depth is stored in
+		ndcToUVMapping.x *= (float)viewRect.width / hiZProps.getWidth();
+		ndcToUVMapping.y *= (float)viewRect.height / hiZProps.getHeight();
+		
+		Vector2I bufferSize(hiZProps.getWidth(), hiZProps.getHeight());
+		gSSRTraceParamDef.gHiZSize.set(mParamBuffer, bufferSize);
+		gSSRTraceParamDef.gHiZNumMips.set(mParamBuffer, hiZProps.getNumMipmaps());
+		gSSRTraceParamDef.gHiZUVMapping.set(mParamBuffer, ndcToUVMapping);
+
+		SPtr<GpuParamBlockBuffer> perView = view.getPerViewBuffer();
+		mParamsSet->setParamBlockBuffer("PerCamera", perView);
+
+		rapi.setRenderTarget(destination);
+
+		gRendererUtility().setPass(mMaterial);
+		gRendererUtility().setPassParams(mParamsSet);
+		gRendererUtility().drawScreenQuad();
+	}
+
 	void PostProcessing::postProcess(RendererView* viewInfo, const SPtr<RenderTargets>& renderTargets, float frameDelta)
 	void PostProcessing::postProcess(RendererView* viewInfo, const SPtr<RenderTargets>& renderTargets, float frameDelta)
 	{
 	{
 		auto& viewProps = viewInfo->getProperties();
 		auto& viewProps = viewInfo->getProperties();