Переглянути джерело

Depth of field (Gaussian) effect functional
Fixed a bug in DX11 cbuffer packing rules algorithm

BearishSun 8 роки тому
батько
коміт
fdfc56c8ac

+ 37 - 12
Data/Raw/Engine/Shaders/PPGaussianBlur.bsl

@@ -9,9 +9,9 @@ technique PPGaussianBlur
 		[internal]
 		[internal]
 		cbuffer Input
 		cbuffer Input
 		{
 		{
-			float2 gSampleOffsets[MAX_NUM_SAMPLES];
-			float gSampleWeights[MAX_NUM_SAMPLES];
-			uint gNumSamples;
+			float4 gSampleOffsets[(MAX_NUM_SAMPLES + 1) / 2];
+			float4 gSampleWeights[(MAX_NUM_SAMPLES + 3) / 4];
+			int gNumSamples;
 		}		
 		}		
 
 
 		SamplerState gInputSamp;
 		SamplerState gInputSamp;
@@ -22,25 +22,50 @@ technique PPGaussianBlur
 			// Note: Consider adding a version of this shader with unrolled loop for small number of samples
 			// Note: Consider adding a version of this shader with unrolled loop for small number of samples
 			float4 output = 0;
 			float4 output = 0;
 			
 			
-			uint sampleIdx = 0;
-			for(sampleIdx = 0; sampleIdx < (gNumSamples - 1); sampleIdx += 2)
+			int idx = 0;
+			for(; idx < (gNumSamples / 4); idx++)
 			{
 			{
 				{
 				{
-					float2 uv = input.uv0 + gSampleOffsets[sampleIdx];
-					output += gInputTex.Sample(gInputSamp, uv) * gSampleWeights[sampleIdx + 0];
+					float2 uv = input.uv0 + gSampleOffsets[idx * 2 + 0].xy;
+					output += gInputTex.Sample(gInputSamp, uv) * gSampleWeights[idx].x;
 				}
 				}
 				
 				
 				{
 				{
-					float2 uv = input.uv0 + gSampleOffsets[sampleIdx];
-					output += gInputTex.Sample(gInputSamp, uv) * gSampleWeights[sampleIdx + 1];
+					float2 uv = input.uv0 + gSampleOffsets[idx * 2 + 0].zw;
+					output += gInputTex.Sample(gInputSamp, uv) * gSampleWeights[idx].y;
+				}
+				
+				{
+					float2 uv = input.uv0 + gSampleOffsets[idx * 2 + 1].xy;
+					output += gInputTex.Sample(gInputSamp, uv) * gSampleWeights[idx].z;
+				}
+				
+				{
+					float2 uv = input.uv0 + gSampleOffsets[idx * 2 + 1].zw;
+					output += gInputTex.Sample(gInputSamp, uv) * gSampleWeights[idx].w;
 				}
 				}
 			}
 			}
 			
 			
+			int extraSamples = gNumSamples - idx * 4;
 			[branch]
 			[branch]
-			if(sampleIdx < gNumSamples)
+			if(extraSamples >= 1)
 			{
 			{
-				float2 uv = input.uv0 + gSampleOffsets[sampleIdx / 2].xy;
-				output += gInputTex.Sample(gInputSamp, uv) * gSampleWeights[sampleIdx + 0];
+				float2 uv = input.uv0 + gSampleOffsets[idx * 2 + 0].xy;
+				output += gInputTex.Sample(gInputSamp, uv) * gSampleWeights[idx].x;
+				
+				[branch]
+				if(extraSamples >= 2)
+				{
+					float2 uv = input.uv0 + gSampleOffsets[idx * 2 + 0].zw;
+					output += gInputTex.Sample(gInputSamp, uv) * gSampleWeights[idx].y;
+					
+					[branch]
+					if(extraSamples >= 3)
+					{
+						float2 uv = input.uv0 + gSampleOffsets[idx * 2 + 1].xy;
+						output += gInputTex.Sample(gInputSamp, uv) * gSampleWeights[idx].z;
+					}
+				}				
 			}
 			}
 			
 			
 			return output;
 			return output;

+ 2 - 2
Data/Raw/Engine/Shaders/PPGaussianDOFCombine.bsl

@@ -19,7 +19,7 @@ technique PPGaussianDOFCombine
 		float3 fsmain(VStoFS input) : SV_Target0
 		float3 fsmain(VStoFS input) : SV_Target0
 		{
 		{
 			float4 focusedColor = gFocusedTex.Sample(gColorSamp, input.uv0);
 			float4 focusedColor = gFocusedTex.Sample(gColorSamp, input.uv0);
-			float depth = convertFromDeviceZ(gDepthTex.Sample(gDepthSamp, input.uv0));
+			float depth = -convertFromDeviceZ(gDepthTex.Sample(gDepthSamp, input.uv0));
 			
 			
 			float4 nearColor = 0;
 			float4 nearColor = 0;
 			float4 farColor = 0;
 			float4 farColor = 0;
@@ -51,7 +51,7 @@ technique PPGaussianDOFCombine
 			// Foreground layer
 			// Foreground layer
 			//// Same type of blending as with the layer above
 			//// Same type of blending as with the layer above
 			float foregroundMask = calcNearMask(depth);
 			float foregroundMask = calcNearMask(depth);
-			foregroundMask = saturate(1.0f - foregroundMask * 5.0f);
+			foregroundMask = saturate(foregroundMask * 5.0f);
 			foregroundMask *= foregroundMask;
 			foregroundMask *= foregroundMask;
 			
 			
 			combined = lerp(combined, nearColor.rgb, foregroundMask);
 			combined = lerp(combined, nearColor.rgb, foregroundMask);

+ 3 - 1
Source/BansheeD3D11RenderAPI/Source/BsD3D11RenderAPI.cpp

@@ -1403,7 +1403,9 @@ namespace bs { namespace ct
 				param.cpuMemOffset = block.blockSize;
 				param.cpuMemOffset = block.blockSize;
 				param.gpuMemOffset = 0;
 				param.gpuMemOffset = 0;
 
 
-				block.blockSize += size * param.arraySize;
+				// Last array element isn't rounded up to four component vectors
+				block.blockSize += size * (param.arraySize - 1);
+				block.blockSize += typeInfo.size / 4;
 			}
 			}
 			else
 			else
 			{
 			{

+ 2 - 2
Source/BansheeEngine/Source/BsStandardPostProcessSettings.cpp

@@ -66,8 +66,8 @@ namespace bs
 	}
 	}
 
 
 	DepthOfFieldSettings::DepthOfFieldSettings()
 	DepthOfFieldSettings::DepthOfFieldSettings()
-		: enabled(false), focalDistance(2.0f), focalRange(1.0f), nearTransitionRange(0.25f), farTransitionRange(0.25f)
-		, nearBlurAmount(16.0f), farBlurAmount(16.0f)
+		: enabled(false), focalDistance(0.75f), focalRange(0.75f), nearTransitionRange(0.25f), farTransitionRange(0.25f)
+		, nearBlurAmount(0.15f), farBlurAmount(0.15f)
 	{ }
 	{ }
 
 
 	RTTITypeBase* DepthOfFieldSettings::getRTTIStatic()
 	RTTITypeBase* DepthOfFieldSettings::getRTTIStatic()

+ 2 - 2
Source/RenderBeast/Include/BsPostProcessing.h

@@ -308,8 +308,8 @@ namespace bs { namespace ct
 	const int MAX_BLUR_SAMPLES = 128;
 	const int MAX_BLUR_SAMPLES = 128;
 
 
 	BS_PARAM_BLOCK_BEGIN(GaussianBlurParamDef)
 	BS_PARAM_BLOCK_BEGIN(GaussianBlurParamDef)
-		BS_PARAM_BLOCK_ENTRY_ARRAY(Vector2, gSampleOffsets, MAX_BLUR_SAMPLES)
-		BS_PARAM_BLOCK_ENTRY_ARRAY(float, gSampleWeights, MAX_BLUR_SAMPLES)
+		BS_PARAM_BLOCK_ENTRY_ARRAY(Vector4, gSampleOffsets, (MAX_BLUR_SAMPLES + 1) / 2)
+		BS_PARAM_BLOCK_ENTRY_ARRAY(Vector4, gSampleWeights, (MAX_BLUR_SAMPLES + 3) / 4)
 		BS_PARAM_BLOCK_ENTRY(int, gNumSamples)
 		BS_PARAM_BLOCK_ENTRY(int, gNumSamples)
 	BS_PARAM_BLOCK_END
 	BS_PARAM_BLOCK_END
 
 

+ 37 - 27
Source/RenderBeast/Source/BsPostProcessing.cpp

@@ -576,24 +576,53 @@ namespace bs { namespace ct
 			dstProps.getWidth(), dstProps.getHeight(), TU_RENDERTARGET);
 			dstProps.getWidth(), dstProps.getHeight(), TU_RENDERTARGET);
 		SPtr<PooledRenderTexture> tempTexture = GpuResourcePool::instance().get(tempTextureDesc);
 		SPtr<PooledRenderTexture> tempTexture = GpuResourcePool::instance().get(tempTextureDesc);
 
 
-		// Horizontal pass
+		auto updateParamBuffer = [&](Direction direction)
 		{
 		{
-			float kernelRadius = calcKernelRadius(source, filterSize, DirHorizontal);
+			float kernelRadius = calcKernelRadius(source, filterSize, direction);
 			UINT32 numSamples = calcStdDistribution(kernelRadius, sampleWeights, sampleOffsets);
 			UINT32 numSamples = calcStdDistribution(kernelRadius, sampleWeights, sampleOffsets);
 
 
-			for(UINT32 i = 0; i < numSamples; ++i)
+			for(UINT32 i = 0; i < (numSamples + 3) / 4; ++i)
+			{
+				UINT32 remainder = std::min(4U, numSamples - i * 4);
+
+				Vector4 weights;
+				for (UINT32 j = 0; j < remainder; ++j)
+					weights[j] = sampleWeights[i * 4 + j];
+
+				gGaussianBlurParamDef.gSampleWeights.set(mParamBuffer, weights, i);
+			}
+
+			UINT32 axis0 = direction == DirHorizontal ? 0 : 1;
+			UINT32 axis1 = (axis0 + 1) % 2;
+
+			for(UINT32 i = 0; i < (numSamples + 1) / 2; ++i)
 			{
 			{
-				gGaussianBlurParamDef.gSampleWeights.set(mParamBuffer, sampleWeights[i], i);
+				UINT32 remainder = std::min(2U, numSamples - i * 2);
+
+				Vector4 offset;
+				offset[axis0] = sampleOffsets[i * 2 + 0] * invTexSize[axis0];
+				offset[axis1] = 0.0f;
 
 
-				Vector2 offset;
-				offset.x = sampleOffsets[i] * invTexSize.x;
-				offset.y = 0.0f;
+				if(remainder == 2)
+				{
+					offset[axis0 + 2] = sampleOffsets[i * 2 + 1] * invTexSize[axis0];
+					offset[axis1 + 2] = 0.0f;
+				}
+				else
+				{
+					offset[axis0 + 2] = 0.0f;
+					offset[axis1 + 2] = 0.0f;
+				}
 
 
 				gGaussianBlurParamDef.gSampleOffsets.set(mParamBuffer, offset, i);
 				gGaussianBlurParamDef.gSampleOffsets.set(mParamBuffer, offset, i);
 			}
 			}
 
 
 			gGaussianBlurParamDef.gNumSamples.set(mParamBuffer, numSamples);
 			gGaussianBlurParamDef.gNumSamples.set(mParamBuffer, numSamples);
+		};
 
 
+		// Horizontal pass
+		{
+			updateParamBuffer(DirHorizontal);
 			mInputTexture.set(source);
 			mInputTexture.set(source);
 
 
 			RenderAPI& rapi = RenderAPI::instance();
 			RenderAPI& rapi = RenderAPI::instance();
@@ -606,22 +635,7 @@ namespace bs { namespace ct
 
 
 		// Vertical pass
 		// Vertical pass
 		{
 		{
-			float kernelRadius = calcKernelRadius(source, filterSize, DirVertical);
-			UINT32 numSamples = calcStdDistribution(kernelRadius, sampleWeights, sampleOffsets);
-
-			for(UINT32 i = 0; i < numSamples; ++i)
-			{
-				gGaussianBlurParamDef.gSampleWeights.set(mParamBuffer, sampleWeights[i], i);
-
-				Vector2 offset;
-				offset.x = 0.0f;
-				offset.y = sampleOffsets[i] * invTexSize.y;
-
-				gGaussianBlurParamDef.gSampleOffsets.set(mParamBuffer, offset, i);
-			}
-
-			gGaussianBlurParamDef.gNumSamples.set(mParamBuffer, numSamples);
-
+			updateParamBuffer(DirVertical);
 			mInputTexture.set(tempTexture->texture);
 			mInputTexture.set(tempTexture->texture);
 
 
 			RenderAPI& rapi = RenderAPI::instance();
 			RenderAPI& rapi = RenderAPI::instance();
@@ -915,10 +929,6 @@ namespace bs { namespace ct
 		}
 		}
 
 
 		separateMat->execute(sceneColor, sceneDepth, view, settings);
 		separateMat->execute(sceneColor, sceneDepth, view, settings);
-		separateMat->release();
-
-		// DEBUG ONLY
-		return;
 
 
 		SPtr<PooledRenderTexture> nearTex, farTex;
 		SPtr<PooledRenderTexture> nearTex, farTex;
 		if(near && far)
 		if(near && far)