Prechádzať zdrojové kódy

Tonemapping LUT creation now uses compute shaders since Vulkan doesn't support 3D texture rendering

BearishSun 9 rokov pred
rodič
commit
457cd2fc42

+ 2 - 2
Data/Raw/Engine/Includes/PPTonemapCommon.bslinc

@@ -4,7 +4,7 @@ Technique : base("PPTonemapCommon") =
 	
 	
 	Pass =
 	Pass =
 	{
 	{
-		Fragment =
+		Common =
 		{
 		{
 			static const float3x3 sRGBToXYZMatrix =
 			static const float3x3 sRGBToXYZMatrix =
 			{
 			{
@@ -121,7 +121,7 @@ Technique : base("PPTonemapCommon") =
 	
 	
 	Pass =
 	Pass =
 	{
 	{
-		Fragment =
+		Common =
 		{
 		{
 			const mat3x3 sRGBToXYZMatrix = mat3x3(
 			const mat3x3 sRGBToXYZMatrix = mat3x3(
 				vec3(0.4124564f, 0.2126729f, 0.0193339f),
 				vec3(0.4124564f, 0.2126729f, 0.0193339f),

+ 2 - 2
Data/Raw/Engine/Includes/PPWhiteBalance.bslinc

@@ -4,7 +4,7 @@ Technique : base("PPWhiteBalance") =
 	
 	
 	Pass =
 	Pass =
 	{
 	{
-		Fragment =
+		Compute =
 		{
 		{
 			/**
 			/**
 			 * Calculates correlated color temperature from chomaticity coordinates using the McCamy's formula.
 			 * Calculates correlated color temperature from chomaticity coordinates using the McCamy's formula.
@@ -216,7 +216,7 @@ Technique : base("PPWhiteBalance") =
 	
 	
 	Pass =
 	Pass =
 	{
 	{
-		Fragment =
+		Compute =
 		{
 		{
 			/**
 			/**
 			 * Calculates correlated color temperature from chomaticity coordinates using the McCamy's formula.
 			 * Calculates correlated color temperature from chomaticity coordinates using the McCamy's formula.

+ 14 - 29
Data/Raw/Engine/Shaders/PPCreateTonemapLUT.bsl

@@ -1,4 +1,3 @@
-#include "$ENGINE$\VolumeRenderBase.bslinc"
 #include "$ENGINE$\PPTonemapCommon.bslinc"
 #include "$ENGINE$\PPTonemapCommon.bslinc"
 #include "$ENGINE$\PPWhiteBalance.bslinc"
 #include "$ENGINE$\PPWhiteBalance.bslinc"
 
 
@@ -8,7 +7,6 @@ Parameters =
 };
 };
 
 
 Technique
 Technique
- : inherits("VolumeRenderBase")
  : inherits("PPTonemapCommon")
  : inherits("PPTonemapCommon")
  : inherits("PPWhiteBalance") =
  : inherits("PPWhiteBalance") =
 {
 {
@@ -16,10 +14,7 @@ Technique
 	
 	
 	Pass =
 	Pass =
 	{
 	{
-		DepthWrite = false;
-		DepthRead = false;
-	
-		Fragment =
+		Compute =
 		{
 		{
 			cbuffer Input
 			cbuffer Input
 			{
 			{
@@ -84,18 +79,19 @@ Technique
 				return color;
 				return color;
 			}		
 			}		
 			
 			
-			float4 main(GStoFS input) : SV_Target0
+			RWTexture3D<float4> gOutputTex;
+			
+			[numthreads(8, 8, 1)]
+			void main(
+				uint3 dispatchThreadId : SV_DispatchThreadID,
+				uint threadIndex : SV_GroupIndex)
 			{
 			{
 				// Constants
 				// Constants
 				const float3x3 sRGBToACES2065Matrix = mul(XYZToACES2065Matrix, mul(D65ToD60Matrix, sRGBToXYZMatrix));
 				const float3x3 sRGBToACES2065Matrix = mul(XYZToACES2065Matrix, mul(D65ToD60Matrix, sRGBToXYZMatrix));
 				const float3x3 sRGBToACEScgMatrix = mul(XYZToACEScgMatrix, mul(D65ToD60Matrix, sRGBToXYZMatrix));
 				const float3x3 sRGBToACEScgMatrix = mul(XYZToACEScgMatrix, mul(D65ToD60Matrix, sRGBToXYZMatrix));
 				const float3x3 ACEScgTosRGBMatrix = mul(XYZTosRGBMatrix, mul(D60ToD65Matrix, ACEScgToXYZMatrix));
 				const float3x3 ACEScgTosRGBMatrix = mul(XYZTosRGBMatrix, mul(D60ToD65Matrix, ACEScgToXYZMatrix));
 				
 				
-				// By default pixel centers will be sampled, but we want to encode the entire range, so
-				// offset the sampling by half a pixel, and extend the entire range by one pixel.
-				float2 uv = input.uv0 - (0.5f / LUT_SIZE);
-				float3 logColor = float3(uv * LUT_SIZE / (float)(LUT_SIZE - 1), input.layerIdx / (float)(LUT_SIZE - 1));
-				
+				float3 logColor = float3(dispatchThreadId.xyz / (float)(LUT_SIZE - 1));
 				float3 linearColor = LogToLinearColor(logColor);
 				float3 linearColor = LogToLinearColor(logColor);
 				
 				
 				linearColor = WhiteBalance(linearColor);
 				linearColor = WhiteBalance(linearColor);
@@ -119,14 +115,13 @@ Technique
 					gammaColor = pow(gammaColor, 1.0f/2.2f);
 					gammaColor = pow(gammaColor, 1.0f/2.2f);
 				
 				
 				// TODO - Divide by 1.05f here and then re-apply it when decoding from the texture?
 				// TODO - Divide by 1.05f here and then re-apply it when decoding from the texture?
-				return float4(gammaColor, 0.0f);
+				gOutputTex[dispatchThreadId] = float4(gammaColor, 1.0f);	
 			}	
 			}	
 		};
 		};
 	};
 	};
 };
 };
 
 
 Technique
 Technique
- : inherits("VolumeRenderBase")
  : inherits("PPTonemapCommon")
  : inherits("PPTonemapCommon")
  : inherits("PPWhiteBalance") =
  : inherits("PPWhiteBalance") =
 {
 {
@@ -134,10 +129,7 @@ Technique
 	
 	
 	Pass =
 	Pass =
 	{
 	{
-		DepthWrite = false;
-		DepthRead = false;
-	
-		Fragment =
+		Compute =
 		{
 		{
 			layout(binding = 1) uniform Input
 			layout(binding = 1) uniform Input
 			{
 			{
@@ -208,12 +200,8 @@ Technique
 				result = color;
 				result = color;
 			}		
 			}		
 			
 			
-			in GStoFS
-			{
-				layout(location = 0) vec2 uv0;
-			} FSInput;
-			
-			layout(location = 0) out vec4 fragColor;
+			layout (local_size_x = 8, local_size_y = 8) in;
+			layout(binding = 2, rgba8) uniform image3D gOutputTex;
 			
 			
 			void main()
 			void main()
 			{
 			{
@@ -222,10 +210,7 @@ Technique
 				const mat3x3 sRGBToACEScgMatrix = XYZToACEScgMatrix * (D65ToD60Matrix * sRGBToXYZMatrix);
 				const mat3x3 sRGBToACEScgMatrix = XYZToACEScgMatrix * (D65ToD60Matrix * sRGBToXYZMatrix);
 				const mat3x3 ACEScgTosRGBMatrix = XYZTosRGBMatrix * (D60ToD65Matrix * ACEScgToXYZMatrix);
 				const mat3x3 ACEScgTosRGBMatrix = XYZTosRGBMatrix * (D60ToD65Matrix * ACEScgToXYZMatrix);
 				
 				
-				// By default pixel centers will be sampled, but we want to encode the entire range, so
-				// offset the sampling by half a pixel, and extend the entire range by one pixel.
-				vec2 uv = FSInput.uv0 - (0.5f / LUT_SIZE);
-				vec3 logColor = vec3(uv * LUT_SIZE / float(LUT_SIZE - 1), gl_Layer / float(LUT_SIZE - 1));
+				vec3 logColor = vec3(gl_GlobalInvocationID.xyz / float(LUT_SIZE - 1));
 				
 				
 				vec3 linearColor;
 				vec3 linearColor;
 				LogToLinearColor(logColor, linearColor);
 				LogToLinearColor(logColor, linearColor);
@@ -251,7 +236,7 @@ Technique
 					gammaColor = pow(gammaColor, vec3(1.0f/2.2f));
 					gammaColor = pow(gammaColor, vec3(1.0f/2.2f));
 				
 				
 				// TODO - Divide by 1.05f here and then re-apply it when decoding from the texture?
 				// TODO - Divide by 1.05f here and then re-apply it when decoding from the texture?
-				fragColor = vec4(gammaColor, 0.0f);
+				imageStore(gOutputTex, ivec3(gl_GlobalInvocationID.xyz), vec4(gammaColor, 1.0f));
 			}	
 			}	
 		};
 		};
 	};
 	};

+ 4 - 3
Source/BansheeD3D11RenderAPI/Include/BsD3D11Mappings.h

@@ -90,9 +90,10 @@ namespace bs
 
 
 		/**
 		/**
 		 * Converts engine pixel format to DX11 pixel format. Some formats depend on whether hardware gamma is used or not,
 		 * Converts engine pixel format to DX11 pixel format. Some formats depend on whether hardware gamma is used or not,
-		 * in which case set the @p hwGamma parameter as needed.
+		 * in which case set the @p hwGamma parameter as needed. Most formats also depend on @p usage, as specified by
+		 * TextureUsage flags.
 		 */
 		 */
-		static DXGI_FORMAT getPF(PixelFormat format, bool hwGamma);
+		static DXGI_FORMAT getPF(PixelFormat format, bool hwGamma, int usage);
 		
 		
 		/** Converts engine GPU buffer format to DX11 GPU buffer format. */
 		/** Converts engine GPU buffer format to DX11 GPU buffer format. */
 		static DXGI_FORMAT getBF(GpuBufferFormat format);
 		static DXGI_FORMAT getBF(GpuBufferFormat format);
@@ -119,7 +120,7 @@ namespace bs
 		static bool isDynamic(GpuBufferUsage mUsage);
 		static bool isDynamic(GpuBufferUsage mUsage);
 
 
 		/**	Finds the closest pixel format that DX11 supports. */
 		/**	Finds the closest pixel format that DX11 supports. */
-		static PixelFormat getClosestSupportedPF(PixelFormat format, bool hwGamma);
+		static PixelFormat getClosestSupportedPF(PixelFormat format, bool hwGamma, int usage);
 
 
 		/**
 		/**
 		 * Returns size in bytes of a pixel surface of the specified size and format, while using DX11 allocation rules for
 		 * Returns size in bytes of a pixel surface of the specified size and format, while using DX11 allocation rules for

+ 2 - 0
Source/BansheeD3D11RenderAPI/Include/BsD3D11RenderAPI.h

@@ -211,6 +211,8 @@ namespace bs
 		D3D11InputLayoutManager* mIAManager;
 		D3D11InputLayoutManager* mIAManager;
 
 
 		std::pair<SPtr<TextureCore>, SPtr<TextureView>> mBoundUAVs[D3D11_PS_CS_UAV_REGISTER_COUNT];
 		std::pair<SPtr<TextureCore>, SPtr<TextureView>> mBoundUAVs[D3D11_PS_CS_UAV_REGISTER_COUNT];
+		bool mPSUAVsBound;
+		bool mCSUAVsBound;
 
 
 		UINT32 mStencilRef;
 		UINT32 mStencilRef;
 		Rect2 mViewportNorm;
 		Rect2 mViewportNorm;

+ 72 - 11
Source/BansheeD3D11RenderAPI/Source/BsD3D11Mappings.cpp

@@ -600,8 +600,11 @@ namespace bs
 		}
 		}
 	}
 	}
 
 
-	DXGI_FORMAT D3D11Mappings::getPF(PixelFormat pf, bool gamma)
+	DXGI_FORMAT D3D11Mappings::getPF(PixelFormat pf, bool gamma, int usage)
 	{
 	{
+		int isSampledTexture = ((usage & TU_RENDERTARGET) != 0) && ((usage & TU_DEPTHSTENCIL) != 0) &&
+			((usage & TU_LOADSTORE) != 0);
+
 		switch(pf)
 		switch(pf)
 		{
 		{
 		case PF_R8:
 		case PF_R8:
@@ -623,12 +626,19 @@ namespace bs
 				return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
 				return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
 			return DXGI_FORMAT_R8G8B8A8_UNORM;
 			return DXGI_FORMAT_R8G8B8A8_UNORM;
 		case PF_B8G8R8A8:
 		case PF_B8G8R8A8:
+			if (!isSampledTexture)
+				return DXGI_FORMAT_UNKNOWN;
+
 			if (gamma)
 			if (gamma)
 				return DXGI_FORMAT_B8G8R8A8_UNORM_SRGB;
 				return DXGI_FORMAT_B8G8R8A8_UNORM_SRGB;
 			return DXGI_FORMAT_B8G8R8A8_UNORM;
 			return DXGI_FORMAT_B8G8R8A8_UNORM;
 		case PF_B8G8R8X8:
 		case PF_B8G8R8X8:
+			if (!isSampledTexture)
+				return DXGI_FORMAT_UNKNOWN;
+
 			if (gamma)
 			if (gamma)
 				return DXGI_FORMAT_B8G8R8X8_UNORM_SRGB;
 				return DXGI_FORMAT_B8G8R8X8_UNORM_SRGB;
+
 			return DXGI_FORMAT_B8G8R8X8_UNORM;
 			return DXGI_FORMAT_B8G8R8X8_UNORM;
 		case PF_FLOAT16_R:
 		case PF_FLOAT16_R:
 			return DXGI_FORMAT_R16_FLOAT;
 			return DXGI_FORMAT_R16_FLOAT;
@@ -643,40 +653,76 @@ namespace bs
 		case PF_FLOAT32_RG:
 		case PF_FLOAT32_RG:
 			return DXGI_FORMAT_R32G32_FLOAT;
 			return DXGI_FORMAT_R32G32_FLOAT;
 		case PF_FLOAT32_RGB:
 		case PF_FLOAT32_RGB:
+			if (!isSampledTexture)
+				return DXGI_FORMAT_UNKNOWN;
+
 			return DXGI_FORMAT_R32G32B32_FLOAT;
 			return DXGI_FORMAT_R32G32B32_FLOAT;
 		case PF_FLOAT32_RGBA:
 		case PF_FLOAT32_RGBA:
 			return DXGI_FORMAT_R32G32B32A32_FLOAT;
 			return DXGI_FORMAT_R32G32B32A32_FLOAT;
 		case PF_BC1:
 		case PF_BC1:
 		case PF_BC1a:
 		case PF_BC1a:
+			if (!isSampledTexture)
+				return DXGI_FORMAT_UNKNOWN;
+
 			if(gamma)
 			if(gamma)
 				return DXGI_FORMAT_BC1_UNORM_SRGB;
 				return DXGI_FORMAT_BC1_UNORM_SRGB;
 			return DXGI_FORMAT_BC1_UNORM;
 			return DXGI_FORMAT_BC1_UNORM;
 		case PF_BC2:
 		case PF_BC2:
+			if (!isSampledTexture)
+				return DXGI_FORMAT_UNKNOWN;
+
 			if (gamma)
 			if (gamma)
 				return DXGI_FORMAT_BC2_UNORM_SRGB;
 				return DXGI_FORMAT_BC2_UNORM_SRGB;
 			return DXGI_FORMAT_BC2_UNORM;
 			return DXGI_FORMAT_BC2_UNORM;
 		case PF_BC3:
 		case PF_BC3:
+			if (!isSampledTexture)
+				return DXGI_FORMAT_UNKNOWN;
+
 			if (gamma)
 			if (gamma)
 				return DXGI_FORMAT_BC3_UNORM_SRGB;
 				return DXGI_FORMAT_BC3_UNORM_SRGB;
 			return DXGI_FORMAT_BC3_UNORM;
 			return DXGI_FORMAT_BC3_UNORM;
 		case PF_BC4:
 		case PF_BC4:
+			if (!isSampledTexture)
+				return DXGI_FORMAT_UNKNOWN;
+
 			return DXGI_FORMAT_BC4_UNORM;
 			return DXGI_FORMAT_BC4_UNORM;
 		case PF_BC5:
 		case PF_BC5:
+			if (!isSampledTexture)
+				return DXGI_FORMAT_UNKNOWN;
+
 			return DXGI_FORMAT_BC5_UNORM;
 			return DXGI_FORMAT_BC5_UNORM;
 		case PF_BC6H:
 		case PF_BC6H:
+			if (!isSampledTexture)
+				return DXGI_FORMAT_UNKNOWN;
+
 			return DXGI_FORMAT_BC6H_UF16;
 			return DXGI_FORMAT_BC6H_UF16;
 		case PF_BC7:
 		case PF_BC7:
+			if (!isSampledTexture)
+				return DXGI_FORMAT_UNKNOWN;
+
 			if (gamma)
 			if (gamma)
 				return DXGI_FORMAT_BC7_UNORM_SRGB;
 				return DXGI_FORMAT_BC7_UNORM_SRGB;
 			else
 			else
 				return DXGI_FORMAT_BC7_UNORM;
 				return DXGI_FORMAT_BC7_UNORM;
 		case PF_D32_S8X24:
 		case PF_D32_S8X24:
+			if ((usage & TU_DEPTHSTENCIL) == 0)
+				return DXGI_FORMAT_UNKNOWN;
+
 			return DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
 			return DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
 		case PF_D24S8:
 		case PF_D24S8:
+			if ((usage & TU_DEPTHSTENCIL) == 0)
+				return DXGI_FORMAT_UNKNOWN;
+
 			return DXGI_FORMAT_D24_UNORM_S8_UINT;
 			return DXGI_FORMAT_D24_UNORM_S8_UINT;
 		case PF_D32:
 		case PF_D32:
+			if ((usage & TU_DEPTHSTENCIL) == 0)
+				return DXGI_FORMAT_UNKNOWN;
+
 			return DXGI_FORMAT_D32_FLOAT;
 			return DXGI_FORMAT_D32_FLOAT;
 		case PF_D16:
 		case PF_D16:
+			if ((usage & TU_DEPTHSTENCIL) == 0)
+				return DXGI_FORMAT_UNKNOWN;
+
 			return DXGI_FORMAT_D16_UNORM;
 			return DXGI_FORMAT_D16_UNORM;
 		case PF_FLOAT_R11G11B10:
 		case PF_FLOAT_R11G11B10:
 			return DXGI_FORMAT_R11G11B10_FLOAT;
 			return DXGI_FORMAT_R11G11B10_FLOAT;
@@ -772,31 +818,46 @@ namespace bs
 		}
 		}
 	}
 	}
 
 
-	PixelFormat D3D11Mappings::getClosestSupportedPF(PixelFormat pf, bool hwGamma)
+	PixelFormat D3D11Mappings::getClosestSupportedPF(PixelFormat pf, bool hwGamma, int usage)
 	{
 	{
-		if (getPF(pf, hwGamma) != DXGI_FORMAT_UNKNOWN)
+		if (getPF(pf, hwGamma, usage) != DXGI_FORMAT_UNKNOWN)
 		{
 		{
 			return pf;
 			return pf;
 		}
 		}
+
 		switch(pf)
 		switch(pf)
 		{
 		{
+		case PF_BC4:
+			return PF_R8;
+		case PF_BC5:
+			return PF_R8G8;
 		case PF_FLOAT16_RGB:
 		case PF_FLOAT16_RGB:
+		case PF_BC6H:
 			return PF_FLOAT16_RGBA;
 			return PF_FLOAT16_RGBA;
+		case PF_FLOAT32_RGB:
+			return PF_FLOAT32_RGBA;
 		case PF_R8G8B8:
 		case PF_R8G8B8:
-			return PF_R8G8B8A8;
 		case PF_B8G8R8:
 		case PF_B8G8R8:
-			return PF_R8G8B8A8;
+		case PF_X8R8G8B8:
 		case PF_A8R8G8B8:
 		case PF_A8R8G8B8:
+		case PF_BC1:
+		case PF_BC1a:
+		case PF_BC2:
+		case PF_BC3:
+		case PF_UNKNOWN:
 			return PF_R8G8B8A8;
 			return PF_R8G8B8A8;
 		case PF_A8B8G8R8:
 		case PF_A8B8G8R8:
-			return PF_B8G8R8A8;
-		case PF_X8R8G8B8:
-			return PF_R8G8B8A8;
 		case PF_X8B8G8R8:
 		case PF_X8B8G8R8:
-			return PF_B8G8R8X8;
 		case PF_R8G8B8X8:
 		case PF_R8G8B8X8:
-			return PF_B8G8R8X8;
-		case PF_UNKNOWN:
+		case PF_B8G8R8X8:
+			return PF_B8G8R8A8;
+		case PF_D32_S8X24:
+			return PF_FLOAT32_RG;
+		case PF_D24S8:
+		case PF_D32:
+			return PF_FLOAT32_R;
+		case PF_D16:
+			return PF_FLOAT16_R;
 		default:
 		default:
 			return PF_R8G8B8A8;
 			return PF_R8G8B8A8;
 		}
 		}

+ 40 - 12
Source/BansheeD3D11RenderAPI/Source/BsD3D11RenderAPI.cpp

@@ -36,8 +36,8 @@ namespace bs
 {
 {
 	D3D11RenderAPI::D3D11RenderAPI()
 	D3D11RenderAPI::D3D11RenderAPI()
 		: mDXGIFactory(nullptr), mDevice(nullptr), mDriverList(nullptr), mActiveD3DDriver(nullptr)
 		: mDXGIFactory(nullptr), mDevice(nullptr), mDriverList(nullptr), mActiveD3DDriver(nullptr)
-		, mFeatureLevel(D3D_FEATURE_LEVEL_11_0), mHLSLFactory(nullptr), mIAManager(nullptr), mStencilRef(0)
-		, mActiveDrawOp(DOT_TRIANGLE_LIST), mViewportNorm(0.0f, 0.0f, 1.0f, 1.0f)
+		, mFeatureLevel(D3D_FEATURE_LEVEL_11_0), mHLSLFactory(nullptr), mIAManager(nullptr), mPSUAVsBound(false)
+		, mCSUAVsBound(false), mStencilRef(0), mActiveDrawOp(DOT_TRIANGLE_LIST), mViewportNorm(0.0f, 0.0f, 1.0f, 1.0f)
 	{ }
 	{ }
 
 
 	D3D11RenderAPI::~D3D11RenderAPI()
 	D3D11RenderAPI::~D3D11RenderAPI()
@@ -342,6 +342,32 @@ namespace bs
 		{
 		{
 			THROW_IF_NOT_CORE_THREAD;
 			THROW_IF_NOT_CORE_THREAD;
 
 
+			ID3D11DeviceContext* context = mDevice->getImmediateContext();
+
+			// Clear any previously bound UAVs (otherwise shaders attempting to read resources viewed by those view will
+			// be unable to)
+			if (mPSUAVsBound || mCSUAVsBound)
+			{
+				ID3D11UnorderedAccessView* emptyUAVs[D3D11_PS_CS_UAV_REGISTER_COUNT];
+				bs_zero_out(emptyUAVs);
+
+				if(mPSUAVsBound)
+				{
+					context->OMSetRenderTargetsAndUnorderedAccessViews(
+						D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr, 0, 
+						D3D11_PS_CS_UAV_REGISTER_COUNT, emptyUAVs, nullptr);
+
+					mPSUAVsBound = false;
+				}
+
+				if(mCSUAVsBound)
+				{
+					context->CSSetUnorderedAccessViews(0, D3D11_PS_CS_UAV_REGISTER_COUNT, emptyUAVs, nullptr);
+
+					mCSUAVsBound = false;
+				}
+			}
+
 			bs_frame_mark();
 			bs_frame_mark();
 			{
 			{
 				FrameVector<ID3D11ShaderResourceView*> srvs(8);
 				FrameVector<ID3D11ShaderResourceView*> srvs(8);
@@ -476,8 +502,6 @@ namespace bs
 					}
 					}
 				};
 				};
 
 
-				ID3D11DeviceContext* context = mDevice->getImmediateContext();
-
 				UINT32 numSRVs = 0;
 				UINT32 numSRVs = 0;
 				UINT32 numUAVs = 0;
 				UINT32 numUAVs = 0;
 				UINT32 numConstBuffers = 0;
 				UINT32 numConstBuffers = 0;
@@ -485,19 +509,12 @@ namespace bs
 
 
 				populateViews(GPT_VERTEX_PROGRAM);
 				populateViews(GPT_VERTEX_PROGRAM);
 				numSRVs = (UINT32)srvs.size();
 				numSRVs = (UINT32)srvs.size();
-				numUAVs = (UINT32)uavs.size();
 				numConstBuffers = (UINT32)constBuffers.size();
 				numConstBuffers = (UINT32)constBuffers.size();
 				numSamplers = (UINT32)samplers.size();
 				numSamplers = (UINT32)samplers.size();
 
 
 				if(numSRVs > 0)
 				if(numSRVs > 0)
 					context->VSSetShaderResources(0, numSRVs, srvs.data());
 					context->VSSetShaderResources(0, numSRVs, srvs.data());
 
 
-				if(numUAVs > 0)
-				{
-					context->OMSetRenderTargetsAndUnorderedAccessViews(D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr,
-						nullptr, 0, numUAVs, uavs.data(), nullptr);
-				}
-
 				if (numConstBuffers > 0)
 				if (numConstBuffers > 0)
 					context->VSSetConstantBuffers(0, numConstBuffers, constBuffers.data());
 					context->VSSetConstantBuffers(0, numConstBuffers, constBuffers.data());
 
 
@@ -506,12 +523,20 @@ namespace bs
 
 
 				populateViews(GPT_FRAGMENT_PROGRAM);
 				populateViews(GPT_FRAGMENT_PROGRAM);
 				numSRVs = (UINT32)srvs.size();
 				numSRVs = (UINT32)srvs.size();
+				numUAVs = (UINT32)uavs.size();
 				numConstBuffers = (UINT32)constBuffers.size();
 				numConstBuffers = (UINT32)constBuffers.size();
 				numSamplers = (UINT32)samplers.size();
 				numSamplers = (UINT32)samplers.size();
 
 
 				if (numSRVs > 0)
 				if (numSRVs > 0)
 					context->PSSetShaderResources(0, numSRVs, srvs.data());
 					context->PSSetShaderResources(0, numSRVs, srvs.data());
 
 
+				if (numUAVs > 0)
+				{
+					context->OMSetRenderTargetsAndUnorderedAccessViews(
+						D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr, 0, numUAVs, uavs.data(), nullptr);
+					mPSUAVsBound = true;
+				}
+
 				if (numConstBuffers > 0)
 				if (numConstBuffers > 0)
 					context->PSSetConstantBuffers(0, numConstBuffers, constBuffers.data());
 					context->PSSetConstantBuffers(0, numConstBuffers, constBuffers.data());
 
 
@@ -569,8 +594,11 @@ namespace bs
 				if (numSRVs > 0)
 				if (numSRVs > 0)
 					context->CSSetShaderResources(0, numSRVs, srvs.data());
 					context->CSSetShaderResources(0, numSRVs, srvs.data());
 
 
-				if(numUAVs > 0)
+				if (numUAVs > 0)
+				{
 					context->CSSetUnorderedAccessViews(0, numUAVs, uavs.data(), nullptr);
 					context->CSSetUnorderedAccessViews(0, numUAVs, uavs.data(), nullptr);
+					mCSUAVsBound = true;
+				}
 
 
 				if (numConstBuffers > 0)
 				if (numConstBuffers > 0)
 					context->CSSetConstantBuffers(0, numConstBuffers, constBuffers.data());
 					context->CSSetConstantBuffers(0, numConstBuffers, constBuffers.data());

+ 6 - 6
Source/BansheeD3D11RenderAPI/Source/BsD3D11Texture.cpp

@@ -242,7 +242,7 @@ namespace bs
 		UINT32 numMips = mProperties.getNumMipmaps();
 		UINT32 numMips = mProperties.getNumMipmaps();
 		PixelFormat format = mProperties.getFormat();
 		PixelFormat format = mProperties.getFormat();
 		bool hwGamma = mProperties.isHardwareGammaEnabled();
 		bool hwGamma = mProperties.isHardwareGammaEnabled();
-		PixelFormat closestFormat = D3D11Mappings::getClosestSupportedPF(format, hwGamma);
+		PixelFormat closestFormat = D3D11Mappings::getClosestSupportedPF(format, hwGamma, usage);
 		UINT32 numFaces = mProperties.getNumFaces();
 		UINT32 numFaces = mProperties.getNumFaces();
 
 
 		// We must have those defined here
 		// We must have those defined here
@@ -250,7 +250,7 @@ namespace bs
 
 
 		// Determine which D3D11 pixel format we'll use
 		// Determine which D3D11 pixel format we'll use
 		HRESULT hr;
 		HRESULT hr;
-		DXGI_FORMAT d3dPF = D3D11Mappings::getPF(closestFormat, hwGamma);
+		DXGI_FORMAT d3dPF = D3D11Mappings::getPF(closestFormat, hwGamma, usage);
 
 
 		if (format != D3D11Mappings::getPF(d3dPF))
 		if (format != D3D11Mappings::getPF(d3dPF))
 		{
 		{
@@ -360,7 +360,7 @@ namespace bs
 		bool hwGamma = mProperties.isHardwareGammaEnabled();
 		bool hwGamma = mProperties.isHardwareGammaEnabled();
 		UINT32 sampleCount = mProperties.getNumSamples();
 		UINT32 sampleCount = mProperties.getNumSamples();
 		TextureType texType = mProperties.getTextureType();
 		TextureType texType = mProperties.getTextureType();
-		PixelFormat closestFormat = D3D11Mappings::getClosestSupportedPF(format, hwGamma);
+		PixelFormat closestFormat = D3D11Mappings::getClosestSupportedPF(format, hwGamma, usage);
 		UINT32 numFaces = mProperties.getNumFaces();
 		UINT32 numFaces = mProperties.getNumFaces();
 
 
 		// TODO - Consider making this a parameter eventually
 		// TODO - Consider making this a parameter eventually
@@ -371,7 +371,7 @@ namespace bs
 
 
 		// Determine which D3D11 pixel format we'll use
 		// Determine which D3D11 pixel format we'll use
 		HRESULT hr;
 		HRESULT hr;
-		DXGI_FORMAT d3dPF = D3D11Mappings::getPF(closestFormat, hwGamma);
+		DXGI_FORMAT d3dPF = D3D11Mappings::getPF(closestFormat, hwGamma, usage);
 
 
 		if (format != D3D11Mappings::getPF(d3dPF))
 		if (format != D3D11Mappings::getPF(d3dPF))
 		{
 		{
@@ -506,7 +506,7 @@ namespace bs
 		UINT32 numMips = mProperties.getNumMipmaps();
 		UINT32 numMips = mProperties.getNumMipmaps();
 		PixelFormat format = mProperties.getFormat();
 		PixelFormat format = mProperties.getFormat();
 		bool hwGamma = mProperties.isHardwareGammaEnabled();
 		bool hwGamma = mProperties.isHardwareGammaEnabled();
-		PixelFormat closestFormat = D3D11Mappings::getClosestSupportedPF(format, hwGamma);
+		PixelFormat closestFormat = D3D11Mappings::getClosestSupportedPF(format, hwGamma, usage);
 
 
 		// TODO - Consider making this a parameter eventually
 		// TODO - Consider making this a parameter eventually
 		bool readableDepth = true;
 		bool readableDepth = true;
@@ -516,7 +516,7 @@ namespace bs
 
 
 		// Determine which D3D11 pixel format we'll use
 		// Determine which D3D11 pixel format we'll use
 		HRESULT hr;
 		HRESULT hr;
-		DXGI_FORMAT d3dPF = D3D11Mappings::getPF(closestFormat, hwGamma);
+		DXGI_FORMAT d3dPF = D3D11Mappings::getPF(closestFormat, hwGamma, usage);
 		
 		
 		if (format != D3D11Mappings::getPF(d3dPF))
 		if (format != D3D11Mappings::getPF(d3dPF))
 		{
 		{

+ 2 - 1
Source/BansheeD3D11RenderAPI/Source/BsD3D11TextureManager.cpp

@@ -18,7 +18,8 @@ namespace bs
 	PixelFormat D3D11TextureManager::getNativeFormat(TextureType ttype, PixelFormat format, int usage, bool hwGamma)
 	PixelFormat D3D11TextureManager::getNativeFormat(TextureType ttype, PixelFormat format, int usage, bool hwGamma)
 	{
 	{
 		// Basic filtering
 		// Basic filtering
-		DXGI_FORMAT d3dPF = D3D11Mappings::getPF(D3D11Mappings::getClosestSupportedPF(format, hwGamma), hwGamma);
+		DXGI_FORMAT d3dPF = D3D11Mappings::getPF(D3D11Mappings::getClosestSupportedPF(format, hwGamma, usage), 
+			hwGamma, usage);
 
 
 		return D3D11Mappings::getPF(d3dPF);
 		return D3D11Mappings::getPF(d3dPF);
 	}
 	}

+ 2 - 0
Source/RenderBeast/Include/BsPostProcessing.h

@@ -202,6 +202,8 @@ namespace bs
 	private:
 	private:
 		SPtr<GpuParamBlockBufferCore> mParamBuffer;
 		SPtr<GpuParamBlockBufferCore> mParamBuffer;
 		SPtr<GpuParamBlockBufferCore> mWhiteBalanceParamBuffer;
 		SPtr<GpuParamBlockBufferCore> mWhiteBalanceParamBuffer;
+
+		GpuParamLoadStoreTextureCore mOutputTex;
 	};
 	};
 
 
 	BS_PARAM_BLOCK_BEGIN(TonemappingParamDef)
 	BS_PARAM_BLOCK_BEGIN(TonemappingParamDef)

+ 11 - 7
Source/RenderBeast/Source/BsPostProcessing.cpp

@@ -296,6 +296,9 @@ namespace bs
 
 
 		mParamsSet->setParamBlockBuffer("Input", mParamBuffer);
 		mParamsSet->setParamBlockBuffer("Input", mParamBuffer);
 		mParamsSet->setParamBlockBuffer("WhiteBalanceInput", mWhiteBalanceParamBuffer);
 		mParamsSet->setParamBlockBuffer("WhiteBalanceInput", mWhiteBalanceParamBuffer);
+
+		SPtr<GpuParamsCore> params = mParamsSet->getGpuParams();
+		params->getLoadStoreTextureParam(GPT_COMPUTE_PROGRAM, "gOutputTex", mOutputTex);
 	}
 	}
 
 
 	void CreateTonemapLUTMat::_initDefines(ShaderDefines& defines)
 	void CreateTonemapLUTMat::_initDefines(ShaderDefines& defines)
@@ -339,18 +342,19 @@ namespace bs
 		gWhiteBalanceParamDef.gWhiteOffset.set(mWhiteBalanceParamBuffer, settings.whiteBalance.tint);
 		gWhiteBalanceParamDef.gWhiteOffset.set(mWhiteBalanceParamBuffer, settings.whiteBalance.tint);
 
 
 		// Set output
 		// Set output
-		POOLED_RENDER_TEXTURE_DESC outputDesc = POOLED_RENDER_TEXTURE_DESC::create3D(PF_B8G8R8X8, 
-			LUT_SIZE, LUT_SIZE, LUT_SIZE, TU_RENDERTARGET);
+		POOLED_RENDER_TEXTURE_DESC outputDesc = POOLED_RENDER_TEXTURE_DESC::create3D(PF_R8G8B8A8, 
+			LUT_SIZE, LUT_SIZE, LUT_SIZE, TU_LOADSTORE);
 
 
-		// Render
+		// Dispatch
 		ppInfo.colorLUT = RenderTexturePool::instance().get(outputDesc);
 		ppInfo.colorLUT = RenderTexturePool::instance().get(outputDesc);
 
 
-		RenderAPICore& rapi = RenderAPICore::instance();
-		rapi.setRenderTarget(ppInfo.colorLUT->renderTexture);
+		mOutputTex.set(ppInfo.colorLUT->texture);
 
 
-		gRendererUtility().setPass(mMaterial);
+		RenderAPICore& rapi = RenderAPICore::instance();
+		
+		gRendererUtility().setComputePass(mMaterial);
 		gRendererUtility().setPassParams(mParamsSet);
 		gRendererUtility().setPassParams(mParamsSet);
-		gRendererUtility().drawScreenQuad(LUT_SIZE);
+		rapi.dispatchCompute(LUT_SIZE / 8, LUT_SIZE / 8, LUT_SIZE);
 	}
 	}
 
 
 	void CreateTonemapLUTMat::release(PostProcessInfo& ppInfo)
 	void CreateTonemapLUTMat::release(PostProcessInfo& ppInfo)

+ 2 - 2
Source/RenderBeast/Source/BsRenderTargets.cpp

@@ -13,8 +13,8 @@ namespace bs
 		:mViewport(viewport), mNumSamples(numSamples), mHDR(hdr)
 		:mViewport(viewport), mNumSamples(numSamples), mHDR(hdr)
 	{
 	{
 		// Note: Consider customizable HDR format via options? e.g. smaller PF_FLOAT_R11G11B10 or larger 32-bit format
 		// Note: Consider customizable HDR format via options? e.g. smaller PF_FLOAT_R11G11B10 or larger 32-bit format
-		mSceneColorFormat = hdr ? PF_FLOAT16_RGBA : PF_B8G8R8A8;
-		mAlbedoFormat = PF_B8G8R8X8; // Note: Also consider customizable format (e.g. 16-bit float?)
+		mSceneColorFormat = hdr ? PF_FLOAT16_RGBA : PF_R8G8B8A8;
+		mAlbedoFormat = PF_R8G8B8A8; // Note: Also consider customizable format (e.g. 16-bit float?)
 		mNormalFormat = PF_UNORM_R10G10B10A2; // Note: Also consider customizable format (e.g. 16-bit float?)
 		mNormalFormat = PF_UNORM_R10G10B10A2; // Note: Also consider customizable format (e.g. 16-bit float?)
 	}
 	}