Переглянути джерело

Bulk of code for deferred rendering MSAA

BearishSun 8 роки тому
батько
коміт
8531a656fd
28 змінених файлів з 896 додано та 211 видалено
  1. 4 0
      Data/Raw/Engine/DataList.json
  2. 134 0
      Data/Raw/Engine/Shaders/FlatFramebufferToTexture.bsl
  3. 149 55
      Data/Raw/Engine/Shaders/TiledDeferredLighting.bsl
  4. 37 36
      Source/BansheeCore/Include/BsRenderAPI.h
  5. 2 2
      Source/BansheeCore/Source/BsGpuParam.cpp
  6. 1 1
      Source/BansheeCore/Source/BsGpuParamsSet.cpp
  7. 1 1
      Source/BansheeCore/Source/BsMesh.cpp
  8. 1 1
      Source/BansheeCore/Source/BsMeshHeap.cpp
  9. 2 2
      Source/BansheeCore/Source/BsParamBlocks.cpp
  10. 1 1
      Source/BansheeD3D11RenderAPI/Source/BsD3D11RenderAPI.cpp
  11. 1 1
      Source/BansheeEngine/Source/BsGUIManager.cpp
  12. 2 2
      Source/BansheeEngine/Source/BsRendererUtility.cpp
  13. 5 2
      Source/BansheeGLRenderAPI/Source/BsGLRenderAPI.cpp
  14. 5 1
      Source/BansheeVulkanRenderAPI/Source/BsVulkanRenderAPI.cpp
  15. 1 1
      Source/MBansheeEngine/Animation/Animation.cs
  16. 2 2
      Source/RenderBeast/CMakeSources.cmake
  17. 97 13
      Source/RenderBeast/Include/BsGpuResourcePool.h
  18. 64 11
      Source/RenderBeast/Include/BsLightRendering.h
  19. 4 4
      Source/RenderBeast/Include/BsPostProcessing.h
  20. 2 1
      Source/RenderBeast/Include/BsRenderBeast.h
  21. 18 0
      Source/RenderBeast/Include/BsRenderTargets.h
  22. 3 0
      Source/RenderBeast/Include/BsRendererCamera.h
  23. 110 8
      Source/RenderBeast/Source/BsGpuResourcePool.cpp
  24. 111 21
      Source/RenderBeast/Source/BsLightRendering.cpp
  25. 20 24
      Source/RenderBeast/Source/BsPostProcessing.cpp
  26. 67 17
      Source/RenderBeast/Source/BsRenderBeast.cpp
  27. 51 3
      Source/RenderBeast/Source/BsRenderTargets.cpp
  28. 1 1
      Source/RenderBeast/Source/BsRendererCamera.cpp

+ 4 - 0
Data/Raw/Engine/DataList.json

@@ -213,6 +213,10 @@
         {
         {
             "Path": "Transparent.bsl",
             "Path": "Transparent.bsl",
             "UUID": "10db2029-145c-424e-8399-2be48aa66680"
             "UUID": "10db2029-145c-424e-8399-2be48aa66680"
+        },
+        {
+            "Path": "FlatFramebufferToTexture.bsl",
+            "UUID": "c469c67d-6a43-4961-8e8c-d769f88546b7"
         }
         }
     ],
     ],
     "Skin": [
     "Skin": [

+ 134 - 0
Data/Raw/Engine/Shaders/FlatFramebufferToTexture.bsl

@@ -0,0 +1,134 @@
+Technique =
+{
+	Language = "HLSL11";
+	
+	Pass =
+	{
+		DepthRead = false;
+		DepthWrite = false;
+		
+		Target = 
+		{
+			Blend = true;
+			Color = { ONE, ONE, ADD };
+			WriteMask = RGB;
+		};		
+	
+		Common = 
+		{
+			struct VStoFS
+			{
+				float4 position : SV_POSITION;
+				float2 uv0 : TEXCOORD0;
+			};
+		};
+	
+		Vertex =
+		{
+			struct VertexInput
+			{
+				float2 screenPos : POSITION;
+				float2 uv0 : TEXCOORD0;
+			};
+			
+			VStoFS main(VertexInput input)
+			{
+				VStoFS output;
+			
+				output.position = float4(input.screenPos, 0, 1);
+				output.uv0 = input.uv0;
+
+				return output;
+			}			
+		};
+		
+		Fragment = 
+		{
+			cbuffer Params : register(b0)
+			{
+				uint2 gFramebufferSize;
+				uint gSampleCount;
+			}		
+		
+			Buffer<float4> gInput : register(t0);
+
+			uint getLinearAddress(uint2 coord, uint sampleIndex)
+			{
+				return (coord.y * gFramebufferSize.x + coord.x) * gSampleCount + sampleIndex;
+			}			
+
+			float4 main(VStoFS input, uint sampleIndex : SV_SampleIndex) : SV_Target0
+			{
+				int2 pixelPos = trunc(input.uv0);
+				uint sourceIdx = getLinearAddress(pixelPos, sampleIndex);
+
+				return gInput[sourceIdx];
+			}
+		};
+	};
+};
+
+Technique =
+{
+	Language = "GLSL";
+	
+	Pass =
+	{
+		DepthRead = false;
+		DepthWrite = false;
+		
+		Target = 
+		{
+			Blend = true;
+			Color = { ONE, ONE, ADD };
+			WriteMask = RGB;
+		};		
+		
+		Vertex =
+		{
+			layout(location = 0) in vec2 bs_position;
+			layout(location = 1) in vec2 bs_texcoord0;
+			
+			layout(location = 0) out vec2 texcoord0;
+		
+			out gl_PerVertex
+			{
+				vec4 gl_Position;
+			};
+		
+			void main()
+			{
+				gl_Position = vec4(bs_position, 0, 1);
+				texcoord0 = bs_texcoord0;
+			}		
+		};
+		
+		Fragment = 
+		{
+			layout(location = 0) in vec2 texcoord0;
+			layout(location = 0) out vec4 fragColor;
+		
+			layout(binding = 0) uniform Params 
+			{
+				uvec2 gFramebufferSize;
+				uint gSampleCount;
+			};		
+		
+			layout(binding = 1) uniform samplerBuffer gInput;
+
+			uint getLinearAddress(uvec2 coord, uint sampleIndex)
+			{
+				return (coord.y * gFramebufferSize.x + coord.x) * gSampleCount + sampleIndex;
+			}			
+
+			void main()
+			{
+				vec2 uv = trunc(texcoord0);
+				ivec2 pixelPos = ivec2(uv.x, uv.y);
+			
+				uint sourceIdx = getLinearAddress(pixelPos, gl_SampleID);
+				fragColor = texelFetch(gInput, int(sourceIdx));
+			}
+		};
+	};
+};

+ 149 - 55
Data/Raw/Engine/Shaders/TiledDeferredLighting.bsl

@@ -109,9 +109,9 @@ Technique
 			
 			
 			SurfaceData getGBufferData(uint2 pixelPos, uint sampleIndex)
 			SurfaceData getGBufferData(uint2 pixelPos, uint sampleIndex)
 			{
 			{
-				float4 GBufferAData = gGBufferATex.Load(int3(pixelPos, 0), sampleIndex);
-				float4 GBufferBData = gGBufferBTex.Load(int3(pixelPos, 0), sampleIndex);
-				float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0), sampleIndex).r;
+				float4 GBufferAData = gGBufferATex.Load(pixelPos, sampleIndex);
+				float4 GBufferBData = gGBufferBTex.Load(pixelPos, sampleIndex);
+				float deviceZ = gDepthBufferTex.Load(pixelPos, sampleIndex).r;
 				
 				
 				return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
 				return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
 			}
 			}
@@ -170,7 +170,7 @@ Technique
 				
 				
 				float3 diffuse = surfaceData.albedo.xyz / PI; // TODO - Add better lighting model later
 				float3 diffuse = surfaceData.albedo.xyz / PI; // TODO - Add better lighting model later
 				return float4(lightAccumulator * diffuse, alpha);
 				return float4(lightAccumulator * diffuse, alpha);
-			}			
+			}
 			
 			
 			[numthreads(TILE_SIZE, TILE_SIZE, 1)]
 			[numthreads(TILE_SIZE, TILE_SIZE, 1)]
 			void main(
 			void main(
@@ -342,8 +342,8 @@ Technique
 					float4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
 					float4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
 					writeBufferSample(pixelPos, 0, lighting);
 					writeBufferSample(pixelPos, 0, lighting);
 
 
-					bool needsPerSampleShading = needsPerSampleShading(pixelPos);
-					if(needsPerSampleShading)
+					bool doPerSampleShading = needsPerSampleShading(surfaceData);
+					if(doPerSampleShading)
 					{
 					{
 						[unroll]
 						[unroll]
 						for(uint i = 1; i < MSAA_COUNT; ++i)
 						for(uint i = 1; i < MSAA_COUNT; ++i)
@@ -352,6 +352,12 @@ Technique
 							writeBufferSample(pixelPos, i, lighting);
 							writeBufferSample(pixelPos, i, lighting);
 						}
 						}
 					}
 					}
+					else // Splat same information to all samples
+					{
+						[unroll]
+						for(uint i = 1; i < MSAA_COUNT; ++i)
+							writeBufferSample(pixelPos, i, lighting);
+					}
 					
 					
 					#else
 					#else
 					float4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
 					float4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
@@ -379,9 +385,15 @@ Technique
 		
 		
 			layout (local_size_x = TILE_SIZE, local_size_y = TILE_SIZE) in;
 			layout (local_size_x = TILE_SIZE, local_size_y = TILE_SIZE) in;
 		
 		
+			#if MSAA_COUNT > 1
+			layout(binding = 1) uniform sampler2DMS gGBufferATex;
+			layout(binding = 2) uniform sampler2DMS gGBufferBTex;
+			layout(binding = 3) uniform sampler2DMS gDepthBufferTex;
+			#else
 			layout(binding = 1) uniform sampler2D gGBufferATex;
 			layout(binding = 1) uniform sampler2D gGBufferATex;
 			layout(binding = 2) uniform sampler2D gGBufferBTex;
 			layout(binding = 2) uniform sampler2D gGBufferBTex;
 			layout(binding = 3) uniform sampler2D gDepthBufferTex;
 			layout(binding = 3) uniform sampler2D gDepthBufferTex;
+			#endif
 			
 			
 			SurfaceData decodeGBuffer(vec4 GBufferAData, vec4 GBufferBData, float deviceZ)
 			SurfaceData decodeGBuffer(vec4 GBufferAData, vec4 GBufferBData, float deviceZ)
 			{
 			{
@@ -394,7 +406,44 @@ Technique
 				surfaceData.depth = convertFromDeviceZ(deviceZ);
 				surfaceData.depth = convertFromDeviceZ(deviceZ);
 				
 				
 				return surfaceData;
 				return surfaceData;
-			}			
+			}
+			
+			#if MSAA_COUNT > 1
+			layout(binding = 5, rgba16f) uniform image2DMS gOutput;
+			
+			bool needsPerSampleShading(SurfaceData samples[MSAA_COUNT])
+			{
+				vec3 albedo = samples[0].albedo.xyz;
+				vec3 normal = samples[0].worldNormal.xyz;
+				float depth = samples[0].depth;
+
+				for(int i = 1; i < MSAA_COUNT; i++)
+				{
+					vec3 otherAlbedo = samples[i].albedo.xyz;
+					vec3 otherNormal = samples[i].worldNormal.xyz;
+					float otherDepth = samples[i].depth;
+
+					if(abs(depth - otherDepth) > 0.1f || abs(dot(abs(normal - otherNormal), vec3(1, 1, 1))) > 0.1f || abs(dot(albedo - otherAlbedo, vec3(1, 1, 1))) > 0.1f)
+					{
+						return true;
+					}
+				}
+				
+				return false;
+			}
+			
+			SurfaceData getGBufferData(ivec2 pixelPos, int sampleIndex)
+			{
+				vec4 GBufferAData = texelFetch(gGBufferATex, pixelPos, sampleIndex);
+				vec4 GBufferBData = texelFetch(gGBufferBTex, pixelPos, sampleIndex);
+				float deviceZ = texelFetch(gDepthBufferTex, pixelPos, sampleIndex).r;
+				
+				return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
+			}
+			
+			#else
+			
+			layout(binding = 5, rgba16f) uniform image2D gOutput;
 			
 			
 			SurfaceData getGBufferData(ivec2 pixelPos)
 			SurfaceData getGBufferData(ivec2 pixelPos)
 			{
 			{
@@ -405,13 +454,13 @@ Technique
 				return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
 				return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
 			}	
 			}	
 			
 			
+			#endif
+			
 			layout(std430, binding = 4) readonly buffer gLights
 			layout(std430, binding = 4) readonly buffer gLights
 			{
 			{
 				LightData[] gLightsData;
 				LightData[] gLightsData;
 			};
 			};
-						
-			layout(binding = 5, rgba16f) uniform image2D gOutput;
-			
+
 			layout(binding = 6, std140) uniform Params
 			layout(binding = 6, std140) uniform Params
 			{
 			{
 				// Offsets at which specific light types begin in gLights buffer
 				// Offsets at which specific light types begin in gLights buffer
@@ -427,12 +476,71 @@ Technique
 			shared uint sTotalNumLights;
 			shared uint sTotalNumLights;
             shared uint sLightIndices[MAX_LIGHTS];
             shared uint sLightIndices[MAX_LIGHTS];
 			
 			
+			vec4 getLighting(vec2 clipSpacePos, SurfaceData surfaceData)
+			{
+				// x, y are now in clip space, z, w are in view space
+				// We multiply them by a special inverse view-projection matrix, that had the projection entries that effect
+				// z, w eliminated (since they are already in view space)
+				// Note: Multiply by depth should be avoided if using ortographic projection
+				vec4 mixedSpacePos = vec4(clipSpacePos.xy * -surfaceData.depth, surfaceData.depth, 1);
+				vec4 worldPosition4D = gMatScreenToWorld * mixedSpacePos;
+				vec3 worldPosition = worldPosition4D.xyz / worldPosition4D.w;	
+				
+				float alpha = 0.0f;
+				vec3 lightAccumulator = vec3(0, 0, 0);
+				if(surfaceData.worldNormal.w > 0.0f)
+				{
+					for(uint i = 0; i < gLightOffsets[0]; ++i)
+					{
+						LightData lightData = gLightsData[i];
+						lightAccumulator += getDirLightContibution(surfaceData, lightData);
+					}
+					
+                    for (uint i = 0; i < sNumLightsPerType[0]; ++i)
+                    {
+                        uint lightIdx = sLightIndices[i];
+						LightData lightData = gLightsData[lightIdx];
+                        lightAccumulator += getPointLightContribution(worldPosition, surfaceData, lightData);
+                    }
+
+					for(uint i = sNumLightsPerType[0]; i < sTotalNumLights; ++i)
+                    {
+                        uint lightIdx = sLightIndices[i];
+						LightData lightData = gLightsData[lightIdx];
+                        lightAccumulator += getSpotLightContribution(worldPosition, surfaceData, lightData);
+                    }
+					
+					alpha = 1.0f;
+				}
+				
+				vec3 diffuse = surfaceData.albedo.xyz / PI; // TODO - Add better lighting model later				
+				return vec4(lightAccumulator * diffuse, alpha);
+			}
+			
 			void main()
 			void main()
 			{
 			{
 				uint threadIndex = gl_LocalInvocationID.y * TILE_SIZE + gl_LocalInvocationID.x;
 				uint threadIndex = gl_LocalInvocationID.y * TILE_SIZE + gl_LocalInvocationID.x;
 				ivec2 pixelPos = ivec2(gl_GlobalInvocationID.xy) + gViewportRectangle.xy;
 				ivec2 pixelPos = ivec2(gl_GlobalInvocationID.xy) + gViewportRectangle.xy;
-				SurfaceData surfaceData = getGBufferData(pixelPos);
 
 
+				// Get data for all samples, and determine per-pixel minimum and maximum depth values
+				SurfaceData surfaceData[MSAA_COUNT];
+				uint sampleMinZ = 0x7F7FFFFF;
+				uint sampleMaxZ = 0;
+
+				#if MSAA_COUNT > 1
+				for(int i = 0; i < MSAA_COUNT; ++i)
+				{
+					surfaceData[i] = getGBufferData(pixelPos, i);
+					
+					sampleMinZ = min(sampleMinZ, floatBitsToUint(-surfaceData[i].depth));
+					sampleMaxZ = max(sampleMaxZ, floatBitsToUint(-surfaceData[i].depth));
+				}
+				#else
+				surfaceData[0] = getGBufferData(pixelPos);
+				sampleMinZ = floatBitsToUint(-surfaceData[0].depth);
+				sampleMaxZ = floatBitsToUint(-surfaceData[0].depth);
+				#endif				
+				
 				// Set initial values
 				// Set initial values
 				if(threadIndex == 0)
 				if(threadIndex == 0)
 				{
 				{
@@ -446,8 +554,8 @@ Technique
 				groupMemoryBarrier();
 				groupMemoryBarrier();
 				barrier();
 				barrier();
 				
 				
-				atomicMin(sTileMinZ, floatBitsToUint(-surfaceData.depth));
-				atomicMax(sTileMaxZ, floatBitsToUint(-surfaceData.depth));
+				atomicMin(sTileMinZ, sampleMinZ);
+				atomicMax(sTileMaxZ, sampleMaxZ);
 				
 				
 				groupMemoryBarrier();
 				groupMemoryBarrier();
 				barrier();
 				barrier();
@@ -480,18 +588,7 @@ Technique
 				// Generate near/far frustum planes
 				// Generate near/far frustum planes
 				frustumPlanes[4] = vec4(0.0f, 0.0f, -1.0f, -minTileZ); 
 				frustumPlanes[4] = vec4(0.0f, 0.0f, -1.0f, -minTileZ); 
 				frustumPlanes[5] = vec4(0.0f, 0.0f, 1.0f, maxTileZ);
 				frustumPlanes[5] = vec4(0.0f, 0.0f, 1.0f, maxTileZ);
-				
-				vec2 screenUv = (vec2(gViewportRectangle.xy + pixelPos) + 0.5f) / vec2(gViewportRectangle.zw);
-				vec2 clipSpacePos = (screenUv - gClipToUVScaleOffset.zw) / gClipToUVScaleOffset.xy;
-			
-				// x, y are now in clip space, z, w are in view space
-				// We multiply them by a special inverse view-projection matrix, that had the projection entries that effect
-				// z, w eliminated (since they are already in view space)
-				// Note: Multiply by depth should be avoided if using ortographic projection
-				vec4 mixedSpacePos = vec4(clipSpacePos.xy * -surfaceData.depth, surfaceData.depth, 1);
-				vec4 worldPosition4D = gMatScreenToWorld * mixedSpacePos;
-				vec3 worldPosition = worldPosition4D.xyz / worldPosition4D.w;
-			
+							
 				// Find radial & spot lights overlapping the tile
 				// Find radial & spot lights overlapping the tile
 				for(uint type = 0; type < 2; type++)
 				for(uint type = 0; type < 2; type++)
 				{
 				{
@@ -535,44 +632,41 @@ Technique
 				}
 				}
 
 
                 groupMemoryBarrier();
                 groupMemoryBarrier();
-				barrier();		
-			
-				float alpha = 0.0f;
-				vec3 lightAccumulator = vec3(0, 0, 0);
-				if(surfaceData.worldNormal.w > 0.0f)
-				{
-					for(uint i = 0; i < gLightOffsets[0]; ++i)
-					{
-						LightData lightData = gLightsData[i];
-						lightAccumulator += getDirLightContibution(surfaceData, lightData);
-					}
-					
-                    for (uint i = 0; i < sNumLightsPerType[0]; ++i)
-                    {
-                        uint lightIdx = sLightIndices[i];
-						LightData lightData = gLightsData[lightIdx];
-                        lightAccumulator += getPointLightContribution(worldPosition, surfaceData, lightData);
-                    }
+				barrier();	
 
 
-					for(uint i = sNumLightsPerType[0]; i < sTotalNumLights; ++i)
-                    {
-                        uint lightIdx = sLightIndices[i];
-						LightData lightData = gLightsData[lightIdx];
-                        lightAccumulator += getSpotLightContribution(worldPosition, surfaceData, lightData);
-                    }
-					
-					alpha = 1.0f;
-				}
-				
-				vec3 diffuse = surfaceData.albedo.xyz / PI; // TODO - Add better lighting model later
+				vec2 screenUv = (vec2(gViewportRectangle.xy + pixelPos) + 0.5f) / vec2(gViewportRectangle.zw);
+				vec2 clipSpacePos = (screenUv - gClipToUVScaleOffset.zw) / gClipToUVScaleOffset.xy;
 				
 				
 				uvec2 viewportMax = gViewportRectangle.xy + gViewportRectangle.zw;
 				uvec2 viewportMax = gViewportRectangle.xy + gViewportRectangle.zw;
 
 
 				// Ignore pixels out of valid range
 				// Ignore pixels out of valid range
 				if (all(lessThan(gl_GlobalInvocationID.xy, viewportMax))) 
 				if (all(lessThan(gl_GlobalInvocationID.xy, viewportMax))) 
 				{
 				{
+					#if MSAA_COUNT > 1
+					vec4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
+					imageStore(gOutput, pixelPos, 0, lighting);
+
+					bool doPerSampleShading = needsPerSampleShading(surfaceData);
+					if(doPerSampleShading)
+					{
+						for(int i = 1; i < MSAA_COUNT; ++i)
+						{
+							lighting = getLighting(clipSpacePos.xy, surfaceData[i]);
+							imageStore(gOutput, pixelPos, i, lighting);
+						}
+					}
+					else // Splat same information to all samples
+					{
+						for(int i = 1; i < MSAA_COUNT; ++i)
+							imageStore(gOutput, pixelPos, i, lighting);
+					}
+					
+					#else
+					vec4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
+					
 					vec4 existingValue = imageLoad(gOutput, pixelPos);
 					vec4 existingValue = imageLoad(gOutput, pixelPos);
-					imageStore(gOutput, pixelPos, vec4(diffuse * lightAccumulator + existingValue.xyz, alpha));
+					imageStore(gOutput, pixelPos, vec4(existingValue.rgb + lighting.rgb, lighting.a));
+					#endif
 				}
 				}
 			}
 			}
 		};
 		};

+ 37 - 36
Source/BansheeCore/Include/BsRenderAPI.h

@@ -166,20 +166,46 @@ namespace bs
 		static const RenderAPIInfo& getAPIInfo();
 		static const RenderAPIInfo& getAPIInfo();
 	};
 	};
 
 
+	/** Feature flags that describe which render API specific features are enabled. */
+	enum class RenderAPIFeatureFlag
+	{
+		/** If set, vertex color order will be reversed before being sent to the shader. */
+		VertexColorFlip			= 1 << 0,
+		/** 
+		 * If set, the Y axis in texture (UV) coordinates is assumed to be pointing up, instead of down (which is the 
+		 * default). 
+		 */
+		UVYAxisUp				= 1 << 1,
+		/**
+		 * If set, the Y axis in normalized device coordinates (NDC) is assumed to be pointing down, instead of up (which
+		 * is the default).
+		 */
+		NDCYAxisDown			= 1 << 2,
+		/**
+		 * If set, the matrices used by shaders are in column major order, instead of in row major (which is the default).
+		 */
+		ColumnMajorMatrices		= 1 << 3,
+		/** 
+		 * If set, the render API has native support for multi-threaded command buffer generation. Otherwise it is 
+		 * emulated and using command buffers might not be beneficial. 
+		 */
+		MultiThreadedCB			= 1 << 4,
+		/** If set, the render API supports unordered stores to a texture with more than one sample. */
+		MSAAImageStores			= 1 << 5
+	};
+
+	typedef Flags<RenderAPIFeatureFlag> RenderAPIFeatures;
+	BS_FLAGS_OPERATORS(RenderAPIFeatureFlag)
+
 	/** Contains properties specific to a render API implementation. */
 	/** Contains properties specific to a render API implementation. */
 	class RenderAPIInfo
 	class RenderAPIInfo
 	{
 	{
 	public:
 	public:
 		RenderAPIInfo(float horzTexelOffset, float vertTexelOffset, float minDepth, float maxDepth, 
 		RenderAPIInfo(float horzTexelOffset, float vertTexelOffset, float minDepth, float maxDepth, 
-			VertexElementType vertexColorType, bool vertexColorFlip, bool uvYAxisUp, bool ndcYAxisDown, 
-				bool columnMajorMatrices, bool multiThreadedCB)
+			VertexElementType vertexColorType, RenderAPIFeatures featureFlags)
 			: mHorizontalTexelOffset(horzTexelOffset), mVerticalTexelOffset(vertTexelOffset), mMinDepth(minDepth)
 			: mHorizontalTexelOffset(horzTexelOffset), mVerticalTexelOffset(vertTexelOffset), mMinDepth(minDepth)
-			, mMaxDepth(maxDepth), mVertexColorType(vertexColorType), mVertexColorFlip(vertexColorFlip)
-			, mUVYAxisUp(uvYAxisUp), mNDCYAxisDown(ndcYAxisDown), mColumnMajorMatrices(columnMajorMatrices)
-			, mMultiThreadedCB(multiThreadedCB)
-		{
-			
-		}
+			, mMaxDepth(maxDepth), mVertexColorType(vertexColorType), mFeatureFlags(featureFlags)
+		{ }
 
 
 		/** Gets the native type used for vertex colors. */
 		/** Gets the native type used for vertex colors. */
 		VertexElementType getColorVertexElementType() const { return mVertexColorType; }
 		VertexElementType getColorVertexElementType() const { return mVertexColorType; }
@@ -196,29 +222,8 @@ namespace bs
 		/** Gets the maximum (farthest) depth value used by this render system. */
 		/** Gets the maximum (farthest) depth value used by this render system. */
 		float getMaximumDepthInputValue() const { return mMaxDepth; }
 		float getMaximumDepthInputValue() const { return mMaxDepth; }
 
 
-		/** Checks if vertex color needs to be flipped before sent to the shader. */
-		bool getVertexColorFlipRequired() const { return mVertexColorFlip; }
-
-		/** Checks whether GPU programs expect matrices in column major format. */
-		bool getGpuProgramHasColumnMajorMatrices() const { return mColumnMajorMatrices; }
-		
-		/** 
-		 * Returns true if Y axis in texture (UV) coordinates is pointing up, false if down. If axis is pointing up the axis
-		 * value at the top if 1 and at the bottom 0, otherwise reverse. 
-		 */
-		bool getUVYAxisUp() const { return mUVYAxisUp; }
-
-		/**
-		 * Returns true if the Y axis in NDC coordinates is pointing down, false if up. If axis is pointing down the value
-		 * at the top will -1 and at the bottom 1, otherwise reverse.
-		 */
-		bool getNDCYAxisDown() const { return mNDCYAxisDown; }
-
-		/**
-		 * Checks if the API supports native multi-threaded command buffer generation. On APIs that don't support it 
-		 * command buffers can still be used, but it will be more efficient to use the immediate rendering operations.
-		 */
-		bool getMultiThreadedCBGeneration() const { return mMultiThreadedCB; }
+		/** Checks is a specific feature flag enabled. */
+		bool isFlagSet(RenderAPIFeatureFlag flag) const { return mFeatureFlags.isSet(flag); }
 
 
 	private:
 	private:
 		float mHorizontalTexelOffset = 0.0f;
 		float mHorizontalTexelOffset = 0.0f;
@@ -226,11 +231,7 @@ namespace bs
 		float mMinDepth = 0.0f;
 		float mMinDepth = 0.0f;
 		float mMaxDepth = 1.0f;
 		float mMaxDepth = 1.0f;
 		VertexElementType mVertexColorType = VET_COLOR_ABGR;
 		VertexElementType mVertexColorType = VET_COLOR_ABGR;
-		bool mVertexColorFlip = false;
-		bool mUVYAxisUp = true;
-		bool mNDCYAxisDown = false;
-		bool mColumnMajorMatrices = false;
-		bool mMultiThreadedCB = false;
+		RenderAPIFeatures mFeatureFlags;
 	};
 	};
 
 
 	/** @} */
 	/** @} */

+ 2 - 2
Source/BansheeCore/Source/BsGpuParam.cpp

@@ -43,7 +43,7 @@ namespace bs
 		UINT32 elementSizeBytes = mParamDesc->elementSize * sizeof(UINT32);
 		UINT32 elementSizeBytes = mParamDesc->elementSize * sizeof(UINT32);
 		UINT32 sizeBytes = std::min(elementSizeBytes, (UINT32)sizeof(T)); // Truncate if it doesn't fit within parameter size
 		UINT32 sizeBytes = std::min(elementSizeBytes, (UINT32)sizeof(T)); // Truncate if it doesn't fit within parameter size
 
 
-		bool transposeMatrices = ct::RenderAPI::instance().getAPIInfo().getGpuProgramHasColumnMajorMatrices();
+		bool transposeMatrices = ct::RenderAPI::instance().getAPIInfo().isFlagSet(RenderAPIFeatureFlag::ColumnMajorMatrices);
 		if (TransposePolicy<T>::transposeEnabled(transposeMatrices))
 		if (TransposePolicy<T>::transposeEnabled(transposeMatrices))
 		{
 		{
 			T transposed = TransposePolicy<T>::transpose(value);
 			T transposed = TransposePolicy<T>::transpose(value);
@@ -86,7 +86,7 @@ namespace bs
 		T value;
 		T value;
 		paramBlock->read((mParamDesc->cpuMemOffset + arrayIdx * mParamDesc->arrayElementStride) * sizeof(UINT32), &value, sizeBytes);
 		paramBlock->read((mParamDesc->cpuMemOffset + arrayIdx * mParamDesc->arrayElementStride) * sizeof(UINT32), &value, sizeBytes);
 
 
-		bool transposeMatrices = ct::RenderAPI::instance().getAPIInfo().getGpuProgramHasColumnMajorMatrices();
+		bool transposeMatrices = ct::RenderAPI::instance().getAPIInfo().isFlagSet(RenderAPIFeatureFlag::ColumnMajorMatrices);
 		if (TransposePolicy<T>::transposeEnabled(transposeMatrices))
 		if (TransposePolicy<T>::transposeEnabled(transposeMatrices))
 			return TransposePolicy<T>::transpose(value);
 			return TransposePolicy<T>::transpose(value);
 		else
 		else

+ 1 - 1
Source/BansheeCore/Source/BsGpuParamsSet.cpp

@@ -897,7 +897,7 @@ namespace bs
 
 
 			UINT8* data = params->getData(materialParamInfo->index);
 			UINT8* data = params->getData(materialParamInfo->index);
 
 
-			bool transposeMatrices = ct::RenderAPI::instance().getAPIInfo().getGpuProgramHasColumnMajorMatrices();
+			bool transposeMatrices = ct::RenderAPI::instance().getAPIInfo().isFlagSet(RenderAPIFeatureFlag::ColumnMajorMatrices);
 			if (transposeMatrices)
 			if (transposeMatrices)
 			{
 			{
 				auto writeTransposed = [&](auto& temp)
 				auto writeTransposed = [&](auto& temp)

+ 1 - 1
Source/BansheeCore/Source/BsMesh.cpp

@@ -449,7 +449,7 @@ namespace bs
 				LOGERR("Vertex buffer values for stream \"" + toString(i) + "\" are being written out of valid range.");
 				LOGERR("Vertex buffer values for stream \"" + toString(i) + "\" are being written out of valid range.");
 			}
 			}
 
 
-			if (RenderAPI::instance().getAPIInfo().getVertexColorFlipRequired())
+			if (RenderAPI::instance().getAPIInfo().isFlagSet(RenderAPIFeatureFlag::VertexColorFlip))
 			{
 			{
 				UINT8* bufferCopy = (UINT8*)bs_alloc(bufferSize);
 				UINT8* bufferCopy = (UINT8*)bs_alloc(bufferSize);
 				memcpy(bufferCopy, srcVertBufferData, bufferSize); // TODO Low priority - Attempt to avoid this copy
 				memcpy(bufferCopy, srcVertBufferData, bufferSize); // TODO Low priority - Attempt to avoid this copy

+ 1 - 1
Source/BansheeCore/Source/BsMeshHeap.cpp

@@ -295,7 +295,7 @@ namespace bs
 			UINT8* vertDest = mCPUVertexData[i] + vertChunkStart * vertSize;
 			UINT8* vertDest = mCPUVertexData[i] + vertChunkStart * vertSize;
 			memcpy(vertDest, meshData->getStreamData(i), meshData->getNumVertices() * vertSize);
 			memcpy(vertDest, meshData->getStreamData(i), meshData->getNumVertices() * vertSize);
 
 
-			if (RenderAPI::instance().getAPIInfo().getVertexColorFlipRequired())
+			if (RenderAPI::instance().getAPIInfo().isFlagSet(RenderAPIFeatureFlag::VertexColorFlip))
 			{
 			{
 				UINT32 vertexStride = mVertexDesc->getVertexStride(i);
 				UINT32 vertexStride = mVertexDesc->getVertexStride(i);
 				for (INT32 semanticIdx = 0; semanticIdx < bs::VertexBuffer::MAX_SEMANTIC_IDX; semanticIdx++)
 				for (INT32 semanticIdx = 0; semanticIdx < bs::VertexBuffer::MAX_SEMANTIC_IDX; semanticIdx++)

+ 2 - 2
Source/BansheeCore/Source/BsParamBlocks.cpp

@@ -24,7 +24,7 @@ namespace bs { namespace ct
 		UINT32 elementSizeBytes = mParamDesc.elementSize * sizeof(UINT32);
 		UINT32 elementSizeBytes = mParamDesc.elementSize * sizeof(UINT32);
 		UINT32 sizeBytes = std::min(elementSizeBytes, (UINT32)sizeof(T)); // Truncate if it doesn't fit within parameter size
 		UINT32 sizeBytes = std::min(elementSizeBytes, (UINT32)sizeof(T)); // Truncate if it doesn't fit within parameter size
 
 
-		bool transposeMatrices = RenderAPI::instance().getAPIInfo().getGpuProgramHasColumnMajorMatrices();
+		bool transposeMatrices = RenderAPI::instance().getAPIInfo().isFlagSet(RenderAPIFeatureFlag::ColumnMajorMatrices);
 		if (TransposePolicy<T>::transposeEnabled(transposeMatrices))
 		if (TransposePolicy<T>::transposeEnabled(transposeMatrices))
 		{
 		{
 			T transposed = TransposePolicy<T>::transpose(value);
 			T transposed = TransposePolicy<T>::transpose(value);
@@ -63,7 +63,7 @@ namespace bs { namespace ct
 		paramBlock->read((mParamDesc.cpuMemOffset + arrayIdx * mParamDesc.arrayElementStride) * sizeof(UINT32), &value, 
 		paramBlock->read((mParamDesc.cpuMemOffset + arrayIdx * mParamDesc.arrayElementStride) * sizeof(UINT32), &value, 
 			sizeBytes);
 			sizeBytes);
 
 
-		bool transposeMatrices = RenderAPI::instance().getAPIInfo().getGpuProgramHasColumnMajorMatrices();
+		bool transposeMatrices = RenderAPI::instance().getAPIInfo().isFlagSet(RenderAPIFeatureFlag::ColumnMajorMatrices);
 		if (TransposePolicy<T>::transposeEnabled(transposeMatrices))
 		if (TransposePolicy<T>::transposeEnabled(transposeMatrices))
 			return TransposePolicy<T>::transpose(value);
 			return TransposePolicy<T>::transpose(value);
 		else
 		else

+ 1 - 1
Source/BansheeD3D11RenderAPI/Source/BsD3D11RenderAPI.cpp

@@ -1344,7 +1344,7 @@ namespace bs { namespace ct
 
 
 	const RenderAPIInfo& D3D11RenderAPI::getAPIInfo() const
 	const RenderAPIInfo& D3D11RenderAPI::getAPIInfo() const
 	{
 	{
-		static RenderAPIInfo info(0.0f, 0.0f, 0.0f, 1.0f, VET_COLOR_ABGR, false, false, false, false, false);
+		static RenderAPIInfo info(0.0f, 0.0f, 0.0f, 1.0f, VET_COLOR_ABGR, RenderAPIFeatures());
 
 
 		return info;
 		return info;
 	}
 	}

+ 1 - 1
Source/BansheeEngine/Source/BsGUIManager.cpp

@@ -1762,7 +1762,7 @@ namespace bs
 
 
 		float invViewportWidth = 1.0f / (camera.getViewport()->getWidth() * 0.5f);
 		float invViewportWidth = 1.0f / (camera.getViewport()->getWidth() * 0.5f);
 		float invViewportHeight = 1.0f / (camera.getViewport()->getHeight() * 0.5f);
 		float invViewportHeight = 1.0f / (camera.getViewport()->getHeight() * 0.5f);
-		float viewflipYFlip = bs::RenderAPI::getAPIInfo().getNDCYAxisDown() ? -1.0f : 1.0f;
+		float viewflipYFlip = bs::RenderAPI::getAPIInfo().isFlagSet(RenderAPIFeatureFlag::NDCYAxisDown) ? -1.0f : 1.0f;
 
 
 		for (auto& entry : renderData)
 		for (auto& entry : renderData)
 		{
 		{

+ 2 - 2
Source/BansheeEngine/Source/BsRendererUtility.cpp

@@ -291,7 +291,7 @@ namespace bs { namespace ct
 		const RenderAPIInfo& rapiInfo = RenderAPI::instance().getAPIInfo();
 		const RenderAPIInfo& rapiInfo = RenderAPI::instance().getAPIInfo();
 		Vector3 vertices[4];
 		Vector3 vertices[4];
 
 
-		if (rapiInfo.getNDCYAxisDown())
+		if (rapiInfo.isFlagSet(RenderAPIFeatureFlag::NDCYAxisDown))
 		{
 		{
 			vertices[0] = Vector3(-1.0f, -1.0f, 0.0f);
 			vertices[0] = Vector3(-1.0f, -1.0f, 0.0f);
 			vertices[1] = Vector3(1.0f, -1.0f, 0.0f);
 			vertices[1] = Vector3(1.0f, -1.0f, 0.0f);
@@ -307,7 +307,7 @@ namespace bs { namespace ct
 		}
 		}
 
 
 		Vector2 uvs[4];
 		Vector2 uvs[4];
-		if (rapiInfo.getUVYAxisUp() ^ flipUV)
+		if (rapiInfo.isFlagSet(RenderAPIFeatureFlag::UVYAxisUp) ^ flipUV)
 		{
 		{
 			uvs[0] = Vector2(uv.x, uv.y + uv.height);
 			uvs[0] = Vector2(uv.x, uv.y + uv.height);
 			uvs[1] = Vector2(uv.x + uv.width, uv.y + uv.height);
 			uvs[1] = Vector2(uv.x + uv.width, uv.y + uv.height);

+ 5 - 2
Source/BansheeGLRenderAPI/Source/BsGLRenderAPI.cpp

@@ -2283,8 +2283,11 @@ namespace bs { namespace ct
 
 
 	const RenderAPIInfo& GLRenderAPI::getAPIInfo() const
 	const RenderAPIInfo& GLRenderAPI::getAPIInfo() const
 	{
 	{
-		static RenderAPIInfo info(0.0f, 0.0f, -1.0f, 1.0f, VET_COLOR_ABGR, false, true, false, true, false);
-
+		static RenderAPIInfo info(0.0f, 0.0f, -1.0f, 1.0f, VET_COLOR_ABGR,
+								  RenderAPIFeatureFlag::UVYAxisUp |
+								  RenderAPIFeatureFlag::ColumnMajorMatrices |
+								  RenderAPIFeatureFlag::MSAAImageStores);
+								  
 		return info;
 		return info;
 	}
 	}
 
 

+ 5 - 1
Source/BansheeVulkanRenderAPI/Source/BsVulkanRenderAPI.cpp

@@ -552,7 +552,11 @@ namespace bs { namespace ct
 
 
 	const RenderAPIInfo& VulkanRenderAPI::getAPIInfo() const
 	const RenderAPIInfo& VulkanRenderAPI::getAPIInfo() const
 	{
 	{
-		static RenderAPIInfo info(0.0f, 0.0f, 0.0f, 1.0f, VET_COLOR_ABGR, false, false, true, true, true);
+		static RenderAPIInfo info(0.0f, 0.0f, 0.0f, 1.0f, VET_COLOR_ABGR, 
+								  RenderAPIFeatureFlag::NDCYAxisDown |
+								  RenderAPIFeatureFlag::ColumnMajorMatrices |
+								  RenderAPIFeatureFlag::MultiThreadedCB |
+								  RenderAPIFeatureFlag::MSAAImageStores);
 
 
 		return info;
 		return info;
 	}
 	}

+ 1 - 1
Source/MBansheeEngine/Animation/Animation.cs

@@ -381,7 +381,7 @@ namespace BansheeEngine
         /// </summary>
         /// </summary>
         /// <param name="name">Name of the morph channel to modify the weight for. This depends on the mesh the animation
         /// <param name="name">Name of the morph channel to modify the weight for. This depends on the mesh the animation
         ///                    is currently animating.</param>
         ///                    is currently animating.</param>
-        /// <param name="weight">Weight that determines how much of the channel to apply to the mesh, in range[0, 1].
+        /// <param name="weight">Weight that determines how much of the channel to apply to the mesh, in range [0, 1].
         ///                     </param>
         ///                     </param>
         public void SetMorphChannelWeight(string name, float weight)
         public void SetMorphChannelWeight(string name, float weight)
         {
         {

+ 2 - 2
Source/RenderBeast/CMakeSources.cmake

@@ -1,5 +1,5 @@
 set(BS_RENDERBEAST_INC_NOFILTER
 set(BS_RENDERBEAST_INC_NOFILTER
-	"Include/BsRenderTexturePool.h"
+	"Include/BsGpuResourcePool.h"
 	"Include/BsRenderBeastOptions.h"
 	"Include/BsRenderBeastOptions.h"
 	"Include/BsSamplerOverrides.h"
 	"Include/BsSamplerOverrides.h"
 	"Include/BsRenderBeast.h"
 	"Include/BsRenderBeast.h"
@@ -16,7 +16,7 @@ set(BS_RENDERBEAST_INC_NOFILTER
 )
 )
 
 
 set(BS_RENDERBEAST_SRC_NOFILTER
 set(BS_RENDERBEAST_SRC_NOFILTER
-	"Source/BsRenderTexturePool.cpp"
+	"Source/BsGpuResourcePool.cpp"
 	"Source/BsSamplerOverrides.cpp"
 	"Source/BsSamplerOverrides.cpp"
 	"Source/BsRenderBeast.cpp"
 	"Source/BsRenderBeast.cpp"
 	"Source/BsRenderBeastFactory.cpp"
 	"Source/BsRenderBeastFactory.cpp"

+ 97 - 13
Source/RenderBeast/Include/BsRenderTexturePool.h → Source/RenderBeast/Include/BsGpuResourcePool.h

@@ -13,51 +13,89 @@ namespace bs { namespace ct
 	 *  @{
 	 *  @{
 	 */
 	 */
 
 
-	class RenderTexturePool;
+	class GpuResourcePool;
 	struct POOLED_RENDER_TEXTURE_DESC;
 	struct POOLED_RENDER_TEXTURE_DESC;
+	struct POOLED_STORAGE_BUFFER_DESC;
 
 
-	/**	Contains data about a single render texture in the texture pool. */
+	/**	Contains data about a single render texture in the GPU resource pool. */
 	struct PooledRenderTexture
 	struct PooledRenderTexture
 	{
 	{
-		PooledRenderTexture(RenderTexturePool* pool);
+		PooledRenderTexture(GpuResourcePool* pool);
 		~PooledRenderTexture();
 		~PooledRenderTexture();
 
 
 		SPtr<Texture> texture;
 		SPtr<Texture> texture;
 		SPtr<RenderTexture> renderTexture;
 		SPtr<RenderTexture> renderTexture;
 
 
 	private:
 	private:
-		friend class RenderTexturePool;
+		friend class GpuResourcePool;
 
 
-		RenderTexturePool* mPool;
+		GpuResourcePool* mPool;
 		bool mIsFree;
 		bool mIsFree;
 	};
 	};
 
 
-	/** Contains a pool of render textures meant to accommodate reuse of render textures of the same size and format. */
-	class RenderTexturePool : public Module<RenderTexturePool>
+	/**	Contains data about a single storage buffer in the GPU resource pool. */
+	struct PooledStorageBuffer
+	{
+		PooledStorageBuffer(GpuResourcePool* pool);
+		~PooledStorageBuffer();
+
+		SPtr<GpuBuffer> buffer;
+
+	private:
+		friend class GpuResourcePool;
+
+		GpuResourcePool* mPool;
+		bool mIsFree;
+	};
+
+	/** 
+	 * Contains a pool of textures and buffers meant to accommodate reuse of such resources for the main purpose of using
+	 * them as write targets on the GPU.
+	 */
+	class GpuResourcePool : public Module<GpuResourcePool>
 	{
 	{
 	public:
 	public:
-		~RenderTexturePool();
+		~GpuResourcePool();
 
 
 		/**
 		/**
 		 * Attempts to find the unused render texture with the specified parameters in the pool, or creates a new texture
 		 * Attempts to find the unused render texture with the specified parameters in the pool, or creates a new texture
-		 * otherwise. When done with the texture make sure to call release().
+		 * otherwise. When done with the texture make sure to call release(const POOLED_RENDER_TEXTURE_DESC&).
 		 *
 		 *
 		 * @param[in]	desc		Descriptor structure that describes what kind of texture to retrieve.
 		 * @param[in]	desc		Descriptor structure that describes what kind of texture to retrieve.
 		 */
 		 */
 		SPtr<PooledRenderTexture> get(const POOLED_RENDER_TEXTURE_DESC& desc);
 		SPtr<PooledRenderTexture> get(const POOLED_RENDER_TEXTURE_DESC& desc);
 
 
 		/**
 		/**
-		 * Releases a texture previously allocated with get(). The texture is returned to the pool so that it may be reused
-		 * later.
+		 * Attempts to find the unused storage buffer with the specified parameters in the pool, or creates a new buffer
+		 * otherwise. When done with the buffer make sure to call release(const POOLED_STORAGE_BUFFER_DESC&).
+		 *
+		 * @param[in]	desc		Descriptor structure that describes what kind of buffer to retrieve.
+		 */
+		SPtr<PooledStorageBuffer> get(const POOLED_STORAGE_BUFFER_DESC& desc);
+
+		/**
+		 * Releases a texture previously allocated with get(const POOLED_RENDER_TEXTURE_DESC&). The texture is returned to
+		 * the pool so that it may be reused later.
 		 *			
 		 *			
 		 * @note	
 		 * @note	
 		 * The texture will be removed from the pool if the last reference to it is deleted. Normally you would call 
 		 * The texture will be removed from the pool if the last reference to it is deleted. Normally you would call 
-		 * release() but keep a reference if you plan on using it later on.
+		 * release(const POOLED_RENDER_TEXTURE_DESC&) but keep a reference if you plan on using it later on.
 		 */
 		 */
 		void release(const SPtr<PooledRenderTexture>& texture);
 		void release(const SPtr<PooledRenderTexture>& texture);
 
 
+		/**
+		 * Releases a buffer previously allocated with get(const POOLED_STORAGE_BUFFER_DESC&). The buffer is returned to the
+		 * pool so that it may be reused later.
+		 *			
+		 * @note	
+		 * The buffer will be removed from the pool if the last reference to it is deleted. Normally you would call 
+		 * release(const POOLED_STORAGE_BUFFER_DESC&) but keep a reference if you plan on using it later on.
+		 */
+		void release(const SPtr<PooledStorageBuffer>& buffer);
+
 	private:
 	private:
 		friend struct PooledRenderTexture;
 		friend struct PooledRenderTexture;
+		friend struct PooledStorageBuffer;
 
 
 		/**	Registers a newly created render texture in the pool. */
 		/**	Registers a newly created render texture in the pool. */
 		void _registerTexture(const SPtr<PooledRenderTexture>& texture);
 		void _registerTexture(const SPtr<PooledRenderTexture>& texture);
@@ -65,6 +103,12 @@ namespace bs { namespace ct
 		/**	Unregisters a created render texture in the pool. */
 		/**	Unregisters a created render texture in the pool. */
 		void _unregisterTexture(PooledRenderTexture* texture);
 		void _unregisterTexture(PooledRenderTexture* texture);
 
 
+		/**	Registers a newly created storage buffer in the pool. */
+		void _registerBuffer(const SPtr<PooledStorageBuffer>& buffer);
+
+		/**	Unregisters a created storage buffer in the pool. */
+		void _unregisterBuffer(PooledStorageBuffer* buffer);
+
 		/**
 		/**
 		 * Checks does the provided texture match the parameters.
 		 * Checks does the provided texture match the parameters.
 		 * 
 		 * 
@@ -73,7 +117,16 @@ namespace bs { namespace ct
 		 */
 		 */
 		static bool matches(const SPtr<Texture>& texture, const POOLED_RENDER_TEXTURE_DESC& desc);
 		static bool matches(const SPtr<Texture>& texture, const POOLED_RENDER_TEXTURE_DESC& desc);
 
 
+		/**
+		 * Checks does the provided buffer match the parameters.
+		 * 
+		 * @param[in]	desc	Descriptor structure that describes what kind of buffer to match.
+		 * @return				True if the buffer matches the descriptor, false otherwise.
+		 */
+		static bool matches(const SPtr<GpuBuffer>& buffer, const POOLED_STORAGE_BUFFER_DESC& desc);
+
 		Map<PooledRenderTexture*, std::weak_ptr<PooledRenderTexture>> mTextures;
 		Map<PooledRenderTexture*, std::weak_ptr<PooledRenderTexture>> mTextures;
+		Map<PooledStorageBuffer*, std::weak_ptr<PooledStorageBuffer>> mBuffers;
 	};
 	};
 
 
 	/** Structure used for creating a new pooled render texture. */
 	/** Structure used for creating a new pooled render texture. */
@@ -122,7 +175,7 @@ namespace bs { namespace ct
 			INT32 usage = TU_STATIC);
 			INT32 usage = TU_STATIC);
 
 
 	private:
 	private:
-		friend class RenderTexturePool;
+		friend class GpuResourcePool;
 
 
 		UINT32 width;
 		UINT32 width;
 		UINT32 height;
 		UINT32 height;
@@ -134,5 +187,36 @@ namespace bs { namespace ct
 		bool hwGamma;
 		bool hwGamma;
 	};
 	};
 
 
+	/** Structure used for describing a pooled storage buffer. */
+	struct POOLED_STORAGE_BUFFER_DESC
+	{
+	public:
+		POOLED_STORAGE_BUFFER_DESC() {}
+
+		/**
+		 * Creates a descriptor for a storage buffer containing primitive data types.
+		 *
+		 * @param[in]	format		Format of individual buffer entries.
+		 * @param[in]	numElements	Number of elements in the buffer.
+		 */
+		static POOLED_STORAGE_BUFFER_DESC createStandard(GpuBufferFormat format, UINT32 numElements);
+
+		/**
+		 * Creates a descriptor for a storage buffer containing structures.
+		 *
+		 * @param[in]	elementSize		Size of a single structure in the buffer.
+		 * @param[in]	numElements		Number of elements in the buffer.
+		 */
+		static POOLED_STORAGE_BUFFER_DESC createStructured(UINT32 elementSize, UINT32 numElements);
+
+	private:
+		friend class GpuResourcePool;
+
+		GpuBufferType type;
+		GpuBufferFormat format;
+		UINT32 numElements;
+		UINT32 elementSize;
+	};
+
 	/** @} */
 	/** @} */
 }}
 }}

+ 64 - 11
Source/RenderBeast/Include/BsLightRendering.h

@@ -53,9 +53,6 @@ namespace bs { namespace ct
 		/** Returns a GPU bindable buffer containing information about every light. */
 		/** Returns a GPU bindable buffer containing information about every light. */
 		SPtr<GpuBuffer> getLightBuffer() const { return mLightBuffer; }
 		SPtr<GpuBuffer> getLightBuffer() const { return mLightBuffer; }
 
 
-		/** Returns a GPU bindable param buffer containing meta-data about light in the ligth buffer. */
-		SPtr<GpuParamBlockBuffer> getParamBuffer() const { return mParamBuffer; }
-
 		/** Returns the number of directional lights in the lights buffer. */
 		/** Returns the number of directional lights in the lights buffer. */
 		UINT32 getNumDirLights() const { return mNumLights[0]; }
 		UINT32 getNumDirLights() const { return mNumLights[0]; }
 
 
@@ -66,7 +63,6 @@ namespace bs { namespace ct
 		UINT32 getNumSpotLights() const { return mNumLights[2]; }
 		UINT32 getNumSpotLights() const { return mNumLights[2]; }
 
 
 	private:
 	private:
-		SPtr<GpuParamBlockBuffer> mParamBuffer;
 		SPtr<GpuBuffer> mLightBuffer;
 		SPtr<GpuBuffer> mLightBuffer;
 
 
 		UINT32 mNumLights[3];
 		UINT32 mNumLights[3];
@@ -74,32 +70,89 @@ namespace bs { namespace ct
 
 
 	BS_PARAM_BLOCK_BEGIN(TiledLightingParamDef)
 	BS_PARAM_BLOCK_BEGIN(TiledLightingParamDef)
 		BS_PARAM_BLOCK_ENTRY(Vector3I, gLightOffsets)
 		BS_PARAM_BLOCK_ENTRY(Vector3I, gLightOffsets)
+		BS_PARAM_BLOCK_ENTRY(Vector2I, gFramebufferSize)
 	BS_PARAM_BLOCK_END
 	BS_PARAM_BLOCK_END
 
 
 	extern TiledLightingParamDef gTiledLightingParamDef;
 	extern TiledLightingParamDef gTiledLightingParamDef;
 
 
-	/** Shader that performs a lighting pass over data stored in the Gbuffer. */
-	class TiledDeferredLightingMat : public RendererMaterial<TiledDeferredLightingMat>
+	/** Functionality common to all versions of TiledDeferredLightingMat<T>. */
+	class TiledDeferredLighting
 	{
 	{
-		RMAT_DEF("TiledDeferredLighting.bsl");
-
 	public:
 	public:
-		TiledDeferredLightingMat();
+		TiledDeferredLighting(const SPtr<Material>& material, const SPtr<GpuParamsSet>& paramsSet, UINT32 sampleCount);
 
 
 		/** Binds the material for rendering, sets up parameters and executes it. */
 		/** Binds the material for rendering, sets up parameters and executes it. */
 		void execute(const SPtr<RenderTargets>& gbuffer, const SPtr<GpuParamBlockBuffer>& perCamera);
 		void execute(const SPtr<RenderTargets>& gbuffer, const SPtr<GpuParamBlockBuffer>& perCamera);
 
 
 		/** Binds all the active lights. */
 		/** Binds all the active lights. */
 		void setLights(const GPULightData& lightData);
 		void setLights(const GPULightData& lightData);
+
+		static const UINT32 TILE_SIZE;
 	private:
 	private:
+		UINT32 mSampleCount;
+		SPtr<Material> mMaterial;
+		SPtr<GpuParamsSet> mParamsSet;
+
 		GpuParamTexture mGBufferA;
 		GpuParamTexture mGBufferA;
 		GpuParamTexture mGBufferB;
 		GpuParamTexture mGBufferB;
 		GpuParamTexture mGBufferDepth;
 		GpuParamTexture mGBufferDepth;
 
 
 		GpuParamBuffer mLightBufferParam;
 		GpuParamBuffer mLightBufferParam;
-		GpuParamLoadStoreTexture mOutputParam;
+		GpuParamLoadStoreTexture mOutputTextureParam;
+		GpuParamBuffer mOutputBufferParam;
 
 
-		static const UINT32 TILE_SIZE;
+		SPtr<GpuParamBlockBuffer> mParamBuffer;
+	};
+
+	/** Interface implemented by all versions of TTiledDeferredLightingMat<T>. */
+	class ITiledDeferredLightingMat
+	{
+	public:
+		virtual ~ITiledDeferredLightingMat() {}
+
+		/** Binds the material for rendering, sets up parameters and executes it. */
+		virtual void execute(const SPtr<RenderTargets>& gbuffer, const SPtr<GpuParamBlockBuffer>& perCamera) = 0;
+
+		/** Binds all the active lights. */
+		virtual void setLights(const GPULightData& lightData) = 0;
+	};
+
+	/** Shader that performs a lighting pass over data stored in the Gbuffer. */
+	template<int MSAA_COUNT>
+	class TTiledDeferredLightingMat : public ITiledDeferredLightingMat, public RendererMaterial<TTiledDeferredLightingMat<MSAA_COUNT>>
+	{
+		RMAT_DEF("TiledDeferredLighting.bsl");
+
+	public:
+		TTiledDeferredLightingMat();
+
+		/** Binds the material for rendering, sets up parameters and executes it. */
+		void execute(const SPtr<RenderTargets>& gbuffer, const SPtr<GpuParamBlockBuffer>& perCamera) override;
+
+		/** Binds all the active lights. */
+		void setLights(const GPULightData& lightData) override;
+	private:
+		TiledDeferredLighting mInternal;
+	};
+
+	BS_PARAM_BLOCK_BEGIN(FlatFramebufferToTextureParamDef)
+		BS_PARAM_BLOCK_ENTRY(Vector2I, gFramebufferSize)
+		BS_PARAM_BLOCK_ENTRY(INT32, gSampleCount)
+	BS_PARAM_BLOCK_END
+
+	/** Shader that copies a flattened framebuffer into a multisampled texture. */
+	class FlatFramebufferToTextureMat : public RendererMaterial<FlatFramebufferToTextureMat>
+	{
+		RMAT_DEF("FlatFramebufferToTexture.bsl");
+
+	public:
+		FlatFramebufferToTextureMat();
+
+		/** Binds the material for rendering, sets up parameters and executes it. */
+		void execute(const SPtr<GpuBuffer>& flatFramebuffer, const SPtr<Texture>& target);
+	private:
+		GpuParamBuffer mInputParam;
+		SPtr<GpuParamBlockBuffer> mParamBuffer;
 	};
 	};
 
 
 	/** @} */
 	/** @} */

+ 4 - 4
Source/RenderBeast/Include/BsPostProcessing.h

@@ -5,7 +5,7 @@
 #include "BsRenderBeastPrerequisites.h"
 #include "BsRenderBeastPrerequisites.h"
 #include "BsRendererMaterial.h"
 #include "BsRendererMaterial.h"
 #include "BsParamBlocks.h"
 #include "BsParamBlocks.h"
-#include "BsRenderTexturePool.h"
+#include "BsGpuResourcePool.h"
 #include "BsStandardPostProcessSettings.h"
 #include "BsStandardPostProcessSettings.h"
 
 
 namespace bs { namespace ct
 namespace bs { namespace ct
@@ -43,7 +43,7 @@ namespace bs { namespace ct
 		DownsampleMat();
 		DownsampleMat();
 
 
 		/** Renders the post-process effect with the provided parameters. */
 		/** Renders the post-process effect with the provided parameters. */
-		void execute(const SPtr<RenderTexture>& target, PostProcessInfo& ppInfo);
+		void execute(const SPtr<Texture>& target, PostProcessInfo& ppInfo);
 
 
 		/** Releases the output render target. */
 		/** Releases the output render target. */
 		void release(PostProcessInfo& ppInfo);
 		void release(PostProcessInfo& ppInfo);
@@ -223,7 +223,7 @@ namespace bs { namespace ct
 		TonemappingMat();
 		TonemappingMat();
 
 
 		/** Executes the post-process effect with the provided parameters. */
 		/** Executes the post-process effect with the provided parameters. */
-		void execute(const SPtr<RenderTexture>& sceneColor, const SPtr<RenderTarget>& outputRT, const Rect2& outputRect,
+		void execute(const SPtr<Texture>& sceneColor, const SPtr<RenderTarget>& outputRT, const Rect2& outputRect,
 			PostProcessInfo& ppInfo);
 			PostProcessInfo& ppInfo);
 
 
 	private:
 	private:
@@ -247,7 +247,7 @@ namespace bs { namespace ct
 		 * view's final output render target. Once the method exits, final render target is guaranteed to be currently
 		 * view's final output render target. Once the method exits, final render target is guaranteed to be currently
 		 * bound for rendering. 
 		 * bound for rendering. 
 		 */
 		 */
-		void postProcess(RendererCamera* viewInfo, const SPtr<RenderTexture>& sceneColor, float frameDelta);
+		void postProcess(RendererCamera* viewInfo, const SPtr<Texture>& sceneColor, float frameDelta);
 		
 		
 	private:
 	private:
 		DownsampleMat mDownsample;
 		DownsampleMat mDownsample;

+ 2 - 1
Source/RenderBeast/Include/BsRenderBeast.h

@@ -218,7 +218,8 @@ namespace bs
 		SPtr<RenderBeastOptions> mCoreOptions;
 		SPtr<RenderBeastOptions> mCoreOptions;
 
 
 		DefaultMaterial* mDefaultMaterial;
 		DefaultMaterial* mDefaultMaterial;
-		TiledDeferredLightingMat* mTiledDeferredLightingMat;
+		ITiledDeferredLightingMat* mTiledDeferredLightingMats[4];
+		FlatFramebufferToTextureMat* mFlatFramebufferToTextureMat;
 		SkyboxMat* mSkyboxMat;
 		SkyboxMat* mSkyboxMat;
 
 
 		GPULightData* mGPULightData;
 		GPULightData* mGPULightData;

+ 18 - 0
Source/RenderBeast/Include/BsRenderTargets.h

@@ -64,6 +64,21 @@ namespace bs { namespace ct
 		/**	Returns the depth texture of the gbuffer as a bindable texture. */
 		/**	Returns the depth texture of the gbuffer as a bindable texture. */
 		SPtr<Texture> getTextureDepth() const;
 		SPtr<Texture> getTextureDepth() const;
 
 
+		/** 
+		 * Returns a scene color texture with a single-sample per pixel. If no multisampling is used, this is the same as
+		 * getSceneColor().
+		 */
+		SPtr<Texture> getSceneColorNonMSAA() const;
+
+		/** Returns a render target that can be used for rendering to the texture returned by getSceneColorNonMSAA(). */
+		SPtr<RenderTexture> getSceneColorNonMSAART() const;
+
+		/** 
+		 * Returns a buffer that is meant to be used for rendering when MSAA is used, since writes to multisampled textures
+		 * aren't supported on all render backends.
+		 */
+		SPtr<GpuBuffer> getFlattenedSceneColorBuffer() const;
+
 		/**	Checks if the targets support HDR rendering. */
 		/**	Checks if the targets support HDR rendering. */
 		bool getHDR() const { return mHDR; }
 		bool getHDR() const { return mHDR; }
 
 
@@ -86,6 +101,9 @@ namespace bs { namespace ct
 		SPtr<PooledRenderTexture> mNormalTex;
 		SPtr<PooledRenderTexture> mNormalTex;
 		SPtr<PooledRenderTexture> mDepthTex;
 		SPtr<PooledRenderTexture> mDepthTex;
 
 
+		SPtr<PooledRenderTexture> mSceneColorNonMSAATex;
+		SPtr<PooledStorageBuffer> mFlattenedSceneColorBuffer;
+
 		SPtr<RenderTexture> mGBufferRT;
 		SPtr<RenderTexture> mGBufferRT;
 		SPtr<RenderTexture> mSceneColorRT;
 		SPtr<RenderTexture> mSceneColorRT;
 
 

+ 3 - 0
Source/RenderBeast/Include/BsRendererCamera.h

@@ -162,6 +162,9 @@ namespace bs { namespace ct
 		/** Returns true if the resulting render target should be flipped vertically. */
 		/** Returns true if the resulting render target should be flipped vertically. */
 		bool getFlipView() const { return mViewDesc.flipView; }
 		bool getFlipView() const { return mViewDesc.flipView; }
 
 
+		/** Returns the number of samples per pixel to render. */
+		UINT32 getNumSamples() const { return mViewDesc.target.numSamples; }
+
 		/** Returns the scene camera this object is based of. This can be null for manually constructed renderer cameras. */
 		/** Returns the scene camera this object is based of. This can be null for manually constructed renderer cameras. */
 		const Camera* getSceneCamera() const { return mViewDesc.sceneCamera; }
 		const Camera* getSceneCamera() const { return mViewDesc.sceneCamera; }
 
 

+ 110 - 8
Source/RenderBeast/Source/BsRenderTexturePool.cpp → Source/RenderBeast/Source/BsGpuResourcePool.cpp

@@ -1,13 +1,14 @@
 //********************************** Banshee Engine (www.banshee3d.com) **************************************************//
 //********************************** Banshee Engine (www.banshee3d.com) **************************************************//
 //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
 //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
-#include "BsRenderTexturePool.h"
+#include "BsGpuResourcePool.h"
 #include "BsRenderTexture.h"
 #include "BsRenderTexture.h"
 #include "BsTexture.h"
 #include "BsTexture.h"
+#include "BsGpuBuffer.h"
 #include "BsTextureManager.h"
 #include "BsTextureManager.h"
 
 
 namespace bs { namespace ct
 namespace bs { namespace ct
 {
 {
-	PooledRenderTexture::PooledRenderTexture(RenderTexturePool* pool)
+	PooledRenderTexture::PooledRenderTexture(GpuResourcePool* pool)
 		:mPool(pool), mIsFree(false)
 		:mPool(pool), mIsFree(false)
 	{ }
 	{ }
 
 
@@ -17,13 +18,26 @@ namespace bs { namespace ct
 			mPool->_unregisterTexture(this);
 			mPool->_unregisterTexture(this);
 	}
 	}
 
 
-	RenderTexturePool::~RenderTexturePool()
+	PooledStorageBuffer::PooledStorageBuffer(GpuResourcePool* pool)
+		:mPool(pool), mIsFree(false)
+	{ }
+
+	PooledStorageBuffer::~PooledStorageBuffer()
+	{
+		if (mPool != nullptr)
+			mPool->_unregisterBuffer(this);
+	}
+
+	GpuResourcePool::~GpuResourcePool()
 	{
 	{
 		for (auto& texture : mTextures)
 		for (auto& texture : mTextures)
 			texture.second.lock()->mPool = nullptr;
 			texture.second.lock()->mPool = nullptr;
+
+		for (auto& buffer : mBuffers)
+			buffer.second.lock()->mPool = nullptr;
 	}
 	}
 
 
-	SPtr<PooledRenderTexture> RenderTexturePool::get(const POOLED_RENDER_TEXTURE_DESC& desc)
+	SPtr<PooledRenderTexture> GpuResourcePool::get(const POOLED_RENDER_TEXTURE_DESC& desc)
 	{
 	{
 		for (auto& texturePair : mTextures)
 		for (auto& texturePair : mTextures)
 		{
 		{
@@ -83,13 +97,53 @@ namespace bs { namespace ct
 		return newTextureData;
 		return newTextureData;
 	}
 	}
 
 
-	void RenderTexturePool::release(const SPtr<PooledRenderTexture>& texture)
+	SPtr<PooledStorageBuffer> GpuResourcePool::get(const POOLED_STORAGE_BUFFER_DESC& desc)
+	{
+		for (auto& bufferPair : mBuffers)
+		{
+			SPtr<PooledStorageBuffer> bufferData = bufferPair.second.lock();
+
+			if (!bufferData->mIsFree)
+				continue;
+
+			if (bufferData->buffer == nullptr)
+				continue;
+
+			if (matches(bufferData->buffer, desc))
+			{
+				bufferData->mIsFree = false;
+				return bufferData;
+			}
+		}
+
+		SPtr<PooledStorageBuffer> newBufferData = bs_shared_ptr_new<PooledStorageBuffer>(this);
+		_registerBuffer(newBufferData);
+
+		GPU_BUFFER_DESC bufferDesc;
+		bufferDesc.type = desc.type;
+		bufferDesc.elementSize = desc.elementSize;
+		bufferDesc.elementCount = desc.numElements;
+		bufferDesc.format = desc.format;
+		bufferDesc.randomGpuWrite = true;
+
+		newBufferData->buffer = GpuBuffer::create(bufferDesc);
+
+		return newBufferData;
+	}
+
+	void GpuResourcePool::release(const SPtr<PooledRenderTexture>& texture)
 	{
 	{
 		auto iterFind = mTextures.find(texture.get());
 		auto iterFind = mTextures.find(texture.get());
 		iterFind->second.lock()->mIsFree = true;
 		iterFind->second.lock()->mIsFree = true;
 	}
 	}
 
 
-	bool RenderTexturePool::matches(const SPtr<Texture>& texture, const POOLED_RENDER_TEXTURE_DESC& desc)
+	void GpuResourcePool::release(const SPtr<PooledStorageBuffer>& buffer)
+	{
+		auto iterFind = mBuffers.find(buffer.get());
+		iterFind->second.lock()->mIsFree = true;
+	}
+
+	bool GpuResourcePool::matches(const SPtr<Texture>& texture, const POOLED_RENDER_TEXTURE_DESC& desc)
 	{
 	{
 		const TextureProperties& texProps = texture->getProperties();
 		const TextureProperties& texProps = texture->getProperties();
 
 
@@ -111,16 +165,42 @@ namespace bs { namespace ct
 		return match;
 		return match;
 	}
 	}
 
 
-	void RenderTexturePool::_registerTexture(const SPtr<PooledRenderTexture>& texture)
+	bool GpuResourcePool::matches(const SPtr<GpuBuffer>& buffer, const POOLED_STORAGE_BUFFER_DESC& desc)
+	{
+		const GpuBufferProperties& props = buffer->getProperties();
+
+		bool match = props.getType() == desc.type && props.getElementCount() == desc.numElements;
+		if(match)
+		{
+			if (desc.type == GBT_STANDARD)
+				match = props.getFormat() == desc.format;
+			else // Structured
+				match = props.getElementSize() == desc.elementSize;
+		}
+
+		return match;
+	}
+
+	void GpuResourcePool::_registerTexture(const SPtr<PooledRenderTexture>& texture)
 	{
 	{
 		mTextures.insert(std::make_pair(texture.get(), texture));
 		mTextures.insert(std::make_pair(texture.get(), texture));
 	}
 	}
 
 
-	void RenderTexturePool::_unregisterTexture(PooledRenderTexture* texture)
+	void GpuResourcePool::_unregisterTexture(PooledRenderTexture* texture)
 	{
 	{
 		mTextures.erase(texture);
 		mTextures.erase(texture);
 	}
 	}
 
 
+	void GpuResourcePool::_registerBuffer(const SPtr<PooledStorageBuffer>& buffer)
+	{
+		mBuffers.insert(std::make_pair(buffer.get(), buffer));
+	}
+
+	void GpuResourcePool::_unregisterBuffer(PooledStorageBuffer* buffer)
+	{
+		mBuffers.erase(buffer);
+	}
+
 	POOLED_RENDER_TEXTURE_DESC POOLED_RENDER_TEXTURE_DESC::create2D(PixelFormat format, UINT32 width, UINT32 height,
 	POOLED_RENDER_TEXTURE_DESC POOLED_RENDER_TEXTURE_DESC::create2D(PixelFormat format, UINT32 width, UINT32 height,
 		INT32 usage, UINT32 samples, bool hwGamma)
 		INT32 usage, UINT32 samples, bool hwGamma)
 	{
 	{
@@ -168,4 +248,26 @@ namespace bs { namespace ct
 
 
 		return desc;
 		return desc;
 	}
 	}
+
+	POOLED_STORAGE_BUFFER_DESC POOLED_STORAGE_BUFFER_DESC::createStandard(GpuBufferFormat format, UINT32 numElements)
+	{
+		POOLED_STORAGE_BUFFER_DESC desc;
+		desc.type = GBT_STANDARD;
+		desc.format = format;
+		desc.numElements = numElements;
+		desc.elementSize = 0;
+
+		return desc;
+	}
+
+	POOLED_STORAGE_BUFFER_DESC POOLED_STORAGE_BUFFER_DESC::createStructured(UINT32 elementSize, UINT32 numElements)
+	{
+		POOLED_STORAGE_BUFFER_DESC desc;
+		desc.type = GBT_STRUCTURED;
+		desc.format = BF_UNKNOWN;
+		desc.numElements = numElements;
+		desc.elementSize = elementSize;
+
+		return desc;
+	}
 }}
 }}

+ 111 - 21
Source/RenderBeast/Source/BsLightRendering.cpp

@@ -40,9 +40,7 @@ namespace bs { namespace ct
 
 
 	GPULightData::GPULightData()
 	GPULightData::GPULightData()
 		:mNumLights {}
 		:mNumLights {}
-	{
-		mParamBuffer = gTiledLightingParamDef.createBuffer();
-	}
+	{ }
 
 
 	void GPULightData::setLights(const Vector<LightData>& lightData, UINT32 numDirLights, UINT32 numRadialLights,
 	void GPULightData::setLights(const Vector<LightData>& lightData, UINT32 numDirLights, UINT32 numRadialLights,
 				   UINT32 numSpotLights)
 				   UINT32 numSpotLights)
@@ -82,14 +80,13 @@ namespace bs { namespace ct
 
 
 		if (size > 0)
 		if (size > 0)
 			mLightBuffer->writeData(0, size, lightData.data(), BWT_DISCARD);
 			mLightBuffer->writeData(0, size, lightData.data(), BWT_DISCARD);
-
-		gTiledLightingParamDef.gLightOffsets.set(mParamBuffer, lightOffsets);
-		mParamBuffer->flushToGPU();
 	}
 	}
 
 
-	const UINT32 TiledDeferredLightingMat::TILE_SIZE = 16;
+	const UINT32 TiledDeferredLighting::TILE_SIZE = 16;
 
 
-	TiledDeferredLightingMat::TiledDeferredLightingMat()
+	TiledDeferredLighting::TiledDeferredLighting(const SPtr<Material>& material, const SPtr<GpuParamsSet>& paramsSet, 
+												 UINT32 sampleCount)
+		:mSampleCount(sampleCount), mMaterial(material), mParamsSet(paramsSet)
 	{
 	{
 		SPtr<GpuParams> params = mParamsSet->getGpuParams();
 		SPtr<GpuParams> params = mParamsSet->getGpuParams();
 
 
@@ -105,28 +102,46 @@ namespace bs { namespace ct
 		}
 		}
 
 
 		params->getBufferParam(GPT_COMPUTE_PROGRAM, "gLights", mLightBufferParam);
 		params->getBufferParam(GPT_COMPUTE_PROGRAM, "gLights", mLightBufferParam);
-		params->getLoadStoreTextureParam(GPT_COMPUTE_PROGRAM, "gOutput", mOutputParam);
-	}
 
 
-	void TiledDeferredLightingMat::_initDefines(ShaderDefines& defines)
-	{
-		defines.set("TILE_SIZE", TILE_SIZE);
-		defines.set("MSAA_COUNT", 1);
+		if(params->hasLoadStoreTexture(GPT_COMPUTE_PROGRAM, "gOutput"))
+			params->getLoadStoreTextureParam(GPT_COMPUTE_PROGRAM, "gOutput", mOutputTextureParam);
+
+		if (params->hasBuffer(GPT_COMPUTE_PROGRAM, "gOutput"))
+			params->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutput", mOutputBufferParam);
+
+		mParamBuffer = gTiledLightingParamDef.createBuffer();
+		mParamsSet->setParamBlockBuffer("Params", mParamBuffer, true);
 	}
 	}
 
 
-	void TiledDeferredLightingMat::execute(const SPtr<RenderTargets>& gbuffer, const SPtr<GpuParamBlockBuffer>& perCamera)
+	void TiledDeferredLighting::execute(const SPtr<RenderTargets>& gbuffer,
+										const SPtr<GpuParamBlockBuffer>& perCamera)
 	{
 	{
+		Vector2I framebufferSize;
+		framebufferSize[0] = gbuffer->getWidth();
+		framebufferSize[1] = gbuffer->getHeight();
+		gTiledLightingParamDef.gFramebufferSize.set(mParamBuffer, framebufferSize);
+
+		mParamBuffer->flushToGPU();
+
 		mGBufferA.set(gbuffer->getTextureA());
 		mGBufferA.set(gbuffer->getTextureA());
 		mGBufferB.set(gbuffer->getTextureB());
 		mGBufferB.set(gbuffer->getTextureB());
 		mGBufferDepth.set(gbuffer->getTextureDepth());
 		mGBufferDepth.set(gbuffer->getTextureDepth());
 
 
 		mParamsSet->setParamBlockBuffer("PerCamera", perCamera, true);
 		mParamsSet->setParamBlockBuffer("PerCamera", perCamera, true);
 
 
-		SPtr<Texture> sceneColorTex = gbuffer->getSceneColor();
-		mOutputParam.set(sceneColorTex);
+		if (mSampleCount > 1)
+		{
+			SPtr<GpuBuffer> sceneColorBuffer = gbuffer->getFlattenedSceneColorBuffer();
+			mOutputBufferParam.set(sceneColorBuffer);
+		}
+		else
+		{
+			SPtr<Texture> sceneColorTex = gbuffer->getSceneColor();
+			mOutputTextureParam.set(sceneColorTex);
+		}
 
 
-		UINT32 width = sceneColorTex->getProperties().getWidth();
-		UINT32 height = sceneColorTex->getProperties().getHeight();
+		UINT32 width = gbuffer->getWidth();
+		UINT32 height = gbuffer->getHeight();
 
 
 		UINT32 numTilesX = (UINT32)Math::ceilToInt(width / (float)TILE_SIZE);
 		UINT32 numTilesX = (UINT32)Math::ceilToInt(width / (float)TILE_SIZE);
 		UINT32 numTilesY = (UINT32)Math::ceilToInt(height / (float)TILE_SIZE);
 		UINT32 numTilesY = (UINT32)Math::ceilToInt(height / (float)TILE_SIZE);
@@ -137,9 +152,84 @@ namespace bs { namespace ct
 		RenderAPI::instance().dispatchCompute(numTilesX, numTilesY);
 		RenderAPI::instance().dispatchCompute(numTilesX, numTilesY);
 	}
 	}
 
 
-	void TiledDeferredLightingMat::setLights(const GPULightData& lightData)
+	void TiledDeferredLighting::setLights(const GPULightData& lightData)
 	{
 	{
 		mLightBufferParam.set(lightData.getLightBuffer());
 		mLightBufferParam.set(lightData.getLightBuffer());
-		mParamsSet->setParamBlockBuffer("Params", lightData.getParamBuffer(), true);
+
+		Vector3I lightOffsets;
+		lightOffsets[0] = lightData.getNumDirLights();
+		lightOffsets[1] = lightOffsets[0] + lightData.getNumRadialLights();
+		lightOffsets[2] = lightOffsets[1] + lightData.getNumSpotLights();
+
+		gTiledLightingParamDef.gLightOffsets.set(mParamBuffer, lightOffsets);
 	}
 	}
+
+	template<int MSAA_COUNT>
+	TTiledDeferredLightingMat<MSAA_COUNT>::TTiledDeferredLightingMat()
+		:mInternal(mMaterial, mParamsSet, MSAA_COUNT)
+	{
+
+	}
+
+	template<int MSAA_COUNT>
+	void TTiledDeferredLightingMat<MSAA_COUNT>::_initDefines(ShaderDefines& defines)
+	{
+		defines.set("TILE_SIZE", TiledDeferredLighting::TILE_SIZE);
+		defines.set("MSAA_COUNT", MSAA_COUNT);
+	}
+
+	template<int MSAA_COUNT>
+	void TTiledDeferredLightingMat<MSAA_COUNT>::execute(const SPtr<RenderTargets>& gbuffer,
+													const SPtr<GpuParamBlockBuffer>& perCamera)
+	{
+		mInternal.execute(gbuffer, perCamera);
+	}
+
+	template<int MSAA_COUNT>
+	void TTiledDeferredLightingMat<MSAA_COUNT>::setLights(const GPULightData& lightData)
+	{
+		mInternal.setLights(lightData);
+	}
+
+	template class TTiledDeferredLightingMat<1>;
+	template class TTiledDeferredLightingMat<2>;
+	template class TTiledDeferredLightingMat<4>;
+	template class TTiledDeferredLightingMat<8>;
+
+	FlatFramebufferToTextureParamDef gFlatFramebufferToTextureParamDef;
+
+	FlatFramebufferToTextureMat::FlatFramebufferToTextureMat()
+	{
+		SPtr<GpuParams> params = mParamsSet->getGpuParams();
+		params->getBufferParam(GPT_FRAGMENT_PROGRAM, "gInput", mInputParam);
+
+		mParamBuffer = gTiledLightingParamDef.createBuffer();
+		mParamsSet->setParamBlockBuffer("Params", mParamBuffer, true);
+	}
+
+	void FlatFramebufferToTextureMat::_initDefines(ShaderDefines& defines)
+	{
+		// Do nothing
+	}
+
+	void FlatFramebufferToTextureMat::execute(const SPtr<GpuBuffer>& flatFramebuffer, const SPtr<Texture>& target)
+	{
+		const TextureProperties& props = target->getProperties();
+
+		Vector2I framebufferSize;
+		framebufferSize[0] = props.getWidth();
+		framebufferSize[1] = props.getHeight();
+		gFlatFramebufferToTextureParamDef.gFramebufferSize.set(mParamBuffer, framebufferSize);
+
+		gFlatFramebufferToTextureParamDef.gSampleCount.set(mParamBuffer, props.getNumSamples());
+
+		mParamBuffer->flushToGPU();
+
+		mInputParam.set(flatFramebuffer);
+
+		gRendererUtility().setPass(mMaterial, 0);
+		gRendererUtility().setPassParams(mParamsSet);
+		gRendererUtility().drawScreenQuad();
+	}
+
 }}
 }}

+ 20 - 24
Source/RenderBeast/Source/BsPostProcessing.cpp

@@ -2,7 +2,7 @@
 //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
 //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
 #include "BsPostProcessing.h"
 #include "BsPostProcessing.h"
 #include "BsRenderTexture.h"
 #include "BsRenderTexture.h"
-#include "BsRenderTexturePool.h"
+#include "BsGpuResourcePool.h"
 #include "BsRendererUtility.h"
 #include "BsRendererUtility.h"
 #include "BsTextureManager.h"
 #include "BsTextureManager.h"
 #include "BsCamera.h"
 #include "BsCamera.h"
@@ -26,27 +26,24 @@ namespace bs { namespace ct
 		// Do nothing
 		// Do nothing
 	}
 	}
 
 
-	void DownsampleMat::execute(const SPtr<RenderTexture>& target, PostProcessInfo& ppInfo)
+	void DownsampleMat::execute(const SPtr<Texture>& target, PostProcessInfo& ppInfo)
 	{
 	{
 		// Set parameters
 		// Set parameters
-		SPtr<Texture> colorTexture = target->getColorTexture(0);
-		mInputTexture.set(colorTexture);
+		mInputTexture.set(target);
 
 
-		const RenderTextureProperties& rtProps = target->getProperties();
+		const TextureProperties& rtProps = target->getProperties();
 		Vector2 invTextureSize(1.0f / rtProps.getWidth(), 1.0f / rtProps.getHeight());
 		Vector2 invTextureSize(1.0f / rtProps.getWidth(), 1.0f / rtProps.getHeight());
 
 
 		gDownsampleParamDef.gInvTexSize.set(mParamBuffer, invTextureSize);
 		gDownsampleParamDef.gInvTexSize.set(mParamBuffer, invTextureSize);
 
 
 		// Set output
 		// Set output
-		const TextureProperties& colorProps = colorTexture->getProperties();
+		UINT32 width = std::max(1, Math::ceilToInt(rtProps.getWidth() * 0.5f));
+		UINT32 height = std::max(1, Math::ceilToInt(rtProps.getHeight() * 0.5f));
 
 
-		UINT32 width = std::max(1, Math::ceilToInt(colorProps.getWidth() * 0.5f));
-		UINT32 height = std::max(1, Math::ceilToInt(colorProps.getHeight() * 0.5f));
-
-		mOutputDesc = POOLED_RENDER_TEXTURE_DESC::create2D(colorProps.getFormat(), width, height, TU_RENDERTARGET);
+		mOutputDesc = POOLED_RENDER_TEXTURE_DESC::create2D(rtProps.getFormat(), width, height, TU_RENDERTARGET);
 
 
 		// Render
 		// Render
-		ppInfo.downsampledSceneTex = RenderTexturePool::instance().get(mOutputDesc);
+		ppInfo.downsampledSceneTex = GpuResourcePool::instance().get(mOutputDesc);
 
 
 		RenderAPI& rapi = RenderAPI::instance();
 		RenderAPI& rapi = RenderAPI::instance();
 		rapi.setRenderTarget(ppInfo.downsampledSceneTex->renderTexture, true);
 		rapi.setRenderTarget(ppInfo.downsampledSceneTex->renderTexture, true);
@@ -62,7 +59,7 @@ namespace bs { namespace ct
 
 
 	void DownsampleMat::release(PostProcessInfo& ppInfo)
 	void DownsampleMat::release(PostProcessInfo& ppInfo)
 	{
 	{
-		RenderTexturePool::instance().release(ppInfo.downsampledSceneTex);
+		GpuResourcePool::instance().release(ppInfo.downsampledSceneTex);
 		mOutput = nullptr;
 		mOutput = nullptr;
 	}
 	}
 
 
@@ -108,7 +105,7 @@ namespace bs { namespace ct
 			TU_LOADSTORE);
 			TU_LOADSTORE);
 
 
 		// Dispatch
 		// Dispatch
-		ppInfo.histogramTex = RenderTexturePool::instance().get(mOutputDesc);
+		ppInfo.histogramTex = GpuResourcePool::instance().get(mOutputDesc);
 
 
 		mOutputTex.set(ppInfo.histogramTex->texture);
 		mOutputTex.set(ppInfo.histogramTex->texture);
 
 
@@ -122,7 +119,7 @@ namespace bs { namespace ct
 
 
 	void EyeAdaptHistogramMat::release(PostProcessInfo& ppInfo)
 	void EyeAdaptHistogramMat::release(PostProcessInfo& ppInfo)
 	{
 	{
-		RenderTexturePool::instance().release(ppInfo.histogramTex);
+		GpuResourcePool::instance().release(ppInfo.histogramTex);
 		mOutput = nullptr;
 		mOutput = nullptr;
 	}
 	}
 
 
@@ -193,7 +190,7 @@ namespace bs { namespace ct
 			TU_RENDERTARGET);
 			TU_RENDERTARGET);
 
 
 		// Render
 		// Render
-		ppInfo.histogramReduceTex = RenderTexturePool::instance().get(mOutputDesc);
+		ppInfo.histogramReduceTex = GpuResourcePool::instance().get(mOutputDesc);
 
 
 		RenderAPI& rapi = RenderAPI::instance();
 		RenderAPI& rapi = RenderAPI::instance();
 		rapi.setRenderTarget(ppInfo.histogramReduceTex->renderTexture, true);
 		rapi.setRenderTarget(ppInfo.histogramReduceTex->renderTexture, true);
@@ -211,7 +208,7 @@ namespace bs { namespace ct
 
 
 	void EyeAdaptHistogramReduceMat::release(PostProcessInfo& ppInfo)
 	void EyeAdaptHistogramReduceMat::release(PostProcessInfo& ppInfo)
 	{
 	{
-		RenderTexturePool::instance().release(ppInfo.histogramReduceTex);
+		GpuResourcePool::instance().release(ppInfo.histogramReduceTex);
 		mOutput = nullptr;
 		mOutput = nullptr;
 	}
 	}
 
 
@@ -236,8 +233,8 @@ namespace bs { namespace ct
 		if(!texturesInitialized)
 		if(!texturesInitialized)
 		{
 		{
 			POOLED_RENDER_TEXTURE_DESC outputDesc = POOLED_RENDER_TEXTURE_DESC::create2D(PF_FLOAT32_R, 1, 1, TU_RENDERTARGET);
 			POOLED_RENDER_TEXTURE_DESC outputDesc = POOLED_RENDER_TEXTURE_DESC::create2D(PF_FLOAT32_R, 1, 1, TU_RENDERTARGET);
-			ppInfo.eyeAdaptationTex[0] = RenderTexturePool::instance().get(outputDesc);
-			ppInfo.eyeAdaptationTex[1] = RenderTexturePool::instance().get(outputDesc);
+			ppInfo.eyeAdaptationTex[0] = GpuResourcePool::instance().get(outputDesc);
+			ppInfo.eyeAdaptationTex[1] = GpuResourcePool::instance().get(outputDesc);
 		}
 		}
 
 
 		ppInfo.lastEyeAdaptationTex = (ppInfo.lastEyeAdaptationTex + 1) % 2; // TODO - Do I really need two targets?
 		ppInfo.lastEyeAdaptationTex = (ppInfo.lastEyeAdaptationTex + 1) % 2; // TODO - Do I really need two targets?
@@ -347,7 +344,7 @@ namespace bs { namespace ct
 			LUT_SIZE, LUT_SIZE, LUT_SIZE, TU_LOADSTORE);
 			LUT_SIZE, LUT_SIZE, LUT_SIZE, TU_LOADSTORE);
 
 
 		// Dispatch
 		// Dispatch
-		ppInfo.colorLUT = RenderTexturePool::instance().get(outputDesc);
+		ppInfo.colorLUT = GpuResourcePool::instance().get(outputDesc);
 
 
 		mOutputTex.set(ppInfo.colorLUT->texture);
 		mOutputTex.set(ppInfo.colorLUT->texture);
 
 
@@ -360,7 +357,7 @@ namespace bs { namespace ct
 
 
 	void CreateTonemapLUTMat::release(PostProcessInfo& ppInfo)
 	void CreateTonemapLUTMat::release(PostProcessInfo& ppInfo)
 	{
 	{
-		RenderTexturePool::instance().release(ppInfo.colorLUT);
+		GpuResourcePool::instance().release(ppInfo.colorLUT);
 	}
 	}
 
 
 	TonemappingParamDef gTonemappingParamDef;
 	TonemappingParamDef gTonemappingParamDef;
@@ -392,15 +389,14 @@ namespace bs { namespace ct
 	}
 	}
 
 
 	template<bool GammaOnly, bool AutoExposure>
 	template<bool GammaOnly, bool AutoExposure>
-	void TonemappingMat<GammaOnly, AutoExposure>::execute(const SPtr<RenderTexture>& sceneColor, 
+	void TonemappingMat<GammaOnly, AutoExposure>::execute(const SPtr<Texture>& sceneColor, 
 		const SPtr<RenderTarget>& outputRT, const Rect2& outputRect, PostProcessInfo& ppInfo)
 		const SPtr<RenderTarget>& outputRT, const Rect2& outputRect, PostProcessInfo& ppInfo)
 	{
 	{
 		gTonemappingParamDef.gRawGamma.set(mParamBuffer, 1.0f / ppInfo.settings->gamma);
 		gTonemappingParamDef.gRawGamma.set(mParamBuffer, 1.0f / ppInfo.settings->gamma);
 		gTonemappingParamDef.gManualExposureScale.set(mParamBuffer, Math::pow(2.0f, ppInfo.settings->exposureScale));
 		gTonemappingParamDef.gManualExposureScale.set(mParamBuffer, Math::pow(2.0f, ppInfo.settings->exposureScale));
 
 
 		// Set parameters
 		// Set parameters
-		SPtr<Texture> colorTexture = sceneColor->getColorTexture(0);
-		mInputTex.set(colorTexture);
+		mInputTex.set(sceneColor);
 
 
 		SPtr<Texture> colorLUT;
 		SPtr<Texture> colorLUT;
 		if(ppInfo.colorLUT != nullptr)
 		if(ppInfo.colorLUT != nullptr)
@@ -430,7 +426,7 @@ namespace bs { namespace ct
 	template class TonemappingMat<true, false>;
 	template class TonemappingMat<true, false>;
 	template class TonemappingMat<false, false>;
 	template class TonemappingMat<false, false>;
 
 
-	void PostProcessing::postProcess(RendererCamera* viewInfo, const SPtr<RenderTexture>& sceneColor, float frameDelta)
+	void PostProcessing::postProcess(RendererCamera* viewInfo, const SPtr<Texture>& sceneColor, float frameDelta)
 	{
 	{
 		PostProcessInfo& ppInfo = viewInfo->getPPInfo();
 		PostProcessInfo& ppInfo = viewInfo->getPPInfo();
 		const StandardPostProcessSettings& settings = *ppInfo.settings;
 		const StandardPostProcessSettings& settings = *ppInfo.settings;

+ 67 - 17
Source/RenderBeast/Source/BsRenderBeast.cpp

@@ -23,7 +23,7 @@
 #include "BsRenderBeastOptions.h"
 #include "BsRenderBeastOptions.h"
 #include "BsSamplerOverrides.h"
 #include "BsSamplerOverrides.h"
 #include "BsLight.h"
 #include "BsLight.h"
-#include "BsRenderTexturePool.h"
+#include "BsGpuResourcePool.h"
 #include "BsRenderTargets.h"
 #include "BsRenderTargets.h"
 #include "BsRendererUtility.h"
 #include "BsRendererUtility.h"
 #include "BsAnimationManager.h"
 #include "BsAnimationManager.h"
@@ -40,9 +40,9 @@ using namespace std::placeholders;
 namespace bs { namespace ct
 namespace bs { namespace ct
 {
 {
 	RenderBeast::RenderBeast()
 	RenderBeast::RenderBeast()
-		: mDefaultMaterial(nullptr), mTiledDeferredLightingMat(nullptr), mSkyboxMat(nullptr), mGPULightData(nullptr)
-		, mLightGrid(nullptr), mObjectRenderer(nullptr), mOptions(bs_shared_ptr_new<RenderBeastOptions>())
-		, mOptionsDirty(true)
+		: mDefaultMaterial(nullptr), mTiledDeferredLightingMats(), mFlatFramebufferToTextureMat(nullptr)
+		, mSkyboxMat(nullptr), mGPULightData(nullptr), mLightGrid(nullptr), mObjectRenderer(nullptr)
+		, mOptions(bs_shared_ptr_new<RenderBeastOptions>()), mOptionsDirty(true)
 	{ }
 	{ }
 
 
 	const StringID& RenderBeast::getName() const
 	const StringID& RenderBeast::getName() const
@@ -74,13 +74,18 @@ namespace bs { namespace ct
 		mObjectRenderer = bs_new<ObjectRenderer>();
 		mObjectRenderer = bs_new<ObjectRenderer>();
 
 
 		mDefaultMaterial = bs_new<DefaultMaterial>();
 		mDefaultMaterial = bs_new<DefaultMaterial>();
-		mTiledDeferredLightingMat = bs_new<TiledDeferredLightingMat>();
 		mSkyboxMat = bs_new<SkyboxMat>();
 		mSkyboxMat = bs_new<SkyboxMat>();
+		mFlatFramebufferToTextureMat = bs_new<FlatFramebufferToTextureMat>();
+
+		mTiledDeferredLightingMats[0] = bs_new<TTiledDeferredLightingMat<1>>();
+		mTiledDeferredLightingMats[1] = bs_new<TTiledDeferredLightingMat<2>>();
+		mTiledDeferredLightingMats[2] = bs_new<TTiledDeferredLightingMat<4>>();
+		mTiledDeferredLightingMats[3] = bs_new<TTiledDeferredLightingMat<8>>();
 
 
 		mGPULightData = bs_new<GPULightData>();
 		mGPULightData = bs_new<GPULightData>();
 		mLightGrid = bs_new<LightGrid>();
 		mLightGrid = bs_new<LightGrid>();
 
 
-		RenderTexturePool::startUp();
+		GpuResourcePool::startUp();
 		PostProcessing::startUp();
 		PostProcessing::startUp();
 	}
 	}
 
 
@@ -101,13 +106,17 @@ namespace bs { namespace ct
 		mRenderableVisibility.clear();
 		mRenderableVisibility.clear();
 
 
 		PostProcessing::shutDown();
 		PostProcessing::shutDown();
-		RenderTexturePool::shutDown();
+		GpuResourcePool::shutDown();
 
 
 		bs_delete(mDefaultMaterial);
 		bs_delete(mDefaultMaterial);
-		bs_delete(mTiledDeferredLightingMat);
 		bs_delete(mSkyboxMat);
 		bs_delete(mSkyboxMat);
 		bs_delete(mGPULightData);
 		bs_delete(mGPULightData);
 		bs_delete(mLightGrid);
 		bs_delete(mLightGrid);
+		bs_delete(mFlatFramebufferToTextureMat);
+
+		UINT32 numDeferredMats = sizeof(mTiledDeferredLightingMats) / sizeof(mTiledDeferredLightingMats[0]);
+		for (UINT32 i = 0; i < numDeferredMats; i++)
+			bs_delete(mTiledDeferredLightingMats[i]);
 
 
 		RendererUtility::shutDown();
 		RendererUtility::shutDown();
 
 
@@ -745,8 +754,6 @@ namespace bs { namespace ct
 		mLightDataTemp.clear();
 		mLightDataTemp.clear();
 		mLightVisibilityTemp.clear();
 		mLightVisibilityTemp.clear();
 
 
-		mTiledDeferredLightingMat->setLights(*mGPULightData);
-
 		// Update various buffers required by each renderable
 		// Update various buffers required by each renderable
 		UINT32 numRenderables = (UINT32)mRenderables.size();
 		UINT32 numRenderables = (UINT32)mRenderables.size();
 		for (UINT32 i = 0; i < numRenderables; i++)
 		for (UINT32 i = 0; i < numRenderables; i++)
@@ -867,7 +874,39 @@ namespace bs { namespace ct
 		renderTargets->bindSceneColor(true);
 		renderTargets->bindSceneColor(true);
 
 
 		// Render light pass
 		// Render light pass
-		mTiledDeferredLightingMat->execute(renderTargets, perCameraBuffer);
+		ITiledDeferredLightingMat* lightingMat;
+
+		UINT32 numSamples = viewInfo->getNumSamples();
+		switch(numSamples)
+		{
+		case 0:
+		case 1:
+			lightingMat = mTiledDeferredLightingMats[0]; // No MSAA
+			break;
+		case 2:
+			lightingMat = mTiledDeferredLightingMats[1]; // 2X MSAA
+			break;
+		case 4:
+			lightingMat = mTiledDeferredLightingMats[2]; // 4X MSAA
+			break;
+		default:
+			lightingMat = mTiledDeferredLightingMats[3]; // 8X MSAA or higher
+			break;
+		}
+
+		lightingMat->setLights(*mGPULightData);
+		lightingMat->execute(renderTargets, perCameraBuffer);
+
+		const RenderAPIInfo& rapiInfo = RenderAPI::instance().getAPIInfo();
+		bool usingFlattenedFB = numSamples > 1 && !rapiInfo.isFlagSet(RenderAPIFeatureFlag::MSAAImageStores);
+
+		// If we're using flattened framebuffer for MSAA we need to copy its contents to the MSAA scene texture before
+		// continuing
+		if(usingFlattenedFB)
+		{
+			mFlatFramebufferToTextureMat->execute(renderTargets->getFlattenedSceneColorBuffer(), 
+												  renderTargets->getSceneColor());
+		}
 
 
 		// Render skybox (if any)
 		// Render skybox (if any)
 		SPtr<Texture> skyTexture = viewInfo->getSkybox();
 		SPtr<Texture> skyTexture = viewInfo->getSkybox();
@@ -908,22 +947,33 @@ namespace bs { namespace ct
 		}
 		}
 
 
 		// Post-processing and final resolve
 		// Post-processing and final resolve
+		RenderAPI& rapi = RenderAPI::instance();
+		Rect2 viewportArea = viewInfo->getViewportRect();
+
 		if (viewInfo->checkRunPostProcessing())
 		if (viewInfo->checkRunPostProcessing())
 		{
 		{
-			// TODO - If GBuffer has multiple samples, I should resolve them before post-processing
-			PostProcessing::instance().postProcess(viewInfo, renderTargets->getSceneColorRT(), frameDelta);
+			// If using MSAA, resolve into non-MSAA texture before post-processing
+			if(numSamples > 1)
+			{
+				rapi.setRenderTarget(renderTargets->getSceneColorNonMSAART());
+				rapi.setViewport(viewportArea);
+
+				SPtr<Texture> sceneColor = renderTargets->getSceneColor();
+				gRendererUtility().blit(sceneColor, Rect2I::EMPTY, viewInfo->getFlipView());
+			}
+
+			// Post-processing code also takes care of writting to the final output target
+			PostProcessing::instance().postProcess(viewInfo, renderTargets->getSceneColor(), frameDelta);
 		}
 		}
 		else
 		else
 		{
 		{
 			// Just copy from scene color to output if no post-processing
 			// Just copy from scene color to output if no post-processing
-			RenderAPI& rapi = RenderAPI::instance();
 			SPtr<RenderTarget> target = viewInfo->getFinalTarget();
 			SPtr<RenderTarget> target = viewInfo->getFinalTarget();
-			Rect2 viewportArea = viewInfo->getViewportRect();
 
 
 			rapi.setRenderTarget(target);
 			rapi.setRenderTarget(target);
 			rapi.setViewport(viewportArea);
 			rapi.setViewport(viewportArea);
 
 
-			SPtr<Texture> sceneColor = renderTargets->getSceneColorRT()->getColorTexture(0);
+			SPtr<Texture> sceneColor = renderTargets->getSceneColor();
 			gRendererUtility().blit(sceneColor, Rect2I::EMPTY, viewInfo->getFlipView());
 			gRendererUtility().blit(sceneColor, Rect2I::EMPTY, viewInfo->getFlipView());
 		}
 		}
 
 
@@ -1056,7 +1106,7 @@ namespace bs { namespace ct
 		viewDesc.visibleLayers = 0xFFFFFFFFFFFFFFFF;
 		viewDesc.visibleLayers = 0xFFFFFFFFFFFFFFFF;
 		viewDesc.nearPlane = 0.5f;
 		viewDesc.nearPlane = 0.5f;
 		viewDesc.farPlane = 1000.0f;
 		viewDesc.farPlane = 1000.0f;
-		viewDesc.flipView = RenderAPI::instance().getAPIInfo().getUVYAxisUp();
+		viewDesc.flipView = RenderAPI::instance().getAPIInfo().isFlagSet(RenderAPIFeatureFlag::UVYAxisUp);
 
 
 		viewDesc.viewOrigin = position;
 		viewDesc.viewOrigin = position;
 		viewDesc.projTransform = projTransform;
 		viewDesc.projTransform = projTransform;

+ 51 - 3
Source/RenderBeast/Source/BsRenderTargets.cpp

@@ -1,7 +1,7 @@
 //********************************** Banshee Engine (www.banshee3d.com) **************************************************//
 //********************************** Banshee Engine (www.banshee3d.com) **************************************************//
 //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
 //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
 #include "BsRenderTargets.h"
 #include "BsRenderTargets.h"
-#include "BsRenderTexturePool.h"
+#include "BsGpuResourcePool.h"
 #include "BsViewport.h"
 #include "BsViewport.h"
 #include "BsRenderAPI.h"
 #include "BsRenderAPI.h"
 #include "BsTextureManager.h"
 #include "BsTextureManager.h"
@@ -25,11 +25,15 @@ namespace bs { namespace ct
 
 
 	void RenderTargets::allocate()
 	void RenderTargets::allocate()
 	{
 	{
-		RenderTexturePool& texPool = RenderTexturePool::instance();
+		GpuResourcePool& texPool = GpuResourcePool::instance();
 
 
 		UINT32 width = mViewTarget.viewRect.width;
 		UINT32 width = mViewTarget.viewRect.width;
 		UINT32 height = mViewTarget.viewRect.height;
 		UINT32 height = mViewTarget.viewRect.height;
 
 
+		// Note: This class is keeping all these textures alive for too long (even after they are done for a frame). We
+		// could save on memory by deallocating and reallocating them every frame, but it remains to be seen how much of
+		// a performance impact would that have.
+
 		// Note: Albedo is allocated as SRGB, meaning when reading from textures during depth pass we decode from sRGB into linear,
 		// Note: Albedo is allocated as SRGB, meaning when reading from textures during depth pass we decode from sRGB into linear,
 		// then back into sRGB when writing to albedo, and back to linear when reading from albedo during light pass. This /might/ have
 		// then back into sRGB when writing to albedo, and back to linear when reading from albedo during light pass. This /might/ have
 		// a performance impact. In which case we could just use a higher precision albedo buffer, which can then store linear color
 		// a performance impact. In which case we could just use a higher precision albedo buffer, which can then store linear color
@@ -43,6 +47,23 @@ namespace bs { namespace ct
 		SPtr<PooledRenderTexture> newDepthRT = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(PF_D32_S8X24, width, height, 
 		SPtr<PooledRenderTexture> newDepthRT = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(PF_D32_S8X24, width, height, 
 			TU_DEPTHSTENCIL, mViewTarget.numSamples, false));
 			TU_DEPTHSTENCIL, mViewTarget.numSamples, false));
 
 
+		if(mViewTarget.numSamples > 1)
+		{
+			const RenderAPIInfo& rapiInfo = RenderAPI::instance().getAPIInfo();
+
+			// DX11/HLSL is unable to have an UAV for a multisampled texture, so we need to use a buffer instead and then
+			// perform a blit to the actual scene color
+			if (!rapiInfo.isFlagSet(RenderAPIFeatureFlag::MSAAImageStores))
+			{
+				UINT32 bufferNumElements = width * height * mViewTarget.numSamples;
+				mFlattenedSceneColorBuffer = texPool.get(POOLED_STORAGE_BUFFER_DESC::createStandard(BF_16X4F, bufferNumElements));
+			}
+
+			// Need a texture we'll resolve MSAA to before post-processing
+			mSceneColorNonMSAATex = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(mSceneColorFormat, width,
+																					 height, TU_RENDERTARGET, 1, false));
+		}
+
 		bool rebuildTargets = newColorRT != mSceneColorTex || newAlbedoRT != mAlbedoTex || newNormalRT != mNormalTex || newDepthRT != mDepthTex;
 		bool rebuildTargets = newColorRT != mSceneColorTex || newAlbedoRT != mAlbedoTex || newNormalRT != mNormalTex || newDepthRT != mDepthTex;
 
 
 		mSceneColorTex = newColorRT;
 		mSceneColorTex = newColorRT;
@@ -94,12 +115,18 @@ namespace bs { namespace ct
 		RenderAPI& rapi = RenderAPI::instance();
 		RenderAPI& rapi = RenderAPI::instance();
 		rapi.setRenderTarget(nullptr);
 		rapi.setRenderTarget(nullptr);
 
 
-		RenderTexturePool& texPool = RenderTexturePool::instance();
+		GpuResourcePool& texPool = GpuResourcePool::instance();
 
 
 		texPool.release(mSceneColorTex);
 		texPool.release(mSceneColorTex);
 		texPool.release(mAlbedoTex);
 		texPool.release(mAlbedoTex);
 		texPool.release(mNormalTex);
 		texPool.release(mNormalTex);
 		texPool.release(mDepthTex);
 		texPool.release(mDepthTex);
+
+		if(mSceneColorNonMSAATex != nullptr)
+			texPool.release(mSceneColorNonMSAATex);
+
+		if (mFlattenedSceneColorBuffer != nullptr)
+			texPool.release(mFlattenedSceneColorBuffer);
 	}
 	}
 
 
 	void RenderTargets::bindGBuffer()
 	void RenderTargets::bindGBuffer()
@@ -150,4 +177,25 @@ namespace bs { namespace ct
 	{
 	{
 		return mDepthTex->texture;
 		return mDepthTex->texture;
 	}
 	}
+
+	SPtr<Texture> RenderTargets::getSceneColorNonMSAA() const
+	{
+		if (mSceneColorNonMSAATex != nullptr)
+			return mSceneColorNonMSAATex->texture;
+
+		return getSceneColor();
+	}
+
+	SPtr<RenderTexture> RenderTargets::getSceneColorNonMSAART() const
+	{
+		if (mSceneColorNonMSAATex != nullptr)
+			return mSceneColorNonMSAATex->renderTexture;
+
+		return mSceneColorTex->renderTexture;
+	}
+
+	SPtr<GpuBuffer> RenderTargets::getFlattenedSceneColorBuffer() const
+	{
+		return mFlattenedSceneColorBuffer->buffer;
+	}
 }}
 }}

+ 1 - 1
Source/RenderBeast/Source/BsRendererCamera.cpp

@@ -331,7 +331,7 @@ namespace bs { namespace ct
 		clipToUVScaleOffset.w = viewRect.y / rtHeight + (halfHeight + rapiInfo.getVerticalTexelOffset()) / rtHeight;
 		clipToUVScaleOffset.w = viewRect.y / rtHeight + (halfHeight + rapiInfo.getVerticalTexelOffset()) / rtHeight;
 
 
 		// Either of these flips the Y axis, but if they're both true they cancel out
 		// Either of these flips the Y axis, but if they're both true they cancel out
-		if (rapiInfo.getUVYAxisUp() ^ rapiInfo.getNDCYAxisDown())
+		if (rapiInfo.isFlagSet(RenderAPIFeatureFlag::UVYAxisUp) ^ rapiInfo.isFlagSet(RenderAPIFeatureFlag::NDCYAxisDown))
 			clipToUVScaleOffset.y = -clipToUVScaleOffset.y;
 			clipToUVScaleOffset.y = -clipToUVScaleOffset.y;
 
 
 		gPerCameraParamDef.gClipToUVScaleOffset.set(mParamBuffer, clipToUVScaleOffset);
 		gPerCameraParamDef.gClipToUVScaleOffset.set(mParamBuffer, clipToUVScaleOffset);