Explorar el Código

Tiled deferred renderer now handles reflection probes
Split deferred direct lighting and image based lighting into two separate shaders to reduce load on shared memory
Reorganized render target allocation so its cleaner and targets can be released as soon as they're not needed

BearishSun hace 8 años
padre
commit
a4a191ca37

+ 4 - 0
Data/Raw/Engine/DataList.json

@@ -245,6 +245,10 @@
         {
             "Path": "IrradianceReduceSH.bsl",
             "UUID": "aa40f2be-00e4-4322-a4bf-e435528f1e6e"
+        },
+        {
+            "Path": "TiledDeferredImageBasedLighting.bsl",
+            "UUID": "6029db14-107f-43df-9a33-7105c56aa0fd"
         }
     ],
     "Skin": [

+ 7 - 6
Data/Raw/Engine/Includes/ImageBasedLighting.bslinc

@@ -36,7 +36,8 @@ Technique : base("ImageBasedLighting") =
 
 			#ifdef USE_COMPUTE_INDICES
 				groupshared uint gReflectionProbeIndices[MAX_PROBES];
-			#else
+			#endif
+			#ifdef USE_LIGHT_GRID_INDICES
 				Buffer<uint> gReflectionProbeIndices;
 			#endif
 			
@@ -54,7 +55,7 @@ Technique : base("ImageBasedLighting") =
 				return gSkyIrradianceTex.SampleLevel(gSkyIrradianceSamp, dir, 0).rgb * gSkyBrightness;
 			}
 			
-			float3 getSphereReflectionContribution(float normalizedDistance)
+			float getSphereReflectionContribution(float normalizedDistance)
 			{			
 				// If closer than 60% to the probe radius, then full contribution is used.
 				// For the other 40% we smoothstep and return contribution lower than 1 so other
@@ -87,7 +88,7 @@ Technique : base("ImageBasedLighting") =
 				return lookupDir;
 			}
 			
-			float3 getDistBoxToPoint(float3 pt, float3 extents)
+			float getDistBoxToPoint(float3 pt, float3 extents)
 			{
 				float3 d = max(max(-extents - pt, 0), pt - extents);
 				return length(d);
@@ -131,7 +132,7 @@ Technique : base("ImageBasedLighting") =
 				//   return t * t * (3.0 - 2.0 * t);
 				
 				float t = saturate(3.3333 - 3.3333 * normalizedDistance);
-				return t * t * (3.0 - 2.0 * t);
+				contribution = t * t * (3.0 - 2.0 * t);
 				
 				return lookupDir;
 			}
@@ -193,7 +194,7 @@ Technique : base("ImageBasedLighting") =
 				#endif
 			}
 			
-			float3 getImageBasedSpecular(float3 worldPos, float3 V, float3 R, SurfaceData surfaceData)
+			float3 getImageBasedSpecular(float3 worldPos, float3 V, float3 R, SurfaceData surfaceData, uint probeOffset, uint numProbes)
 			{
 				// See C++ code for generation of gPreintegratedEnvBRDF to see why this code works as is
 				float3 N = surfaceData.worldNormal.xyz;
@@ -202,7 +203,7 @@ Technique : base("ImageBasedLighting") =
 				// Note: Using a fixed F0 value of 0.04 (plastic) for dielectrics, and using albedo as specular for conductors.
 				// For more customizability allow the user to provide separate albedo/specular colors for both types.
 				float3 specularColor = lerp(float3(0.04f, 0.04f, 0.04f), surfaceData.albedo.rgb, surfaceData.metalness);
-				float3 radiance = gatherReflectionRadiance(worldPos, R, surfaceData.roughness, specularColor, 0, 0);
+				float3 radiance = gatherReflectionRadiance(worldPos, R, surfaceData.roughness, specularColor, probeOffset, numProbes);
 				
 				float2 envBRDF = gPreintegratedEnvBRDF.SampleLevel(gPreintegratedEnvBRDFSamp, float2(NoV, surfaceData.roughness), 0).rg;
 				

+ 1 - 6
Data/Raw/Engine/Includes/LightGridCommon.bslinc

@@ -17,12 +17,7 @@ Technique : base("LightGridCommon") =
 				uint gMaxNumLightsPerCell;
 				uint2 gGridPixelSize;
 			}
-			
-			float convertToNDCZ(float viewZ)
-			{
-				return -gNDCZToWorldZ.y + (gNDCZToWorldZ.x / viewZ);
-			}
-			
+						
 			float calcViewZFromCellZ(uint cellZ)
 			{
 				// We don't want to subdivide depth uniformly because XY sizes will be much

+ 3 - 2
Data/Raw/Engine/Includes/LightingCommon.bslinc

@@ -199,13 +199,14 @@ Technique
 			
 			#ifdef USE_COMPUTE_INDICES
 				groupshared uint gLightIndices[MAX_LIGHTS];
-			#else
+			#endif
+			#ifdef USE_LIGHT_GRID_INDICES
 				Buffer<uint> gLightIndices;
 			#endif
 			
 			float4 getDirectLighting(float3 worldPos, float3 V, float3 R, SurfaceData surfaceData, uint4 lightOffsets)
 			{
-				float3 N = surfaceData.worldNormal;
+				float3 N = surfaceData.worldNormal.xyz;
 				float roughness2 = max(surfaceData.roughness, 0.08f);
 				roughness2 *= roughness2;
 				

+ 6 - 0
Data/Raw/Engine/Includes/PerCameraData.bslinc

@@ -66,6 +66,12 @@ Technique : base("PerCameraData") =
 			{
 				return (1.0f / (deviceZ + gDeviceZToWorldZ.y)) * gDeviceZToWorldZ.x;
 			}
+			
+			/** Converts Z value from view space to NDC space. */
+			float convertToNDCZ(float viewZ)
+			{
+				return -gNDCZToWorldZ.y + (gNDCZToWorldZ.x / viewZ);
+			}
 		};
 	};
 };

+ 1 - 0
Data/Raw/Engine/Shaders/LightGridLLCreation.bsl

@@ -1,4 +1,5 @@
 #include "$ENGINE$\PerCameraData.bslinc"
+#define USE_LIGHT_GRID_INDICES
 #include "$ENGINE$\LightingCommon.bslinc"
 #include "$ENGINE$\LightGridCommon.bslinc"
 

+ 291 - 0
Data/Raw/Engine/Shaders/TiledDeferredImageBasedLighting.bsl

@@ -0,0 +1,291 @@
+#include "$ENGINE$\GBufferInput.bslinc"
+#include "$ENGINE$\PerCameraData.bslinc"
+#include "$ENGINE$\ReflectionCubemapCommon.bslinc"
+#define USE_COMPUTE_INDICES
+#include "$ENGINE$\LightingCommon.bslinc"
+#include "$ENGINE$\ImageBasedLighting.bslinc"
+
+Technique 
+  : inherits("GBufferInput")
+  : inherits("PerCameraData")
+  : inherits("LightingCommon")
+  : inherits("ReflectionCubemapCommon")
+  : inherits("ImageBasedLighting") =
+{
+	Language = "HLSL11";
+	
+	Pass =
+	{
+		Compute = 
+		{			
+			cbuffer Params : register(b0)
+			{
+				uint2 gFramebufferSize;
+			}
+		
+			#if MSAA_COUNT > 1
+			Buffer<float4> gInColor;
+			RWBuffer<float4> gOutput;
+			
+			uint getLinearAddress(uint2 coord, uint sampleIndex)
+			{
+				return (coord.y * gFramebufferSize.x + coord.x) * MSAA_COUNT + sampleIndex;
+			}
+			
+			void writeBufferSample(uint2 coord, uint sampleIndex, float4 color)
+			{
+				uint idx = getLinearAddress(coord, sampleIndex);
+				gOutput[idx] = color;
+			}
+			
+			float4 readInColorSample(uint2 coord, uint sampleIndex)
+			{
+				uint idx = getLinearAddress(coord, sampleIndex);
+				return gInColor[idx];
+			}
+
+			#else
+			Texture2D<float4> gInColor;
+			RWTexture2D<float4>	gOutput;
+			#endif
+						
+			groupshared uint sTileMinZ;
+			groupshared uint sTileMaxZ;
+
+			void getTileZBounds(uint threadIndex, SurfaceData surfaceData[MSAA_COUNT], out float minTileZ, out float maxTileZ)
+			{
+				// Note: To improve performance perhaps:
+				//  - Use halfZ (split depth range into two regions for better culling)
+				//  - Use parallel reduction instead of atomics
+			
+				uint sampleMinZ = 0x7F7FFFFF;
+				uint sampleMaxZ = 0;
+
+				#if MSAA_COUNT > 1
+				[unroll]
+				for(uint i = 0; i < MSAA_COUNT; ++i)
+				{
+					sampleMinZ = min(sampleMinZ, asuint(-surfaceData[i].depth));
+					sampleMaxZ = max(sampleMaxZ, asuint(-surfaceData[i].depth));
+				}
+				#else
+				sampleMinZ = asuint(-surfaceData[0].depth);
+				sampleMaxZ = asuint(-surfaceData[0].depth);
+				#endif
+
+				// Set initial values
+				if(threadIndex == 0)
+				{
+					sTileMinZ = 0x7F7FFFFF;
+					sTileMaxZ = 0;
+				}
+				
+				GroupMemoryBarrierWithGroupSync();
+				
+				// Determine minimum and maximum depth values for a tile			
+				InterlockedMin(sTileMinZ, sampleMinZ);
+				InterlockedMax(sTileMaxZ, sampleMaxZ);
+				
+				GroupMemoryBarrierWithGroupSync();
+				
+			    minTileZ = asfloat(sTileMinZ);
+				maxTileZ = asfloat(sTileMaxZ);
+			}
+			
+			void calcTileAABB(uint2 tileId, float viewZMin, float viewZMax, out float3 center, out float3 extent)
+			{
+				// Convert threat XY coordinates to NDC coordinates
+				float2 uvTopLeft = (tileId * TILE_SIZE + 0.5f) / gFramebufferSize;
+				float2 uvBottomRight = ((tileId + uint2(1, 1)) * TILE_SIZE - 0.5f) / gFramebufferSize;
+			
+				float3 ndcMin;
+				float3 ndcMax;
+			
+				ndcMin.xy = uvTopLeft * 2.0f - float2(1.0f, 1.0f);
+				ndcMax.xy = uvBottomRight * 2.0f - float2(1.0f, 1.0f);
+			
+				// Flip Y depending on render API, depending if Y in NDC is facing up or down
+				// (We negate the value because we want NDC with Y flipped, so origin is top left)
+				float flipY = -sign(gMatProj[1][1]);
+				ndcMin.y *= flipY;
+				ndcMax.y *= flipY;
+			
+				// Camera is looking along negative z, therefore min in view space is max in NDC
+				ndcMin.z = convertToNDCZ(viewZMax);
+				ndcMax.z = convertToNDCZ(viewZMin);
+			
+				float4 corner[5];
+				// Far
+				corner[0] = mul(gMatInvProj, float4(ndcMin.x, ndcMin.y, ndcMax.z, 1.0f));
+				corner[1] = mul(gMatInvProj, float4(ndcMax.x, ndcMin.y, ndcMax.z, 1.0f));
+				corner[2] = mul(gMatInvProj, float4(ndcMax.x, ndcMax.y, ndcMax.z, 1.0f));
+				corner[3] = mul(gMatInvProj, float4(ndcMin.x, ndcMax.y, ndcMax.z, 1.0f));
+				
+				// Near (only one point, as the far away face is guaranteed to be larger in XY extents)
+				corner[4] = mul(gMatInvProj, float4(ndcMin.x, ndcMin.y, ndcMin.z, 1.0f));
+			
+				[unroll]
+				for(uint i = 0; i < 5; ++i)
+					corner[i].xy /= corner[i].w;
+			
+				float3 viewMin = float3(corner[0].xy, viewZMin);
+				float3 viewMax = float3(corner[0].xy, viewZMax);
+				
+				[unroll]
+				for(uint i = 1; i < 4; ++i)
+				{
+					viewMin.xy = min(viewMin.xy, corner[i].xy);
+					viewMax.xy = max(viewMax.xy, corner[i].xy);
+				}
+				
+				extent = (viewMax - viewMin) * 0.5f;
+				center = viewMin + extent;
+			}
+			
+			bool intersectSphereBox(float3 sCenter, float sRadius, float3 bCenter, float3 bExtents)
+			{
+				float3 closestOnBox = max(0, abs(bCenter - sCenter) - bExtents);
+				return dot(closestOnBox, closestOnBox) < sRadius * sRadius;
+			}
+			
+			float4 getLighting(uint2 pixelPos, uint sampleIdx, float2 clipSpacePos, SurfaceData surfaceData, uint probeOffset, uint numProbes)
+			{
+				// x, y are now in clip space, z, w are in view space
+				// We multiply them by a special inverse view-projection matrix, that had the projection entries that effect
+				// z, w eliminated (since they are already in view space)
+				// Note: Multiply by depth should be avoided if using ortographic projection
+				float4 mixedSpacePos = float4(clipSpacePos * -surfaceData.depth, surfaceData.depth, 1);
+				float4 worldPosition4D = mul(gMatScreenToWorld, mixedSpacePos);
+				float3 worldPosition = worldPosition4D.xyz / worldPosition4D.w;
+				
+				float3 V = normalize(gViewOrigin - worldPosition);
+				float3 N = surfaceData.worldNormal.xyz;
+				float3 R = 2 * dot(V, N) * N - V;
+				float3 specR = getSpecularDominantDir(N, R, surfaceData.roughness);
+				
+				float4 existingColor;
+				#if MSAA_COUNT > 1
+				existingColor = readInColorSample(pixelPos, sampleIdx);
+				#else
+				existingColor = gInColor.Load(int3(pixelPos.xy, 0));
+				#endif				
+				
+				float3 indirectDiffuse = getSkyIndirectDiffuse(N) * surfaceData.albedo.rgb;
+				float3 imageBasedSpecular = getImageBasedSpecular(worldPosition, V, specR, surfaceData, probeOffset, numProbes);
+
+				float4 totalLighting = existingColor;
+				totalLighting.rgb += indirectDiffuse;
+				totalLighting.rgb += imageBasedSpecular;
+				
+				return totalLighting;				
+			}
+						
+			groupshared uint gUnsortedProbeIndices[MAX_PROBES];
+			groupshared uint sNumProbes;
+			
+			[numthreads(TILE_SIZE, TILE_SIZE, 1)]
+			void main(
+				uint3 groupId : SV_GroupID,
+				uint3 groupThreadId : SV_GroupThreadID,
+				uint3 dispatchThreadId : SV_DispatchThreadID)
+			{
+				uint threadIndex = groupThreadId.y * TILE_SIZE + groupThreadId.x;
+				uint2 pixelPos = dispatchThreadId.xy + gViewportRectangle.xy;
+				
+				// Get data for all samples
+				SurfaceData surfaceData[MSAA_COUNT];
+				
+				#if MSAA_COUNT > 1
+				[unroll]
+				for(uint i = 0; i < MSAA_COUNT; ++i)
+					surfaceData[i] = getGBufferData(pixelPos, i);
+				#else
+				surfaceData[0] = getGBufferData(pixelPos);
+				#endif
+
+				// Set initial values
+				if(threadIndex == 0)
+					sNumProbes = 0;				
+				
+				// Determine per-pixel minimum and maximum depth values
+				float minTileZ, maxTileZ;
+				getTileZBounds(threadIndex, surfaceData, minTileZ, maxTileZ);
+				
+				// Create AABB for the current tile
+				float3 center, extent;
+				calcTileAABB(groupId.xy, minTileZ, maxTileZ, center, extent);
+								
+                // Find probes overlapping the tile
+				for (uint i = 0; i < gNumProbes && i < MAX_LIGHTS; i += TILE_SIZE)
+				{
+					float4 probePosition = mul(gMatView, float4(gReflectionProbes[i].position, 1.0f));
+					float probeRadius = gReflectionProbes[i].radius;
+				
+					if(intersectSphereBox(probePosition, probeRadius, center, extent))
+					{
+						uint idx;
+						InterlockedAdd(sNumProbes, 1U, idx);
+						gUnsortedProbeIndices[idx] = i;
+					}
+				}
+
+                GroupMemoryBarrierWithGroupSync();
+
+				// Sort based on original indices. Using parallel enumeration sort (n^2) - could be faster
+				const uint numThreads = TILE_SIZE * TILE_SIZE;
+				for (uint i = threadIndex; i < sNumProbes; i += numThreads)
+				{
+					int idx = gUnsortedProbeIndices[i];
+					uint smallerCount = 0;
+
+					for (uint j = 0; j < sNumProbes; j++) 
+					{
+						int otherIdx = gUnsortedProbeIndices[j];
+						if (otherIdx < idx)
+							smallerCount++;
+					}
+
+					gReflectionProbeIndices[smallerCount] = gUnsortedProbeIndices[i];
+				}
+				
+				GroupMemoryBarrierWithGroupSync();
+				
+				// Generate world position
+				float2 screenUv = ((float2)(gViewportRectangle.xy + pixelPos) + 0.5f) / (float2)gViewportRectangle.zw;
+				float2 clipSpacePos = (screenUv - gClipToUVScaleOffset.zw) / gClipToUVScaleOffset.xy;
+			
+				uint2 viewportMax = gViewportRectangle.xy + gViewportRectangle.zw;
+
+				// Ignore pixels out of valid range
+				if (all(dispatchThreadId.xy < viewportMax))
+				{
+					#if MSAA_COUNT > 1
+					float4 lighting = getLighting(pixelPos, 0, clipSpacePos.xy, surfaceData[0], 0, gNumProbes);
+					writeBufferSample(pixelPos, 0, lighting);
+
+					bool doPerSampleShading = needsPerSampleShading(surfaceData);
+					if(doPerSampleShading)
+					{
+						[unroll]
+						for(uint i = 1; i < MSAA_COUNT; ++i)
+						{
+							lighting = getLighting(pixelPos, i, clipSpacePos.xy, surfaceData[i], 0, gNumProbes);
+							writeBufferSample(pixelPos, i, lighting);
+						}
+					}
+					else // Splat same information to all samples
+					{
+						[unroll]
+						for(uint i = 1; i < MSAA_COUNT; ++i)
+							writeBufferSample(pixelPos, i, lighting);
+					}
+					
+					#else
+					float4 lighting = getLighting(pixelPos, 0, clipSpacePos.xy, surfaceData[0], 0, gNumProbes);
+					gOutput[pixelPos] = lighting;
+					#endif
+				}
+			}
+		};
+	};
+};

+ 7 - 9
Data/Raw/Engine/Shaders/TiledDeferredLighting.bsl

@@ -72,15 +72,7 @@ Technique
 				float3 R = 2 * dot(V, N) * N - V;
 				float3 specR = getSpecularDominantDir(N, R, surfaceData.roughness);
 				
-				float4 directLighting = getDirectLighting(worldPosition, V, specR, surfaceData, lightOffsets);
-				float3 indirectDiffuse = getSkyIndirectDiffuse(surfaceData.worldNormal) * surfaceData.albedo;
-				float3 imageBasedSpecular = getImageBasedSpecular(worldPosition, V, specR, surfaceData);
-
-				float4 totalLighting = directLighting;
-				totalLighting.rgb += indirectDiffuse;
-				totalLighting.rgb += imageBasedSpecular;
-				
-				return totalLighting;				
+				return getDirectLighting(worldPosition, V, specR, surfaceData, lightOffsets);				
 			}
 			
 			[numthreads(TILE_SIZE, TILE_SIZE, 1)]
@@ -92,6 +84,12 @@ Technique
 				uint threadIndex = groupThreadId.y * TILE_SIZE + groupThreadId.x;
 				uint2 pixelPos = dispatchThreadId.xy + gViewportRectangle.xy;
 				
+				// Note: To improve performance perhaps:
+				//  - Use halfZ (split depth range into two regions for better culling)
+				//  - Use parallel reduction instead of atomics
+				//  - Use AABB instead of frustum (no false positives)
+				//   - Increase tile size to 32x32 to amortize the cost of AABB calc (2x if using halfZ)
+				
 				// Get data for all samples, and determine per-pixel minimum and maximum depth values
 				SurfaceData surfaceData[MSAA_COUNT];
 				uint sampleMinZ = 0x7F7FFFFF;

+ 3 - 2
Data/Raw/Engine/Shaders/Transparent.bsl

@@ -1,7 +1,8 @@
 #include "$ENGINE$\BasePass.bslinc"
-#include "$ENGINE$\LightingCommon.bslinc"
 #include "$ENGINE$\LightGridCommon.bslinc"
 #include "$ENGINE$\ReflectionCubemapCommon.bslinc"
+#define USE_LIGHT_GRID_INDICES
+#include "$ENGINE$\LightingCommon.bslinc"
 #include "$ENGINE$\ImageBasedLighting.bslinc"
 #include "$ENGINE$\Surface.bslinc"
 
@@ -94,7 +95,7 @@ Technique
 				
 				float4 directLighting = getDirectLighting(input.worldPosition, V, specR, surfaceData, lightOffsets);
 				float3 indirectDiffuse = getSkyIndirectDiffuse(surfaceData.worldNormal) * surfaceData.albedo;
-				float3 imageBasedSpecular = getImageBasedSpecular(input.worldPosition, V, specR, surfaceData);
+				float3 imageBasedSpecular = getImageBasedSpecular(input.worldPosition, V, specR, surfaceData, 0, 0);
 
 				float3 totalLighting = directLighting.rgb;
 				totalLighting.rgb += indirectDiffuse;

+ 8 - 2
Source/BansheeEngine/Source/BsLightProbeCache.cpp

@@ -107,7 +107,10 @@ namespace bs { namespace ct
 	{
 		auto iterFind = mProbeInfos.find(uuid);
 		if (iterFind != mProbeInfos.end())
-			return iterFind->second.radiance.dirty;
+		{
+			if(iterFind->second.radiance.texture != nullptr)
+				return iterFind->second.radiance.dirty;
+		}
 
 		return true;
 	}
@@ -116,7 +119,10 @@ namespace bs { namespace ct
 	{
 		auto iterFind = mProbeInfos.find(uuid);
 		if (iterFind != mProbeInfos.end())
-			return iterFind->second.irradiance.dirty;
+		{
+			if(iterFind->second.irradiance.texture != nullptr)
+				return iterFind->second.irradiance.dirty;
+		}
 
 		return true;
 	}

+ 2 - 2
Source/RenderBeast/CMakeSources.cmake

@@ -12,7 +12,7 @@ set(BS_RENDERBEAST_INC_NOFILTER
 	"Include/BsRendererCamera.h"
 	"Include/BsRendererObject.h"
 	"Include/BsLightGrid.h"
-	"Include/BsReflectionProbeSampling.h"
+	"Include/BsImageBasedLighting.h"
 )
 
 set(BS_RENDERBEAST_SRC_NOFILTER
@@ -28,7 +28,7 @@ set(BS_RENDERBEAST_SRC_NOFILTER
 	"Source/BsRendererCamera.cpp"
 	"Source/BsRendererObject.cpp"
 	"Source/BsLightGrid.cpp"
-	"Source/BsReflectionProbeSampling.cpp"
+	"Source/BsImageBasedLighting.cpp"
 )
 
 source_group("Header Files" FILES ${BS_RENDERBEAST_INC_NOFILTER})

+ 199 - 0
Source/RenderBeast/Include/BsImageBasedLighting.h

@@ -0,0 +1,199 @@
+//********************************** Banshee Engine (www.banshee3d.com) **************************************************//
+//**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
+#pragma once
+
+#include "BsRenderBeastPrerequisites.h"
+#include "BsRendererMaterial.h"
+#include "BsParamBlocks.h"
+
+namespace bs { namespace ct
+{
+	/** @addtogroup RenderBeast
+	 *  @{
+	 */
+
+	/** Information about a single reflection probe, as seen by the lighting shader. */
+	struct ReflProbeData
+	{
+		Vector3 position;
+		float radius;
+		Vector3 boxExtents;
+		Matrix4 invBoxTransform;
+		float transitionDistance;
+		UINT32 cubemapIdx;
+		UINT32 type;
+	};
+
+	/** Contains GPU buffers used by the renderer to manipulate reflection probes. */
+	class GPUReflProbeData
+	{
+	public:
+		GPUReflProbeData();
+
+		/** Updates the internal buffers with a new set of probes. */
+		void setProbes(const Vector<ReflProbeData>& probeData, UINT32 numProbes);
+
+		/** Returns a GPU bindable buffer containing information about every reflection probe. */
+		SPtr<GpuBuffer> getProbeBuffer() const { return mProbeBuffer; }
+
+		/** Returns the number of reflection probes in the probe buffer. */
+		UINT32 getNumProbes() const { return mNumProbes; }
+
+	private:
+		SPtr<GpuBuffer> mProbeBuffer;
+
+		UINT32 mNumProbes;
+	};
+
+	BS_PARAM_BLOCK_BEGIN(ReflProbeParamsParamDef)
+		BS_PARAM_BLOCK_ENTRY(INT32, gReflCubemapNumMips)
+		BS_PARAM_BLOCK_ENTRY(INT32, gNumProbes)
+		BS_PARAM_BLOCK_ENTRY(INT32, gSkyCubemapAvailable)
+		BS_PARAM_BLOCK_ENTRY(INT32, gSkyCubemapNumMips)
+		BS_PARAM_BLOCK_ENTRY(float, gSkyBrightness)
+	BS_PARAM_BLOCK_END
+
+	extern ReflProbeParamsParamDef gReflProbeParamsParamDef;
+
+	/**	Renderer information specific to a single reflection probe. */
+	class RendererReflectionProbe
+	{
+	public:
+		RendererReflectionProbe(ReflectionProbe* probe);
+
+		/** Populates the structure with reflection probe parameters. */
+		void getParameters(ReflProbeData& output) const;
+
+		ReflectionProbe* probe;
+		UINT32 arrayIdx;
+		SPtr<Texture> texture;
+		bool customTexture : 1;
+		bool textureDirty : 1;
+		bool arrayDirty : 1;
+		bool errorFlagged : 1;
+	};
+
+	BS_PARAM_BLOCK_BEGIN(TiledImageBasedLightingParamDef)
+		BS_PARAM_BLOCK_ENTRY(Vector2I, gFramebufferSize)
+	BS_PARAM_BLOCK_END
+
+	extern TiledImageBasedLightingParamDef gTiledImageBasedLightingParamDef;
+
+	/** Functionality common to all versions of TiledDeferredImageBasedLightingMat<T>. */
+	class TiledDeferredImageBasedLighting
+	{
+	public:
+		TiledDeferredImageBasedLighting(const SPtr<Material>& material, const SPtr<GpuParamsSet>& paramsSet, 
+			UINT32 sampleCount);
+
+		/** Binds the material for rendering, sets up parameters and executes it. */
+		void execute(const SPtr<RenderTargets>& renderTargets, const SPtr<GpuParamBlockBuffer>& perCamera,
+					 const SPtr<Texture>& preintegratedGF);
+
+		/** Binds all the active reflection probes. */
+		void setReflectionProbes(const GPUReflProbeData& probeData, const SPtr<Texture>& reflectionCubemaps);
+
+		/** Binds the sky reflection & irradiance textures. Set textures to null if not available. */
+		void setSky(const SPtr<Texture>& skyReflections, const SPtr<Texture>& skyIrradiance, float brightness);
+
+		/**
+		 * Generates a 2D 2-channel texture containing a pre-integrated G and F factors of the microfactet BRDF. This is an
+		 * approximation used for image based lighting, so we can avoid sampling environment maps for each light. Works in
+		 * tandem with the importance sampled reflection cubemaps.
+		 * 
+		 * (u, v) = (NoV, roughness)
+		 * (r, g) = (scale, bias)
+		 */
+		static SPtr<Texture> generatePreintegratedEnvBRDF();
+
+		static const UINT32 TILE_SIZE;
+	private:
+		UINT32 mSampleCount;
+		SPtr<Material> mMaterial;
+		SPtr<GpuParamsSet> mParamsSet;
+
+		GpuParamTexture mGBufferA;
+		GpuParamTexture mGBufferB;
+		GpuParamTexture mGBufferC;
+		GpuParamTexture mGBufferDepth;
+
+		GpuParamTexture mInColorTextureParam;
+		GpuParamBuffer mInColorBufferParam;
+
+		GpuParamTexture mSkyReflectionsParam;
+		GpuParamTexture mSkyIrradianceParam;
+		GpuParamTexture mReflectionProbeCubemapsParam;
+		GpuParamTexture mPreintegratedEnvBRDFParam;
+		GpuParamBuffer mReflectionProbesParam;
+
+		GpuParamLoadStoreTexture mOutputTextureParam;
+		GpuParamBuffer mOutputBufferParam;
+
+		SPtr<GpuParamBlockBuffer> mParamBuffer;
+		SPtr<GpuParamBlockBuffer> mReflectionsParamBuffer;
+		SPtr<SamplerState> mReflectionSamplerState;
+	};
+
+	/** Interface implemented by all versions of TTiledDeferredImageBasedLightingMat<T>. */
+	class ITiledDeferredImageBasedLightingMat
+	{
+	public:
+		virtual ~ITiledDeferredImageBasedLightingMat() {}
+
+		/** @copydoc ITiledDeferredImageBasedLightingMat::execute() */
+		virtual void execute(const SPtr<RenderTargets>& renderTargets, const SPtr<GpuParamBlockBuffer>& perCamera,
+			const SPtr<Texture>& preintegratedGF) = 0;
+
+		/** @copydoc ITiledDeferredImageBasedLightingMat::setReflectionProbes() */
+		virtual void setReflectionProbes(const GPUReflProbeData& probeData, const SPtr<Texture>& reflectionCubemaps) = 0;
+
+		/** @copydoc ITiledDeferredImageBasedLightingMat::setSky() */
+		virtual void setSky(const SPtr<Texture>& skyReflections, const SPtr<Texture>& skyIrradiance, float brightness) = 0;
+	};
+
+	/** Shader that performs a lighting pass over data stored in the Gbuffer. */
+	template<int MSAA_COUNT, bool CapturingReflections>
+	class TTiledDeferredImageBasedLightingMat : public ITiledDeferredImageBasedLightingMat, 
+		public RendererMaterial<TTiledDeferredImageBasedLightingMat<MSAA_COUNT, CapturingReflections>>
+	{
+		RMAT_DEF("TiledDeferredImageBasedLighting.bsl");
+
+	public:
+		TTiledDeferredImageBasedLightingMat();
+
+		/** @copydoc ITiledDeferredImageBasedLightingMat::execute() */
+		void execute(const SPtr<RenderTargets>& renderTargets, const SPtr<GpuParamBlockBuffer>& perCamera,
+			const SPtr<Texture>& preintegratedGF) override;
+
+		/** @copydoc ITiledDeferredImageBasedLightingMat::setReflectionProbes() */
+		void setReflectionProbes(const GPUReflProbeData& probeData, const SPtr<Texture>& reflectionCubemaps) override;
+
+		/** @copydoc ITiledDeferredImageBasedLightingMat::setSky() */
+		void setSky(const SPtr<Texture>& skyReflections, const SPtr<Texture>& skyIrradiance, float brightness) override;
+	private:
+		TiledDeferredImageBasedLighting mInternal;
+	};
+
+	/** Contains instances for all types of tile deferred image based lighting materials. */
+	class TiledDeferredImageBasedLightingMaterials
+	{
+	public:
+		TiledDeferredImageBasedLightingMaterials();
+		~TiledDeferredImageBasedLightingMaterials();
+
+		/**
+		* Returns a version of the tile-deferred image based lighting material that matches the parameters.
+		*
+		* @param[in]   msaa						Number of samples per pixel.
+		* @param[in]   capturingReflections		If true reflection probes will not be evaluated and instead the material's
+		*										specular color will be returned instead. Useful when rendering reflection
+		*										probes.
+		*/
+		ITiledDeferredImageBasedLightingMat* get(UINT32 msaa, bool capturingReflections);
+
+	private:
+		ITiledDeferredImageBasedLightingMat* mInstances[8];
+	};
+
+	/** @} */
+}}

+ 9 - 51
Source/RenderBeast/Include/BsLightRendering.h

@@ -5,7 +5,6 @@
 #include "BsRenderBeastPrerequisites.h"
 #include "BsRendererMaterial.h"
 #include "BsParamBlocks.h"
-#include "BsReflectionProbeSampling.h"
 
 namespace bs { namespace ct
 {
@@ -85,28 +84,11 @@ namespace bs { namespace ct
 		TiledDeferredLighting(const SPtr<Material>& material, const SPtr<GpuParamsSet>& paramsSet, UINT32 sampleCount);
 
 		/** Binds the material for rendering, sets up parameters and executes it. */
-		void execute(const SPtr<RenderTargets>& gbuffer, const SPtr<GpuParamBlockBuffer>& perCamera, 
-			const SPtr<Texture>& preintegratedGF, bool noLighting);
+		void execute(const SPtr<RenderTargets>& renderTargets, const SPtr<GpuParamBlockBuffer>& perCamera, bool noLighting);
 
 		/** Binds all the active lights. */
 		void setLights(const GPULightData& lightData);
 
-		/** Binds all the active reflection probes. */
-		void setReflectionProbes(const GPUReflProbeData& probeData, const SPtr<Texture>& reflectionCubemaps);
-
-		/** Binds the sky reflection & irradiance textures. Set textures to null if not available. */
-		void setSky(const SPtr<Texture>& skyReflections, const SPtr<Texture>& skyIrradiance, float brightness);
-
-		/** 
-		 * Generates a 2D 2-channel texture containing a pre-integrated G and F factors of the microfactet BRDF. This is an
-		 * approximation used for image based lighting, so we can avoid sampling environment maps for each light. Works in
-		 * tandem with the importance sampled reflection cubemaps.
-		 * 
-		 * (u, v) = (NoV, roughness) 
-		 * (r, g) = (scale, bias)
-		 */
-		static SPtr<Texture> generatePreintegratedEnvBRDF();
-
 		static const UINT32 TILE_SIZE;
 	private:
 		UINT32 mSampleCount;
@@ -118,21 +100,12 @@ namespace bs { namespace ct
 		GpuParamTexture mGBufferC;
 		GpuParamTexture mGBufferDepth;
 
-		GpuParamTexture mSkyReflectionsParam;
-		GpuParamTexture mSkyIrradianceParam;
-		GpuParamTexture mReflectionProbeCubemapsParam;
-		GpuParamTexture mPreintegratedEnvBRDFParam;
-		GpuParamBuffer mReflectionProbesParam;
-
 		Vector3I mLightOffsets;
 		GpuParamBuffer mLightBufferParam;
 		GpuParamLoadStoreTexture mOutputTextureParam;
 		GpuParamBuffer mOutputBufferParam;
 
 		SPtr<GpuParamBlockBuffer> mParamBuffer;
-		SPtr<GpuParamBlockBuffer> mReflectionsParamBuffer;
-
-		SPtr<SamplerState> mReflectionSamplerState;
 	};
 
 	/** Interface implemented by all versions of TTiledDeferredLightingMat<T>. */
@@ -142,22 +115,16 @@ namespace bs { namespace ct
 		virtual ~ITiledDeferredLightingMat() {}
 
 		/** @copydoc TiledDeferredLighting::execute() */
-		virtual void execute(const SPtr<RenderTargets>& gbuffer, const SPtr<GpuParamBlockBuffer>& perCamera, 
-			const SPtr<Texture>& preintegratedGF, bool noLighting) = 0;
+		virtual void execute(const SPtr<RenderTargets>& renderTargets, const SPtr<GpuParamBlockBuffer>& perCamera,
+			bool noLighting) = 0;
 
 		/** @copydoc TiledDeferredLighting::setLights() */
 		virtual void setLights(const GPULightData& lightData) = 0;
-
-		/** @copydoc TiledDeferredLighting::setReflectionProbes() */
-		virtual void setReflectionProbes(const GPUReflProbeData& probeData, const SPtr<Texture>& reflectionCubemaps) = 0;
-
-		/** @copydoc TiledDeferredLighting::setSky() */
-		virtual void setSky(const SPtr<Texture>& skyReflections, const SPtr<Texture>& skyIrradiance, float brightness) = 0;
 	};
 
 	/** Shader that performs a lighting pass over data stored in the Gbuffer. */
-	template<int MSAA_COUNT, bool CapturingReflections>
-	class TTiledDeferredLightingMat : public ITiledDeferredLightingMat, public RendererMaterial<TTiledDeferredLightingMat<MSAA_COUNT, CapturingReflections>>
+	template<int MSAA_COUNT>
+	class TTiledDeferredLightingMat : public ITiledDeferredLightingMat, public RendererMaterial<TTiledDeferredLightingMat<MSAA_COUNT>>
 	{
 		RMAT_DEF("TiledDeferredLighting.bsl");
 
@@ -165,17 +132,11 @@ namespace bs { namespace ct
 		TTiledDeferredLightingMat();
 
 		/** @copydoc ITiledDeferredLightingMat::execute() */
-		void execute(const SPtr<RenderTargets>& gbuffer, const SPtr<GpuParamBlockBuffer>& perCamera, 
-			const SPtr<Texture>& preintegratedGF, bool noLighting) override;
+		void execute(const SPtr<RenderTargets>& renderTargets, const SPtr<GpuParamBlockBuffer>& perCamera,
+			bool noLighting) override;
 
 		/** @copydoc ITiledDeferredLightingMat::setLights() */
 		void setLights(const GPULightData& lightData) override;
-
-		/** @copydoc ITiledDeferredLightingMat::setReflectionProbes() */
-		void setReflectionProbes(const GPUReflProbeData& probeData, const SPtr<Texture>& reflectionCubemaps) override;
-
-		/** @copydoc ITiledDeferredLightingMat::setSky() */
-		void setSky(const SPtr<Texture>& skyReflections, const SPtr<Texture>& skyIrradiance, float brightness) override;
 	private:
 		TiledDeferredLighting mInternal;
 	};
@@ -191,14 +152,11 @@ namespace bs { namespace ct
 		 * Returns a version of the tile-deferred lighting material that matches the parameters.
 		 * 
 		 * @param[in]   msaa					Number of samples per pixel.
-		 * @param[in]   capturingReflections	If true reflection probes will not be evaluated and instead the material's
-		 *										specular color will be returned instead. Useful when rendering reflection
-		 *										probes.
 		 */
-		ITiledDeferredLightingMat* get(UINT32 msaa, bool capturingReflections);
+		ITiledDeferredLightingMat* get(UINT32 msaa);
 
 	private:
-		ITiledDeferredLightingMat* mInstances[8];
+		ITiledDeferredLightingMat* mInstances[4];
 	};
 
 	BS_PARAM_BLOCK_BEGIN(FlatFramebufferToTextureParamDef)

+ 0 - 77
Source/RenderBeast/Include/BsReflectionProbeSampling.h

@@ -1,77 +0,0 @@
-//********************************** Banshee Engine (www.banshee3d.com) **************************************************//
-//**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
-#pragma once
-
-#include "BsRenderBeastPrerequisites.h"
-#include "BsRendererMaterial.h"
-#include "BsParamBlocks.h"
-
-namespace bs { namespace ct
-{
-	/** @addtogroup RenderBeast
-	 *  @{
-	 */
-
-	/** Information about a single reflection probe, as seen by the lighting shader. */
-	struct ReflProbeData
-	{
-		Vector3 position;
-		float radius;
-		Vector3 boxExtents;
-		Matrix4 invBoxTransform;
-		float transitionDistance;
-		UINT32 cubemapIdx;
-		UINT32 type;
-	};
-
-	/** Contains GPU buffers used by the renderer to manipulate reflection probes. */
-	class GPUReflProbeData
-	{
-	public:
-		GPUReflProbeData();
-
-		/** Updates the internal buffers with a new set of probes. */
-		void setProbes(const Vector<ReflProbeData>& probeData, UINT32 numProbes);
-
-		/** Returns a GPU bindable buffer containing information about every reflection probe. */
-		SPtr<GpuBuffer> getProbeBuffer() const { return mProbeBuffer; }
-
-		/** Returns the number of reflection probes in the probe buffer. */
-		UINT32 getNumProbes() const { return mNumProbes; }
-
-	private:
-		SPtr<GpuBuffer> mProbeBuffer;
-
-		UINT32 mNumProbes;
-	};
-
-	BS_PARAM_BLOCK_BEGIN(ReflProbeParamsParamDef)
-		BS_PARAM_BLOCK_ENTRY(INT32, gReflCubemapNumMips)
-		BS_PARAM_BLOCK_ENTRY(INT32, gNumProbes)
-		BS_PARAM_BLOCK_ENTRY(INT32, gSkyCubemapAvailable)
-		BS_PARAM_BLOCK_ENTRY(INT32, gSkyCubemapNumMips)
-		BS_PARAM_BLOCK_ENTRY(float, gSkyBrightness)
-	BS_PARAM_BLOCK_END
-
-	extern ReflProbeParamsParamDef gReflProbeParamsParamDef;
-
-	/**	Renderer information specific to a single reflection probe. */
-	class RendererReflectionProbe
-	{
-	public:
-		RendererReflectionProbe(ReflectionProbe* probe);
-
-		/** Populates the structure with reflection probe parameters. */
-		void getParameters(ReflProbeData& output) const;
-
-		ReflectionProbe* probe;
-		UINT32 arrayIdx;
-		SPtr<Texture> texture;
-		bool customTexture : 1;
-		bool textureDirty : 1;
-		bool arrayDirty : 1;
-		bool errorFlagged : 1;
-	};
-
-	/** @} */
-}}

+ 35 - 19
Source/RenderBeast/Include/BsRenderBeast.h

@@ -8,7 +8,7 @@
 #include "BsSamplerOverrides.h"
 #include "BsRendererMaterial.h"
 #include "BsLightRendering.h"
-#include "BsReflectionProbeSampling.h"
+#include "BsImageBasedLighting.h"
 #include "BsObjectRendering.h"
 #include "BsPostProcessing.h"
 #include "BsRendererCamera.h"
@@ -224,48 +224,60 @@ namespace bs
 		void refreshSamplerOverrides(bool force = false);
 
 		// Core thread only fields
+
+		// Scene data
+		//// Cameras and render targets
 		Vector<RendererRenderTarget> mRenderTargets;
 		UnorderedMap<const Camera*, RendererCamera*> mCameras;
-		UnorderedMap<SamplerOverrideKey, MaterialSamplerOverrides*> mSamplerOverrides;
-
+		
+		//// Renderables
 		Vector<RendererObject*> mRenderables;
 		Vector<CullInfo> mRenderableCullInfos;
 		Vector<bool> mRenderableVisibility; // Transient
 
+		//// Lights
 		Vector<RendererLight> mDirectionalLights;
 		Vector<RendererLight> mRadialLights;
 		Vector<RendererLight> mSpotLights;
 		Vector<Sphere> mPointLightWorldBounds;
 		Vector<Sphere> mSpotLightWorldBounds;
 
+		//// Reflection probes
 		Vector<RendererReflectionProbe> mReflProbes;
 		Vector<Sphere> mReflProbeWorldBounds;
 		Vector<bool> mCubemapArrayUsedSlots;
 		SPtr<Texture> mReflCubemapArrayTex;
 
-		SPtr<RenderBeastOptions> mCoreOptions;
-
-		DefaultMaterial* mDefaultMaterial;
-		TiledDeferredLightingMaterials* mTiledDeferredLightingMats;
-		FlatFramebufferToTextureMat* mFlatFramebufferToTextureMat;
-		SkyboxMat<false>* mSkyboxMat;
-		SkyboxMat<true>* mSkyboxSolidColorMat;
-
-		Skybox* mSkybox;
+		//// Sky light
+		Skybox* mSkybox = nullptr;
 		SPtr<Texture> mSkyboxTexture;
 		SPtr<Texture> mSkyboxFilteredReflections;
 		SPtr<Texture> mSkyboxIrradiance;
 
+		// Materials & GPU data
+		//// Base pass
+		DefaultMaterial* mDefaultMaterial = nullptr;
+		ObjectRenderer* mObjectRenderer = nullptr;
+
+		//// Lighting
+		TiledDeferredLightingMaterials* mTiledDeferredLightingMats = nullptr;
+		LightGrid* mLightGrid = nullptr;
+		GPULightData* mGPULightData = nullptr;
+
+		//// Image based lighting
+		TiledDeferredImageBasedLightingMaterials* mTileDeferredImageBasedLightingMats = nullptr;
+		GPUReflProbeData* mGPUReflProbeData = nullptr;
 		SPtr<Texture> mPreintegratedEnvBRDF;
-		GPULightData* mGPULightData;
-		GPUReflProbeData* mGPUReflProbeData;
-		LightGrid* mLightGrid;
 
-		ObjectRenderer* mObjectRenderer;
+		//// Sky
+		SkyboxMat<false>* mSkyboxMat;
+		SkyboxMat<true>* mSkyboxSolidColorMat;
 
-		// Sim thread only fields
-		SPtr<RenderBeastOptions> mOptions;
-		bool mOptionsDirty;
+		//// Other
+		FlatFramebufferToTextureMat* mFlatFramebufferToTextureMat = nullptr;
+
+		SPtr<RenderBeastOptions> mCoreOptions;
+		UnorderedMap<SamplerOverrideKey, MaterialSamplerOverrides*> mSamplerOverrides;
 
 		// Helpers to avoid memory allocations
 		Vector<LightData> mLightDataTemp;
@@ -273,6 +285,10 @@ namespace bs
 
 		Vector<ReflProbeData> mReflProbeDataTemp;
 		Vector<bool> mReflProbeVisibilityTemp;
+
+		// Sim thread only fields
+		SPtr<RenderBeastOptions> mOptions;
+		bool mOptionsDirty = true;
 	};
 
 	/** @} */

+ 71 - 32
Source/RenderBeast/Include/BsRenderTargets.h

@@ -12,6 +12,20 @@ namespace bs { namespace ct
 	 *  @{
 	 */
 
+	/** Types of render target textures that can be allocated by RenderTargets manager. */
+	enum RenderTargetType
+	{
+		/** 
+		 * Buffer containing albedo, normals, metalness/roughness and other material data, populated during base pass and
+		 * used during lighting and other operations.
+		 */
+		RTT_GBuffer,
+		/** Buffer containing intermediate lighting information used during deferred lighting pass. */
+		RTT_LightAccumulation,
+		/** Buffer containing final scene color information. */
+		RTT_SceneColor
+	};
+
 	/**
 	 * Allocates and handles all the required render targets for rendering a scene from a specific view.
 	 *
@@ -20,67 +34,79 @@ namespace bs { namespace ct
 	class RenderTargets
 	{
 	public:
+		/** 
+		 * Prepares any internal data for rendering. Should be called at the beginning of each frame, before allocating,
+		 * retrieving or binding any textures. Must eventually be followed by cleanup().
+		 */
+		void prepare();
+
 		/**
-		 * Creates a new set of render targets. This will not actually allocate the internal render targets - this happens
-		 * the first time you call bind().
-		 *
-		 * @param[in]	view			Information about the view that the render targets will be used for. Determines size
-		 *								of the render targets, and the output color render target.
-		 * @param[in]	hdr				Should the render targets support high dynamic range rendering.
+		 * Cleans up any internal data after rendering. Should be called after done rendering for a frame. All allocations
+		 * must be released at this point and no further allocations or texture binds should be done until the next call
+		 * to prepare().
 		 */
-		static SPtr<RenderTargets> create(const RENDERER_VIEW_TARGET_DESC& view, bool hdr);
+		void cleanup();
+
+		/**	Returns the depth buffer as a bindable texture. */
+		SPtr<Texture> getSceneDepth() const;
 
 		/**
 		 * Allocates the textures required for rendering. Allocations are pooled so this is generally a fast operation
 		 * unless the size or other render target options changed. This must be called before binding render targets.
 		 */
-		void allocate();
+		void allocate(RenderTargetType type);
 
 		/**
 		 * Deallocates textures by returning them to the pool. This should be done when the caller is done using the render
 		 * targets, so that other systems might re-use them. This will not release any memory unless all render targets
 		 * pointing to those textures go out of scope.
 		 */
-		void release();
+		void release(RenderTargetType type);
 
 		/**	Binds the GBuffer render target for rendering. */
 		void bindGBuffer();
 
-		/**	Binds the scene color render target for rendering. */
-		void bindSceneColor(bool readOnlyDepthStencil);
-
-		/** Returns the scene color render target. */
-		SPtr<RenderTexture> getSceneColorRT() const { return mSceneColorRT; }
-
-		/** Returns the gbuffer texture that scene color is stored in. */
-		SPtr<Texture> getSceneColor() const;
-
 		/**	Returns the first color texture of the gbuffer as a bindable texture. */
-		SPtr<Texture> getTextureA() const;
+		SPtr<Texture> getGBufferA() const;
 
 		/**	Returns the second color texture of the gbuffer as a bindable texture. */
-		SPtr<Texture> getTextureB() const;
+		SPtr<Texture> getGBufferB() const;
 
 		/**	Returns the third color texture of the gbuffer as a bindable texture. */
-		SPtr<Texture> getTextureC() const;
+		SPtr<Texture> getGBufferC() const;
 
-		/**	Returns the depth texture of the gbuffer as a bindable texture. */
-		SPtr<Texture> getTextureDepth() const;
+		/**	Binds the scene color render target for rendering. */
+		void bindSceneColor(bool readOnlyDepthStencil);
 
 		/** 
-		 * Returns a scene color texture with a single-sample per pixel. If no multisampling is used, this is the same as
-		 * getSceneColor().
+		 * Returns the texture for storing the final scene color. If using MSAA see getSceneColorBuffer() instead. Only 
+		 * available after bindSceneColor() has been called from this frame.
+		 **/
+		SPtr<Texture> getSceneColor() const;
+
+		/** 
+		 * Flattened, buffer version of the texture returned by getSceneColor(). Required when MSAA is used, since
+		 * random writes to multisampled textures aren't supported on all render backends.
 		 */
-		SPtr<Texture> getSceneColorNonMSAA() const;
+		SPtr<GpuBuffer> getSceneColorBuffer() const;
 
-		/** Returns a render target that can be used for rendering to the texture returned by getSceneColorNonMSAA(). */
-		SPtr<RenderTexture> getSceneColorNonMSAART() const;
+		/** Returns the texture for storing of the intermediate lighting information. */
+		SPtr<Texture> getLightAccumulation() const;
+
+		/**
+		* Flattened, buffer version of the texture returned by getLightAccumulation(). Required when MSAA is used, since
+		* random writes to multisampled textures aren't supported on all render backends.
+		*/
+		SPtr<GpuBuffer> getLightAccumulationBuffer() const;
 
 		/** 
-		 * Returns a buffer that is meant to be used for rendering when MSAA is used, since writes to multisampled textures
-		 * aren't supported on all render backends.
+		 * Returns a scene color texture with a single-sample per pixel. If no multisampling is used, this is the same as
+		 * getSceneColor().
 		 */
-		SPtr<GpuBuffer> getFlattenedSceneColorBuffer() const;
+		SPtr<Texture> getResolvedSceneColor() const;
+
+		/** Returns a render target that can be used for rendering to the texture returned by getResolvedSceneColor(). */
+		SPtr<RenderTexture> getResolvedSceneColorRT() const;
 
 		/**	Checks if the targets support HDR rendering. */
 		bool getHDR() const { return mHDR; }
@@ -94,17 +120,30 @@ namespace bs { namespace ct
 		/** Gets the height of the targets, in pixels. */
 		UINT32 getHeight() const { return mHeight; }
 
+		/**
+		 * Creates a new set of render targets. Note in order to actually use the render targets you need to call the
+		 * relevant allocate* method before use.
+		 *
+		 * @param[in]	view			Information about the view that the render targets will be used for. Determines size
+		 *								of the render targets, and the output color render target.
+		 * @param[in]	hdr				Should the render targets support high dynamic range rendering.
+		 */
+		static SPtr<RenderTargets> create(const RENDERER_VIEW_TARGET_DESC& view, bool hdr);
+
 	private:
 		RenderTargets(const RENDERER_VIEW_TARGET_DESC& view, bool hdr);
 
 		RENDERER_VIEW_TARGET_DESC mViewTarget;
 
-		SPtr<PooledRenderTexture> mSceneColorTex;
 		SPtr<PooledRenderTexture> mAlbedoTex;
 		SPtr<PooledRenderTexture> mNormalTex;
 		SPtr<PooledRenderTexture> mRoughMetalTex;
 		SPtr<PooledRenderTexture> mDepthTex;
 
+		SPtr<PooledRenderTexture> mLightAccumulationTex;
+		SPtr<PooledStorageBuffer> mFlattenedLightAccumulationBuffer;
+
+		SPtr<PooledRenderTexture> mSceneColorTex;
 		SPtr<PooledRenderTexture> mSceneColorNonMSAATex;
 		SPtr<PooledStorageBuffer> mFlattenedSceneColorBuffer;
 

+ 439 - 0
Source/RenderBeast/Source/BsImageBasedLighting.cpp

@@ -0,0 +1,439 @@
+//********************************** Banshee Engine (www.banshee3d.com) **************************************************//
+//**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
+#include "BsImageBasedLighting.h"
+#include "BsMaterial.h"
+#include "BsShader.h"
+#include "BsGpuBuffer.h"
+#include "BsReflectionProbe.h"
+#include "BsLightProbeCache.h"
+#include "BsGpuParamsSet.h"
+#include "BsRenderTargets.h"
+#include "BsRenderBeast.h"
+#include "BsRendererUtility.h"
+
+namespace bs { namespace ct
+{
+	static const UINT32 BUFFER_INCREMENT = 16 * sizeof(ReflProbeData);
+
+	ReflProbeParamsParamDef gReflProbeParamsParamDef;
+	TiledImageBasedLightingParamDef gTiledImageBasedLightingParamDef;
+
+	GPUReflProbeData::GPUReflProbeData()
+		:mNumProbes(0)
+	{ }
+
+	void GPUReflProbeData::setProbes(const Vector<ReflProbeData>& probeData, UINT32 numProbes)
+	{
+		mNumProbes = numProbes;
+
+		UINT32 size = numProbes * sizeof(ReflProbeData);
+		UINT32 curBufferSize;
+
+		if (mProbeBuffer != nullptr)
+			curBufferSize = mProbeBuffer->getSize();
+		else
+			curBufferSize = 0;
+
+		if (size > curBufferSize || curBufferSize == 0)
+		{
+			// Allocate at least one block even if no probes, to avoid issues with null buffers
+			UINT32 bufferSize = std::max(1, Math::ceilToInt(size / (float)BUFFER_INCREMENT)) * BUFFER_INCREMENT;
+
+			GPU_BUFFER_DESC bufferDesc;
+			bufferDesc.type = GBT_STRUCTURED;
+			bufferDesc.elementCount = bufferSize / sizeof(ReflProbeData);
+			bufferDesc.elementSize = sizeof(ReflProbeData);
+			bufferDesc.format = BF_UNKNOWN;
+
+			mProbeBuffer = GpuBuffer::create(bufferDesc);
+		}
+
+		if (size > 0)
+			mProbeBuffer->writeData(0, size, probeData.data(), BWT_DISCARD);
+	}
+
+	RendererReflectionProbe::RendererReflectionProbe(ReflectionProbe* probe)
+		:probe(probe)
+	{
+		arrayIdx = -1;
+		texture = nullptr;
+		customTexture = probe->getCustomTexture() != nullptr;
+		textureDirty = LightProbeCache::instance().isRadianceDirty(probe->getUUID());
+		arrayDirty = true;
+		errorFlagged = false;
+	}
+
+	void RendererReflectionProbe::getParameters(ReflProbeData& output) const
+	{
+		output.type = probe->getType() == ReflectionProbeType::Sphere ? 0 
+			: probe->getType() == ReflectionProbeType::Box ? 1 : 2;
+		
+		output.position = probe->getPosition();
+		output.boxExtents = probe->getExtents();
+
+		if (probe->getType() == ReflectionProbeType::Sphere)
+			output.radius = probe->getRadius();
+		else
+			output.radius = output.boxExtents.length();
+
+		output.transitionDistance = probe->getTransitionDistance();
+		output.cubemapIdx = arrayIdx;
+		output.invBoxTransform.setInverseTRS(output.position, probe->getRotation(), output.boxExtents);
+	}
+
+	// Note: Using larger tiles than in tiled deferred lighting since we use AABB for intersections, which is more
+	// expensive to compute than frustums. This way we amortize the cost even though other parts of the shader might suffer
+	// due to increased thread group load.
+	const UINT32 TiledDeferredImageBasedLighting::TILE_SIZE = 32;
+
+	TiledDeferredImageBasedLighting::TiledDeferredImageBasedLighting(const SPtr<Material>& material, 
+		const SPtr<GpuParamsSet>& paramsSet, UINT32 sampleCount)
+		:mSampleCount(sampleCount), mMaterial(material), mParamsSet(paramsSet)
+	{
+		SPtr<GpuParams> params = mParamsSet->getGpuParams();
+
+		auto& texParams = mMaterial->getShader()->getTextureParams();
+		for (auto& entry : texParams)
+		{
+			if (entry.second.rendererSemantic == RPS_GBufferA)
+				params->getTextureParam(GPT_COMPUTE_PROGRAM, entry.second.name, mGBufferA);
+			else if (entry.second.rendererSemantic == RPS_GBufferB)
+				params->getTextureParam(GPT_COMPUTE_PROGRAM, entry.second.name, mGBufferB);
+			else if (entry.second.rendererSemantic == RPS_GBufferC)
+				params->getTextureParam(GPT_COMPUTE_PROGRAM, entry.second.name, mGBufferC);
+			else if (entry.second.rendererSemantic == RPS_GBufferDepth)
+				params->getTextureParam(GPT_COMPUTE_PROGRAM, entry.second.name, mGBufferDepth);
+		}
+
+		if (mSampleCount > 1)
+		{
+			params->getBufferParam(GPT_COMPUTE_PROGRAM, "gInColor", mInColorBufferParam);
+			params->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutput", mOutputBufferParam);
+		}
+		else
+		{
+			params->getTextureParam(GPT_COMPUTE_PROGRAM, "gInColor", mInColorTextureParam);
+			params->getLoadStoreTextureParam(GPT_COMPUTE_PROGRAM, "gOutput", mOutputTextureParam);
+		}
+
+		mParamBuffer = gTiledImageBasedLightingParamDef.createBuffer();
+		mParamsSet->setParamBlockBuffer("Params", mParamBuffer, true);
+
+		// Sky
+		params->getTextureParam(GPT_COMPUTE_PROGRAM, "gSkyReflectionTex", mSkyReflectionsParam);
+		params->getTextureParam(GPT_COMPUTE_PROGRAM, "gSkyIrradianceTex", mSkyIrradianceParam);
+
+		// Reflections
+		params->getTextureParam(GPT_COMPUTE_PROGRAM, "gReflProbeCubemaps", mReflectionProbeCubemapsParam);
+		params->getTextureParam(GPT_COMPUTE_PROGRAM, "gPreintegratedEnvBRDF", mPreintegratedEnvBRDFParam);
+
+		params->getBufferParam(GPT_COMPUTE_PROGRAM, "gReflectionProbes", mReflectionProbesParam);
+
+		mReflectionsParamBuffer = gReflProbeParamsParamDef.createBuffer();
+		mParamsSet->setParamBlockBuffer("ReflProbeParams", mReflectionsParamBuffer);
+
+		SAMPLER_STATE_DESC reflSamplerDesc;
+		reflSamplerDesc.magFilter = FO_LINEAR;
+		reflSamplerDesc.minFilter = FO_LINEAR;
+		reflSamplerDesc.mipFilter = FO_LINEAR;
+
+		mReflectionSamplerState = SamplerState::create(reflSamplerDesc);
+
+		params->setSamplerState(GPT_COMPUTE_PROGRAM, "gSkyReflectionSamp", mReflectionSamplerState);
+		params->setSamplerState(GPT_COMPUTE_PROGRAM, "gReflProbeSamp", mReflectionSamplerState);
+	}
+
+	void TiledDeferredImageBasedLighting::execute(const SPtr<RenderTargets>& renderTargets,
+		const SPtr<GpuParamBlockBuffer>& perCamera, const SPtr<Texture>& preintegratedGF)
+	{
+		Vector2I framebufferSize;
+		framebufferSize[0] = renderTargets->getWidth();
+		framebufferSize[1] = renderTargets->getHeight();
+		gTiledImageBasedLightingParamDef.gFramebufferSize.set(mParamBuffer, framebufferSize);
+
+		mParamBuffer->flushToGPU();
+		mReflectionsParamBuffer->flushToGPU();
+
+		mGBufferA.set(renderTargets->getGBufferA());
+		mGBufferB.set(renderTargets->getGBufferB());
+		mGBufferC.set(renderTargets->getGBufferC());
+		mGBufferDepth.set(renderTargets->getSceneDepth());
+
+		mPreintegratedEnvBRDFParam.set(preintegratedGF);
+
+		mParamsSet->setParamBlockBuffer("PerCamera", perCamera, true);
+
+		if (mSampleCount > 1)
+		{
+			mInColorBufferParam.set(renderTargets->getLightAccumulationBuffer());
+			mOutputBufferParam.set(renderTargets->getSceneColorBuffer());
+		}
+		else
+		{
+			mInColorTextureParam.set(renderTargets->getLightAccumulation());
+			mOutputTextureParam.set(renderTargets->getSceneColor());
+		}
+
+		UINT32 width = renderTargets->getWidth();
+		UINT32 height = renderTargets->getHeight();
+
+		UINT32 numTilesX = (UINT32)Math::ceilToInt(width / (float)TILE_SIZE);
+		UINT32 numTilesY = (UINT32)Math::ceilToInt(height / (float)TILE_SIZE);
+
+		gRendererUtility().setComputePass(mMaterial, 0);
+		gRendererUtility().setPassParams(mParamsSet);
+
+		RenderAPI::instance().dispatchCompute(numTilesX, numTilesY);
+	}
+
+	void TiledDeferredImageBasedLighting::setReflectionProbes(const GPUReflProbeData& probeData,
+		const SPtr<Texture>& reflectionCubemaps)
+	{
+		mReflectionProbesParam.set(probeData.getProbeBuffer());
+		mReflectionProbeCubemapsParam.set(reflectionCubemaps);
+
+		gReflProbeParamsParamDef.gNumProbes.set(mReflectionsParamBuffer, probeData.getNumProbes());
+
+		UINT32 numMips = 0;
+		if (reflectionCubemaps != nullptr)
+			numMips = reflectionCubemaps->getProperties().getNumMipmaps() + 1;
+
+		gReflProbeParamsParamDef.gReflCubemapNumMips.set(mReflectionsParamBuffer, numMips);
+	}
+
+	void TiledDeferredImageBasedLighting::setSky(const SPtr<Texture>& skyReflections, const SPtr<Texture>& skyIrradiance,
+		float brightness)
+	{
+		mSkyReflectionsParam.set(skyReflections);
+		mSkyIrradianceParam.set(skyIrradiance);
+
+		UINT32 skyReflectionsAvailable = 0;
+		UINT32 numMips = 0;
+		if (skyReflections != nullptr)
+		{
+			numMips = skyReflections->getProperties().getNumMipmaps() + 1;
+			skyReflectionsAvailable = 1;
+		}
+
+		gReflProbeParamsParamDef.gSkyCubemapNumMips.set(mReflectionsParamBuffer, numMips);
+		gReflProbeParamsParamDef.gSkyCubemapAvailable.set(mReflectionsParamBuffer, skyReflectionsAvailable);
+		gReflProbeParamsParamDef.gSkyBrightness.set(mReflectionsParamBuffer, brightness);
+	}
+
+	// Reverse bits functions used for Hammersley sequence
+	float reverseBits(UINT32 bits)
+	{
+		bits = (bits << 16u) | (bits >> 16u);
+		bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
+		bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
+		bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
+		bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
+
+		return (float)(double(bits) / (double)0x100000000LL);
+	}
+
+	void hammersleySequence(UINT32 i, UINT32 count, float& e0, float& e1)
+	{
+		e0 = i / (float)count;
+		e1 = reverseBits(i);
+	}
+
+	Vector3 sphericalToCartesian(float cosTheta, float sinTheta, float phi)
+	{
+		Vector3 output;
+		output.x = sinTheta * cos(phi);
+		output.y = sinTheta * sin(phi);
+		output.z = cosTheta;
+
+		return output;
+	}
+
+	// Generates an angle in spherical coordinates, importance sampled for the specified roughness based on some uniformly
+	// distributed random variables in range [0, 1].
+	void importanceSampleGGX(float e0, float e1, float roughness4, float& cosTheta, float& phi)
+	{
+		// See GGXImportanceSample.nb for derivation (essentially, take base GGX, normalize it, generate PDF, split PDF into
+		// marginal probability for theta and conditional probability for phi. Plug those into the CDF, invert it.)				
+		cosTheta = sqrt((1.0f - e0) / (1.0f + (roughness4 - 1.0f) * e0));
+		phi = 2.0f * Math::PI * e1;
+	}
+
+	float calcMicrofacetShadowingSmithGGX(float roughness4, float NoV, float NoL)
+	{
+		// Note: See lighting shader for derivation. Includes microfacet BRDF divisor.
+		float g1V = NoV + sqrt(NoV * (NoV - NoV * roughness4) + roughness4);
+		float g1L = NoL + sqrt(NoL * (NoL - NoL * roughness4) + roughness4);
+		return 1.0f / (g1V * g1L);
+	}
+
+	SPtr<Texture> TiledDeferredImageBasedLighting::generatePreintegratedEnvBRDF()
+	{
+		TEXTURE_DESC desc;
+		desc.type = TEX_TYPE_2D;
+		desc.format = PF_FLOAT16_RG;
+		desc.width = 128;
+		desc.height = 32;
+
+		SPtr<Texture> texture = Texture::create(desc);
+		PixelData pixelData = texture->lock(GBL_WRITE_ONLY_DISCARD);
+
+		for (UINT32 y = 0; y < desc.height; y++)
+		{
+			float roughness = (float)(y + 0.5f) / desc.height;
+			float m = roughness * roughness;
+			float m2 = m*m;
+
+			for (UINT32 x = 0; x < desc.width; x++)
+			{
+				float NoV = (float)(x + 0.5f) / desc.width;
+
+				Vector3 V;
+				V.x = sqrt(1.0f - NoV * NoV); // sine
+				V.y = 0.0f;
+				V.z = NoV;
+
+				// These are the two integrals resulting from the second part of the split-sum approximation. Described in
+				// Epic's 2013 paper:
+				//    http://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_notes_v2.pdf
+				float scale = 0.0f;
+				float offset = 0.0f;
+
+				// We use the same importance sampling function we use for reflection cube importance sampling, only we
+				// sample G and F, instead of D factors of the microfactet BRDF. See GGXImportanceSample.nb for derivation.
+				constexpr UINT32 NumSamples = 128;
+				for (UINT32 i = 0; i < NumSamples; i++)
+				{
+					float e0, e1;
+					hammersleySequence(i, NumSamples, e0, e1);
+
+					float cosTheta, phi;
+					importanceSampleGGX(e0, e1, m2, cosTheta, phi);
+
+					float sinTheta = sqrt(1.0f - cosTheta * cosTheta);
+					Vector3 H = sphericalToCartesian(cosTheta, sinTheta, phi);
+					Vector3 L = 2.0f * Vector3::dot(V, H) * H - V;
+
+					float VoH = std::max(Vector3::dot(V, H), 0.0f);
+					float NoL = std::max(L.z, 0.0f); // N assumed (0, 0, 1)
+					float NoH = std::max(H.z, 0.0f); // N assumed (0, 0, 1)
+
+													 // Set second part of the split sum integral is split into two parts:
+													 //   F0*I[G * (1 - (1 - v.h)^5) * cos(theta)] + I[G * (1 - v.h)^5 * cos(theta)] (F0 * scale + bias)
+
+													 // We calculate the fresnel scale (1 - (1 - v.h)^5) and bias ((1 - v.h)^5) parts
+					float fc = pow(1.0f - VoH, 5.0f);
+					float fresnelScale = 1.0f - fc;
+					float fresnelOffset = fc;
+
+					// We calculate the G part
+					float G = calcMicrofacetShadowingSmithGGX(m2, NoV, NoL);
+
+					// When we factor out G and F, then divide D by PDF, this is what's left
+					// Note: This is based on PDF: D * NoH / (4 * VoH). (4 * VoH) factor comes from the Jacobian of the
+					// transformation from half vector to light vector
+					float pdfFactor = 4.0f * VoH / NoH;
+
+					if (NoL > 0.0f)
+					{
+						scale += NoL * pdfFactor * G * fresnelScale;
+						offset += NoL * pdfFactor * G * fresnelOffset;
+					}
+				}
+
+				scale /= NumSamples;
+				offset /= NumSamples;
+
+				Color color;
+				color.r = Math::clamp01(scale);
+				color.g = Math::clamp01(offset);
+
+				pixelData.setColorAt(color, x, y);
+			}
+		}
+
+		texture->unlock();
+
+		return texture;
+	}
+
+	template<int MSAA_COUNT, bool CapturingReflections>
+	TTiledDeferredImageBasedLightingMat<MSAA_COUNT, CapturingReflections>::TTiledDeferredImageBasedLightingMat()
+		:mInternal(mMaterial, mParamsSet, MSAA_COUNT)
+	{
+
+	}
+
+	template<int MSAA_COUNT, bool CapturingReflections>
+	void TTiledDeferredImageBasedLightingMat<MSAA_COUNT, CapturingReflections>::_initDefines(ShaderDefines& defines)
+	{
+		defines.set("TILE_SIZE", TiledDeferredImageBasedLighting::TILE_SIZE);
+		defines.set("MSAA_COUNT", MSAA_COUNT);
+		defines.set("CAPTURING_REFLECTIONS", CapturingReflections);
+	}
+
+	template<int MSAA_COUNT, bool CapturingReflections>
+	void TTiledDeferredImageBasedLightingMat<MSAA_COUNT, CapturingReflections>::execute(const SPtr<RenderTargets>& gbuffer,
+		const SPtr<GpuParamBlockBuffer>& perCamera, const SPtr<Texture>& preintegratedGF)
+	{
+		mInternal.execute(gbuffer, perCamera, preintegratedGF);
+	}
+
+	template<int MSAA_COUNT, bool CapturingReflections>
+	void TTiledDeferredImageBasedLightingMat<MSAA_COUNT, CapturingReflections>::setReflectionProbes(
+		const GPUReflProbeData& probeData, const SPtr<Texture>& reflectionCubemaps)
+	{
+		mInternal.setReflectionProbes(probeData, reflectionCubemaps);
+	}
+
+	template<int MSAA_COUNT, bool CapturingReflections>
+	void TTiledDeferredImageBasedLightingMat<MSAA_COUNT, CapturingReflections>::setSky(const SPtr<Texture>& skyReflections,
+		const SPtr<Texture>& skyIrradiance, float brightness)
+	{
+		mInternal.setSky(skyReflections, skyIrradiance, brightness);
+	}
+
+	TiledDeferredImageBasedLightingMaterials::TiledDeferredImageBasedLightingMaterials()
+	{
+		mInstances[0] = bs_new<TTiledDeferredImageBasedLightingMat<1, false>>();
+		mInstances[1] = bs_new<TTiledDeferredImageBasedLightingMat<2, false>>();
+		mInstances[2] = bs_new<TTiledDeferredImageBasedLightingMat<4, false>>();
+		mInstances[3] = bs_new<TTiledDeferredImageBasedLightingMat<8, false>>();
+
+		mInstances[4] = bs_new<TTiledDeferredImageBasedLightingMat<1, true>>();
+		mInstances[5] = bs_new<TTiledDeferredImageBasedLightingMat<2, true>>();
+		mInstances[6] = bs_new<TTiledDeferredImageBasedLightingMat<4, true>>();
+		mInstances[7] = bs_new<TTiledDeferredImageBasedLightingMat<8, true>>();
+	}
+
+	TiledDeferredImageBasedLightingMaterials::~TiledDeferredImageBasedLightingMaterials()
+	{
+		for (UINT32 i = 0; i < 8; i++)
+			bs_delete(mInstances[i]);
+	}
+
+	ITiledDeferredImageBasedLightingMat* TiledDeferredImageBasedLightingMaterials::get(UINT32 msaa, bool capturingReflections)
+	{
+		if (!capturingReflections)
+		{
+			if (msaa == 1)
+				return mInstances[0];
+			else if (msaa == 2)
+				return mInstances[1];
+			else if (msaa == 4)
+				return mInstances[2];
+			else
+				return mInstances[3];
+		}
+		else
+		{
+			if (msaa == 1)
+				return mInstances[4];
+			else if (msaa == 2)
+				return mInstances[5];
+			else if (msaa == 4)
+				return mInstances[6];
+			else
+				return mInstances[7];
+		}
+	}
+}}

+ 38 - 270
Source/RenderBeast/Source/BsLightRendering.cpp

@@ -10,7 +10,6 @@
 #include "BsGpuBuffer.h"
 #include "BsLight.h"
 #include "BsRendererUtility.h"
-#include "BsReflectionProbeSampling.h"
 
 namespace bs { namespace ct
 {
@@ -125,37 +124,14 @@ namespace bs { namespace ct
 
 		mParamBuffer = gTiledLightingParamDef.createBuffer();
 		mParamsSet->setParamBlockBuffer("Params", mParamBuffer, true);
-
-		// Sky
-		params->getTextureParam(GPT_COMPUTE_PROGRAM, "gSkyReflectionTex", mSkyReflectionsParam);
-		params->getTextureParam(GPT_COMPUTE_PROGRAM, "gSkyIrradianceTex", mSkyIrradianceParam);
-
-		// Reflections
-		params->getTextureParam(GPT_COMPUTE_PROGRAM, "gReflProbeCubemaps", mReflectionProbeCubemapsParam);
-		params->getTextureParam(GPT_COMPUTE_PROGRAM, "gPreintegratedEnvBRDF", mPreintegratedEnvBRDFParam);
-
-		params->getBufferParam(GPT_COMPUTE_PROGRAM, "gReflectionProbes", mReflectionProbesParam);
-
-		mReflectionsParamBuffer = gReflProbeParamsParamDef.createBuffer();
-		mParamsSet->setParamBlockBuffer("ReflProbeParams", mReflectionsParamBuffer);
-
-		SAMPLER_STATE_DESC reflSamplerDesc;
-		reflSamplerDesc.magFilter = FO_LINEAR;
-		reflSamplerDesc.minFilter = FO_LINEAR;
-		reflSamplerDesc.mipFilter = FO_LINEAR;
-
-		mReflectionSamplerState = SamplerState::create(reflSamplerDesc);
-
-		params->setSamplerState(GPT_COMPUTE_PROGRAM, "gSkyReflectionSamp", mReflectionSamplerState);
-		params->setSamplerState(GPT_COMPUTE_PROGRAM, "gReflProbeSamp", mReflectionSamplerState);
 	}
 
-	void TiledDeferredLighting::execute(const SPtr<RenderTargets>& gbuffer, const SPtr<GpuParamBlockBuffer>& perCamera,
-										const SPtr<Texture>& preintegratedGF, bool noLighting)
+	void TiledDeferredLighting::execute(const SPtr<RenderTargets>& renderTargets, const SPtr<GpuParamBlockBuffer>& perCamera,
+		bool noLighting)
 	{
 		Vector2I framebufferSize;
-		framebufferSize[0] = gbuffer->getWidth();
-		framebufferSize[1] = gbuffer->getHeight();
+		framebufferSize[0] = renderTargets->getWidth();
+		framebufferSize[1] = renderTargets->getHeight();
 		gTiledLightingParamDef.gFramebufferSize.set(mParamBuffer, framebufferSize);
 
 		if (noLighting)
@@ -172,31 +148,27 @@ namespace bs { namespace ct
 			gTiledLightingParamDef.gLightOffsets.set(mParamBuffer, mLightOffsets);
 		}
 		mParamBuffer->flushToGPU();
-		mReflectionsParamBuffer->flushToGPU();
-
-		mGBufferA.set(gbuffer->getTextureA());
-		mGBufferB.set(gbuffer->getTextureB());
-		mGBufferC.set(gbuffer->getTextureC());
-		mGBufferDepth.set(gbuffer->getTextureDepth());
 
-		mPreintegratedEnvBRDFParam.set(preintegratedGF);
+		mGBufferA.set(renderTargets->getGBufferA());
+		mGBufferB.set(renderTargets->getGBufferB());
+		mGBufferC.set(renderTargets->getGBufferC());
+		mGBufferDepth.set(renderTargets->getSceneDepth());
 
 		mParamsSet->setParamBlockBuffer("PerCamera", perCamera, true);
 
-		const RenderAPIInfo& rapiInfo = RenderAPI::instance().getAPIInfo();
-		if (mSampleCount > 1 && !rapiInfo.isFlagSet(RenderAPIFeatureFlag::MSAAImageStores))
+		if (mSampleCount > 1)
 		{
-			SPtr<GpuBuffer> sceneColorBuffer = gbuffer->getFlattenedSceneColorBuffer();
-			mOutputBufferParam.set(sceneColorBuffer);
+			SPtr<GpuBuffer> lightAccumulation = renderTargets->getLightAccumulationBuffer();
+			mOutputBufferParam.set(lightAccumulation);
 		}
 		else
 		{
-			SPtr<Texture> sceneColorTex = gbuffer->getSceneColor();
-			mOutputTextureParam.set(sceneColorTex);
+			SPtr<Texture> lightAccumulation = renderTargets->getLightAccumulation();
+			mOutputTextureParam.set(lightAccumulation);
 		}
 
-		UINT32 width = gbuffer->getWidth();
-		UINT32 height = gbuffer->getHeight();
+		UINT32 width = renderTargets->getWidth();
+		UINT32 height = renderTargets->getHeight();
 
 		UINT32 numTilesX = (UINT32)Math::ceilToInt(width / (float)TILE_SIZE);
 		UINT32 numTilesY = (UINT32)Math::ceilToInt(height / (float)TILE_SIZE);
@@ -216,261 +188,57 @@ namespace bs { namespace ct
 		mLightOffsets[2] = mLightOffsets[1] + lightData.getNumSpotLights();
 	}
 
-	void TiledDeferredLighting::setReflectionProbes(const GPUReflProbeData& probeData,
-													const SPtr<Texture>& reflectionCubemaps)
-	{
-		mReflectionProbesParam.set(probeData.getProbeBuffer());
-		mReflectionProbeCubemapsParam.set(reflectionCubemaps);
-
-		gReflProbeParamsParamDef.gNumProbes.set(mReflectionsParamBuffer, probeData.getNumProbes());
-
-		UINT32 numMips = 0;
-		if (reflectionCubemaps != nullptr)
-			numMips = reflectionCubemaps->getProperties().getNumMipmaps() + 1;
-
-		gReflProbeParamsParamDef.gReflCubemapNumMips.set(mReflectionsParamBuffer, numMips);
-	}
-
-	void TiledDeferredLighting::setSky(const SPtr<Texture>& skyReflections, const SPtr<Texture>& skyIrradiance, 
-		float brightness)
-	{
-		mSkyReflectionsParam.set(skyReflections);
-		mSkyIrradianceParam.set(skyIrradiance);
-
-		UINT32 skyReflectionsAvailable = 0;
-		UINT32 numMips = 0;
-		if (skyReflections != nullptr)
-		{
-			numMips = skyReflections->getProperties().getNumMipmaps() + 1;
-			skyReflectionsAvailable = 1;
-		}
-
-		gReflProbeParamsParamDef.gSkyCubemapNumMips.set(mReflectionsParamBuffer, numMips);
-		gReflProbeParamsParamDef.gSkyCubemapAvailable.set(mReflectionsParamBuffer, skyReflectionsAvailable);
-		gReflProbeParamsParamDef.gSkyBrightness.set(mReflectionsParamBuffer, brightness);
-	}
-
-	// Reverse bits functions used for Hammersley sequence
-	float reverseBits(UINT32 bits)
-	{
-		bits = (bits << 16u) | (bits >> 16u);
-		bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
-		bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
-		bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
-		bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
-
-		return (float)(double(bits) / (double)0x100000000LL);
-	}
-
-	void hammersleySequence(UINT32 i, UINT32 count, float& e0, float& e1)
-	{
-		e0 = i / (float)count;
-		e1 = reverseBits(i);
-	}
-
-	Vector3 sphericalToCartesian(float cosTheta, float sinTheta, float phi)
-	{
-		Vector3 output;
-		output.x = sinTheta * cos(phi);
-		output.y = sinTheta * sin(phi);
-		output.z = cosTheta;
-
-		return output;
-	}
-
-	// Generates an angle in spherical coordinates, importance sampled for the specified roughness based on some uniformly
-	// distributed random variables in range [0, 1].
-	void importanceSampleGGX(float e0, float e1, float roughness4, float& cosTheta, float& phi)
-	{
-		// See GGXImportanceSample.nb for derivation (essentially, take base GGX, normalize it, generate PDF, split PDF into
-		// marginal probability for theta and conditional probability for phi. Plug those into the CDF, invert it.)				
-		cosTheta = sqrt((1.0f - e0) / (1.0f + (roughness4 - 1.0f) * e0));
-		phi = 2.0f * Math::PI * e1;
-	}
-
-	float calcMicrofacetShadowingSmithGGX(float roughness4, float NoV, float NoL)
-	{
-		// Note: See lighting shader for derivation. Includes microfacet BRDF divisor.
-		float g1V = NoV + sqrt(NoV * (NoV - NoV * roughness4) + roughness4);
-		float g1L = NoL + sqrt(NoL * (NoL - NoL * roughness4) + roughness4);
-		return 1.0f / (g1V * g1L);
-	}
-
-	SPtr<Texture> TiledDeferredLighting::generatePreintegratedEnvBRDF()
-	{
-		TEXTURE_DESC desc;
-		desc.type = TEX_TYPE_2D;
-		desc.format = PF_FLOAT16_RG;
-		desc.width = 128;
-		desc.height = 32;
-
-		SPtr<Texture> texture = Texture::create(desc);
-		PixelData pixelData = texture->lock(GBL_WRITE_ONLY_DISCARD);
-
-		for (UINT32 y = 0; y < desc.height; y++)
-		{
-			float roughness = (float)(y + 0.5f) / desc.height;
-			float m = roughness * roughness;
-			float m2 = m*m;
-
-			for (UINT32 x = 0; x < desc.width; x++)
-			{
-				float NoV = (float)(x + 0.5f) / desc.width;
-
-				Vector3 V;
-				V.x = sqrt(1.0f - NoV * NoV); // sine
-				V.y = 0.0f;
-				V.z = NoV;
-
-				// These are the two integrals resulting from the second part of the split-sum approximation. Described in
-				// Epic's 2013 paper:
-				//    http://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_notes_v2.pdf
-				float scale = 0.0f;
-				float offset = 0.0f;
-
-				// We use the same importance sampling function we use for reflection cube importance sampling, only we
-				// sample G and F, instead of D factors of the microfactet BRDF. See GGXImportanceSample.nb for derivation.
-				constexpr UINT32 NumSamples = 128;
-				for (UINT32 i = 0; i < NumSamples; i++)
-				{
-					float e0, e1;
-					hammersleySequence(i, NumSamples, e0, e1);
-
-					float cosTheta, phi;
-					importanceSampleGGX(e0, e1, m2, cosTheta, phi);
-
-					float sinTheta = sqrt(1.0f - cosTheta * cosTheta);
-					Vector3 H = sphericalToCartesian(cosTheta, sinTheta, phi);
-					Vector3 L = 2.0f * Vector3::dot(V, H) * H - V;
-
-					float VoH = std::max(Vector3::dot(V, H), 0.0f);
-					float NoL = std::max(L.z, 0.0f); // N assumed (0, 0, 1)
-					float NoH = std::max(H.z, 0.0f); // N assumed (0, 0, 1)
-
-					// Set second part of the split sum integral is split into two parts:
-					//   F0*I[G * (1 - (1 - v.h)^5) * cos(theta)] + I[G * (1 - v.h)^5 * cos(theta)] (F0 * scale + bias)
-
-					// We calculate the fresnel scale (1 - (1 - v.h)^5) and bias ((1 - v.h)^5) parts
-					float fc = pow(1.0f - VoH, 5.0f);
-					float fresnelScale = 1.0f - fc;
-					float fresnelOffset = fc;
-
-					// We calculate the G part
-					float G = calcMicrofacetShadowingSmithGGX(m2, NoV, NoL);
-
-					// When we factor out G and F, then divide D by PDF, this is what's left
-					// Note: This is based on PDF: D * NoH / (4 * VoH). (4 * VoH) factor comes from the Jacobian of the
-					// transformation from half vector to light vector
-					float pdfFactor = 4.0f * VoH / NoH;
-
-					if (NoL > 0.0f)
-					{
-						scale += NoL * pdfFactor * G * fresnelScale;
-						offset += NoL * pdfFactor * G * fresnelOffset;
-					}
-				}
-
-				scale /= NumSamples;
-				offset /= NumSamples;
-
-				Color color;
-				color.r = Math::clamp01(scale);
-				color.g = Math::clamp01(offset);
-
-				pixelData.setColorAt(color, x, y);
-			}
-		}
-
-		texture->unlock();
-
-		return texture;
-	}
-
-	template<int MSAA_COUNT, bool CapturingReflections>
-	TTiledDeferredLightingMat<MSAA_COUNT, CapturingReflections>::TTiledDeferredLightingMat()
+	template<int MSAA_COUNT>
+	TTiledDeferredLightingMat<MSAA_COUNT>::TTiledDeferredLightingMat()
 		:mInternal(mMaterial, mParamsSet, MSAA_COUNT)
 	{
 
 	}
 
-	template<int MSAA_COUNT, bool CapturingReflections>
-	void TTiledDeferredLightingMat<MSAA_COUNT, CapturingReflections>::_initDefines(ShaderDefines& defines)
+	template<int MSAA_COUNT>
+	void TTiledDeferredLightingMat<MSAA_COUNT>::_initDefines(ShaderDefines& defines)
 	{
 		defines.set("TILE_SIZE", TiledDeferredLighting::TILE_SIZE);
 		defines.set("MSAA_COUNT", MSAA_COUNT);
-		defines.set("CAPTURING_REFLECTIONS", CapturingReflections);
 	}
 
-	template<int MSAA_COUNT, bool CapturingReflections>
-	void TTiledDeferredLightingMat<MSAA_COUNT, CapturingReflections>::execute(const SPtr<RenderTargets>& gbuffer,
-		const SPtr<GpuParamBlockBuffer>& perCamera, const SPtr<Texture>& preintegratedGF, bool noLighting)
+	template<int MSAA_COUNT>
+	void TTiledDeferredLightingMat<MSAA_COUNT>::execute(const SPtr<RenderTargets>& gbuffer,
+		const SPtr<GpuParamBlockBuffer>& perCamera, bool noLighting)
 	{
-		mInternal.execute(gbuffer, perCamera, preintegratedGF, noLighting);
+		mInternal.execute(gbuffer, perCamera, noLighting);
 	}
 
-	template<int MSAA_COUNT, bool CapturingReflections>
-	void TTiledDeferredLightingMat<MSAA_COUNT, CapturingReflections>::setLights(const GPULightData& lightData)
+	template<int MSAA_COUNT>
+	void TTiledDeferredLightingMat<MSAA_COUNT>::setLights(const GPULightData& lightData)
 	{
 		mInternal.setLights(lightData);
 	}
 
-	template<int MSAA_COUNT, bool CapturingReflections>
-	void TTiledDeferredLightingMat<MSAA_COUNT, CapturingReflections>::setReflectionProbes(const GPUReflProbeData& probeData,
-		const SPtr<Texture>& reflectionCubemaps)
-	{
-		mInternal.setReflectionProbes(probeData, reflectionCubemaps);
-	}
-
-	template<int MSAA_COUNT, bool CapturingReflections>
-	void TTiledDeferredLightingMat<MSAA_COUNT, CapturingReflections>::setSky(const SPtr<Texture>& skyReflections,
-		const SPtr<Texture>& skyIrradiance, float brightness)
-	{
-		mInternal.setSky(skyReflections, skyIrradiance, brightness);
-	}
-
 	TiledDeferredLightingMaterials::TiledDeferredLightingMaterials()
 	{
-		mInstances[0] = bs_new<TTiledDeferredLightingMat<1, false>>();
-		mInstances[1] = bs_new<TTiledDeferredLightingMat<2, false>>();
-		mInstances[2] = bs_new<TTiledDeferredLightingMat<4, false>>();
-		mInstances[3] = bs_new<TTiledDeferredLightingMat<8, false>>();
-
-		mInstances[4] = bs_new<TTiledDeferredLightingMat<1, true>>();
-		mInstances[5] = bs_new<TTiledDeferredLightingMat<2, true>>();
-		mInstances[6] = bs_new<TTiledDeferredLightingMat<4, true>>();
-		mInstances[7] = bs_new<TTiledDeferredLightingMat<8, true>>();
+		mInstances[0] = bs_new<TTiledDeferredLightingMat<1>>();
+		mInstances[1] = bs_new<TTiledDeferredLightingMat<2>>();
+		mInstances[2] = bs_new<TTiledDeferredLightingMat<4>>();
+		mInstances[3] = bs_new<TTiledDeferredLightingMat<8>>();
 	}
 
 	TiledDeferredLightingMaterials::~TiledDeferredLightingMaterials()
 	{
-		for (UINT32 i = 0; i < 8; i++)
+		for (UINT32 i = 0; i < 4; i++)
 			bs_delete(mInstances[i]);
 	}
 
-	ITiledDeferredLightingMat* TiledDeferredLightingMaterials::get(UINT32 msaa, bool capturingReflections)
+	ITiledDeferredLightingMat* TiledDeferredLightingMaterials::get(UINT32 msaa)
 	{
-		if (!capturingReflections)
-		{
-			if (msaa == 1)
-				return mInstances[0];
-			else if (msaa == 2)
-				return mInstances[1];
-			else if (msaa == 4)
-				return mInstances[2];
-			else
-				return mInstances[3];
-		}
+		if (msaa == 1)
+			return mInstances[0];
+		else if (msaa == 2)
+			return mInstances[1];
+		else if (msaa == 4)
+			return mInstances[2];
 		else
-		{
-			if (msaa == 1)
-				return mInstances[4];
-			else if (msaa == 2)
-				return mInstances[5];
-			else if (msaa == 4)
-				return mInstances[6];
-			else
-				return mInstances[7];
-		}
+			return mInstances[3];
 	}
 
 	FlatFramebufferToTextureParamDef gFlatFramebufferToTextureParamDef;

+ 0 - 79
Source/RenderBeast/Source/BsReflectionProbeSampling.cpp

@@ -1,79 +0,0 @@
-//********************************** Banshee Engine (www.banshee3d.com) **************************************************//
-//**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
-#include "BsReflectionProbeSampling.h"
-#include "BsMaterial.h"
-#include "BsShader.h"
-#include "BsRenderBeast.h"
-#include "BsGpuBuffer.h"
-#include "BsReflectionProbe.h"
-#include "BsLightProbeCache.h"
-
-namespace bs { namespace ct
-{
-	static const UINT32 BUFFER_INCREMENT = 16 * sizeof(ReflProbeData);
-
-	ReflProbeParamsParamDef gReflProbeParamsParamDef;
-
-	GPUReflProbeData::GPUReflProbeData()
-		:mNumProbes(0)
-	{ }
-
-	void GPUReflProbeData::setProbes(const Vector<ReflProbeData>& probeData, UINT32 numProbes)
-	{
-		mNumProbes = numProbes;
-
-		UINT32 size = numProbes * sizeof(ReflProbeData);
-		UINT32 curBufferSize;
-
-		if (mProbeBuffer != nullptr)
-			curBufferSize = mProbeBuffer->getSize();
-		else
-			curBufferSize = 0;
-
-		if (size > curBufferSize || curBufferSize == 0)
-		{
-			// Allocate at least one block even if no probes, to avoid issues with null buffers
-			UINT32 bufferSize = std::max(1, Math::ceilToInt(size / (float)BUFFER_INCREMENT)) * BUFFER_INCREMENT;
-
-			GPU_BUFFER_DESC bufferDesc;
-			bufferDesc.type = GBT_STRUCTURED;
-			bufferDesc.elementCount = bufferSize / sizeof(ReflProbeData);
-			bufferDesc.elementSize = sizeof(ReflProbeData);
-			bufferDesc.format = BF_UNKNOWN;
-
-			mProbeBuffer = GpuBuffer::create(bufferDesc);
-		}
-
-		if (size > 0)
-			mProbeBuffer->writeData(0, size, probeData.data(), BWT_DISCARD);
-	}
-
-	RendererReflectionProbe::RendererReflectionProbe(ReflectionProbe* probe)
-		:probe(probe)
-	{
-		arrayIdx = -1;
-		texture = nullptr;
-		customTexture = probe->getCustomTexture() != nullptr;
-		textureDirty = LightProbeCache::instance().isRadianceDirty(probe->getUUID());
-		arrayDirty = true;
-		errorFlagged = false;
-	}
-
-	void RendererReflectionProbe::getParameters(ReflProbeData& output) const
-	{
-		output.type = probe->getType() == ReflectionProbeType::Sphere ? 0 
-			: probe->getType() == ReflectionProbeType::Box ? 1 : 2;
-		
-		output.position = probe->getPosition();
-		output.boxExtents = probe->getExtents();
-
-		if (probe->getType() == ReflectionProbeType::Sphere)
-			output.radius = probe->getRadius();
-		else
-			output.radius = output.boxExtents.length();
-
-		output.transitionDistance = probe->getTransitionDistance();
-		output.cubemapIdx = arrayIdx;
-		output.invBoxTransform.setInverseTRS(output.position, probe->getRotation(), output.boxExtents);
-	}
-}}

+ 33 - 23
Source/RenderBeast/Source/BsRenderBeast.cpp

@@ -46,18 +46,9 @@ namespace bs { namespace ct
 	constexpr UINT32 MaxReflectionCubemaps = 2048 / 6;
 
 	RenderBeast::RenderBeast()
-		: mDefaultMaterial(nullptr)
-		, mTiledDeferredLightingMats()
-		, mFlatFramebufferToTextureMat(nullptr)
-		, mSkyboxMat(nullptr)
-		, mSkyboxSolidColorMat(nullptr)
-		, mGPULightData(nullptr)
-		, mGPUReflProbeData(nullptr)
-		, mLightGrid(nullptr)
-		, mObjectRenderer(nullptr)
-		, mOptions(bs_shared_ptr_new<RenderBeastOptions>())
-		, mOptionsDirty(true)
-	{ }
+	{
+		mOptions = bs_shared_ptr_new<RenderBeastOptions>();
+	}
 
 	const StringID& RenderBeast::getName() const
 	{
@@ -93,8 +84,9 @@ namespace bs { namespace ct
 		mFlatFramebufferToTextureMat = bs_new<FlatFramebufferToTextureMat>();
 
 		mTiledDeferredLightingMats = bs_new<TiledDeferredLightingMaterials>();
+		mTileDeferredImageBasedLightingMats = bs_new<TiledDeferredImageBasedLightingMaterials>();
 
-		mPreintegratedEnvBRDF = TiledDeferredLighting::generatePreintegratedEnvBRDF();
+		mPreintegratedEnvBRDF = TiledDeferredImageBasedLighting::generatePreintegratedEnvBRDF();
 		mGPULightData = bs_new<GPULightData>();
 		mGPUReflProbeData = bs_new<GPUReflProbeData>();
 		mLightGrid = bs_new<LightGrid>();
@@ -135,6 +127,7 @@ namespace bs { namespace ct
 		bs_delete(mLightGrid);
 		bs_delete(mFlatFramebufferToTextureMat);
 		bs_delete(mTiledDeferredLightingMats);
+		bs_delete(mTileDeferredImageBasedLightingMats);
 
 		mPreintegratedEnvBRDF = nullptr;
 
@@ -991,6 +984,7 @@ namespace bs { namespace ct
 		}
 
 		SPtr<RenderTargets> renderTargets = viewInfo->getRenderTargets();
+		renderTargets->allocate(RTT_GBuffer);
 		renderTargets->bindGBuffer();
 
 		// Trigger pre-base-pass callbacks
@@ -1038,18 +1032,32 @@ namespace bs { namespace ct
 		RenderAPI& rapi = RenderAPI::instance();
 		rapi.setRenderTarget(nullptr);
 
-		// Render light pass
+		// Render light pass into light accumulation buffer
 		UINT32 numSamples = viewInfo->getNumSamples();
-		ITiledDeferredLightingMat* lightingMat = mTiledDeferredLightingMats->get(numSamples, viewInfo->isRenderingReflections());
+		ITiledDeferredLightingMat* lightingMat = mTiledDeferredLightingMats->get(numSamples);
+
+		renderTargets->allocate(RTT_LightAccumulation);
 
 		lightingMat->setLights(*mGPULightData);
-		lightingMat->setReflectionProbes(*mGPUReflProbeData, mReflCubemapArrayTex);
-		lightingMat->setSky(mSkyboxFilteredReflections, mSkyboxIrradiance, mSkybox->getBrightness());
+		lightingMat->execute(renderTargets, perCameraBuffer, viewInfo->renderWithNoLighting());
+
+		renderTargets->allocate(RTT_SceneColor);
 
-		lightingMat->execute(renderTargets, perCameraBuffer, mPreintegratedEnvBRDF, viewInfo->renderWithNoLighting());
+		// Render image based lighting and add it with light accumulation, output to scene color
+		// Note: Image based lighting is split from direct lighting in order to reduce load on GPU shared memory. The
+		// image based shader ends up re-doing a lot of calculations and it could be beneficial to profile and see if
+		// both methods can be squeezed into the same shader.
+		ITiledDeferredImageBasedLightingMat* imageBasedLightingMat =
+			mTileDeferredImageBasedLightingMats->get(numSamples, viewInfo->isRenderingReflections());
 
-		const RenderAPIInfo& rapiInfo = RenderAPI::instance().getAPIInfo();
-		bool usingFlattenedFB = numSamples > 1 && !rapiInfo.isFlagSet(RenderAPIFeatureFlag::MSAAImageStores);
+		imageBasedLightingMat->setReflectionProbes(*mGPUReflProbeData, mReflCubemapArrayTex);
+		imageBasedLightingMat->setSky(mSkyboxFilteredReflections, mSkyboxIrradiance, mSkybox->getBrightness());
+		imageBasedLightingMat->execute(renderTargets, perCameraBuffer, mPreintegratedEnvBRDF);
+
+		renderTargets->release(RTT_LightAccumulation);
+		renderTargets->release(RTT_GBuffer);
+
+		bool usingFlattenedFB = numSamples > 1;
 
 		renderTargets->bindSceneColor(true);
 
@@ -1057,7 +1065,7 @@ namespace bs { namespace ct
 		// continuing
 		if(usingFlattenedFB)
 		{
-			mFlatFramebufferToTextureMat->execute(renderTargets->getFlattenedSceneColorBuffer(), 
+			mFlatFramebufferToTextureMat->execute(renderTargets->getSceneColorBuffer(), 
 												  renderTargets->getSceneColor());
 		}
 
@@ -1112,7 +1120,7 @@ namespace bs { namespace ct
 			// If using MSAA, resolve into non-MSAA texture before post-processing
 			if(numSamples > 1)
 			{
-				rapi.setRenderTarget(renderTargets->getSceneColorNonMSAART());
+				rapi.setRenderTarget(renderTargets->getResolvedSceneColorRT());
 				rapi.setViewport(viewportArea);
 
 				SPtr<Texture> sceneColor = renderTargets->getSceneColor();
@@ -1120,7 +1128,7 @@ namespace bs { namespace ct
 			}
 
 			// Post-processing code also takes care of writting to the final output target
-			PostProcessing::instance().postProcess(viewInfo, renderTargets->getSceneColorNonMSAA(), frameDelta);
+			PostProcessing::instance().postProcess(viewInfo, renderTargets->getResolvedSceneColor(), frameDelta);
 		}
 		else
 		{
@@ -1134,6 +1142,8 @@ namespace bs { namespace ct
 			gRendererUtility().blit(sceneColor, Rect2I::EMPTY, viewInfo->getFlipView());
 		}
 
+		renderTargets->release(RTT_SceneColor);
+
 		// Trigger overlay callbacks
 		if (viewInfo->checkTriggerCallbacks())
 		{

+ 156 - 89
Source/RenderBeast/Source/BsRenderTargets.cpp

@@ -23,7 +23,27 @@ namespace bs { namespace ct
 		return bs_shared_ptr<RenderTargets>(new (bs_alloc<RenderTargets>()) RenderTargets(view, hdr));
 	}
 
-	void RenderTargets::allocate()
+	void RenderTargets::prepare()
+	{
+		GpuResourcePool& texPool = GpuResourcePool::instance();
+
+		UINT32 width = mViewTarget.viewRect.width;
+		UINT32 height = mViewTarget.viewRect.height;
+
+		mDepthTex = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(PF_D32_S8X24, width, height, TU_DEPTHSTENCIL, 
+			mViewTarget.numSamples, false));
+	}
+
+	void RenderTargets::cleanup()
+	{
+		RenderAPI& rapi = RenderAPI::instance();
+		rapi.setRenderTarget(nullptr);
+
+		GpuResourcePool& texPool = GpuResourcePool::instance();
+		texPool.release(mDepthTex);
+	}
+
+	void RenderTargets::allocate(RenderTargetType type)
 	{
 		GpuResourcePool& texPool = GpuResourcePool::instance();
 
@@ -34,103 +54,140 @@ namespace bs { namespace ct
 		// could save on memory by deallocating and reallocating them every frame, but it remains to be seen how much of
 		// a performance impact would that have.
 
-		// Note: Albedo is allocated as SRGB, meaning when reading from textures during depth pass we decode from sRGB into linear,
-		// then back into sRGB when writing to albedo, and back to linear when reading from albedo during light pass. This /might/ have
-		// a performance impact. In which case we could just use a higher precision albedo buffer, which can then store linear color
-		// directly (storing linear in 8bit buffer causes too much detail to be lost in the blacks).
-		SPtr<PooledRenderTexture> newColorRT = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(mSceneColorFormat, width, 
-			height, TU_RENDERTARGET | TU_LOADSTORE, mViewTarget.numSamples, false));
-		SPtr<PooledRenderTexture> newAlbedoRT = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(mAlbedoFormat, width, 
-			height, TU_RENDERTARGET, mViewTarget.numSamples, true));
-		SPtr<PooledRenderTexture> newNormalRT = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(mNormalFormat, width, 
-			height, TU_RENDERTARGET, mViewTarget.numSamples, false));
-		SPtr<PooledRenderTexture> newRoughMetalRT = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(PF_FLOAT16_RG, width,
-			height, TU_RENDERTARGET, mViewTarget.numSamples, false));
-		SPtr<PooledRenderTexture> newDepthRT = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(PF_D32_S8X24, width, height, 
-			TU_DEPTHSTENCIL, mViewTarget.numSamples, false));
-
-		if(mViewTarget.numSamples > 1)
+		if (type == RTT_GBuffer)
 		{
-			const RenderAPIInfo& rapiInfo = RenderAPI::instance().getAPIInfo();
-
-			// DX11/HLSL is unable to have an UAV for a multisampled texture, so we need to use a buffer instead and then
-			// perform a blit to the actual scene color
-			if (!rapiInfo.isFlagSet(RenderAPIFeatureFlag::MSAAImageStores))
+			// Note: Albedo is allocated as SRGB, meaning when reading from textures during depth pass we decode from sRGB
+			// into linear, then back into sRGB when writing to albedo, and back to linear when reading from albedo during
+			// light pass. This /might/ have a performance impact. In which case we could just use a higher precision albedo
+			// buffer, which can then store linear color directly (storing linear in 8bit buffer causes too much detail to
+			// be lost in the blacks).
+			mAlbedoTex = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(mAlbedoFormat, width, height, TU_RENDERTARGET,
+				mViewTarget.numSamples, true));
+			mNormalTex = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(mNormalFormat, width, height, TU_RENDERTARGET,
+				mViewTarget.numSamples, false));
+			mRoughMetalTex = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(PF_FLOAT16_RG, width, height, TU_RENDERTARGET,
+				mViewTarget.numSamples, false)); // Note: Metal doesn't need 16-bit float
+
+			bool rebuildRT = false;
+			if (mGBufferRT != nullptr)
 			{
-				UINT32 bufferNumElements = width * height * mViewTarget.numSamples;
-				mFlattenedSceneColorBuffer = texPool.get(POOLED_STORAGE_BUFFER_DESC::createStandard(BF_16X4F, bufferNumElements));
+				rebuildRT |= mGBufferRT->getColorTexture(0) != mAlbedoTex->texture;
+				rebuildRT |= mGBufferRT->getColorTexture(1) != mNormalTex->texture;
+				rebuildRT |= mGBufferRT->getColorTexture(2) != mRoughMetalTex->texture;
+				rebuildRT |= mGBufferRT->getDepthStencilTexture() != mDepthTex->texture;
 			}
+			else
+				rebuildRT = true;
 
-			// Need a texture we'll resolve MSAA to before post-processing
-			mSceneColorNonMSAATex = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(mSceneColorFormat, width,
-																					 height, TU_RENDERTARGET, 1, false));
+			if (mGBufferRT == nullptr || rebuildRT)
+			{
+				RENDER_TEXTURE_DESC gbufferDesc;
+				gbufferDesc.colorSurfaces[0].texture = mAlbedoTex->texture;
+				gbufferDesc.colorSurfaces[0].face = 0;
+				gbufferDesc.colorSurfaces[0].numFaces = 1;
+				gbufferDesc.colorSurfaces[0].mipLevel = 0;
+
+				gbufferDesc.colorSurfaces[1].texture = mNormalTex->texture;
+				gbufferDesc.colorSurfaces[1].face = 0;
+				gbufferDesc.colorSurfaces[1].numFaces = 1;
+				gbufferDesc.colorSurfaces[1].mipLevel = 0;
+
+				gbufferDesc.colorSurfaces[2].texture = mRoughMetalTex->texture;
+				gbufferDesc.colorSurfaces[2].face = 0;
+				gbufferDesc.colorSurfaces[2].numFaces = 1;
+				gbufferDesc.colorSurfaces[2].mipLevel = 0;
+
+				gbufferDesc.depthStencilSurface.texture = mDepthTex->texture;
+				gbufferDesc.depthStencilSurface.face = 0;
+				gbufferDesc.depthStencilSurface.mipLevel = 0;
+
+				mGBufferRT = RenderTexture::create(gbufferDesc);
+			}
 		}
+		else if(type == RTT_SceneColor)
+		{
+			mSceneColorTex = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(mSceneColorFormat, width,
+				height, TU_RENDERTARGET | TU_LOADSTORE, mViewTarget.numSamples, false));
+
+			if (mViewTarget.numSamples > 1)
+			{
+				UINT32 bufferNumElements = width * height * mViewTarget.numSamples;
+				mFlattenedSceneColorBuffer = texPool.get(POOLED_STORAGE_BUFFER_DESC::createStandard(BF_16X4F, bufferNumElements));
 
-		bool rebuildTargets = newColorRT != mSceneColorTex || newAlbedoRT != mAlbedoTex || newNormalRT != mNormalTex 
-			|| newRoughMetalRT != mRoughMetalTex || newDepthRT != mDepthTex;
+				// Need a texture we'll resolve MSAA to before post-processing
+				mSceneColorNonMSAATex = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(mSceneColorFormat, width,
+					height, TU_RENDERTARGET, 1, false));
+			}
 
-		mSceneColorTex = newColorRT;
-		mAlbedoTex = newAlbedoRT;
-		mNormalTex = newNormalRT;
-		mRoughMetalTex = newRoughMetalRT;
-		mDepthTex = newDepthRT;
+			bool rebuildRT = false;
+			if (mSceneColorRT != nullptr)
+			{
+				rebuildRT |= mSceneColorRT->getColorTexture(0) != mSceneColorTex->texture;
+				rebuildRT |= mSceneColorRT->getDepthStencilTexture() != mDepthTex->texture;
+			}
+			else
+				rebuildRT = true;
 
-		if (mGBufferRT == nullptr || mSceneColorRT == nullptr || rebuildTargets)
+			if (rebuildRT)
+			{
+				RENDER_TEXTURE_DESC sceneColorDesc;
+				sceneColorDesc.colorSurfaces[0].texture = mSceneColorTex->texture;
+				sceneColorDesc.colorSurfaces[0].face = 0;
+				sceneColorDesc.colorSurfaces[0].numFaces = 1;
+				sceneColorDesc.colorSurfaces[0].mipLevel = 0;
+
+				sceneColorDesc.depthStencilSurface.texture = mDepthTex->texture;
+				sceneColorDesc.depthStencilSurface.face = 0;
+				sceneColorDesc.depthStencilSurface.numFaces = 1;
+				sceneColorDesc.depthStencilSurface.mipLevel = 0;
+
+				mSceneColorRT = TextureManager::instance().createRenderTexture(sceneColorDesc);
+			}
+		}
+		else if(type == RTT_LightAccumulation)
 		{
-			RENDER_TEXTURE_DESC gbufferDesc;
-			gbufferDesc.colorSurfaces[0].texture = mAlbedoTex->texture;
-			gbufferDesc.colorSurfaces[0].face = 0;
-			gbufferDesc.colorSurfaces[0].numFaces = 1;
-			gbufferDesc.colorSurfaces[0].mipLevel = 0;
-
-			gbufferDesc.colorSurfaces[1].texture = mNormalTex->texture;
-			gbufferDesc.colorSurfaces[1].face = 0;
-			gbufferDesc.colorSurfaces[1].numFaces = 1;
-			gbufferDesc.colorSurfaces[1].mipLevel = 0;
-
-			gbufferDesc.colorSurfaces[2].texture = mRoughMetalTex->texture;
-			gbufferDesc.colorSurfaces[2].face = 0;
-			gbufferDesc.colorSurfaces[2].numFaces = 1;
-			gbufferDesc.colorSurfaces[2].mipLevel = 0;
-
-			gbufferDesc.depthStencilSurface.texture = mDepthTex->texture;
-			gbufferDesc.depthStencilSurface.face = 0;
-			gbufferDesc.depthStencilSurface.mipLevel = 0;
-
-			mGBufferRT = RenderTexture::create(gbufferDesc);
-
-			RENDER_TEXTURE_DESC sceneColorDesc;
-			sceneColorDesc.colorSurfaces[0].texture = mSceneColorTex->texture;
-			sceneColorDesc.colorSurfaces[0].face = 0;
-			sceneColorDesc.colorSurfaces[0].numFaces = 1;
-			sceneColorDesc.colorSurfaces[0].mipLevel = 0;
-
-			sceneColorDesc.depthStencilSurface.texture = mDepthTex->texture;
-			sceneColorDesc.depthStencilSurface.face = 0;
-			sceneColorDesc.depthStencilSurface.numFaces = 1;
-			sceneColorDesc.depthStencilSurface.mipLevel = 0;
-
-			mSceneColorRT = TextureManager::instance().createRenderTexture(sceneColorDesc);
+			if (mViewTarget.numSamples > 1)
+			{
+				UINT32 bufferNumElements = width * height * mViewTarget.numSamples;
+				mFlattenedLightAccumulationBuffer =
+					texPool.get(POOLED_STORAGE_BUFFER_DESC::createStandard(BF_16X4F, bufferNumElements));
+			}
+			else
+			{
+				mLightAccumulationTex = texPool.get(POOLED_RENDER_TEXTURE_DESC::create2D(mSceneColorFormat, width,
+					height, TU_LOADSTORE, mViewTarget.numSamples, false));
+			}
 		}
 	}
 
-	void RenderTargets::release()
+	void RenderTargets::release(RenderTargetType type)
 	{
-		RenderAPI& rapi = RenderAPI::instance();
-		rapi.setRenderTarget(nullptr);
-
 		GpuResourcePool& texPool = GpuResourcePool::instance();
 
-		texPool.release(mSceneColorTex);
-		texPool.release(mAlbedoTex);
-		texPool.release(mNormalTex);
-		texPool.release(mDepthTex);
+		if (type == RTT_GBuffer)
+		{
+			texPool.release(mSceneColorTex);
+			texPool.release(mAlbedoTex);
+			texPool.release(mNormalTex);
+		}
+		else if(type == RTT_SceneColor)
+		{
+			texPool.release(mSceneColorTex);
 
-		if(mSceneColorNonMSAATex != nullptr)
-			texPool.release(mSceneColorNonMSAATex);
+			if (mSceneColorNonMSAATex != nullptr)
+				texPool.release(mSceneColorNonMSAATex);
 
-		if (mFlattenedSceneColorBuffer != nullptr)
-			texPool.release(mFlattenedSceneColorBuffer);
+			if (mFlattenedSceneColorBuffer != nullptr)
+				texPool.release(mFlattenedSceneColorBuffer);
+		}
+		else if(type == RTT_LightAccumulation)
+		{
+			if (mLightAccumulationTex != nullptr)
+				texPool.release(mLightAccumulationTex);
+
+			if (mFlattenedLightAccumulationBuffer != nullptr)
+				texPool.release(mFlattenedLightAccumulationBuffer);
+		}
 	}
 
 	void RenderTargets::bindGBuffer()
@@ -146,7 +203,7 @@ namespace bs { namespace ct
 		if (clearFlags != 0)
 		{
 			RenderAPI::instance().clearViewport(clearFlags, mViewTarget.clearColor,
-												mViewTarget.clearDepthValue, mViewTarget.clearStencilValue, 0x01);
+				mViewTarget.clearDepthValue, mViewTarget.clearStencilValue, 0x01);
 		}
 
 		// Clear all non primary targets (Note: I could perhaps clear all but albedo, since it stores a per-pixel write mask)
@@ -167,27 +224,27 @@ namespace bs { namespace ct
 		return mSceneColorTex->texture;
 	}
 
-	SPtr<Texture> RenderTargets::getTextureA() const
+	SPtr<Texture> RenderTargets::getGBufferA() const
 	{
 		return mAlbedoTex->texture;
 	}
 
-	SPtr<Texture> RenderTargets::getTextureB() const
+	SPtr<Texture> RenderTargets::getGBufferB() const
 	{
 		return mNormalTex->texture;
 	}
 
-	SPtr<Texture> RenderTargets::getTextureC() const
+	SPtr<Texture> RenderTargets::getGBufferC() const
 	{
 		return mRoughMetalTex->texture;
 	}
 
-	SPtr<Texture> RenderTargets::getTextureDepth() const
+	SPtr<Texture> RenderTargets::getSceneDepth() const
 	{
 		return mDepthTex->texture;
 	}
 
-	SPtr<Texture> RenderTargets::getSceneColorNonMSAA() const
+	SPtr<Texture> RenderTargets::getResolvedSceneColor() const
 	{
 		if (mSceneColorNonMSAATex != nullptr)
 			return mSceneColorNonMSAATex->texture;
@@ -195,7 +252,7 @@ namespace bs { namespace ct
 		return getSceneColor();
 	}
 
-	SPtr<RenderTexture> RenderTargets::getSceneColorNonMSAART() const
+	SPtr<RenderTexture> RenderTargets::getResolvedSceneColorRT() const
 	{
 		if (mSceneColorNonMSAATex != nullptr)
 			return mSceneColorNonMSAATex->renderTexture;
@@ -203,8 +260,18 @@ namespace bs { namespace ct
 		return mSceneColorTex->renderTexture;
 	}
 
-	SPtr<GpuBuffer> RenderTargets::getFlattenedSceneColorBuffer() const
+	SPtr<GpuBuffer> RenderTargets::getSceneColorBuffer() const
 	{
 		return mFlattenedSceneColorBuffer->buffer;
 	}
+
+	SPtr<Texture> RenderTargets::getLightAccumulation() const
+	{
+		return mLightAccumulationTex->texture;
+	}
+
+	SPtr<GpuBuffer> RenderTargets::getLightAccumulationBuffer() const
+	{
+		return mFlattenedLightAccumulationBuffer->buffer;
+	}
 }}

+ 2 - 2
Source/RenderBeast/Source/BsRendererCamera.cpp

@@ -126,7 +126,7 @@ namespace bs { namespace ct
 			if (createGBuffer)
 				mRenderTargets = RenderTargets::create(mViewDesc.target, mViewDesc.isHDR);
 
-			mRenderTargets->allocate();
+			mRenderTargets->prepare();
 			mUsingGBuffer = true;
 		}
 	}
@@ -138,7 +138,7 @@ namespace bs { namespace ct
 
 		if(mUsingGBuffer)
 		{
-			mRenderTargets->release();
+			mRenderTargets->cleanup();
 			mUsingGBuffer = false;
 		}
 	}