Преглед изворни кода

Tiled deferred rendering: Moving all lights to a single buffer to make shader code simpler

BearishSun пре 9 година
родитељ
комит
821069bd0f

+ 83 - 128
Data/Raw/Engine/Shaders/TiledDeferredLighting.bsl

@@ -23,8 +23,7 @@ Technique
 		Compute = 
 		{
 			// Arbitrary limit, increase if needed
-            #define MAX_SPOT_LIGHTS 512
-            #define MAX_RADIAL_LIGHTS 512		
+            #define MAX_LIGHTS 512
 
 			SamplerState 	gGBufferASamp : register(s0);
 			SamplerState 	gGBufferBSamp : register(s1);
@@ -65,26 +64,24 @@ Technique
 				return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
 			}	
 			
-			StructuredBuffer<LightData> gDirLights : register(t3);
-			StructuredBuffer<LightData> gPointLights : register(t4);
-			StructuredBuffer<LightData> gSpotLights  : register(t5);				
+			StructuredBuffer<LightData> gLights : register(t3);		
 		
 			RWTexture2D<float4>	gOutput : register(u0);
 		
 			cbuffer Params : register(b0)
 			{
-				// x - directional, y - point, z - spot
-				uint3 gNumLightsPerType;
+				// Offsets at which specific light types begin in gLights buffer
+				// Assumed directional lights start at 0
+				// x - offset to point lights, y - offset to spot lights, z - total number of lights
+				uint3 gLightOffsets;
 			}
 			
 			groupshared uint sTileMinZ;
 			groupshared uint sTileMaxZ;
 
-            groupshared uint sNumRadialLights;
-            groupshared uint sNumSpotLights;
-
-            groupshared uint sRadialLightIndices[MAX_RADIAL_LIGHTS];
-            groupshared uint sSpotLightIndices[MAX_SPOT_LIGHTS];
+            groupshared uint sNumLightsPerType[2];
+			groupshared uint sTotalNumLights;
+            groupshared uint sLightIndices[MAX_LIGHTS];
 
 			[numthreads(TILE_SIZE, TILE_SIZE, 1)]
 			void main(
@@ -103,8 +100,9 @@ Technique
 				{
 					sTileMinZ = 0x7F7FFFFF;
 					sTileMaxZ = 0;
-					sNumRadialLights = 0;
-					sNumSpotLights = 0;
+					sNumLightsPerType[0] = 0;
+					sNumLightsPerType[0] = 0;
+					sTotalNumLights = 0;
 				}
 				
 				GroupMemoryBarrierWithGroupSync();
@@ -185,90 +183,55 @@ Technique
 				float4 worldPosition4D = mul(gMatScreenToWorld, mixedSpacePos);
 				float3 worldPosition = worldPosition4D.xyz / worldPosition4D.w;
 				
-                // Find lights overlapping the tile
-                for (uint i = threadIndex; i < gNumLightsPerType.y && i < MAX_RADIAL_LIGHTS; i += TILE_SIZE)
-                {
-                    float4 lightPosition = mul(gMatView, float4(gPointLights[i].position, 1.0f));
-                    float lightRadius = gPointLights[i].radius;
-                    
-		            // Note: The cull method can have false positives. In case of large light bounds and small tiles, it
-                    // can end up being quite a lot. Consider adding an extra heuristic to check a separating plane.
-                    bool lightInTile = true;
-				
-                    // First check side planes as this will cull majority of the lights
-                    [unroll]
-                    for (uint j = 0; j < 4; ++j)
-                    {
-                        float dist = dot(frustumPlanes[j], lightPosition);
-                        lightInTile = lightInTile && (dist >= -lightRadius);
-                    }
-
-                    // Make sure to do an actual branch, since it's quite likely an entire warp will have the same value
-                    [branch]
-                    if (lightInTile)
-                    {
-                        bool inDepthRange = true;
-				
-			            // Check near/far planes
-                        [unroll]
-                        for (uint j = 4; j < 6; ++j)
-                        {
-                            float dist = dot(frustumPlanes[j], lightPosition);
-                            inDepthRange = inDepthRange && (dist >= -lightRadius);
-                        }
-                        
-                        // In tile, add to branch
-                        [branch]
-                        if (inDepthRange)
-                        {
-                            uint idx;
-                            InterlockedAdd(sNumRadialLights, 1U, idx);
-                            sRadialLightIndices[idx] = i;
-                        }
-                    }
-                }
-
-                for (uint i = threadIndex; i < gNumLightsPerType.z && i < MAX_SPOT_LIGHTS; i += TILE_SIZE)
-                {
-                    float4 lightPosition = mul(gMatView, float4(gSpotLights[i].position, 1.0f));
-                    float lightRadius = gSpotLights[i].radius;
-                    
-		            // Note: The cull method can have false positives. In case of large light bounds and small tiles, it
-                    // can end up being quite a lot. Consider adding an extra heuristic to check a separating plane.
-                    bool lightInTile = true;
-				
-                    // First check side planes as this will cull majority of the lights
-                    [unroll]
-                    for (uint j = 0; j < 4; ++j)
-                    {
-                        float dist = dot(frustumPlanes[j], lightPosition);
-                        lightInTile = lightInTile && (dist >= -lightRadius);
-                    }
+                // Find radial & spot lights overlapping the tile
+				for(uint type = 0; type < 2; type++)
+				{
+					uint lightOffset = threadIndex + gLightOffsets[type];
+					uint lightsEnd = gLightOffsets[type + 1];
+					for (uint i = lightOffset; i < lightsEnd && i < MAX_LIGHTS; i += TILE_SIZE)
+					{
+						float4 lightPosition = mul(gMatView, float4(gLights[i].position, 1.0f));
+						float lightRadius = gLights[i].radius;
+						
+						// Note: The cull method can have false positives. In case of large light bounds and small tiles, it
+						// can end up being quite a lot. Consider adding an extra heuristic to check a separating plane.
+						bool lightInTile = true;
+					
+						// First check side planes as this will cull majority of the lights
+						[unroll]
+						for (uint j = 0; j < 4; ++j)
+						{
+							float dist = dot(frustumPlanes[j], lightPosition);
+							lightInTile = lightInTile && (dist >= -lightRadius);
+						}
 
-                    // Make sure to do an actual branch, since it's quite likely an entire warp will have the same value
-                    [branch]
-                    if (lightInTile)
-                    {
-                        bool inDepthRange = true;
-				
-			            // Check near/far planes
-                        [unroll]
-                        for (uint j = 4; j < 6; ++j)
-                        {
-                            float dist = dot(frustumPlanes[j], lightPosition);
-                            inDepthRange = inDepthRange && (dist >= -lightRadius);
-                        }
-                        
-                        // In tile, add to branch
-                        [branch]
-                        if (inDepthRange)
-                        {
-                            uint idx;
-                            InterlockedAdd(sNumSpotLights, 1U, idx);
-                            sSpotLightIndices[idx] = i;
-                        }
-                    }
-                }
+						// Make sure to do an actual branch, since it's quite likely an entire warp will have the same value
+						[branch]
+						if (lightInTile)
+						{
+							bool inDepthRange = true;
+					
+							// Check near/far planes
+							[unroll]
+							for (uint j = 4; j < 6; ++j)
+							{
+								float dist = dot(frustumPlanes[j], lightPosition);
+								inDepthRange = inDepthRange && (dist >= -lightRadius);
+							}
+							
+							// In tile, add to branch
+							[branch]
+							if (inDepthRange)
+							{
+								InterlockedAdd(sNumLightsPerType[type], 1U);
+								
+								uint idx;
+								InterlockedAdd(sTotalNumLights, 1U, idx);
+								sLightIndices[idx] = i;
+							}
+						}
+					}
+				}
 
                 GroupMemoryBarrierWithGroupSync();
 
@@ -279,19 +242,19 @@ Technique
 				float alpha = 0.0f;
 				if(surfaceData.worldNormal.w > 0.0f)
 				{
-					for(uint i = 0; i < gNumLightsPerType.x; ++i)
-						lightAccumulator += getDirLightContibution(surfaceData, gDirLights[i]);
+					for(uint i = 0; i < gLightOffsets[0]; ++i)
+						lightAccumulator += getDirLightContibution(surfaceData, gLights[i]);
 					
-                    for (uint i = 0; i < sNumRadialLights; ++i)
+                    for (uint i = 0; i < sNumLightsPerType[0]; ++i)
                     {
-                        uint lightIdx = sRadialLightIndices[i];
-                        lightAccumulator += getPointLightContribution(worldPosition, surfaceData, gPointLights[lightIdx]);
+                        uint lightIdx = sLightIndices[i];
+                        lightAccumulator += getPointLightContribution(worldPosition, surfaceData, gLights[lightIdx]);
                     }
 
-					for(uint i = 0; i < sNumSpotLights; ++i)
+					for(uint i = sNumLightsPerType[0]; i < sTotalNumLights; ++i)
                     {
-                        uint lightIdx = sSpotLightIndices[i];
-                        lightAccumulator += getSpotLightContribution(worldPosition, surfaceData, gSpotLights[lightIdx]);
+                        uint lightIdx = sLightIndices[i];
+                        lightAccumulator += getSpotLightContribution(worldPosition, surfaceData, gLights[lightIdx]);
                     }
 
 					alpha = 1.0f;
@@ -355,27 +318,19 @@ Technique
 				return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
 			}	
 			
-			layout(std430, binding = 4) buffer gDirLights
+			layout(std430, binding = 4) buffer gLights
 			{
-				LightData[] gDirLightsData;
+				LightData[] gLightsData;
 			};
+						
+			layout(binding = 5, rgba16f) uniform image2D gOutput;
 			
-			layout(std430, binding = 5) buffer gPointLights
-			{
-				LightData[] gPointLightsData;
-			};
-			
-			layout(std430, binding = 6) buffer gSpotLights
-			{
-				LightData[] gSpotLightsData;
-			};	
-			
-			layout(binding = 7, rgba16f) uniform image2D gOutput;
-			
-			layout(binding = 8, std140) uniform Params
+			layout(binding = 6, std140) uniform Params
 			{
-				// x - directional, y - point, z - spot
-				uvec3 gNumLightsPerType;
+				// Offsets at which specific light types begin in gLights buffer
+				// Assumed directional lights start at 0
+				// x - offset to point lights, y - offset to spot lights, z - total number of lights
+				uvec3 gLightOffsets;
 			};
 			
 			void main()
@@ -398,21 +353,21 @@ Technique
 					vec4 worldPosition4D = gMatScreenToWorld * mixedSpacePos;
 					vec3 worldPosition = worldPosition4D.xyz / worldPosition4D.w;
 					
-					for(uint i = 0; i < gNumLightsPerType.x; i++)
+					for(uint i = 0; i < gLightOffsets.x; i++)
 					{
-						LightData data = gDirLightsData[i];
+						LightData data = gLightsData[i];
 						lightAccumulator += getDirLightContibution(surfaceData, data);
 					}
 					
-					for(uint i = 0; i < gNumLightsPerType.y; i++)
+					for(uint i = gLightOffsets.x; i < gLightOffsets.y; i++)
 					{
-						LightData data = gPointLightsData[i];
+						LightData data = gLightsData[i];
 						lightAccumulator += getPointLightContribution(worldPosition, surfaceData, data);
 					}
 					
-					for(uint i = 0; i < gNumLightsPerType.z; i++)
+					for(uint i = gLightOffsets.y; i < gLightOffsets.z; i++)
 					{
-						LightData data = gSpotLightsData[i];
+						LightData data = gLightsData[i];
 						lightAccumulator += getSpotLightContribution(worldPosition, surfaceData, data);
 					}
 					

+ 5 - 7
Source/RenderBeast/Include/BsLightRendering.h

@@ -41,7 +41,7 @@ namespace bs { namespace ct
 	};
 
 	BS_PARAM_BLOCK_BEGIN(TiledLightingParamDef)
-		BS_PARAM_BLOCK_ENTRY(Vector3I, gNumLightsPerType)
+		BS_PARAM_BLOCK_ENTRY(Vector3I, gLightOffsets)
 	BS_PARAM_BLOCK_END
 
 	extern TiledLightingParamDef gTiledLightingParamDef;
@@ -58,20 +58,18 @@ namespace bs { namespace ct
 		void execute(const SPtr<RenderTargets>& gbuffer, const SPtr<GpuParamBlockBuffer>& perCamera);
 
 		/** Binds all the active lights. */
-		void setLights(const Vector<LightData> (&lightData)[3]);
+		void setLights(const Vector<LightData>& lightData, UINT32 numDirLights, UINT32 numRadialLights, 
+					   UINT32 numSpotLights);
 	private:
 		GpuParamTexture mGBufferA;
 		GpuParamTexture mGBufferB;
 		GpuParamTexture mGBufferDepth;
 
-		GpuParamBuffer mDirLightBufferParam;
-		GpuParamBuffer mPointLightBufferParam;
-		GpuParamBuffer mSpotLightBufferParam;
-
+		GpuParamBuffer mLightBufferParam;
 		GpuParamLoadStoreTexture mOutputParam;
 
 		SPtr<GpuParamBlockBuffer> mParamBuffer;
-		SPtr<GpuBuffer> mLightBuffers[3];
+		SPtr<GpuBuffer> mLightBuffer;
 
 		static const UINT32 TILE_SIZE;
 	};

+ 2 - 2
Source/RenderBeast/Include/BsRenderBeast.h

@@ -208,7 +208,7 @@ namespace bs
 		Vector<bool> mRenderableVisibility; // Transient
 
 		Vector<RendererLight> mDirectionalLights;
-		Vector<RendererLight> mPointLights;
+		Vector<RendererLight> mRadialLights;
 		Vector<RendererLight> mSpotLights;
 		Vector<Sphere> mPointLightWorldBounds;
 		Vector<Sphere> mSpotLightWorldBounds;
@@ -226,7 +226,7 @@ namespace bs
 		bool mOptionsDirty;
 
 		// Helpers to avoid memory allocations
-		Vector<LightData> mLightDataTemp[3];
+		Vector<LightData> mLightDataTemp;
 		Vector<bool> mLightVisibilityTemp;
 	};
 

+ 29 - 33
Source/RenderBeast/Source/BsLightRendering.cpp

@@ -55,10 +55,7 @@ namespace bs { namespace ct
 				params->getTextureParam(GPT_COMPUTE_PROGRAM, entry.second.name, mGBufferDepth);
 		}
 
-		params->getBufferParam(GPT_COMPUTE_PROGRAM, "gDirLights", mDirLightBufferParam);
-		params->getBufferParam(GPT_COMPUTE_PROGRAM, "gPointLights", mPointLightBufferParam);
-		params->getBufferParam(GPT_COMPUTE_PROGRAM, "gSpotLights", mSpotLightBufferParam);
-
+		params->getBufferParam(GPT_COMPUTE_PROGRAM, "gLights", mLightBufferParam);
 		params->getLoadStoreTextureParam(GPT_COMPUTE_PROGRAM, "gOutput", mOutputParam);
 
 		mParamBuffer = gTiledLightingParamDef.createBuffer();
@@ -93,45 +90,44 @@ namespace bs { namespace ct
 		RenderAPI::instance().dispatchCompute(numTilesX, numTilesY);
 	}
 
-	void TiledDeferredLightingMat::setLights(const Vector<LightData>(&lightData)[3])
+	void TiledDeferredLightingMat::setLights(const Vector<LightData>& lightData, UINT32 numDirLights, 
+											 UINT32 numRadialLights, UINT32 numSpotLights)
 	{
-		Vector3I numLightsPerType;
-		for (UINT32 i = 0; i < 3; i++)
-		{
-			UINT32 numLights = (UINT32)lightData[i].size();
-			numLightsPerType[i] = numLights;
+		Vector3I lightOffsets;
+		lightOffsets[0] = numDirLights;
+		lightOffsets[1] = lightOffsets[0] + numRadialLights;
+		lightOffsets[2] = lightOffsets[1] + numSpotLights;
 
-			UINT32 size = numLights * sizeof(LightData);
-			UINT32 curBufferSize;
+		UINT32 totalNumLights = (UINT32)lightOffsets[2];
 
-			if (mLightBuffers[i] != nullptr)
-				curBufferSize = mLightBuffers[i]->getSize();
-			else
-				curBufferSize = 0;
+		UINT32 size = totalNumLights * sizeof(LightData);
+		UINT32 curBufferSize;
 
-			if(size > curBufferSize || curBufferSize == 0)
-			{
-				// Allocate at least one block even if no lights, to avoid issues with null buffers
-				UINT32 bufferSize = std::max(1, Math::ceilToInt(size / (float)BUFFER_INCREMENT)) * BUFFER_INCREMENT;
+		if (mLightBuffer != nullptr)
+			curBufferSize = mLightBuffer->getSize();
+		else
+			curBufferSize = 0;
 
-				GPU_BUFFER_DESC bufferDesc;
-				bufferDesc.type = GBT_STRUCTURED;
-				bufferDesc.elementCount = bufferSize / sizeof(LightData);
-				bufferDesc.elementSize = sizeof(LightData);
-				bufferDesc.format = BF_UNKNOWN;
+		if (size > curBufferSize || curBufferSize == 0)
+		{
+			// Allocate at least one block even if no lights, to avoid issues with null buffers
+			UINT32 bufferSize = std::max(1, Math::ceilToInt(size / (float)BUFFER_INCREMENT)) * BUFFER_INCREMENT;
 
-				mLightBuffers[i] = GpuBuffer::create(bufferDesc);
-			}
+			GPU_BUFFER_DESC bufferDesc;
+			bufferDesc.type = GBT_STRUCTURED;
+			bufferDesc.elementCount = bufferSize / sizeof(LightData);
+			bufferDesc.elementSize = sizeof(LightData);
+			bufferDesc.format = BF_UNKNOWN;
 
-			if(size > 0)
-				mLightBuffers[i]->writeData(0, size, lightData[i].data(), BWT_DISCARD);
+			mLightBuffer = GpuBuffer::create(bufferDesc);
 		}
 
-		mDirLightBufferParam.set(mLightBuffers[0]);
-		mPointLightBufferParam.set(mLightBuffers[1]);
-		mSpotLightBufferParam.set(mLightBuffers[2]);
+		if (size > 0)
+			mLightBuffer->writeData(0, size, lightData.data(), BWT_DISCARD);
+
+		mLightBufferParam.set(mLightBuffer);
 
-		gTiledLightingParamDef.gNumLightsPerType.set(mParamBuffer, numLightsPerType);
+		gTiledLightingParamDef.gLightOffsets.set(mParamBuffer, lightOffsets);
 
 		mParamBuffer->flushToGPU();
 	}

+ 19 - 19
Source/RenderBeast/Source/BsRenderBeast.cpp

@@ -304,10 +304,10 @@ namespace bs { namespace ct
 		{
 			if (light->getType() == LightType::Point)
 			{
-				UINT32 lightId = (UINT32)mPointLights.size();
+				UINT32 lightId = (UINT32)mRadialLights.size();
 				light->setRendererId(lightId);
 
-				mPointLights.push_back(RendererLight(light));
+				mRadialLights.push_back(RendererLight(light));
 				mPointLightWorldBounds.push_back(light->getBounds());
 			}
 			else // Spot
@@ -353,20 +353,20 @@ namespace bs { namespace ct
 		{
 			if (light->getType() == LightType::Point)
 			{
-				Light* lastLight = mPointLights.back().getInternal();
+				Light* lastLight = mRadialLights.back().getInternal();
 				UINT32 lastLightId = lastLight->getRendererId();
 
 				if (lightId != lastLightId)
 				{
 					// Swap current last element with the one we want to erase
-					std::swap(mPointLights[lightId], mPointLights[lastLightId]);
+					std::swap(mRadialLights[lightId], mRadialLights[lastLightId]);
 					std::swap(mPointLightWorldBounds[lightId], mPointLightWorldBounds[lastLightId]);
 
 					lastLight->setRendererId(lightId);
 				}
 
 				// Last element is the one we want to erase
-				mPointLights.erase(mPointLights.end() - 1);
+				mRadialLights.erase(mRadialLights.end() - 1);
 				mPointLightWorldBounds.erase(mPointLightWorldBounds.end() - 1);
 			}
 			else // Spot
@@ -717,22 +717,24 @@ namespace bs { namespace ct
 
 		// Generate a list of lights and their GPU buffers
 		UINT32 numDirLights = (UINT32)mDirectionalLights.size();
-		mLightDataTemp[0].resize(numDirLights);
-		for(UINT32 i = 0; i < numDirLights; i++)
-			mDirectionalLights[i].getParameters(mLightDataTemp[0][i]);
+		for (UINT32 i = 0; i < numDirLights; i++)
+		{
+			mLightDataTemp.push_back(LightData());
+			mDirectionalLights[i].getParameters(mLightDataTemp.back());
+		}
 
-		UINT32 numPointLights = (UINT32)mPointLights.size();
-		mLightVisibilityTemp.resize(numPointLights, false);
+		UINT32 numRadialLights = (UINT32)mRadialLights.size();
+		mLightVisibilityTemp.resize(numRadialLights, false);
 		for (UINT32 i = 0; i < numViews; i++)
 			views[i]->calculateVisibility(mPointLightWorldBounds, mLightVisibilityTemp);
 
-		for(UINT32 i = 0; i < numPointLights; i++)
+		for(UINT32 i = 0; i < numRadialLights; i++)
 		{
 			if (!mLightVisibilityTemp[i])
 				continue;
 
-			mLightDataTemp[1].push_back(LightData());
-			mPointLights[i].getParameters(mLightDataTemp[1].back());
+			mLightDataTemp.push_back(LightData());
+			mRadialLights[i].getParameters(mLightDataTemp.back());
 		}
 
 		UINT32 numSpotLights = (UINT32)mSpotLights.size();
@@ -745,15 +747,13 @@ namespace bs { namespace ct
 			if (!mLightVisibilityTemp[i])
 				continue;
 
-			mLightDataTemp[2].push_back(LightData());
-			mSpotLights[i].getParameters(mLightDataTemp[2].back());
+			mLightDataTemp.push_back(LightData());
+			mSpotLights[i].getParameters(mLightDataTemp.back());
 		}
 
-		mTiledDeferredLightingMat->setLights(mLightDataTemp);
+		mTiledDeferredLightingMat->setLights(mLightDataTemp, numDirLights, numRadialLights, numSpotLights);
 
-		mLightDataTemp[0].clear();
-		mLightDataTemp[1].clear();
-		mLightDataTemp[2].clear();
+		mLightDataTemp.clear();
 		mLightVisibilityTemp.clear();
 
 		for (UINT32 i = 0; i < numViews; i++)