Bläddra i källkod

Feature: Added standard (non-clustered) forward rendering mode, for hardware that does not support compute

BearishSun 8 år sedan
förälder
incheckning
01240b3690
58 ändrade filer med 984 tillägg och 478 borttagningar
  1. BIN
      Data/Engine/Includes/DirectLightAccumulator.bslinc.asset
  2. BIN
      Data/Engine/Includes/ForwardLighting.bslinc.asset
  3. BIN
      Data/Engine/Includes/ImageBasedLighting.bslinc.asset
  4. BIN
      Data/Engine/Includes/LightingCommon.bslinc.asset
  5. BIN
      Data/Engine/Includes/ReflProbeAccumulator.bslinc.asset
  6. 63 18
      Data/Engine/ShaderDependencies.json
  7. BIN
      Data/Engine/Shaders/DeferredDirectionalLight.bsl.asset
  8. BIN
      Data/Engine/Shaders/DeferredIBLFinalize.bsl.asset
  9. BIN
      Data/Engine/Shaders/DeferredIBLProbe.bsl.asset
  10. BIN
      Data/Engine/Shaders/DeferredIBLSetup.bsl.asset
  11. BIN
      Data/Engine/Shaders/DeferredIBLSky.bsl.asset
  12. BIN
      Data/Engine/Shaders/DeferredPointLight.bsl.asset
  13. BIN
      Data/Engine/Shaders/LightGridLLCreation.bsl.asset
  14. BIN
      Data/Engine/Shaders/TiledDeferredImageBasedLighting.bsl.asset
  15. BIN
      Data/Engine/Shaders/TiledDeferredLighting.bsl.asset
  16. BIN
      Data/Engine/Shaders/Transparent.bsl.asset
  17. 12 0
      Data/Raw/Engine/DataList.json
  18. 90 0
      Data/Raw/Engine/Includes/DirectLightAccumulator.bslinc
  19. 83 0
      Data/Raw/Engine/Includes/ForwardLighting.bslinc
  20. 9 78
      Data/Raw/Engine/Includes/ImageBasedLighting.bslinc
  21. 8 64
      Data/Raw/Engine/Includes/LightingCommon.bslinc
  22. 101 0
      Data/Raw/Engine/Includes/ReflProbeAccumulator.bslinc
  23. 0 1
      Data/Raw/Engine/Shaders/DeferredIBLFinalize.bsl
  24. 0 1
      Data/Raw/Engine/Shaders/DeferredIBLProbe.bsl
  25. 0 1
      Data/Raw/Engine/Shaders/DeferredIBLSetup.bsl
  26. 0 1
      Data/Raw/Engine/Shaders/DeferredIBLSky.bsl
  27. 3 0
      Data/Raw/Engine/Shaders/LightGridLLCreation.bsl
  28. 2 1
      Data/Raw/Engine/Shaders/TiledDeferredImageBasedLighting.bsl
  29. 1 0
      Data/Raw/Engine/Shaders/TiledDeferredLighting.bsl
  30. 14 49
      Data/Raw/Engine/Shaders/Transparent.bsl
  31. 10 10
      Source/BansheeCore/Managers/BsGpuProgramManager.cpp
  32. 39 14
      Source/BansheeCore/Material/BsMaterial.cpp
  33. 26 5
      Source/BansheeCore/Material/BsMaterial.h
  34. 3 0
      Source/BansheeCore/Material/BsMaterialParams.cpp
  35. 1 1
      Source/BansheeCore/RenderAPI/BsGpuParams.h
  36. 0 33
      Source/BansheeCore/RenderAPI/BsGpuProgram.h
  37. 2 1
      Source/BansheeCore/RenderAPI/BsRenderAPI.h
  38. 0 80
      Source/BansheeCore/Renderer/BsParamBlocks.cpp
  39. 57 4
      Source/BansheeCore/Renderer/BsParamBlocks.h
  40. 25 19
      Source/BansheeCore/Renderer/BsRenderer.h
  41. 1 0
      Source/BansheeCore/Utility/BsCommonTypes.h
  42. 31 24
      Source/BansheeD3D11RenderAPI/BsD3D11RenderAPI.cpp
  43. 12 3
      Source/BansheeEditor/SceneView/BsSelectionRenderer.cpp
  44. 10 1
      Source/BansheeGLRenderAPI/BsGLRenderAPI.cpp
  45. 2 1
      Source/BansheeSL/BsSLFXCompiler.cpp
  46. 1 1
      Source/BansheeVulkanRenderAPI/BsVulkanGpuProgram.cpp
  47. 9 1
      Source/BansheeVulkanRenderAPI/BsVulkanRenderAPI.cpp
  48. 1 1
      Source/CMakeLists.txt
  49. 9 0
      Source/RenderBeast/BsImageBasedLighting.cpp
  50. 12 0
      Source/RenderBeast/BsImageBasedLighting.h
  51. 106 5
      Source/RenderBeast/BsLightRendering.cpp
  52. 37 5
      Source/RenderBeast/BsLightRendering.h
  53. 45 16
      Source/RenderBeast/BsObjectRendering.cpp
  54. 89 18
      Source/RenderBeast/BsRenderCompositor.cpp
  55. 12 3
      Source/RenderBeast/BsRendererObject.h
  56. 43 9
      Source/RenderBeast/BsRendererScene.cpp
  57. 10 5
      Source/RenderBeast/BsRendererView.cpp
  58. 5 4
      Source/RenderBeast/BsShadowRendering.cpp

BIN
Data/Engine/Includes/DirectLightAccumulator.bslinc.asset


BIN
Data/Engine/Includes/ForwardLighting.bslinc.asset


BIN
Data/Engine/Includes/ImageBasedLighting.bslinc.asset


BIN
Data/Engine/Includes/LightingCommon.bslinc.asset


BIN
Data/Engine/Includes/ReflProbeAccumulator.bslinc.asset


+ 63 - 18
Data/Engine/ShaderDependencies.json

@@ -29,20 +29,26 @@
         {
             "Path": "DeferredLightCommon.bslinc"
         },
-        {
-            "Path": "SurfaceData.bslinc"
-        },
         {
             "Path": "GBufferInput.bslinc"
         },
         {
             "Path": "PerCameraData.bslinc"
+        },
+        {
+            "Path": "DirectLightAccumulator.bslinc"
+        },
+        {
+            "Path": "SurfaceData.bslinc"
         }
     ],
     "DeferredIBLFinalize.bsl": [
         {
             "Path": "ImageBasedLighting.bslinc"
         },
+        {
+            "Path": "ReflProbeAccumulator.bslinc"
+        },
         {
             "Path": "ReflectionCubemapCommon.bslinc"
         },
@@ -63,6 +69,9 @@
         {
             "Path": "ImageBasedLighting.bslinc"
         },
+        {
+            "Path": "ReflProbeAccumulator.bslinc"
+        },
         {
             "Path": "ReflectionCubemapCommon.bslinc"
         },
@@ -80,6 +89,9 @@
         {
             "Path": "ImageBasedLighting.bslinc"
         },
+        {
+            "Path": "ReflProbeAccumulator.bslinc"
+        },
         {
             "Path": "ReflectionCubemapCommon.bslinc"
         },
@@ -100,6 +112,9 @@
         {
             "Path": "ImageBasedLighting.bslinc"
         },
+        {
+            "Path": "ReflProbeAccumulator.bslinc"
+        },
         {
             "Path": "ReflectionCubemapCommon.bslinc"
         },
@@ -123,14 +138,17 @@
         {
             "Path": "DeferredLightCommon.bslinc"
         },
-        {
-            "Path": "SurfaceData.bslinc"
-        },
         {
             "Path": "GBufferInput.bslinc"
         },
         {
             "Path": "PerCameraData.bslinc"
+        },
+        {
+            "Path": "DirectLightAccumulator.bslinc"
+        },
+        {
+            "Path": "SurfaceData.bslinc"
         }
     ],
     "Diffuse.bsl": [
@@ -232,6 +250,9 @@
         {
             "Path": "ImageBasedLighting.bslinc"
         },
+        {
+            "Path": "ReflProbeAccumulator.bslinc"
+        },
         {
             "Path": "ReflectionCubemapCommon.bslinc"
         },
@@ -239,10 +260,13 @@
             "Path": "LightingCommon.bslinc"
         },
         {
-            "Path": "SurfaceData.bslinc"
+            "Path": "PerCameraData.bslinc"
         },
         {
-            "Path": "PerCameraData.bslinc"
+            "Path": "DirectLightAccumulator.bslinc"
+        },
+        {
+            "Path": "SurfaceData.bslinc"
         }
     ],
     "LightGridLLReduction.bsl": [
@@ -570,19 +594,25 @@
             "Path": "ImageBasedLighting.bslinc"
         },
         {
-            "Path": "ReflectionCubemapCommon.bslinc"
+            "Path": "ReflProbeAccumulator.bslinc"
         },
         {
-            "Path": "LightingCommon.bslinc"
+            "Path": "ReflectionCubemapCommon.bslinc"
         },
         {
-            "Path": "SurfaceData.bslinc"
+            "Path": "LightingCommon.bslinc"
         },
         {
             "Path": "GBufferInput.bslinc"
         },
         {
             "Path": "PerCameraData.bslinc"
+        },
+        {
+            "Path": "DirectLightAccumulator.bslinc"
+        },
+        {
+            "Path": "SurfaceData.bslinc"
         }
     ],
     "TiledDeferredLighting.bsl": [
@@ -590,31 +620,49 @@
             "Path": "ImageBasedLighting.bslinc"
         },
         {
-            "Path": "ReflectionCubemapCommon.bslinc"
+            "Path": "ReflProbeAccumulator.bslinc"
         },
         {
-            "Path": "LightingCommon.bslinc"
+            "Path": "ReflectionCubemapCommon.bslinc"
         },
         {
-            "Path": "SurfaceData.bslinc"
+            "Path": "LightingCommon.bslinc"
         },
         {
             "Path": "GBufferInput.bslinc"
         },
         {
             "Path": "PerCameraData.bslinc"
+        },
+        {
+            "Path": "DirectLightAccumulator.bslinc"
+        },
+        {
+            "Path": "SurfaceData.bslinc"
         }
     ],
     "Transparent.bsl": [
         {
             "Path": "ImageBasedLighting.bslinc"
         },
+        {
+            "Path": "ForwardLighting.bslinc"
+        },
+        {
+            "Path": "ReflProbeAccumulator.bslinc"
+        },
         {
             "Path": "ReflectionCubemapCommon.bslinc"
         },
         {
             "Path": "LightingCommon.bslinc"
         },
+        {
+            "Path": "PerCameraData.bslinc"
+        },
+        {
+            "Path": "DirectLightAccumulator.bslinc"
+        },
         {
             "Path": "SurfaceData.bslinc"
         },
@@ -625,14 +673,11 @@
             "Path": "LightGridCommon.bslinc"
         },
         {
-            "Path": "PerCameraData.bslinc"
+            "Path": "VertexInput.bslinc"
         },
         {
             "Path": "PerObjectData.bslinc"
         },
-        {
-            "Path": "VertexInput.bslinc"
-        },
         {
             "Path": "GBufferOutput.bslinc"
         }

BIN
Data/Engine/Shaders/DeferredDirectionalLight.bsl.asset


BIN
Data/Engine/Shaders/DeferredIBLFinalize.bsl.asset


BIN
Data/Engine/Shaders/DeferredIBLProbe.bsl.asset


BIN
Data/Engine/Shaders/DeferredIBLSetup.bsl.asset


BIN
Data/Engine/Shaders/DeferredIBLSky.bsl.asset


BIN
Data/Engine/Shaders/DeferredPointLight.bsl.asset


BIN
Data/Engine/Shaders/LightGridLLCreation.bsl.asset


BIN
Data/Engine/Shaders/TiledDeferredImageBasedLighting.bsl.asset


BIN
Data/Engine/Shaders/TiledDeferredLighting.bsl.asset


BIN
Data/Engine/Shaders/Transparent.bsl.asset


+ 12 - 0
Data/Raw/Engine/DataList.json

@@ -151,6 +151,18 @@
         {
             "Path": "VertexInput.bslinc",
             "UUID": "2d0dc7e6-46bf-03bb-f48d-46bf25dda973"
+        },
+        {
+            "Path": "DirectLightAccumulator.bslinc",
+            "UUID": "71fa057c-4f77-8bb0-acb3-4f77cb467414"
+        },
+        {
+            "Path": "ReflProbeAccumulator.bslinc",
+            "UUID": "72a07be3-4949-b48c-449b-49491e752f8a"
+        },
+        {
+            "Path": "ForwardLighting.bslinc",
+            "UUID": "bf35f9b3-4c48-30bc-d480-4c48cb444517"
         }
     ],
     "Shaders": [

+ 90 - 0
Data/Raw/Engine/Includes/DirectLightAccumulator.bslinc

@@ -0,0 +1,90 @@
+// 
+// Contains helper mixin used for initializing different forms of light accumulation. Can be removed 
+// when template/specialization support for mixins is added to BSL.
+//
+
+#ifdef USE_UNIFORM_BUFFER
+mixin LightAccumulatorDirect
+#else
+mixin LightAccumulatorIndexed
+#endif
+{
+	code
+	{
+		#ifdef USE_UNIFORM_BUFFER
+			#define MAX_LIGHTS 8
+			
+			[internal]
+			cbuffer Lights
+			{
+				LightData gLights[MAX_LIGHTS];
+			}
+		#else
+			#define MAX_LIGHTS 512 // Arbitrary limit, increase if needed
+		
+			#ifdef USE_COMPUTE_INDICES
+				groupshared uint gLightIndices[MAX_LIGHTS];
+				StructuredBuffer<LightData> gLights;
+			#endif
+			
+			#ifdef USE_LIGHT_GRID_INDICES
+				Buffer<uint> gLightIndices;
+				StructuredBuffer<LightData> gLights;
+			#endif
+		#endif
+		
+		float4 getDirectLighting(float3 worldPos, float3 V, float3 R, SurfaceData surfaceData, uint4 lightOffsets)
+		{
+			float3 N = surfaceData.worldNormal.xyz;
+			float roughness2 = max(surfaceData.roughness, 0.08f);
+			roughness2 *= roughness2;
+			
+			float3 outLuminance = 0;
+			float alpha = 0.0f;
+			if(surfaceData.worldNormal.w > 0.0f)
+			{
+				// Handle directional lights
+				[loop]
+				for(uint i = 0; i < lightOffsets.x; ++i)
+				{
+					LightData lightData = gLights[i];
+					outLuminance += getLuminanceDirectional(lightData, worldPos, V, R, surfaceData);
+				}
+				
+				// Handle radial lights
+				[loop]
+				for (uint j = lightOffsets.y; j < lightOffsets.z; ++j)
+				{
+					#ifdef USE_UNIFORM_BUFFER
+					uint lightIdx = j;
+					#else
+					uint lightIdx = gLightIndices[j];
+					#endif
+					
+					LightData lightData = gLights[lightIdx];
+					outLuminance += getLuminanceRadial(lightData, worldPos, V, R, roughness2, surfaceData);
+				}
+
+				// Handle spot lights
+				[loop]
+				for(uint k = lightOffsets.z; k < lightOffsets.w; ++k)
+				{
+					#ifdef USE_UNIFORM_BUFFER
+					uint lightIdx = k;
+					#else
+					uint lightIdx = gLightIndices[k];
+					#endif
+					
+					LightData lightData = gLights[lightIdx];
+					outLuminance += getLuminanceSpot(lightData, worldPos, V, R, roughness2, surfaceData);
+				}
+				
+				// Ambient term for in-editor visualization, not used in actual lighting
+				outLuminance += surfaceData.albedo.rgb * gAmbientFactor / PI;
+				alpha = 1.0f;
+			}
+			
+			return float4(outLuminance, alpha);
+		}
+	};
+};

+ 83 - 0
Data/Raw/Engine/Includes/ForwardLighting.bslinc

@@ -0,0 +1,83 @@
+#include "$ENGINE$\LightGridCommon.bslinc"
+#include "$ENGINE$\ReflectionCubemapCommon.bslinc"
+#define USE_LIGHT_GRID_INDICES 1
+#include "$ENGINE$\LightingCommon.bslinc"
+#include "$ENGINE$\ImageBasedLighting.bslinc"
+
+mixin ForwardLighting
+{
+	mixin LightingCommon;
+	mixin ReflectionCubemapCommon;
+	mixin ImageBasedLighting;
+	
+	#if CLUSTERED
+	mixin LightGridCommon;
+	mixin LightAccumulatorIndexed;
+	mixin ReflProbeAccumulatorIndexed;
+	#else
+	mixin LightAccumulatorDirect;
+	mixin ReflProbeAccumulatorDirect;
+	#endif
+	
+	variations
+	{
+		CLUSTERED = { true, false };
+	};		
+
+	code
+	{
+		#if CLUSTERED
+		Buffer<uint4> gGridLightOffsetsAndSize;
+		Buffer<uint2> gGridProbeOffsetsAndSize;
+		#else
+		[internal]
+		cbuffer LightAndReflProbeParams
+		{
+			// Number of lights per type in the lights buffer
+			// x - number of directional lights
+			// y - offset to radial lights
+			// z - offset to spot lights
+			// w - total number of lights
+			uint4 gLightOffsets;
+			
+			uint gReflProbeCount;
+		}
+		#endif
+
+		float3 calcLighting(float3 worldPosition, float3 screenPosition, float2 uv, SurfaceData surfaceData)
+		{
+			#if CLUSTERED
+			uint2 pixelPos = (uint2)screenPosition.xy;
+			uint cellIdx = calcCellIdx(pixelPos, screenPosition.z);
+			uint3 lightOffsetAndSize = gGridLightOffsetsAndSize[cellIdx].rgb;
+			
+			uint4 lightOffsets;
+			lightOffsets.x = gLightCounts.x;
+			lightOffsets.y = lightOffsetAndSize.x;
+			lightOffsets.z = lightOffsets.y + lightOffsetAndSize.y;
+			lightOffsets.w = lightOffsets.z + lightOffsetAndSize.z;
+			
+			uint2 reflProbeOffsetAndSize = gGridProbeOffsetsAndSize[cellIdx];
+			#else
+			uint4 lightOffsets = gLightOffsets;
+			uint2 reflProbeOffsetAndSize = uint2(0, gReflProbeCount);			
+			#endif
+			
+			float3 V = normalize(gViewOrigin - worldPosition);
+			float3 N = surfaceData.worldNormal.xyz;
+			float3 R = 2 * dot(V, N) * N - V;
+			float3 specR = getSpecularDominantDir(N, R, surfaceData.roughness);
+			
+			float4 directLighting = getDirectLighting(worldPosition, V, specR, surfaceData, lightOffsets);
+			float ao = gAmbientOcclusionTex.Sample(gAmbientOcclusionSamp, uv);
+			float4 ssr = gSSRTex.Sample(gSSRSamp, uv);
+			float3 imageBasedSpecular = getImageBasedSpecular(worldPosition, V, specR, surfaceData, ao, ssr,
+				reflProbeOffsetAndSize.x, reflProbeOffsetAndSize.y);
+
+			float3 totalLighting = directLighting.rgb;
+			totalLighting.rgb += imageBasedSpecular;
+
+			return totalLighting;
+		}	
+	};
+};

+ 9 - 78
Data/Raw/Engine/Includes/ImageBasedLighting.bslinc

@@ -6,9 +6,6 @@ mixin ImageBasedLighting
 
 	code
 	{
-		// Arbitrary limit, increase if needed
-		#define MAX_PROBES 512
-	
 		// Note: Size must be multiple of largest element, because of std430 rules
 		struct ReflProbeData
 		{
@@ -63,17 +60,6 @@ mixin ImageBasedLighting
 			AddressV = CLAMP;
 		};
 		
-		#ifndef STANDARD_DEFERRED
-		StructuredBuffer<ReflProbeData> gReflectionProbes;
-		#endif
-
-		#if USE_COMPUTE_INDICES
-			groupshared uint gReflectionProbeIndices[MAX_PROBES];
-		#endif
-		#if USE_LIGHT_GRID_INDICES
-			Buffer<uint> gReflectionProbeIndices;
-		#endif
-		
 		[internal]
 		cbuffer ReflProbeParams
 		{
@@ -201,68 +187,13 @@ mixin ImageBasedLighting
 		{
 			float r2 = r * r;
 			return saturate(pow(NoV + ao, r2) - 1.0f + ao);
-		}			
-		
-		#ifndef STANDARD_DEFERRED
-		float3 gatherReflectionRadiance(float3 worldPos, float3 dir, float roughness, float alpha, float3 specularColor, uint probeOffset, uint numProbes)
-		{
-			if(gUseReflectionMaps == 0)
-				return specularColor;
-									
-			float mipLevel = mapRoughnessToMipLevel(roughness, gReflCubemapNumMips);
-			
-			float3 output = 0;
-			[loop]
-			for(uint i = 0; i < numProbes; i++)
-			{
-				if(alpha < 0.001f)
-					break;
-						
-				uint probeIdx = gReflectionProbeIndices[probeOffset + i];
-				ReflProbeData probeData = gReflectionProbes[probeIdx];
-				float4 probeValue = evaluateProbe(worldPos, dir, mipLevel, probeData);
-				
-				output += probeValue.rgb * alpha; 
-				alpha *= probeValue.w;
-			}
-				
-			if(gSkyCubemapAvailable > 0)
-			{
-				float skyMipLevel = mapRoughnessToMipLevel(roughness, gSkyCubemapNumMips);
-				float4 skySample = gSkyReflectionTex.SampleLevel(gSkyReflectionSamp, dir, skyMipLevel) * gSkyBrightness;
-				
-				output += skySample.rgb * alpha; 
-			}
-					
-			return output;
-		}
-		
-		float3 getImageBasedSpecular(float3 worldPos, float3 V, float3 R, SurfaceData surfaceData, float ao, float4 ssr, 
-			uint probeOffset, uint numProbes)
-		{
-			// See C++ code for generation of gPreintegratedEnvBRDF to see why this code works as is
-			float3 N = surfaceData.worldNormal.xyz;
-			float NoV = saturate(dot(N, V));
-			
-			// Note: Using a fixed F0 value of 0.04 (plastic) for dielectrics, and using albedo as specular for conductors.
-			// For more customizability allow the user to provide separate albedo/specular colors for both types.
-			float3 specularColor = lerp(float3(0.04f, 0.04f, 0.04f), surfaceData.albedo.rgb, surfaceData.metalness);
-			
-			// Get SSR
-			float3 radiance = ssr.rgb;
-			float alpha = 1.0f - ssr.a; // Determines how much to blend in reflection probes & skybox
-			
-			// Generate an approximate spec. occlusion value from AO. This doesn't need to be applied to SSR since it accounts
-			// for occlusion by tracing rays.
-			float specOcclusion = getSpecularOcclusion(NoV, surfaceData.roughness * surfaceData.roughness, ao);
-			alpha *= specOcclusion;
-			
-			// Get radiance from probes and skybox
-			radiance += gatherReflectionRadiance(worldPos, R, surfaceData.roughness, alpha, specularColor, probeOffset, numProbes);
-			
-			float2 envBRDF = gPreintegratedEnvBRDF.SampleLevel(gPreintegratedEnvBRDFSamp, float2(NoV, surfaceData.roughness), 0).rg;
-			return radiance * (specularColor * envBRDF.x + envBRDF.y);
-		}
-		#endif		
+		}				
 	};
-};
+};
+
+// Hackish way of "instantiating" two versions of a mixin (to be removed when template/specialization support is added)
+#include "$ENGINE$\ReflProbeAccumulator.bslinc"
+
+#define USE_UNIFORM_BUFFER
+#include "$ENGINE$\ReflProbeAccumulator.bslinc"
+#undef USE_UNIFORM_BUFFER

+ 8 - 64
Data/Raw/Engine/Includes/LightingCommon.bslinc

@@ -6,9 +6,6 @@ mixin LightingCommon
 
 	code
 	{
-		// Arbitrary limit, increase if needed
-		#define MAX_LIGHTS 512
-	
 		#define PI 3.1415926
 		#define HALF_PI 1.5707963
 		
@@ -339,65 +336,12 @@ mixin LightingCommon
 				
 			return lightData.color * illuminance * attenuation * surfaceShading;
 		}
-		
-		#ifdef USE_COMPUTE_INDICES
-			groupshared uint gLightIndices[MAX_LIGHTS];
-			StructuredBuffer<LightData> gLights;
-			#define REQUIRES_LIGHT_ITERATION 1
-		#endif
-		
-		#ifdef USE_LIGHT_GRID_INDICES
-			Buffer<uint> gLightIndices;
-			StructuredBuffer<LightData> gLights;
-			#define REQUIRES_LIGHT_ITERATION 1
-		#endif
-		
-		#ifdef REQUIRES_LIGHT_ITERATION
-		float4 getDirectLighting(float3 worldPos, float3 V, float3 R, SurfaceData surfaceData, uint4 lightOffsets)
-		{
-			float3 N = surfaceData.worldNormal.xyz;
-			float roughness2 = max(surfaceData.roughness, 0.08f);
-			roughness2 *= roughness2;
-			
-			float3 outLuminance = 0;
-			float alpha = 0.0f;
-			if(surfaceData.worldNormal.w > 0.0f)
-			{
-				// Handle directional lights
-				[loop]
-				for(uint i = 0; i < lightOffsets.x; ++i)
-				{
-					LightData lightData = gLights[i];
-					outLuminance += getLuminanceDirectional(lightData, worldPos, V, R, surfaceData);
-				}
-				
-				// Handle radial lights
-				[loop]
-				for (uint j = lightOffsets.y; j < lightOffsets.z; ++j)
-				{
-					uint lightIdx = gLightIndices[j];
-					LightData lightData = gLights[lightIdx];
-					
-					outLuminance += getLuminanceRadial(lightData, worldPos, V, R, roughness2, surfaceData);
-				}
-
-				// Handle spot lights
-				[loop]
-				for(uint k = lightOffsets.z; k < lightOffsets.w; ++k)
-				{
-					uint lightIdx = gLightIndices[k];
-					LightData lightData = gLights[lightIdx];
-					
-					outLuminance += getLuminanceSpot(lightData, worldPos, V, R, roughness2, surfaceData);
-				}
-				
-				// Ambient term for in-editor visualization, not used in actual lighting
-				outLuminance += surfaceData.albedo.rgb * gAmbientFactor / PI;
-				alpha = 1.0f;
-			}
-			
-			return float4(outLuminance, alpha);
-		}
-		#endif
 	};
-};
+};
+
+// Hackish way of "instantiating" two versions of a mixin (to be removed when template/specialization support is added)
+#include "$ENGINE$\DirectLightAccumulator.bslinc"
+
+#define USE_UNIFORM_BUFFER
+#include "$ENGINE$\DirectLightAccumulator.bslinc"
+#undef USE_UNIFORM_BUFFER

+ 101 - 0
Data/Raw/Engine/Includes/ReflProbeAccumulator.bslinc

@@ -0,0 +1,101 @@
+// 
+// Contains helper mixin used for initializing different forms of refl. probe accumulation. Can be removed 
+// when template/specialization support for mixins is added to BSL.
+//
+
+#ifdef USE_UNIFORM_BUFFER
+mixin ReflProbeAccumulatorDirect
+#else
+mixin ReflProbeAccumulatorIndexed
+#endif
+{
+	code
+	{
+		#ifdef USE_UNIFORM_BUFFER
+			#define MAX_PROBES 8
+			
+			[internal]
+			cbuffer ReflProbes
+			{
+				ReflProbeData gReflectionProbes[MAX_PROBES];
+			}
+		#else
+			#define MAX_PROBES 512 // Arbitrary limit, increase if needed
+		
+			#ifdef USE_COMPUTE_INDICES
+				groupshared uint gReflectionProbeIndices[MAX_PROBES];
+				StructuredBuffer<ReflProbeData> gReflectionProbes;
+			#endif
+			
+			#ifdef USE_LIGHT_GRID_INDICES
+				Buffer<uint> gReflectionProbeIndices;
+				StructuredBuffer<ReflProbeData> gReflectionProbes;
+			#endif
+		#endif
+		
+		float3 gatherReflectionRadiance(float3 worldPos, float3 dir, float roughness, float alpha, float3 specularColor, uint probeOffset, uint numProbes)
+		{
+			if(gUseReflectionMaps == 0)
+				return specularColor;
+									
+			float mipLevel = mapRoughnessToMipLevel(roughness, gReflCubemapNumMips);
+			
+			float3 output = 0;
+			[loop]
+			for(uint i = 0; i < numProbes; i++)
+			{
+				if(alpha < 0.001f)
+					break;
+						
+				#ifdef USE_UNIFORM_BUFFER
+				uint probeIdx = probeOffset + i;
+				#else
+				uint probeIdx = gReflectionProbeIndices[probeOffset + i];
+				#endif
+				
+				ReflProbeData probeData = gReflectionProbes[probeIdx];
+				float4 probeValue = evaluateProbe(worldPos, dir, mipLevel, probeData);
+				
+				output += probeValue.rgb * alpha; 
+				alpha *= probeValue.w;
+			}
+				
+			if(gSkyCubemapAvailable > 0)
+			{
+				float skyMipLevel = mapRoughnessToMipLevel(roughness, gSkyCubemapNumMips);
+				float4 skySample = gSkyReflectionTex.SampleLevel(gSkyReflectionSamp, dir, skyMipLevel) * gSkyBrightness;
+				
+				output += skySample.rgb * alpha; 
+			}
+					
+			return output;
+		}
+		
+		float3 getImageBasedSpecular(float3 worldPos, float3 V, float3 R, SurfaceData surfaceData, float ao, float4 ssr, 
+			uint probeOffset, uint numProbes)
+		{
+			// See C++ code for generation of gPreintegratedEnvBRDF to see why this code works as is
+			float3 N = surfaceData.worldNormal.xyz;
+			float NoV = saturate(dot(N, V));
+			
+			// Note: Using a fixed F0 value of 0.04 (plastic) for dielectrics, and using albedo as specular for conductors.
+			// For more customizability allow the user to provide separate albedo/specular colors for both types.
+			float3 specularColor = lerp(float3(0.04f, 0.04f, 0.04f), surfaceData.albedo.rgb, surfaceData.metalness);
+			
+			// Get SSR
+			float3 radiance = ssr.rgb;
+			float alpha = 1.0f - ssr.a; // Determines how much to blend in reflection probes & skybox
+			
+			// Generate an approximate spec. occlusion value from AO. This doesn't need to be applied to SSR since it accounts
+			// for occlusion by tracing rays.
+			float specOcclusion = getSpecularOcclusion(NoV, surfaceData.roughness * surfaceData.roughness, ao);
+			alpha *= specOcclusion;
+			
+			// Get radiance from probes and skybox
+			radiance += gatherReflectionRadiance(worldPos, R, surfaceData.roughness, alpha, specularColor, probeOffset, numProbes);
+			
+			float2 envBRDF = gPreintegratedEnvBRDF.SampleLevel(gPreintegratedEnvBRDFSamp, float2(NoV, surfaceData.roughness), 0).rg;
+			return radiance * (specularColor * envBRDF.x + envBRDF.y);
+		}
+	};
+};

+ 0 - 1
Data/Raw/Engine/Shaders/DeferredIBLFinalize.bsl

@@ -7,7 +7,6 @@
 #include "$ENGINE$\GBufferInput.bslinc"
 #include "$ENGINE$\PPBase.bslinc"
 #include "$ENGINE$\PerCameraData.bslinc"
-#define STANDARD_DEFERRED
 #include "$ENGINE$\ImageBasedLighting.bslinc"
 
 technique DeferredIBLFinalize

+ 0 - 1
Data/Raw/Engine/Shaders/DeferredIBLProbe.bsl

@@ -6,7 +6,6 @@
 
 #include "$ENGINE$\GBufferInput.bslinc"
 #include "$ENGINE$\PerCameraData.bslinc"
-#define STANDARD_DEFERRED
 #include "$ENGINE$\ImageBasedLighting.bslinc"
 
 technique DeferredIBLProbe

+ 0 - 1
Data/Raw/Engine/Shaders/DeferredIBLSetup.bsl

@@ -7,7 +7,6 @@
 #include "$ENGINE$\GBufferInput.bslinc"
 #include "$ENGINE$\PPBase.bslinc"
 #include "$ENGINE$\PerCameraData.bslinc"
-#define STANDARD_DEFERRED
 #include "$ENGINE$\ImageBasedLighting.bslinc"
 
 technique DeferredIBLSetup

+ 0 - 1
Data/Raw/Engine/Shaders/DeferredIBLSky.bsl

@@ -7,7 +7,6 @@
 #include "$ENGINE$\GBufferInput.bslinc"
 #include "$ENGINE$\PPBase.bslinc"
 #include "$ENGINE$\PerCameraData.bslinc"
-#define STANDARD_DEFERRED
 #include "$ENGINE$\ImageBasedLighting.bslinc"
 
 technique DeferredIBLFinalize

+ 3 - 0
Data/Raw/Engine/Shaders/LightGridLLCreation.bsl

@@ -13,6 +13,9 @@ technique LightGridLLCreation
 
 	code
 	{
+		StructuredBuffer<LightData> gLights;
+		StructuredBuffer<ReflProbeData> gReflectionProbes;
+	
 		[layout(r32ui)]
 		RWBuffer<uint> gLightsCounter;
 		[layout(r32ui)]

+ 2 - 1
Data/Raw/Engine/Shaders/TiledDeferredImageBasedLighting.bsl

@@ -12,6 +12,7 @@ technique TiledDeferredImageBasedLighting
 	mixin LightingCommon;
 	mixin ReflectionCubemapCommon;
 	mixin ImageBasedLighting;
+	mixin ReflProbeAccumulatorIndexed;
 
 	variations
 	{
@@ -225,7 +226,7 @@ technique TiledDeferredImageBasedLighting
 			calcTileAABB(groupId.xy, minTileZ, maxTileZ, center, extent);
 							
 			// Find probes overlapping the tile
-			for (uint i = threadIndex; i < gNumProbes && i < MAX_LIGHTS; i += TILE_SIZE)
+			for (uint i = threadIndex; i < gNumProbes && i < MAX_PROBES; i += TILE_SIZE)
 			{
 				float4 probePosition = mul(gMatView, float4(gReflectionProbes[i].position, 1.0f));
 				float probeRadius = gReflectionProbes[i].radius;

+ 1 - 0
Data/Raw/Engine/Shaders/TiledDeferredLighting.bsl

@@ -10,6 +10,7 @@ technique TiledDeferredLighting
 	mixin GBufferInput;
 	mixin PerCameraData;
 	mixin LightingCommon;
+	mixin LightAccumulatorIndexed;
 	mixin ReflectionCubemapCommon;
 	mixin ImageBasedLighting;
 

+ 14 - 49
Data/Raw/Engine/Shaders/Transparent.bsl

@@ -1,9 +1,5 @@
 #include "$ENGINE$\BasePass.bslinc"
-#include "$ENGINE$\LightGridCommon.bslinc"
-#include "$ENGINE$\ReflectionCubemapCommon.bslinc"
-#define USE_LIGHT_GRID_INDICES 1
-#include "$ENGINE$\LightingCommon.bslinc"
-#include "$ENGINE$\ImageBasedLighting.bslinc"
+#include "$ENGINE$\ForwardLighting.bslinc"
 
 options
 {
@@ -13,10 +9,7 @@ options
 technique Surface
 {
 	mixin BasePass;
-	mixin LightingCommon;
-	mixin LightGridCommon;
-	mixin ReflectionCubemapCommon;
-	mixin ImageBasedLighting;
+	mixin ForwardLighting;
 
 	blend
 	{
@@ -34,22 +27,19 @@ technique Surface
 
 	code
 	{
-		SamplerState gAlbedoSamp : register(s0);
-		SamplerState gNormalSamp : register(s1);
-		SamplerState gRoughnessSamp : register(s2);
-		SamplerState gMetalnessSamp : register(s3);
+		SamplerState gAlbedoSamp;
+		SamplerState gNormalSamp;
+		SamplerState gRoughnessSamp;
+		SamplerState gMetalnessSamp;
 		
-		Texture2D gAlbedoTex : register(t0);
-		Texture2D gNormalTex : register(t1);
-		Texture2D gRoughnessTex : register(t2);
-		Texture2D gMetalnessTex : register(t3);
+		Texture2D gAlbedoTex;
+		Texture2D gNormalTex;
+		Texture2D gRoughnessTex;
+		Texture2D gMetalnessTex;
 		
-		Buffer<uint4> gGridLightOffsetsAndSize;
-		Buffer<uint2> gGridProbeOffsetsAndSize;
-
-		cbuffer MaterialParams : register(b5)
+		cbuffer MaterialParams
 		{
-			float gOpacity;
+			float gOpacity = 1.0f;
 		}
 		
 		float4 fsmain(in VStoFS input) : SV_Target0
@@ -64,33 +54,8 @@ technique Surface
 			surfaceData.roughness = gRoughnessTex.Sample(gRoughnessSamp, input.uv0).x;
 			surfaceData.metalness = gMetalnessTex.Sample(gMetalnessSamp, input.uv0).x;
 			
-			uint2 pixelPos = (uint2)input.position.xy;
-			uint cellIdx = calcCellIdx(pixelPos, input.position.z);
-			uint3 lightOffsetAndSize = gGridLightOffsetsAndSize[cellIdx].rgb;
-			
-			uint4 lightOffsets;
-			lightOffsets.x = gLightCounts.x;
-			lightOffsets.y = lightOffsetAndSize.x;
-			lightOffsets.z = lightOffsets.y + lightOffsetAndSize.y;
-			lightOffsets.w = lightOffsets.z + lightOffsetAndSize.z;
-			
-			uint2 reflProbeOffsetAndSize = gGridProbeOffsetsAndSize[cellIdx];
-			
-			float3 V = normalize(gViewOrigin - input.worldPosition);
-			float3 N = surfaceData.worldNormal.xyz;
-			float3 R = 2 * dot(V, N) * N - V;
-			float3 specR = getSpecularDominantDir(N, R, surfaceData.roughness);
-			
-			float4 directLighting = getDirectLighting(input.worldPosition, V, specR, surfaceData, lightOffsets);
-			float ao = gAmbientOcclusionTex.Sample(gAmbientOcclusionSamp, input.uv0);
-			float4 ssr = gSSRTex.Sample(gSSRSamp, input.uv0);
-			float3 imageBasedSpecular = getImageBasedSpecular(input.worldPosition, V, specR, surfaceData, ao, ssr,
-				reflProbeOffsetAndSize.x, reflProbeOffsetAndSize.y);
-
-			float3 totalLighting = directLighting.rgb;
-			totalLighting.rgb += imageBasedSpecular;
-
-			return float4(totalLighting, surfaceData.albedo.a * gOpacity);
+			float3 lighting = calcLighting(input.worldPosition.xyz, input.position, input.uv0, surfaceData);
+			return float4(lighting, surfaceData.albedo.a * gOpacity);
 		}	
 	};
 };

+ 10 - 10
Source/BansheeCore/Managers/BsGpuProgramManager.cpp

@@ -97,13 +97,13 @@ namespace bs
 	}
 
 	void GpuProgramManager::removeFactory(GpuProgramFactory* factory)
-    {
-        FactoryMap::iterator it = mFactories.find(factory->getLanguage());
-        if (it != mFactories.end() && it->second == factory)
-        {
-            mFactories.erase(it);
-        }
-    }
+	{
+		FactoryMap::iterator it = mFactories.find(factory->getLanguage());
+		if (it != mFactories.end() && it->second == factory)
+		{
+			mFactories.erase(it);
+		}
+	}
 
 	GpuProgramFactory* GpuProgramManager::getFactory(const String& language)
 	{
@@ -123,12 +123,12 @@ namespace bs
 	}
 
 	SPtr<GpuProgram> GpuProgramManager::create(const GPU_PROGRAM_DESC& desc, GpuDeviceFlags deviceMask)
-    {
+	{
 		SPtr<GpuProgram> ret = createInternal(desc, deviceMask);
 		ret->initialize();
 
-        return ret;
-    }
+		return ret;
+	}
 
 	SPtr<GpuProgram> GpuProgramManager::createInternal(const GPU_PROGRAM_DESC& desc, GpuDeviceFlags deviceMask)
 	{

+ 39 - 14
Source/BansheeCore/Material/BsMaterial.cpp

@@ -58,30 +58,55 @@ namespace bs
 	{
 		paramsSet->update(mParams, updateAll);
 	}
-
+	
 	template<bool Core>
-	UINT32 TMaterial<Core>::findTechnique(const StringID& tag) const
+	UINT32 TMaterial<Core>::findTechnique(const FIND_TECHNIQUE_DESC& desc) const
 	{
 		for(UINT32 i = 0; i < (UINT32)mTechniques.size(); i++)
 		{
-			if (mTechniques[i]->hasTag(tag))
-				return i;
-		}
+			bool foundMatch = true;
+			for(UINT32 j = 0; j < desc.numTags; j++)
+			{
+				if (!mTechniques[i]->hasTag(desc.tags[j]))
+				{
+					foundMatch = false;
+					break;
+				}
+			}
 
-		return (UINT32)-1;
-	}
+			if(!foundMatch)
+				continue;
 
-	template<bool Core>
-	UINT32 TMaterial<Core>::findTechnique(const ShaderVariation& variation) const
-	{
-		for(UINT32 i = 0; i < (UINT32)mTechniques.size(); i++)
-		{
-			const ShaderVariation& curVariation = mTechniques[i]->getVariation();
-			if(curVariation == variation)
+			if(desc.variation)
+			{
+				const ShaderVariation& curVariation = mTechniques[i]->getVariation();
+				const auto& curVarParams = curVariation.getParams();
+
+				foundMatch = true;
+				const auto& searchVarParams = desc.variation->getParams();
+				for (auto& param : searchVarParams)
+				{
+					auto iterFind = curVarParams.find(param.first);
+					if (iterFind == curVarParams.end())
+					{
+						foundMatch = false;
+						break;
+					}
+
+					if (param.second.i != iterFind->second.i)
+					{
+						foundMatch = false;
+						break;
+					}
+				}
+			}
+
+			if(foundMatch)
 				return i;
 		}
 
 		return (UINT32)-1;
+		
 	}
 
 	template<bool Core>

+ 26 - 5
Source/BansheeCore/Material/BsMaterial.h

@@ -47,6 +47,30 @@ namespace bs
 		ResourceChanged		= 2 << 1
 	};
 
+	/** Structure used when searching for a specific technique in a Material. */
+	struct FIND_TECHNIQUE_DESC
+	{
+		static constexpr UINT32 MAX_NUM_TAGS = 10;
+
+		/** A set of tags that the technique must have. */
+		StringID tags[MAX_NUM_TAGS];
+
+		/** Number of valid tags in the @p tags array. */
+		UINT32 numTags = 0;
+
+		/** Specified variation of the technique. Parameters not specified in the variation are assumed to be irrelevant. */
+		const ShaderVariation* variation = nullptr;
+
+		/** Registers a new tag to look for when searching for the technique. */
+		void addTag(const StringID& tag)
+		{
+			BS_ASSERT(numTags < MAX_NUM_TAGS);
+
+			tags[numTags] = tag;
+			numTags++;
+		}
+	};
+
 	/**
 	 * Material that controls how objects are rendered. It is represented by a shader and parameters used to set up that
 	 * shader. It provides a simple interface for manipulating the parameters.
@@ -123,11 +147,8 @@ namespace bs
 		/** Returns the total number of techniques supported by this material. */
 		UINT32 getNumTechniques() const { return (UINT32)mTechniques.size(); }
 
-		/** Attempts to find a technique with the supported tag. Returns an index of the technique, or -1 if not found. */
-		UINT32 findTechnique(const StringID& tag) const;
-
-		/** Attempts to find a technique matching the provided variation. Returns an index of the technique, or -1 if not found. */
-		UINT32 findTechnique(const ShaderVariation& variation) const;
+		/** Attempts to find a technique matching the specified variation and tags. Returns -1 if none can be found. */
+		UINT32 findTechnique(const FIND_TECHNIQUE_DESC& desc) const;
 
 		/** Finds the index of the default (primary) technique to use. */
 		UINT32 getDefaultTechnique() const;

+ 3 - 0
Source/BansheeCore/Material/BsMaterialParams.cpp

@@ -40,6 +40,9 @@ namespace bs
 
 		for (auto& entry : dataParams)
 		{
+			if(entry.second.type == GPDT_UNKNOWN)
+				continue;
+
 			UINT32 paramIdx = (UINT32)mParams.size();
 			mParams.push_back(ParamData());
 			mParamLookup[entry.first] = paramIdx;

+ 1 - 1
Source/BansheeCore/RenderAPI/BsGpuParams.h

@@ -15,7 +15,7 @@ namespace bs
 	 */
 
 	/**	Helper structure whose specializations convert an engine data type into a GPU program data parameter type.  */
-	template<class T> struct TGpuDataParamInfo { };
+	template<class T> struct TGpuDataParamInfo { enum { TypeId = GPDT_STRUCT };};
 	template<> struct TGpuDataParamInfo < float > { enum { TypeId = GPDT_FLOAT1 }; };
 	template<> struct TGpuDataParamInfo < Vector2 > { enum { TypeId = GPDT_FLOAT2 }; };
 	template<> struct TGpuDataParamInfo < Vector3 > { enum { TypeId = GPDT_FLOAT3 }; };

+ 0 - 33
Source/BansheeCore/RenderAPI/BsGpuProgram.h

@@ -12,39 +12,6 @@ namespace bs
 	 *  @{
 	 */
 
-	/**	GPU program profiles representing supported feature sets. */
-	enum GpuProgramProfile
-	{
-		GPP_NONE, /**< No profile. */
-		GPP_FS_1_1, /**< Fragment program 1.1 profile. */
-		GPP_FS_1_2, /**< Fragment program 1.2 profile. */
-		GPP_FS_1_3, /**< Fragment program 1.3 profile. */
-		GPP_FS_1_4, /**< Fragment program 1.4 profile. */
-		GPP_FS_2_0, /**< Fragment program 2.0 profile. */
-		GPP_FS_2_x, /**< Fragment program 2.x profile. */
-		GPP_FS_2_a, /**< Fragment program 2.a profile. */
-		GPP_FS_2_b, /**< Fragment program 2.b profile. */
-		GPP_FS_3_0, /**< Fragment program 3.0 profile. */
-		GPP_FS_3_x, /**< Fragment program 3.x profile. */
-		GPP_FS_4_0, /**< Fragment program 4.0 profile. */
-		GPP_FS_4_1, /**< Fragment program 4.1 profile. */
-		GPP_FS_5_0, /**< Fragment program 5.0 profile. */
-		GPP_VS_1_1, /**< Vertex program 1.1 profile. */
-		GPP_VS_2_0, /**< Vertex program 2.0 profile. */
-		GPP_VS_2_x, /**< Vertex program 2.x profile. */
-		GPP_VS_2_a, /**< Vertex program 2.a profile. */
-		GPP_VS_3_0, /**< Vertex program 3.0 profile. */
-		GPP_VS_4_0, /**< Vertex program 4.0 profile. */
-		GPP_VS_4_1, /**< Vertex program 4.1 profile. */
-		GPP_VS_5_0, /**< Vertex program 5.0 profile. */
-		GPP_GS_4_0, /**< Geometry program 4.0 profile. */
-		GPP_GS_4_1, /**< Geometry program 4.1 profile. */
-		GPP_GS_5_0, /**< Geometry program 5.0 profile. */
-		GPP_HS_5_0, /**< Hull program 5.0 profile. */
-		GPP_DS_5_0, /**< Domain program 5.0 profile. */
-		GPP_CS_5_0 /**< Compute program 5.0 profile. */
-	};
-
 	/** Descriptor structure used for initialization of a GpuProgram. */
 	struct GPU_PROGRAM_DESC
 	{

+ 2 - 1
Source/BansheeCore/RenderAPI/BsRenderAPI.h

@@ -580,7 +580,8 @@ namespace bs
 		 * 			
 		 * @param[in]	name	Name to assign the parameter block.
 		 * @param[in]	params	List of parameters in the parameter block. Only name, type and array size fields need to be
-		 * 						populated, the rest will be populated when the method returns.		
+		 * 						populated, the rest will be populated when the method returns. If a parameter is a struct
+		 * 						then the elementSize field needs to be populated with the size of the struct in bytes.
 		 * @return				Descriptor for the parameter block holding the provided parameters as laid out by the 
 		 *						default render API layout.
 		 */

+ 0 - 80
Source/BansheeCore/Renderer/BsParamBlocks.cpp

@@ -5,86 +5,6 @@
 
 namespace bs { namespace ct
 {
-	template<class T>
-	ParamBlockParam<T>::ParamBlockParam(const GpuParamDataDesc& paramDesc)
-		:mParamDesc(paramDesc)
-	{ }
-
-	template<class T>
-	void ParamBlockParam<T>::set(const SPtr<GpuParamBlockBuffer>& paramBlock, const T& value, UINT32 arrayIdx) const
-	{
-#if BS_DEBUG_MODE
-		if (arrayIdx >= mParamDesc.arraySize)
-		{
-			BS_EXCEPT(InvalidParametersException, "Array index out of range. Array size: " +
-					  toString(mParamDesc.arraySize) + ". Requested size: " + toString(arrayIdx));
-		}
-#endif
-
-		UINT32 elementSizeBytes = mParamDesc.elementSize * sizeof(UINT32);
-		UINT32 sizeBytes = std::min(elementSizeBytes, (UINT32)sizeof(T)); // Truncate if it doesn't fit within parameter size
-
-		bool transposeMatrices = RenderAPI::instance().getAPIInfo().isFlagSet(RenderAPIFeatureFlag::ColumnMajorMatrices);
-		if (TransposePolicy<T>::transposeEnabled(transposeMatrices))
-		{
-			auto transposed = TransposePolicy<T>::transpose(value);
-			paramBlock->write((mParamDesc.cpuMemOffset + arrayIdx * mParamDesc.arrayElementStride) * sizeof(UINT32), 
-				&transposed, sizeBytes);
-		}
-		else
-			paramBlock->write((mParamDesc.cpuMemOffset + arrayIdx * mParamDesc.arrayElementStride) * sizeof(UINT32), 
-				&value, sizeBytes);
-
-		// Set unused bytes to 0
-		if (sizeBytes < elementSizeBytes)
-		{
-			UINT32 diffSize = elementSizeBytes - sizeBytes;
-			paramBlock->zeroOut((mParamDesc.cpuMemOffset + arrayIdx * mParamDesc.arrayElementStride) * sizeof(UINT32) + 
-				sizeBytes, diffSize);
-		}
-	}
-
-	template<class T>
-	T ParamBlockParam<T>::get(const SPtr<GpuParamBlockBuffer>& paramBlock, UINT32 arrayIdx) const
-	{
-#if BS_DEBUG_MODE
-		if (arrayIdx >= mParamDesc.arraySize)
-		{
-			LOGERR("Array index out of range. Array size: " + toString(mParamDesc.arraySize) + ". Requested size: " + 
-				toString(arrayIdx));
-			return T();
-		}
-#endif
-
-		UINT32 elementSizeBytes = mParamDesc.elementSize * sizeof(UINT32);
-		UINT32 sizeBytes = std::min(elementSizeBytes, (UINT32)sizeof(T));
-
-		T value;
-		paramBlock->read((mParamDesc.cpuMemOffset + arrayIdx * mParamDesc.arrayElementStride) * sizeof(UINT32), &value, 
-			sizeBytes);
-
-		return value;
-	}
-
-	template class ParamBlockParam<float>;
-	template class ParamBlockParam<int>;
-	template class ParamBlockParam<Color>;
-	template class ParamBlockParam<Vector2>;
-	template class ParamBlockParam<Vector3>;
-	template class ParamBlockParam<Vector4>;
-	template class ParamBlockParam<Vector2I>;
-	template class ParamBlockParam<Vector3I>;
-	template class ParamBlockParam<Vector4I>;
-	template class ParamBlockParam<Matrix2>;
-	template class ParamBlockParam<Matrix2x3>;
-	template class ParamBlockParam<Matrix2x4>;
-	template class ParamBlockParam<Matrix3>;
-	template class ParamBlockParam<Matrix3x2>;
-	template class ParamBlockParam<Matrix3x4>;
-	template class ParamBlockParam<Matrix4>;
-	template class ParamBlockParam<Matrix4x2>;
-	template class ParamBlockParam<Matrix4x3>;
-
 	ParamBlock::~ParamBlock()
 	{
 		ParamBlockManager::unregisterBlock(this);

+ 57 - 4
Source/BansheeCore/Renderer/BsParamBlocks.h

@@ -16,23 +16,75 @@ namespace bs { namespace ct
 
 	/** Wrapper for a single parameter in a parameter block buffer. */
 	template<class T>
-	class BS_CORE_EXPORT ParamBlockParam
+	class ParamBlockParam
 	{
 	public:
 		ParamBlockParam() { }
-		ParamBlockParam(const GpuParamDataDesc& paramDesc);
+		ParamBlockParam(const GpuParamDataDesc& paramDesc)
+			:mParamDesc(paramDesc)
+		{ }
 
 		/** 
 		 * Sets the parameter in the provided parameter block buffer. Caller is responsible for ensuring the param block
 		 * buffer contains this parameter. 
 		 */
-		void set(const SPtr<GpuParamBlockBuffer>& paramBlock, const T& value, UINT32 arrayIdx = 0) const;
+		void set(const SPtr<GpuParamBlockBuffer>& paramBlock, const T& value, UINT32 arrayIdx = 0) const
+		{
+#if BS_DEBUG_MODE
+			if (arrayIdx >= mParamDesc.arraySize)
+			{
+				BS_EXCEPT(InvalidParametersException, "Array index out of range. Array size: " +
+					toString(mParamDesc.arraySize) + ". Requested size: " + toString(arrayIdx));
+			}
+#endif
+
+			UINT32 elementSizeBytes = mParamDesc.elementSize * sizeof(UINT32);
+			UINT32 sizeBytes = std::min(elementSizeBytes, (UINT32)sizeof(T)); // Truncate if it doesn't fit within parameter size
+
+			bool transposeMatrices = RenderAPI::instance().getAPIInfo().isFlagSet(RenderAPIFeatureFlag::ColumnMajorMatrices);
+			if (TransposePolicy<T>::transposeEnabled(transposeMatrices))
+			{
+				auto transposed = TransposePolicy<T>::transpose(value);
+				paramBlock->write((mParamDesc.cpuMemOffset + arrayIdx * mParamDesc.arrayElementStride) * sizeof(UINT32),
+					&transposed, sizeBytes);
+			}
+			else
+				paramBlock->write((mParamDesc.cpuMemOffset + arrayIdx * mParamDesc.arrayElementStride) * sizeof(UINT32),
+					&value, sizeBytes);
+
+			// Set unused bytes to 0
+			if (sizeBytes < elementSizeBytes)
+			{
+				UINT32 diffSize = elementSizeBytes - sizeBytes;
+				paramBlock->zeroOut((mParamDesc.cpuMemOffset + arrayIdx * mParamDesc.arrayElementStride) * sizeof(UINT32) +
+					sizeBytes, diffSize);
+			}
+		}
 
 		/** 
 		 * Gets the parameter in the provided parameter block buffer. Caller is responsible for ensuring the param block
 		 * buffer contains this parameter. 
 		 */
-		T get(const SPtr<GpuParamBlockBuffer>& paramBlock, UINT32 arrayIdx = 0) const;
+		T get(const SPtr<GpuParamBlockBuffer>& paramBlock, UINT32 arrayIdx = 0) const
+		{
+#if BS_DEBUG_MODE
+			if (arrayIdx >= mParamDesc.arraySize)
+			{
+				LOGERR("Array index out of range. Array size: " + toString(mParamDesc.arraySize) + ". Requested size: " +
+					toString(arrayIdx));
+				return T();
+			}
+#endif
+
+			UINT32 elementSizeBytes = mParamDesc.elementSize * sizeof(UINT32);
+			UINT32 sizeBytes = std::min(elementSizeBytes, (UINT32)sizeof(T));
+
+			T value;
+			paramBlock->read((mParamDesc.cpuMemOffset + arrayIdx * mParamDesc.arrayElementStride) * sizeof(UINT32), &value,
+				sizeBytes);
+
+			return value;
+		}
 
 	protected:
 		GpuParamDataDesc mParamDesc;
@@ -115,6 +167,7 @@ namespace bs { namespace ct
 			newEntry.name = #Name;																							\
 			newEntry.type = (GpuParamDataType)TGpuDataParamInfo<Type>::TypeId;												\
 			newEntry.arraySize = NumElements;																				\
+			newEntry.elementSize = sizeof(Type);																			\
 		}																													\
 																															\
 		void META_InitPrevEntry(const Vector<GpuParamDataDesc>& params, UINT32 idx, META_NextEntry_##Name id)				\

+ 25 - 19
Source/BansheeCore/Renderer/BsRenderer.h

@@ -23,25 +23,31 @@ namespace bs
 	 */
 
 	/** Common shader variations. */
-	static ShaderVariation SVar_Static = ShaderVariation({
-		ShaderVariation::Param("SKINNED", false),
-		ShaderVariation::Param("MORPH", false),
-	});
-
-	static ShaderVariation SVar_Skinned = ShaderVariation({
-		ShaderVariation::Param("SKINNED", true),
-		ShaderVariation::Param("MORPH", false),
-	});
-
-	static ShaderVariation SVar_Morph = ShaderVariation({
-		ShaderVariation::Param("SKINNED", false),
-		ShaderVariation::Param("MORPH", true),
-	});
-
-	static ShaderVariation SVar_SkinnedMorph = ShaderVariation({
-		ShaderVariation::Param("SKINNED", true),
-		ShaderVariation::Param("MORPH", true),
-	});
+
+	/** Returns a specific vertex input shader variation. */
+	template<bool skinned, bool morph>
+	static const ShaderVariation& getVertexInputVariation()
+	{
+		static ShaderVariation variation = ShaderVariation({
+			ShaderVariation::Param("SKINNED", skinned),
+			ShaderVariation::Param("MORPH", morph),
+		});
+
+		return variation;
+	}
+
+	/** Returns a specific forward rendering shader variation. */
+	template<bool skinned, bool morph, bool clustered>
+	static const ShaderVariation& getForwardRenderingVariation()
+	{
+		static ShaderVariation variation = ShaderVariation({
+			ShaderVariation::Param("SKINNED", skinned),
+			ShaderVariation::Param("MORPH", morph),
+			ShaderVariation::Param("CLUSTERED", clustered),
+		});
+
+		return variation;
+	}
 
 	/** Technique tags. */
 	static StringID RTag_Skinned = "Skinned";

+ 1 - 0
Source/BansheeCore/Utility/BsCommonTypes.h

@@ -398,6 +398,7 @@ namespace bs
 			lookup[(UINT32)GPDT_INT3] = { 4, 12, 16, 1, 3 };
 			lookup[(UINT32)GPDT_INT4] = { 4, 16, 16, 1, 4 };
 			lookup[(UINT32)GPDT_BOOL] = { 4, 4, 4, 1, 1 };
+			lookup[(UINT32)GPDT_STRUCT] = { 4, 0, 16, 1, 1 };
 		}
 
 		GpuParamDataTypeInfo lookup[GPDT_COUNT];

+ 31 - 24
Source/BansheeD3D11RenderAPI/BsD3D11RenderAPI.cpp

@@ -1388,41 +1388,48 @@ namespace bs { namespace ct
 			if (param.arraySize > 1)
 			{
 				// Arrays perform no packing and their elements are always padded and aligned to four component vectors
-				UINT32 size = typeInfo.size / 4;
-
-				UINT32 alignOffset = size % typeInfo.baseTypeSize;
-				if (alignOffset != 0)
-				{
-					UINT32 padding = (typeInfo.baseTypeSize - alignOffset);
-					size += padding;
-				}
+				UINT32 size;
+				if(param.type == GPDT_STRUCT)
+					size = Math::divideAndRoundUp(param.elementSize, 16U) * 4;
+				else
+					size = Math::divideAndRoundUp(typeInfo.size, 4U);
 
-				alignOffset = block.blockSize % typeInfo.baseTypeSize;
-				if (alignOffset != 0)
-				{
-					UINT32 padding = (typeInfo.baseTypeSize - alignOffset);
-					block.blockSize += padding;
-				}
+				block.blockSize = Math::divideAndRoundUp(block.blockSize, 4U);
 
 				param.elementSize = size;
 				param.arrayElementStride = size;
 				param.cpuMemOffset = block.blockSize;
 				param.gpuMemOffset = 0;
 
-				// Last array element isn't rounded up to four component vectors
-				block.blockSize += size * (param.arraySize - 1);
-				block.blockSize += typeInfo.size / 4;
+				// Last array element isn't rounded up to four component vectors unless it's a struct
+				if(param.type != GPDT_STRUCT)
+				{
+					block.blockSize += size * (param.arraySize - 1);
+					block.blockSize += typeInfo.size / 4;
+				}
+				else
+					block.blockSize += param.arraySize * size;
 			}
 			else
 			{
-				UINT32 size = typeInfo.baseTypeSize * (typeInfo.numRows * typeInfo.numColumns) / 4;
-
-				// Pack everything as tightly as possible as long as the data doesn't cross 16 byte boundary
-				UINT32 alignOffset = block.blockSize % 4;
-				if (alignOffset != 0 && size > (4 - alignOffset))
+				UINT32 size;
+				if(param.type == GPDT_STRUCT)
+				{
+					// Structs are always aligned and arounded up to 4 component vectors
+					size = Math::divideAndRoundUp(param.elementSize, 16U) * 4;
+					block.blockSize = Math::divideAndRoundUp(block.blockSize, 4U);
+				}
+				else
 				{
-					UINT32 padding = (4 - alignOffset);
-					block.blockSize += padding;
+					size = typeInfo.baseTypeSize * (typeInfo.numRows * typeInfo.numColumns) / 4;
+
+					// Pack everything as tightly as possible as long as the data doesn't cross 16 byte boundary
+					UINT32 alignOffset = block.blockSize % 4;
+					if (alignOffset != 0 && size > (4 - alignOffset))
+					{
+						UINT32 padding = (4 - alignOffset);
+						block.blockSize += padding;
+					}
 				}
 
 				param.elementSize = size;

+ 12 - 3
Source/BansheeEditor/SceneView/BsSelectionRenderer.cpp

@@ -78,6 +78,15 @@ namespace bs
 		constexpr int numTechniques = sizeof(mTechniqueIndices) / sizeof(mTechniqueIndices[0]);
 		static_assert(numTechniques == (int)RenderableAnimType::Count, "Number of techniques doesn't match the number of possible animation types.");
 
+		FIND_TECHNIQUE_DESC findSkinned;
+		findSkinned.addTag(RTag_Skinned);
+
+		FIND_TECHNIQUE_DESC findMorph;
+		findMorph.addTag(RTag_Morph);
+
+		FIND_TECHNIQUE_DESC findSkinnedMorph;
+		findSkinnedMorph.addTag(RTag_SkinnedMorph);
+
 		SPtr<Material> mat = any_cast<SPtr<Material>>(data);
 		for(UINT32 i = 0; i < numTechniques; i++)
 		{
@@ -85,13 +94,13 @@ namespace bs
 			switch (animType)
 			{
 				case RenderableAnimType::Skinned:
-					mTechniqueIndices[i] = mat->findTechnique(RTag_Skinned);
+					mTechniqueIndices[i] = mat->findTechnique(findSkinned);
 					break;
 				case RenderableAnimType::Morph:
-					mTechniqueIndices[i] = mat->findTechnique(RTag_Morph);
+					mTechniqueIndices[i] = mat->findTechnique(findMorph);
 					break;
 				case RenderableAnimType::SkinnedMorph:
-					mTechniqueIndices[i] = mat->findTechnique(RTag_SkinnedMorph);
+					mTechniqueIndices[i] = mat->findTechnique(findSkinnedMorph);
 					break;
 				default:
 					mTechniqueIndices[i] = mat->getDefaultTechnique();

+ 10 - 1
Source/BansheeGLRenderAPI/BsGLRenderAPI.cpp

@@ -2783,7 +2783,16 @@ namespace bs { namespace ct
 
 		for (auto& param : params)
 		{
-			UINT32 size = GLSLParamParser::calcInterfaceBlockElementSizeAndOffset(param.type, param.arraySize, block.blockSize);
+			UINT32 size;
+			
+			if(param.type == GPDT_STRUCT)
+			{
+				// Structs are always aligned and rounded up to vec4
+				size = Math::divideAndRoundUp(param.elementSize, 16U) * 4;
+				block.blockSize = Math::divideAndRoundUp(block.blockSize, 4U);
+			}
+			else
+				size = GLSLParamParser::calcInterfaceBlockElementSizeAndOffset(param.type, param.arraySize, block.blockSize);
 
 			if (param.arraySize > 1)
 			{

+ 2 - 1
Source/BansheeSL/BsSLFXCompiler.cpp

@@ -513,7 +513,8 @@ namespace bs
 				}
 			}
 				break;
-			case Xsc::Reflection::UniformType::Struct: break;
+			case Xsc::Reflection::UniformType::Struct:
+				break;
 			default: ;
 			}
 		}

+ 1 - 1
Source/BansheeVulkanRenderAPI/BsVulkanGpuProgram.cpp

@@ -622,7 +622,7 @@ namespace bs { namespace ct
 					paramDesc.cpuMemOffset = bufferOffset;
 					paramDesc.gpuMemOffset = bufferOffset;
 
-					desc.params[name] = paramDesc;
+					desc.params[paramName] = paramDesc;
 				}
 			}
 		}

+ 9 - 1
Source/BansheeVulkanRenderAPI/BsVulkanRenderAPI.cpp

@@ -591,7 +591,15 @@ namespace bs { namespace ct
 
 		for (auto& param : params)
 		{
-			UINT32 size = VulkanUtility::calcInterfaceBlockElementSizeAndOffset(param.type, param.arraySize, block.blockSize);
+			UINT32 size;
+			if(param.type == GPDT_STRUCT)
+			{
+				// Structs are always aligned and rounded up to vec4
+				size = Math::divideAndRoundUp(param.elementSize, 16U) * 4;
+				block.blockSize = Math::divideAndRoundUp(block.blockSize, 4U);
+			}
+			else
+				size = VulkanUtility::calcInterfaceBlockElementSizeAndOffset(param.type, param.arraySize, block.blockSize);
 
 			if (param.arraySize > 1)
 			{

+ 1 - 1
Source/CMakeLists.txt

@@ -5,7 +5,7 @@ project (Banshee)
 set (BS_VERSION_MAJOR 0)
 set (BS_VERSION_MINOR 4)
 
-set (BS_PREBUILT_DEPENDENCIES_VERSION 17)
+set (BS_PREBUILT_DEPENDENCIES_VERSION 18)
 set (BS_SRC_DEPENDENCIES_VERSION 15)
 set (BS_BUILTIN_ASSETS_VERSION 3)
 

+ 9 - 0
Source/RenderBeast/BsImageBasedLighting.cpp

@@ -143,6 +143,13 @@ namespace bs { namespace ct
 			"ReflProbeParams",
 			reflProbeParamBindings
 		);
+
+		params->getParamInfo()->getBinding(
+			programType,
+			GpuPipelineParamInfoBase::ParamType::ParamBlock,
+			"ReflectionProbes",
+			reflProbesBinding
+		);
 	}
 
 	ReflProbeParamBuffer::ReflProbeParamBuffer()
@@ -285,4 +292,6 @@ namespace bs { namespace ct
 			return get(getVariation<8>());
 		}
 	}
+
+	ReflProbesParamDef gReflProbesParamDef;
 }}

+ 12 - 0
Source/RenderBeast/BsImageBasedLighting.h

@@ -18,6 +18,9 @@ namespace bs { namespace ct
 	 *  @{
 	 */
 
+	/** Maximum number of refl. probes that can influence an object when basic forward rendering is used. */
+	static constexpr UINT32 STANDARD_FORWARD_MAX_NUM_PROBES = 8;
+
 	/** Information about a single reflection probe, as seen by the lighting shader. */
 	struct ReflProbeData
 	{
@@ -115,6 +118,9 @@ namespace bs { namespace ct
 
 		GpuParamBuffer reflectionProbeIndicesParam;
 		GpuParamBinding reflProbeParamBindings;
+
+		// Only utilized when standard forward rendering is used
+		GpuParamBinding reflProbesBinding;
 	};
 
 	/** Parameter buffer containing information about reflection probes. */
@@ -189,5 +195,11 @@ namespace bs { namespace ct
 		static const UINT32 TILE_SIZE;
 	};
 
+	BS_PARAM_BLOCK_BEGIN(ReflProbesParamDef)
+		BS_PARAM_BLOCK_ENTRY_ARRAY(ReflProbeData, gReflectionProbes, STANDARD_FORWARD_MAX_NUM_PROBES)
+	BS_PARAM_BLOCK_END
+
+	extern ReflProbesParamDef gReflProbesParamDef;
+
 	/** @} */
 }}

+ 106 - 5
Source/RenderBeast/BsLightRendering.cpp

@@ -215,16 +215,17 @@ namespace bs { namespace ct
 			mNumShadowedLights[i] = mNumLights[i] - partition(mVisibleLights[i]);
 
 		// Generate light data to initialize the GPU buffer with
+		mVisibleLightData.clear();
 		for(auto& lightsPerType : mVisibleLights)
 		{
 			for(auto& entry : lightsPerType)
 			{
-				mLightDataTemp.push_back(LightData());
-				entry->getParameters(mLightDataTemp.back());
+				mVisibleLightData.push_back(LightData());
+				entry->getParameters(mVisibleLightData.back());
 			}
 		}
 
-		UINT32 size = (UINT32)mLightDataTemp.size() * sizeof(LightData);
+		UINT32 size = (UINT32)mVisibleLightData.size() * sizeof(LightData);
 		UINT32 curBufferSize;
 
 		if (mLightBuffer != nullptr)
@@ -247,9 +248,106 @@ namespace bs { namespace ct
 		}
 
 		if (size > 0)
-			mLightBuffer->writeData(0, size, mLightDataTemp.data(), BWT_DISCARD);
+			mLightBuffer->writeData(0, size, mVisibleLightData.data(), BWT_DISCARD);
+	}
+
+	void VisibleLightData::gatherInfluencingLights(const Bounds& bounds, 
+		const LightData* (&output)[STANDARD_FORWARD_MAX_NUM_LIGHTS], Vector3I& counts) const
+	{
+		UINT32 outputIndices[STANDARD_FORWARD_MAX_NUM_LIGHTS];
+		UINT32 numInfluencingLights = 0;
+
+		UINT32 numDirLights = getNumDirLights();
+		for(UINT32 i = 0; i < numDirLights; i++)
+		{
+			if (numInfluencingLights >= STANDARD_FORWARD_MAX_NUM_LIGHTS)
+				return;
+
+			outputIndices[numInfluencingLights] = i;
+			numInfluencingLights++;
+		}
+
+		UINT32 pointLightOffset = numInfluencingLights;
+		
+		float distances[STANDARD_FORWARD_MAX_NUM_LIGHTS];
+		for(UINT32 i = 0; i < STANDARD_FORWARD_MAX_NUM_LIGHTS; i++)
+			distances[i] = std::numeric_limits<float>::max();
+
+		// Note: This is an ad-hoc way of evaluating light influence, a better way might be wanted
+		UINT32 numLights = (UINT32)mVisibleLightData.size();
+		UINT32 furthestLightIdx = (UINT32)-1;
+		float furthestDistance = 0.0f;
+		for (UINT32 j = numDirLights; j < numLights; j++)
+		{
+			const LightData* lightData = &mVisibleLightData[j];
+
+			Sphere lightSphere(lightData->position, lightData->attRadius);
+			if (bounds.getSphere().intersects(lightSphere))
+			{
+				float distance = bounds.getSphere().getCenter().squaredDistance(lightData->position);
+
+				// See where in the array can we fit the light
+				if (numInfluencingLights < STANDARD_FORWARD_MAX_NUM_LIGHTS)
+				{
+					outputIndices[numInfluencingLights] = j;
+					distances[numInfluencingLights] = distance;
+
+					if (distance > furthestDistance)
+					{
+						furthestLightIdx = numInfluencingLights;
+						furthestDistance = distance;
+					}
 
-		mLightDataTemp.clear();
+					numInfluencingLights++;
+				}
+				else if (distance < furthestDistance)
+				{
+					outputIndices[furthestLightIdx] = j;
+					distances[furthestLightIdx] = distance;
+
+					furthestDistance = distance;
+					for (UINT32 k = 0; k < STANDARD_FORWARD_MAX_NUM_LIGHTS; k++)
+					{
+						if (distances[k] > furthestDistance)
+						{
+							furthestDistance = distances[k];
+							furthestLightIdx = k;
+						}
+					}
+				}
+			}
+		}
+
+		// Output actual light data, sorted by type
+		counts = Vector3I(0, 0, 0);
+
+		for(UINT32 i = 0; i < pointLightOffset; i++)
+		{
+			output[i] = &mVisibleLightData[outputIndices[i]];
+			counts.x += 1;
+		}
+
+		UINT32 outputIdx = pointLightOffset;
+		UINT32 spotLightIdx = getNumDirLights() + getNumRadialLights();
+		for(UINT32 i = pointLightOffset; i < numInfluencingLights; i++)
+		{
+			bool isSpot = outputIndices[i] >= spotLightIdx;
+			if(isSpot)
+				continue;
+
+			output[outputIdx++] = &mVisibleLightData[outputIndices[i]];
+			counts.y += 1;
+		}
+
+		for(UINT32 i = pointLightOffset; i < numInfluencingLights; i++)
+		{
+			bool isSpot = outputIndices[i] >= spotLightIdx;
+			if(!isSpot)
+				continue;
+
+			output[outputIdx++] = &mVisibleLightData[outputIndices[i]];
+			counts.z += 1;
+		}
 	}
 
 	const UINT32 TiledDeferredLightingMat::TILE_SIZE = 16;
@@ -403,4 +501,7 @@ namespace bs { namespace ct
 		Rect2 area(0.0f, 0.0f, (float)props.getWidth(), (float)props.getHeight());
 		gRendererUtility().drawScreenQuad(area);
 	}
+
+	LightsParamDef gLightsParamDef;
+	LightAndReflProbeParamsParamDef gLightAndReflProbeParamsParamDef;
 }}

+ 37 - 5
Source/RenderBeast/BsLightRendering.h

@@ -7,7 +7,11 @@
 #include "Renderer/BsParamBlocks.h"
 #include "Renderer/BsLight.h"
 
-namespace bs { namespace ct
+namespace bs 
+{
+	class Bounds;
+
+namespace ct
 {
 	struct SceneInfo;
 	class RendererViewGroup;
@@ -16,6 +20,9 @@ namespace bs { namespace ct
 	 *  @{
 	 */
 
+	/** Maximum number of lights that can influence an object when basic forward rendering is used. */
+	static constexpr UINT32 STANDARD_FORWARD_MAX_NUM_LIGHTS = 8;
+
 	/** Information about a single light, as seen by the lighting shader. */
 	struct LightData
 	{
@@ -100,6 +107,20 @@ namespace bs { namespace ct
 		/** Returns a GPU bindable buffer containing information about every light. */
 		SPtr<GpuBuffer> getLightBuffer() const { return mLightBuffer; }
 
+		/** 
+		 * Scans the list of lights visible in the view frustum to find the ones influencing the object described by
+		 * the provided bounds. A maximum number of STANDARD_FORWARD_MAX_NUM_LIGHTS will be output. If there are more
+		 * influencing lights, only the most important ones will be returned. 
+		 * 
+		 * The lights will be output in the following order: directional, radial, spot. @p counts will contain the number
+		 * of directional lights (component 'x'), number of radial lights (component 'y') and number of spot lights
+		 * (component 'z');
+		 * 
+		 * update() must have been called with most recent scene/view information before calling this method.
+		 */
+		void gatherInfluencingLights(const Bounds& bounds, const LightData* (&output)[STANDARD_FORWARD_MAX_NUM_LIGHTS], 
+			Vector3I& counts) const;
+
 		/** Returns the number of directional lights in the lights buffer. */
 		UINT32 getNumDirLights() const { return mNumLights[0]; }
 
@@ -126,11 +147,9 @@ namespace bs { namespace ct
 		UINT32 mNumLights[(UINT32)LightType::Count];
 		UINT32 mNumShadowedLights[(UINT32)LightType::Count];
 
-		// These are rebuilt every call to setLights()
+		// These are rebuilt every call to update()
 		Vector<const RendererLight*> mVisibleLights[(UINT32)LightType::Count];
-
-		// Helpers to avoid memory allocations
-		Vector<LightData> mLightDataTemp;
+		Vector<LightData> mVisibleLightData;
 	};
 
 	BS_PARAM_BLOCK_BEGIN(TiledLightingParamDef)
@@ -200,5 +219,18 @@ namespace bs { namespace ct
 		SPtr<GpuParamBlockBuffer> mParamBuffer;
 	};
 
+	BS_PARAM_BLOCK_BEGIN(LightsParamDef)
+		BS_PARAM_BLOCK_ENTRY_ARRAY(LightData, gLights, STANDARD_FORWARD_MAX_NUM_LIGHTS)
+	BS_PARAM_BLOCK_END
+
+	extern LightsParamDef gLightsParamDef;
+
+	BS_PARAM_BLOCK_BEGIN(LightAndReflProbeParamsParamDef)
+		BS_PARAM_BLOCK_ENTRY(Vector4I, gLightOffsets)
+		BS_PARAM_BLOCK_ENTRY(int, gReflProbeCount)
+	BS_PARAM_BLOCK_END
+
+	extern LightAndReflProbeParamsParamDef gLightAndReflProbeParamsParamDef;
+
 	/** @} */
 }}

+ 45 - 16
Source/RenderBeast/BsObjectRendering.cpp

@@ -44,28 +44,57 @@ namespace bs { namespace ct
 			element.perCameraBindings
 		);
 
-		gpuParams->getParamInfo()->getBindings(
-			GpuPipelineParamInfoBase::ParamType::ParamBlock,
-			"GridParams",
-			element.gridParamsBindings
-		);
+		if (gpuParams->hasBuffer(GPT_VERTEX_PROGRAM, "boneMatrices"))
+			gpuParams->setBuffer(GPT_VERTEX_PROGRAM, "boneMatrices", element.boneMatrixBuffer);
+
+		const bool isTransparent = (shader->getFlags() & (UINT32)ShaderFlags::Transparent) != 0;
+		const bool usesForwardRendering = isTransparent;
 
-		if (gpuParams->hasBuffer(GPT_FRAGMENT_PROGRAM, "gLights"))
-			gpuParams->getBufferParam(GPT_FRAGMENT_PROGRAM, "gLights", element.lightsBufferParam);
+		if(usesForwardRendering)
+		{
+			const bool supportsClusteredForward = gRenderBeast()->getFeatureSet() == RenderBeastFeatureSet::Desktop;
+			if(supportsClusteredForward)
+			{
+				gpuParams->getParamInfo()->getBindings(
+					GpuPipelineParamInfoBase::ParamType::ParamBlock,
+					"GridParams",
+					element.gridParamsBindings
+				);
 
-		if (gpuParams->hasBuffer(GPT_FRAGMENT_PROGRAM, "gGridLightOffsetsAndSize"))
-			gpuParams->getBufferParam(GPT_FRAGMENT_PROGRAM, "gGridLightOffsetsAndSize", element.gridLightOffsetsAndSizeParam);
+				if (gpuParams->hasBuffer(GPT_FRAGMENT_PROGRAM, "gLights"))
+					gpuParams->getBufferParam(GPT_FRAGMENT_PROGRAM, "gLights", element.lightsBufferParam);
 
-		if (gpuParams->hasBuffer(GPT_FRAGMENT_PROGRAM, "gLightIndices"))
-			gpuParams->getBufferParam(GPT_FRAGMENT_PROGRAM, "gLightIndices", element.gridLightIndicesParam);
+				if (gpuParams->hasBuffer(GPT_FRAGMENT_PROGRAM, "gGridLightOffsetsAndSize"))
+					gpuParams->getBufferParam(GPT_FRAGMENT_PROGRAM, "gGridLightOffsetsAndSize", 
+						element.gridLightOffsetsAndSizeParam);
 
-		if (gpuParams->hasBuffer(GPT_FRAGMENT_PROGRAM, "gGridProbeOffsetsAndSize"))
-			gpuParams->getBufferParam(GPT_FRAGMENT_PROGRAM, "gGridProbeOffsetsAndSize", element.gridProbeOffsetsAndSizeParam);
+				if (gpuParams->hasBuffer(GPT_FRAGMENT_PROGRAM, "gLightIndices"))
+					gpuParams->getBufferParam(GPT_FRAGMENT_PROGRAM, "gLightIndices", element.gridLightIndicesParam);
 
-		element.imageBasedParams.populate(gpuParams, GPT_FRAGMENT_PROGRAM, true, true, true);
+				if (gpuParams->hasBuffer(GPT_FRAGMENT_PROGRAM, "gGridProbeOffsetsAndSize"))
+					gpuParams->getBufferParam(GPT_FRAGMENT_PROGRAM, "gGridProbeOffsetsAndSize", 
+						element.gridProbeOffsetsAndSizeParam);
+			}
+			else
+			{
+				gpuParams->getParamInfo()->getBinding(
+					GPT_FRAGMENT_PROGRAM,
+					GpuPipelineParamInfoBase::ParamType::ParamBlock,
+					"Lights",
+					element.lightsParamBlockBinding
+				);
 
-		if (gpuParams->hasBuffer(GPT_VERTEX_PROGRAM, "boneMatrices"))
-			gpuParams->setBuffer(GPT_VERTEX_PROGRAM, "boneMatrices", element.boneMatrixBuffer);
+				gpuParams->getParamInfo()->getBinding(
+					GPT_FRAGMENT_PROGRAM,
+					GpuPipelineParamInfoBase::ParamType::ParamBlock,
+					"LightAndReflProbeParams",
+					element.lightAndReflProbeParamsParamBlockBinding
+				);
+			}
+
+			element.imageBasedParams.populate(gpuParams, GPT_FRAGMENT_PROGRAM, true, supportsClusteredForward, 
+				supportsClusteredForward);
+		}
 	}
 
 	void ObjectRenderer::setParamFrameParams(float time)

+ 89 - 18
Source/RenderBeast/BsRenderCompositor.cpp

@@ -1180,13 +1180,31 @@ namespace bs { namespace ct
 		const VisibleLightData& visibleLightData = inputs.viewGroup.getVisibleLightData();
 		const VisibleReflProbeData& visibleReflProbeData = inputs.viewGroup.getVisibleReflProbeData();
 
-		const LightGrid& lightGrid = inputs.view.getLightGrid();
-
+		// Buffers used when clustered forward is available
 		SPtr<GpuParamBlockBuffer> gridParams;
 		SPtr<GpuBuffer> gridLightOffsetsAndSize, gridLightIndices;
 		SPtr<GpuBuffer> gridProbeOffsetsAndSize, gridProbeIndices;
-		lightGrid.getOutputs(gridLightOffsetsAndSize, gridLightIndices, gridProbeOffsetsAndSize, gridProbeIndices, 
-			gridParams);
+
+		// Buffers used when clustered forward is unavailable
+		SPtr<GpuParamBlockBuffer> lightsParamBlock;
+		SPtr<GpuParamBlockBuffer> reflProbesParamBlock;
+		SPtr<GpuParamBlockBuffer> lightAndReflProbeParamsParamBlock;
+
+		bool supportsClusteredForward = gRenderBeast()->getFeatureSet() == RenderBeastFeatureSet::Desktop;
+		if(supportsClusteredForward)
+		{
+			const LightGrid& lightGrid = inputs.view.getLightGrid();
+
+			lightGrid.getOutputs(gridLightOffsetsAndSize, gridLightIndices, gridProbeOffsetsAndSize, gridProbeIndices,
+				gridParams);
+		}
+		else
+		{
+			// Note: Store these instead of creating them every time?
+			lightsParamBlock = gLightsParamDef.createBuffer();
+			reflProbesParamBlock = gReflProbesParamDef.createBuffer();
+			lightAndReflProbeParamsParamBlock = gLightAndReflProbeParamsParamDef.createBuffer();
+		}
 
 		// Prepare refl. probe param buffer
 		ReflProbeParamBuffer reflProbeParamBuffer;
@@ -1214,19 +1232,77 @@ namespace bs { namespace ct
 				// Note: It would be nice to be able to set this once and keep it, only updating if the buffers actually
 				// change (e.g. when growing). 
 				SPtr<GpuParams> gpuParams = element.params->getGpuParams();
-				for(UINT32 j = 0; j < GPT_COUNT; j++)
-				{
-					const GpuParamBinding& binding = element.gridParamsBindings[j];
-					if (binding.slot != (UINT32)-1)
-						gpuParams->setParamBlockBuffer(binding.set, binding.slot, gridParams);
+				ImageBasedLightingParams& iblParams = element.imageBasedParams;
+				if(supportsClusteredForward)
+				{ 
+					for (UINT32 j = 0; j < GPT_COUNT; j++)
+					{
+						const GpuParamBinding& binding = element.gridParamsBindings[j];
+						if (binding.slot != (UINT32)-1)
+							gpuParams->setParamBlockBuffer(binding.set, binding.slot, gridParams);
+					}
+
+					element.gridLightOffsetsAndSizeParam.set(gridLightOffsetsAndSize);
+					element.gridProbeOffsetsAndSizeParam.set(gridProbeOffsetsAndSize);
+
+					element.gridLightIndicesParam.set(gridLightIndices);
+					iblParams.reflectionProbeIndicesParam.set(gridProbeIndices);
+
+					element.lightsBufferParam.set(visibleLightData.getLightBuffer());
+					iblParams.reflectionProbesParam.set(visibleReflProbeData.getProbeBuffer());
 				}
+				else
+				{
+					// Populate light & probe buffers
+					const Bounds& bounds = sceneInfo.renderableCullInfos[i].bounds;
 
-				element.gridLightOffsetsAndSizeParam.set(gridLightOffsetsAndSize);
-				element.gridLightIndicesParam.set(gridLightIndices);
-				element.lightsBufferParam.set(visibleLightData.getLightBuffer());
+					Vector3I lightCounts;
+					const LightData* lights[STANDARD_FORWARD_MAX_NUM_LIGHTS];
+					visibleLightData.gatherInfluencingLights(bounds, lights, lightCounts);
+
+					Vector4I lightOffsets;
+					lightOffsets.x = lightCounts.x;
+					lightOffsets.y = lightCounts.x;
+					lightOffsets.z = lightOffsets.y + lightCounts.y;
+					lightOffsets.w = lightOffsets.z + lightCounts.z;
+
+					for(INT32 j = 0; j < lightOffsets.w; j++)
+						gLightsParamDef.gLights.set(lightsParamBlock, *lights[j], j);
+
+					INT32 numReflProbes = std::min(visibleReflProbeData.getNumProbes(), STANDARD_FORWARD_MAX_NUM_PROBES);
+					for(INT32 j = 0; j < numReflProbes; j++)
+						gReflProbesParamDef.gReflectionProbes.set(reflProbesParamBlock, visibleReflProbeData.getProbeData(j), j);
+
+					gLightAndReflProbeParamsParamDef.gLightOffsets.set(lightAndReflProbeParamsParamBlock, lightOffsets);
+					gLightAndReflProbeParamsParamDef.gReflProbeCount.set(lightAndReflProbeParamsParamBlock, numReflProbes);
+
+					if(iblParams.reflProbesBinding.set != (UINT32)-1)
+					{
+						gpuParams->setParamBlockBuffer(
+							iblParams.reflProbesBinding.set,
+							iblParams.reflProbesBinding.slot,
+							reflProbesParamBlock);
+					}
+
+					if(element.lightsParamBlockBinding.set != (UINT32)-1)
+					{
+						gpuParams->setParamBlockBuffer(
+							element.lightsParamBlockBinding.set,
+							element.lightsParamBlockBinding.slot,
+							lightsParamBlock);
+					}
+
+					if(element.lightAndReflProbeParamsParamBlockBinding.set != (UINT32)-1)
+					{
+						gpuParams->setParamBlockBuffer(
+							element.lightAndReflProbeParamsParamBlockBinding.set,
+							element.lightAndReflProbeParamsParamBlockBinding.slot,
+							lightAndReflProbeParamsParamBlock);
+					}
+				}
 
 				// Image based lighting params
-				ImageBasedLightingParams& iblParams = element.imageBasedParams;
+				// Note: Ideally these should be bound once (they are the same for all renderables)
 				if (iblParams.reflProbeParamBindings.set != (UINT32)-1)
 				{
 					gpuParams->setParamBlockBuffer(
@@ -1235,11 +1311,6 @@ namespace bs { namespace ct
 						reflProbeParamBuffer.buffer);
 				}
 
-				element.gridProbeOffsetsAndSizeParam.set(gridProbeOffsetsAndSize);
-
-				iblParams.reflectionProbeIndicesParam.set(gridProbeIndices);
-				iblParams.reflectionProbesParam.set(visibleReflProbeData.getProbeBuffer());
-
 				iblParams.skyReflectionsTexParam.set(skyFilteredRadiance);
 				iblParams.ambientOcclusionTexParam.set(Texture::WHITE); // Note: Add SSAO here?
 				iblParams.ssrTexParam.set(Texture::BLACK); // Note: Add SSR here?

+ 12 - 3
Source/RenderBeast/BsRendererObject.h

@@ -63,9 +63,6 @@ namespace bs { namespace ct
 		/** Index of the technique in the material to render the element with. */
 		UINT32 techniqueIdx;
 
-		/** Shader variation that controls the type of vertex input that is provided. */
-		const ShaderVariation* vertexInputVariation = nullptr;
-
 		/** Binding indices representing where should the per-camera param block buffer be bound to. */
 		GpuParamBinding perCameraBindings[GPT_COUNT];
 
@@ -92,6 +89,18 @@ namespace bs { namespace ct
 
 		/** Collection of parameters used for image based lighting. */
 		ImageBasedLightingParams imageBasedParams;
+		
+		/** 
+		 * Binding for a parameter block containing a list of lights influencing this object. Only used when standard
+		 * (non-clustered) forward rendering is used. 
+		 */
+		GpuParamBinding lightsParamBlockBinding;
+
+		/** 
+		 * Binding for a parameter block that contains the number of lights and reflection probes in the light/refl. probe 
+		 * parameter blocks. Only used when standard (non-clustered) forward rendering is used.
+		 */
+		GpuParamBinding lightAndReflProbeParamsParamBlockBinding;
 
 		/** GPU buffer containing element's bone matrices, if it requires any. */
 		SPtr<GpuBuffer> boneMatrixBuffer;

+ 43 - 9
Source/RenderBeast/BsRendererScene.cpp

@@ -254,19 +254,52 @@ namespace bs {	namespace ct
 				if (renElement.material != nullptr && renElement.material->getShader() == nullptr)
 					renElement.material = nullptr;
 
-				// If no mInfo.aterial use the default mInfo.aterial
+				// If no material use the default material
 				if (renElement.material == nullptr)
 					renElement.material = Material::create(DefaultMaterial::get()->getShader());
 
 				// Determine which technique to use
-				static const ShaderVariation* variationLookup[4] = { &SVar_Static, &SVar_Skinned, &SVar_Morph, &SVar_SkinnedMorph };
 				static_assert((UINT32)RenderableAnimType::Count == 4, "RenderableAnimType is expected to have four sequential entries.");
 
-				UINT32 techniqueIdx = -1;
+				bool isTransparent = (renElement.material->getShader()->getFlags() & (UINT32)ShaderFlags::Transparent) != 0;
+				bool usesForwardRendering = isTransparent;
+				
 				RenderableAnimType animType = renderable->getAnimType();
-				renElement.vertexInputVariation = variationLookup[(int)animType];
 
-				techniqueIdx = renElement.material->findTechnique(*renElement.vertexInputVariation);
+				static const ShaderVariation* VAR_LOOKUP[4];
+				if(usesForwardRendering)
+				{
+					bool supportsClusteredForward = gRenderBeast()->getFeatureSet() == RenderBeastFeatureSet::Desktop;
+
+					if(supportsClusteredForward)
+					{
+						VAR_LOOKUP[0] = &getForwardRenderingVariation<false, false, true>();
+						VAR_LOOKUP[1] = &getForwardRenderingVariation<true, false, true>();
+						VAR_LOOKUP[2] = &getForwardRenderingVariation<false, true, true>();
+						VAR_LOOKUP[3] = &getForwardRenderingVariation<true, true, true>();
+					}
+					else
+					{
+						VAR_LOOKUP[0] = &getForwardRenderingVariation<false, false, false>();
+						VAR_LOOKUP[1] = &getForwardRenderingVariation<true, false, false>();
+						VAR_LOOKUP[2] = &getForwardRenderingVariation<false, true, false>();
+						VAR_LOOKUP[3] = &getForwardRenderingVariation<true, true, false>();
+					}
+				}
+				else
+				{
+					VAR_LOOKUP[0] = &getVertexInputVariation<false, false>();
+					VAR_LOOKUP[1] = &getVertexInputVariation<true, false>();
+					VAR_LOOKUP[2] = &getVertexInputVariation<false, true>();
+					VAR_LOOKUP[3] = &getVertexInputVariation<true, true>();
+				}
+
+				const ShaderVariation* variation = VAR_LOOKUP[(int)animType];
+
+				FIND_TECHNIQUE_DESC findDesc;
+				findDesc.variation = variation;
+
+				UINT32 techniqueIdx = renElement.material->findTechnique(findDesc);
 
 				if (techniqueIdx == (UINT32)-1)
 					techniqueIdx = renElement.material->getDefaultTechnique();
@@ -286,7 +319,7 @@ namespace bs {	namespace ct
 						{
 							Vector<VertexElement> missingElements = vertexDecl->getMissingElements(shaderDecl);
 
-							// If using mInfo.orph shapes ignore POSITION1 and NORMAL1 mInfo.issing since we assign them from within the renderer
+							// If using morph shapes ignore POSITION1 and NORMAL1 missing since we assign them from within the renderer
 							if (animType == RenderableAnimType::Morph || animType == RenderableAnimType::SkinnedMorph)
 							{
 								auto removeIter = std::remove_if(missingElements.begin(), missingElements.end(), [](const VertexElement& x)
@@ -301,7 +334,8 @@ namespace bs {	namespace ct
 							if (!missingElements.empty())
 							{
 								StringStream wrnStream;
-								wrnStream << "Provided mesh is mInfo.issing required vertex attributes to render with the provided shader. Missing elements: " << std::endl;
+								wrnStream << "Provided mesh is missing required vertex attributes to render with the \
+									provided shader. Missing elements: " << std::endl;
 
 								for (auto& entry : missingElements)
 									wrnStream << "\t" << toString(entry.getSemantic()) << entry.getSemanticIdx() << std::endl;
@@ -313,7 +347,7 @@ namespace bs {	namespace ct
 					}
 				}
 
-				// Generate or assigned renderer specific data for the mInfo.aterial
+				// Generate or assigned renderer specific data for the material
 				renElement.params = renElement.material->createParamsSet(techniqueIdx);
 				renElement.material->updateParamsSet(renElement.params, true);
 
@@ -431,7 +465,7 @@ namespace bs {	namespace ct
 
 	void RendererScene::updateReflectionProbe(ReflectionProbe* probe, bool texture)
 	{
-		// Should only get called if transform changes, any other mInfo.ajor changes and ReflProbeInfo entry gets rebuild
+		// Should only get called if transform changes, any other major changes and ReflProbeInfo entry gets rebuild
 		UINT32 probeId = probe->getRendererId();
 		mInfo.reflProbeWorldBounds[probeId] = probe->getBounds();
 

+ 10 - 5
Source/RenderBeast/BsRendererView.cpp

@@ -9,6 +9,7 @@
 #include "BsLightRendering.h"
 #include "Material/BsGpuParamsSet.h"
 #include "BsRendererScene.h"
+#include "BsRenderBeast.h"
 
 namespace bs { namespace ct
 {
@@ -595,12 +596,16 @@ namespace bs { namespace ct
 		mVisibleLightData.update(sceneInfo, *this);
 		mVisibleReflProbeData.update(sceneInfo, *this);
 
-		for (UINT32 i = 0; i < numViews; i++)
+		bool supportsClusteredForward = gRenderBeast()->getFeatureSet() == RenderBeastFeatureSet::Desktop;
+		if(supportsClusteredForward)
 		{
-			if (mViews[i]->getRenderSettings().overlayOnly)
-				continue;
+			for (UINT32 i = 0; i < numViews; i++)
+			{
+				if (mViews[i]->getRenderSettings().overlayOnly)
+					continue;
 
-			mViews[i]->updateLightGrid(mVisibleLightData, mVisibleReflProbeData);
+				mViews[i]->updateLightGrid(mVisibleLightData, mVisibleReflProbeData);
+			}
 		}
 	}
-}}
+}}

+ 5 - 4
Source/RenderBeast/BsShadowRendering.cpp

@@ -472,10 +472,11 @@ namespace bs { namespace ct
 					}
 				}
 
-				static const ShaderVariation* VAR_LOOKUP[4] = 
-				{ 
-					&SVar_Static, &SVar_Skinned, &SVar_Morph, &SVar_SkinnedMorph 
-				};
+				static const ShaderVariation* VAR_LOOKUP[4];
+				VAR_LOOKUP[0] = &getVertexInputVariation<false, false>();
+				VAR_LOOKUP[1] = &getVertexInputVariation<true, false>();
+				VAR_LOOKUP[2] = &getVertexInputVariation<false, true>();
+				VAR_LOOKUP[3] = &getVertexInputVariation<true, true>();
 
 				for (UINT32 i = 0; i < (UINT32)RenderableAnimType::Count; i++)
 				{