Parcourir la source

Updated engine shaders to new BSL syntax

BearishSun il y a 8 ans
Parent
commit
1523924732
50 fichiers modifiés avec 3392 ajouts et 3908 suppressions
  1. 0 4
      Data/Raw/Engine/DataList.json
  2. 58 64
      Data/Raw/Engine/Includes/BasePass.bslinc
  3. 54 70
      Data/Raw/Engine/Includes/GBufferInput.bslinc
  4. 11 14
      Data/Raw/Engine/Includes/GBufferOutput.bslinc
  5. 183 185
      Data/Raw/Engine/Includes/ImageBasedLighting.bslinc
  6. 53 56
      Data/Raw/Engine/Includes/LightGridCommon.bslinc
  7. 330 333
      Data/Raw/Engine/Includes/LightingCommon.bslinc
  8. 78 85
      Data/Raw/Engine/Includes/NormalVertexInput.bslinc
  9. 24 27
      Data/Raw/Engine/Includes/PPBase.bslinc
  10. 97 100
      Data/Raw/Engine/Includes/PPTonemapCommon.bslinc
  11. 182 184
      Data/Raw/Engine/Includes/PPWhiteBalance.bslinc
  12. 43 68
      Data/Raw/Engine/Includes/PerCameraData.bslinc
  13. 15 32
      Data/Raw/Engine/Includes/PerObjectData.bslinc
  14. 56 59
      Data/Raw/Engine/Includes/ReflectionCubemapCommon.bslinc
  15. 54 69
      Data/Raw/Engine/Includes/ResolveCommon.bslinc
  16. 114 117
      Data/Raw/Engine/Includes/SHCommon.bslinc
  17. 103 115
      Data/Raw/Engine/Includes/SkinnedVertexInput.bslinc
  18. 34 56
      Data/Raw/Engine/Includes/SpriteImage.bslinc
  19. 18 15
      Data/Raw/Engine/Includes/Surface.bslinc
  20. 27 30
      Data/Raw/Engine/Includes/SurfaceData.bslinc
  21. 53 59
      Data/Raw/Engine/Includes/VolumeRenderBase.bslinc
  22. 15 18
      Data/Raw/Engine/Shaders/Default.bsl
  23. 27 43
      Data/Raw/Engine/Shaders/Diffuse.bsl
  24. 41 46
      Data/Raw/Engine/Shaders/FlatFramebufferToTexture.bsl
  25. 113 114
      Data/Raw/Engine/Shaders/IrradianceComputeSH.bsl
  26. 67 68
      Data/Raw/Engine/Shaders/IrradianceProjectSH.bsl
  27. 48 49
      Data/Raw/Engine/Shaders/IrradianceReduceSH.bsl
  28. 126 134
      Data/Raw/Engine/Shaders/LightGridLLCreation.bsl
  29. 89 91
      Data/Raw/Engine/Shaders/LightGridLLReduction.bsl
  30. 99 100
      Data/Raw/Engine/Shaders/PPCreateTonemapLUT.bsl
  31. 25 37
      Data/Raw/Engine/Shaders/PPDownsample.bsl
  32. 84 97
      Data/Raw/Engine/Shaders/PPEyeAdaptHistogram.bsl
  33. 28 39
      Data/Raw/Engine/Shaders/PPEyeAdaptHistogramReduce.bsl
  34. 141 151
      Data/Raw/Engine/Shaders/PPEyeAdaptation.bsl
  35. 64 81
      Data/Raw/Engine/Shaders/PPTonemapping.bsl
  36. 18 31
      Data/Raw/Engine/Shaders/ReflectionCubeDownsample.bsl
  37. 109 122
      Data/Raw/Engine/Shaders/ReflectionCubeImportanceSample.bsl
  38. 41 43
      Data/Raw/Engine/Shaders/Skybox.bsl
  39. 8 6
      Data/Raw/Engine/Shaders/SpriteImageAlpha.bsl
  40. 4 2
      Data/Raw/Engine/Shaders/SpriteImageNoAlpha.bsl
  41. 51 60
      Data/Raw/Engine/Shaders/SpriteLine.bsl
  42. 43 63
      Data/Raw/Engine/Shaders/SpriteText.bsl
  43. 0 188
      Data/Raw/Engine/Shaders/TestFX.bsl
  44. 243 244
      Data/Raw/Engine/Shaders/TiledDeferredImageBasedLighting.bsl
  45. 238 239
      Data/Raw/Engine/Shaders/TiledDeferredLighting.bsl
  46. 71 85
      Data/Raw/Engine/Shaders/Transparent.bsl
  47. 1 5
      Source/BansheeSL/BsLexerFX.l
  48. 1 5
      Source/BansheeSL/BsParserFX.y
  49. 1 1
      Source/BansheeSL/Source/BsASTFX.c
  50. 9 4
      Source/BansheeSL/Source/BsSLFXCompiler.cpp

+ 0 - 4
Data/Raw/Engine/DataList.json

@@ -202,10 +202,6 @@
             "Path": "SpriteText.bsl",
             "UUID": "25df2c87-c206-4c2f-ab2b-3aad9e7f90f1"
         },
-        {
-            "Path": "TestFX.bsl",
-            "UUID": "9e783e45-bf1f-41cc-bb48-eb2e1200cfb6"
-        },
         {
             "Path": "TiledDeferredLighting.bsl",
             "UUID": "787d7293-f335-4eda-a897-c706e6b5c818"

+ 58 - 64
Data/Raw/Engine/Includes/BasePass.bslinc

@@ -9,76 +9,70 @@
 #include "$ENGINE$\NormalVertexInput.bslinc"
 #undef USE_BLEND_SHAPES
 
-Technique : base("BasePassCommon") =
+mixin BasePassCommon
 {
-	Pass =
-	{
-		Vertex =
-		{			
-			VStoFS main(VertexInput input)
-			{
-				VStoFS output;
-			
-				VertexIntermediate intermediate = getVertexIntermediate(input);
-				float4 worldPosition = getVertexWorldPosition(input, intermediate);
-				
-				output.worldPosition = worldPosition.xyz;
-				output.position = mul(gMatViewProj, worldPosition);
-				populateVertexOutput(input, intermediate, output);
-							
-				return output;
-			}
-		};
+	code
+	{			
+		VStoFS vsmain(VertexInput input)
+		{
+			VStoFS output;
 		
-		Fragment =
+			VertexIntermediate intermediate = getVertexIntermediate(input);
+			float4 worldPosition = getVertexWorldPosition(input, intermediate);
+			
+			output.worldPosition = worldPosition.xyz;
+			output.position = mul(gMatViewProj, worldPosition);
+			populateVertexOutput(input, intermediate, output);
+						
+			return output;
+		}
+
+		float3 calcWorldNormal(VStoFS input, float3 surfaceNormal)
 		{
-			float3 calcWorldNormal(VStoFS input, float3 surfaceNormal)
-			{
-				float3 tangentToWorldX = input.tangentToWorldX.xyz;
-				float3 tangentToWorldZ = input.tangentToWorldZ;
-				float3 tangentToWorldY = cross(tangentToWorldZ, tangentToWorldX) * input.tangentToWorldX.w;
-				
-				float3x3 tangentToWorld = float3x3(tangentToWorldX, tangentToWorldY, tangentToWorldZ);
-				
-				// Multiplication order flipped because we stored basis vectors as rows
-				return normalize(mul(surfaceNormal, tangentToWorld));			
-			}
-		};
+			float3 tangentToWorldX = input.tangentToWorldX.xyz;
+			float3 tangentToWorldZ = input.tangentToWorldZ;
+			float3 tangentToWorldY = cross(tangentToWorldZ, tangentToWorldX) * input.tangentToWorldX.w;
+			
+			float3x3 tangentToWorld = float3x3(tangentToWorldX, tangentToWorldY, tangentToWorldZ);
+			
+			// Multiplication order flipped because we stored basis vectors as rows
+			return normalize(mul(surfaceNormal, tangentToWorld));			
+		}
 	};
 };
 
-Technique
- : base("BasePass")
- : inherits("GBufferOutput")
- : inherits("PerCameraData")
- : inherits("PerObjectData")
- : inherits("NormalVertexInput")
- : inherits("BasePassCommon") =
-{ };
+mixin BasePass
+{ 
+	mixin GBufferOutput;
+	mixin PerCameraData;
+	mixin PerObjectData;
+	mixin NormalVertexInput;
+	mixin BasePassCommon;
+};
 
-Technique
- : base("BasePassSkinned")
- : inherits("GBufferOutput")
- : inherits("PerCameraData")
- : inherits("PerObjectData")
- : inherits("SkinnedVertexInput")
- : inherits("BasePassCommon") =
-{ };
+mixin BasePassSkinned
+{
+	mixin GBufferOutput;
+	mixin PerCameraData;
+	mixin PerObjectData;
+	mixin SkinnedVertexInput;
+	mixin BasePassCommon;
+};
 
-Technique
- : base("BasePassMorph")
- : inherits("GBufferOutput")
- : inherits("PerCameraData")
- : inherits("PerObjectData")
- : inherits("MorphVertexInput")
- : inherits("BasePassCommon") =
-{ };
+mixin BasePassMorph
+{
+	mixin GBufferOutput;
+	mixin PerCameraData;
+	mixin PerObjectData;
+	mixin MorphVertexInput;
+	mixin BasePassCommon;
+};
 
-Technique
- : base("BasePassSkinnedMorph")
- : inherits("GBufferOutput")
- : inherits("PerCameraData")
- : inherits("PerObjectData")
- : inherits("SkinnedMorphVertexInput")
- : inherits("BasePassCommon") =
-{ };
+mixin BasePassSkinnedMorph
+{
+	mixin GBufferOutput;
+	mixin PerCameraData;
+	mixin PerObjectData;
+	mixin SkinnedMorphVertexInput;
+	mixin BasePassCommon;
+};

+ 54 - 70
Data/Raw/Engine/Includes/GBufferInput.bslinc

@@ -1,81 +1,65 @@
 #include "$ENGINE$\SurfaceData.bslinc"
 
-Parameters =
+mixin GBufferInput
 {
-	Sampler2D 	gGBufferASamp : alias("gGBufferATex");
-	Sampler2D 	gGBufferBSamp : alias("gGBufferBTex");
-	Sampler2D 	gGBufferCSamp : alias("gGBufferCTex");
-	Sampler2D 	gDepthBufferSamp : alias("gDepthBufferTex");
-	
-	Texture2D 	gGBufferATex : auto("GBufferA");
-	Texture2D	gGBufferBTex : auto("GBufferB");
-	Texture2D	gGBufferCTex : auto("GBufferC");
-	Texture2D 	gDepthBufferTex : auto("GBufferDepth");
-};
+	mixin SurfaceData;
+	mixin PerCameraData;
 
-Technique 
- : inherits("SurfaceData")
- : inherits("PerCameraData")
- : base("GBufferInput") =
-{
-	Pass =
+	code 
 	{
-		Common = 
+		SamplerState gGBufferASamp;
+		SamplerState gGBufferBSamp;
+		SamplerState gGBufferCSamp;
+		SamplerState gDepthBufferSamp;
+
+		#if MSAA_COUNT > 1
+		Texture2DMS<float4, MSAA_COUNT> gGBufferATex;
+		Texture2DMS<float4, MSAA_COUNT>	gGBufferBTex;
+		Texture2DMS<float2, MSAA_COUNT>	gGBufferCTex;
+		Texture2DMS<float4, MSAA_COUNT> gDepthBufferTex;
+		#else
+		Texture2D gGBufferATex;
+		Texture2D gGBufferBTex;
+		Texture2D gGBufferCTex;
+		Texture2D gDepthBufferTex;
+		#endif
+		
+		SurfaceData decodeGBuffer(float4 GBufferAData, float4 GBufferBData, float2 GBufferCData, float deviceZ)
 		{
-			SamplerState gGBufferASamp : register(s0);
-			SamplerState gGBufferBSamp : register(s1);
-			SamplerState gGBufferCSamp : register(s2);
-			SamplerState gDepthBufferSamp : register(s3);
-	
-			#if MSAA_COUNT > 1
-			Texture2DMS<float4, MSAA_COUNT> gGBufferATex : register(t0);
-			Texture2DMS<float4, MSAA_COUNT>	gGBufferBTex : register(t1);
-			Texture2DMS<float2, MSAA_COUNT>	gGBufferCTex : register(t2);
-			Texture2DMS<float4, MSAA_COUNT> gDepthBufferTex : register(t3);
-			#else
-			Texture2D gGBufferATex : register(t0);
-			Texture2D gGBufferBTex : register(t1);
-			Texture2D gGBufferCTex : register(t2);
-			Texture2D gDepthBufferTex : register(t3);
-			#endif
+			SurfaceData output;
 			
-			SurfaceData decodeGBuffer(float4 GBufferAData, float4 GBufferBData, float2 GBufferCData, float deviceZ)
-			{
-				SurfaceData output;
-				
-				output.albedo.xyz = GBufferAData.xyz;
-				output.albedo.w = 1.0f;
-				output.worldNormal = GBufferBData * float4(2, 2, 2, 1) - float4(1, 1, 1, 0);
-				output.worldNormal.xyz = normalize(output.worldNormal.xyz);
-				output.depth = convertFromDeviceZ(deviceZ);
-				output.roughness = GBufferCData.x;
-				output.metalness = GBufferCData.y;
-				
-				return output;
-			}
+			output.albedo.xyz = GBufferAData.xyz;
+			output.albedo.w = 1.0f;
+			output.worldNormal = GBufferBData * float4(2, 2, 2, 1) - float4(1, 1, 1, 0);
+			output.worldNormal.xyz = normalize(output.worldNormal.xyz);
+			output.depth = convertFromDeviceZ(deviceZ);
+			output.roughness = GBufferCData.x;
+			output.metalness = GBufferCData.y;
 			
-			#if MSAA_COUNT > 1
-			SurfaceData getGBufferData(uint2 pixelPos, uint sampleIndex)
-			{
-				float4 GBufferAData = gGBufferATex.Load(pixelPos, sampleIndex);
-				float4 GBufferBData = gGBufferBTex.Load(pixelPos, sampleIndex);
-				float2 GBufferCData = gGBufferCTex.Load(pixelPos, sampleIndex).rg;
-				float deviceZ = gDepthBufferTex.Load(pixelPos, sampleIndex).r;
-				
-				return decodeGBuffer(GBufferAData, GBufferBData, GBufferCData, deviceZ);
-			}
+			return output;
+		}
+		
+		#if MSAA_COUNT > 1
+		SurfaceData getGBufferData(uint2 pixelPos, uint sampleIndex)
+		{
+			float4 GBufferAData = gGBufferATex.Load(pixelPos, sampleIndex);
+			float4 GBufferBData = gGBufferBTex.Load(pixelPos, sampleIndex);
+			float2 GBufferCData = gGBufferCTex.Load(pixelPos, sampleIndex).rg;
+			float deviceZ = gDepthBufferTex.Load(pixelPos, sampleIndex).r;
+			
+			return decodeGBuffer(GBufferAData, GBufferBData, GBufferCData, deviceZ);
+		}
+		
+		#else
+		SurfaceData getGBufferData(uint2 pixelPos)
+		{
+			float4 GBufferAData = gGBufferATex.Load(int3(pixelPos, 0));
+			float4 GBufferBData = gGBufferBTex.Load(int3(pixelPos, 0));
+			float2 GBufferCData = gGBufferCTex.Load(int3(pixelPos, 0)).rg;
+			float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0)).r;
 			
-			#else
-			SurfaceData getGBufferData(uint2 pixelPos)
-			{
-				float4 GBufferAData = gGBufferATex.Load(int3(pixelPos, 0));
-				float4 GBufferBData = gGBufferBTex.Load(int3(pixelPos, 0));
-				float2 GBufferCData = gGBufferCTex.Load(int3(pixelPos, 0)).rg;
-				float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0)).r;
-				
-				return decodeGBuffer(GBufferAData, GBufferBData, GBufferCData, deviceZ);
-			}			
-			#endif			
-		};
+			return decodeGBuffer(GBufferAData, GBufferBData, GBufferCData, deviceZ);
+		}			
+		#endif			
 	};
 };

+ 11 - 14
Data/Raw/Engine/Includes/GBufferOutput.bslinc

@@ -1,21 +1,18 @@
 #include "$ENGINE$\SurfaceData.bslinc"
 
-Technique 
- : inherits("SurfaceData")
- : base("GBufferOutput") =
+mixin GBufferOutput 
 {
-	Pass =
+	mixin SurfaceData;
+
+	code
 	{
-		Common = 
+		void encodeGBuffer(SurfaceData data, out float4 GBufferAData, out float4 GBufferBData, out float2 GBufferCData)
 		{
-			void encodeGBuffer(SurfaceData data, out float4 GBufferAData, out float4 GBufferBData, out float2 GBufferCData)
-			{
-				GBufferAData = data.albedo;
-				GBufferBData.xyz = float3(data.worldNormal.xyz * 0.5f + 0.5f);
-				GBufferBData.w = 1.0f; // Marks that some deferred data was written
-				GBufferCData.x = data.roughness;
-				GBufferCData.y = data.metalness;
-			}
-		};
+			GBufferAData = data.albedo;
+			GBufferBData.xyz = float3(data.worldNormal.xyz * 0.5f + 0.5f);
+			GBufferBData.w = 1.0f; // Marks that some deferred data was written
+			GBufferCData.x = data.roughness;
+			GBufferCData.y = data.metalness;
+		}
 	};
 };

+ 183 - 185
Data/Raw/Engine/Includes/ImageBasedLighting.bslinc

@@ -1,216 +1,214 @@
 #include "$ENGINE$\ReflectionCubemapCommon.bslinc"
 
-Technique 
- : base("ImageBasedLighting")
- : inherits("ReflectionCubemapCommon") =
+mixin ImageBasedLighting 
 {
-	Pass =
+	mixin ReflectionCubemapCommon;
+
+	code
 	{
-		Common = 
+		// Arbitrary limit, increase if needed
+		#define MAX_PROBES 512
+	
+		// Note: Size must be multiple of largest element, because of std430 rules
+		struct ReflProbeData
+		{
+			float3 position;
+			float radius;
+			float3 boxExtents;
+			float transitionDistance;
+			float4x4 invBoxTransform;
+			uint cubemapIdx;
+			uint type; // 0 - Sphere, 1 - Box
+			float2 padding;
+		};
+	
+		[internal] TextureCube gSkyReflectionTex;
+		[internal] SamplerState gSkyReflectionSamp;
+		
+		[internal] TextureCube gSkyIrradianceTex;
+		[internal] SamplerState gSkyIrradianceSamp;
+		
+		[internal] TextureCubeArray gReflProbeCubemaps;
+		[internal] SamplerState gReflProbeSamp;
+		
+		[internal] Texture2D gPreintegratedEnvBRDF;
+		[internal] SamplerState gPreintegratedEnvBRDFSamp;
+		
+		[internal] StructuredBuffer<ReflProbeData> gReflectionProbes;	
+
+		#ifdef USE_COMPUTE_INDICES
+			groupshared uint gReflectionProbeIndices[MAX_PROBES];
+		#endif
+		#ifdef USE_LIGHT_GRID_INDICES
+			[internal] Buffer<uint> gReflectionProbeIndices;
+		#endif
+		
+		[internal]
+		cbuffer ReflProbeParams
 		{
-			// Arbitrary limit, increase if needed
-			#define MAX_PROBES 512
+			uint gReflCubemapNumMips;
+			uint gNumProbes;
+			uint gSkyCubemapAvailable;
+			uint gUseReflectionMaps;
+			uint gSkyCubemapNumMips;
+			float gSkyBrightness;
+		}	
 		
-			// Note: Size must be multiple of largest element, because of std430 rules
-			struct ReflProbeData
+		float3 getSkyIndirectDiffuse(float3 dir)
+		{
+			return gSkyIrradianceTex.SampleLevel(gSkyIrradianceSamp, dir, 0).rgb * gSkyBrightness;
+		}
+		
+		float getSphereReflectionContribution(float normalizedDistance)
+		{			
+			// If closer than 60% to the probe radius, then full contribution is used.
+			// For the other 40% we smoothstep and return contribution lower than 1 so other
+			// reflection probes can be blended.			
+		
+			// smoothstep from 1 to 0.6:
+			//   float t = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0);
+			//   return t * t * (3.0 - 2.0 * t);
+			float t = saturate(2.5 - 2.5 * normalizedDistance);
+			return t * t * (3.0 - 2.0 * t);
+		}
+		
+		float3 getLookupForSphereProxy(float3 originWS, float3 dirWS, float3 centerWS, float radius)
+		{
+			float radius2 = radius * radius;
+			float3 originLS = originWS - centerWS;
+			
+			float a = dot(originLS, dirWS);
+			float dist2 = a * a - dot(originLS, originLS) + radius2;
+
+			float3 lookupDir = dirWS;
+			
+			[flatten]
+			if(dist2 >= 0)
 			{
-				float3 position;
-				float radius;
-				float3 boxExtents;
-				float transitionDistance;
-				float4x4 invBoxTransform;
-				uint cubemapIdx;
-				uint type; // 0 - Sphere, 1 - Box
-				float2 padding;
-			};
+				float farDist = sqrt(dist2) - a;
+				lookupDir = originLS + farDist * dirWS;
+			}
+			
+			return lookupDir;
+		}
 		
-			TextureCube gSkyReflectionTex;
-			SamplerState gSkyReflectionSamp;
+		float getDistBoxToPoint(float3 pt, float3 extents)
+		{
+			float3 d = max(max(-extents - pt, 0), pt - extents);
+			return length(d);
+		}
+		
+		float3 getLookupForBoxProxy(float3 originWS, float3 dirWS, float3 centerWS, float3 extents, float4x4 invBoxTransform, float transitionDistance, out float contribution)
+		{
+			// Transform origin and direction into box local space, where it is united sized and axis aligned
+			float3 originLS = mul(invBoxTransform, float4(originWS, 1)).xyz;
+			float3 dirLS = mul(invBoxTransform, float4(dirWS, 0)).xyz;
 			
-			TextureCube gSkyIrradianceTex;
-			SamplerState gSkyIrradianceSamp;
+			// Get distance from 3 min planes and 3 max planes of the unit AABB
+			//  float3 unitVec = float3(1.0f, 1.0f, 1.0f);
+			//  float3 intersectsMax = (unitVec - originLS) / dirLS;
+			//  float3 intersectsMin = (-unitVec - originLS) / dirLS;
 			
-			TextureCubeArray gReflProbeCubemaps;
-			SamplerState gReflProbeSamp;
+			float3 invDirLS = rcp(dirLS);
+			float3 intersectsMax = invDirLS - originLS * invDirLS;
+			float3 intersectsMin = -invDirLS - originLS * invDirLS;
 			
-			Texture2D gPreintegratedEnvBRDF;
-			SamplerState gPreintegratedEnvBRDFSamp;
+			// Find nearest positive (along ray direction) intersection
+			float3 positiveIntersections = max(intersectsMax, intersectsMin);
+			float intersectDist = min(positiveIntersections.x, min(positiveIntersections.y, positiveIntersections.z));
 			
-			StructuredBuffer<ReflProbeData> gReflectionProbes;	
-
-			#ifdef USE_COMPUTE_INDICES
-				groupshared uint gReflectionProbeIndices[MAX_PROBES];
-			#endif
-			#ifdef USE_LIGHT_GRID_INDICES
-				Buffer<uint> gReflectionProbeIndices;
-			#endif
-			
-			cbuffer ReflProbeParams
-			{
-				uint gReflCubemapNumMips;
-				uint gNumProbes;
-				uint gSkyCubemapAvailable;
-				uint gUseReflectionMaps;
-				uint gSkyCubemapNumMips;
-				float gSkyBrightness;
-			}	
-			
-			float3 getSkyIndirectDiffuse(float3 dir)
-			{
-				return gSkyIrradianceTex.SampleLevel(gSkyIrradianceSamp, dir, 0).rgb * gSkyBrightness;
-			}
+			float3 intersectPositionWS = originWS + intersectDist * dirWS;
+			float3 lookupDir = intersectPositionWS - centerWS;
 			
-			float getSphereReflectionContribution(float normalizedDistance)
-			{			
-				// If closer than 60% to the probe radius, then full contribution is used.
-				// For the other 40% we smoothstep and return contribution lower than 1 so other
-				// reflection probes can be blended.			
-			
-				// smoothstep from 1 to 0.6:
-				//   float t = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0);
-				//   return t * t * (3.0 - 2.0 * t);
-				float t = saturate(2.5 - 2.5 * normalizedDistance);
-				return t * t * (3.0 - 2.0 * t);
-			}
+			// Calculate contribution
+			//// Shrink the box so fade out happens within box extents
+			float3 reducedExtents = extents - float3(transitionDistance, transitionDistance, transitionDistance);
+			float distToBox = getDistBoxToPoint(originLS * reducedExtents, reducedExtents);
 			
-			float3 getLookupForSphereProxy(float3 originWS, float3 dirWS, float3 centerWS, float radius)
-			{
-				float radius2 = radius * radius;
-				float3 originLS = originWS - centerWS;
-				
-				float a = dot(originLS, dirWS);
-				float dist2 = a * a - dot(originLS, originLS) + radius2;
-
-				float3 lookupDir = dirWS;
-				
-				[flatten]
-				if(dist2 >= 0)
-				{
-					float farDist = sqrt(dist2) - a;
-					lookupDir = originLS + farDist * dirWS;
-				}
-				
-				return lookupDir;
-			}
+			float normalizedDistance = distToBox / transitionDistance;
 			
-			float getDistBoxToPoint(float3 pt, float3 extents)
-			{
-				float3 d = max(max(-extents - pt, 0), pt - extents);
-				return length(d);
-			}
+			// If closer than 70% to the probe radius, then full contribution is used.
+			// For the other 30% we smoothstep and return contribution lower than 1 so other
+			// reflection probes can be blended.			
+		
+			// smoothstep from 1 to 0.7:
+			//   float t = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0);
+			//   return t * t * (3.0 - 2.0 * t);
 			
-			float3 getLookupForBoxProxy(float3 originWS, float3 dirWS, float3 centerWS, float3 extents, float4x4 invBoxTransform, float transitionDistance, out float contribution)
-			{
-				// Transform origin and direction into box local space, where it is united sized and axis aligned
-				float3 originLS = mul(invBoxTransform, float4(originWS, 1)).xyz;
-				float3 dirLS = mul(invBoxTransform, float4(dirWS, 0)).xyz;
-				
-				// Get distance from 3 min planes and 3 max planes of the unit AABB
-				//  float3 unitVec = float3(1.0f, 1.0f, 1.0f);
-				//  float3 intersectsMax = (unitVec - originLS) / dirLS;
-				//  float3 intersectsMin = (-unitVec - originLS) / dirLS;
-				
-				float3 invDirLS = rcp(dirLS);
-				float3 intersectsMax = invDirLS - originLS * invDirLS;
-				float3 intersectsMin = -invDirLS - originLS * invDirLS;
-				
-				// Find nearest positive (along ray direction) intersection
-				float3 positiveIntersections = max(intersectsMax, intersectsMin);
-				float intersectDist = min(positiveIntersections.x, min(positiveIntersections.y, positiveIntersections.z));
-				
-				float3 intersectPositionWS = originWS + intersectDist * dirWS;
-				float3 lookupDir = intersectPositionWS - centerWS;
-				
-				// Calculate contribution
-				//// Shrink the box so fade out happens within box extents
-				float3 reducedExtents = extents - float3(transitionDistance, transitionDistance, transitionDistance);
-				float distToBox = getDistBoxToPoint(originLS * reducedExtents, reducedExtents);
-				
-				float normalizedDistance = distToBox / transitionDistance;
-				
-				// If closer than 70% to the probe radius, then full contribution is used.
-				// For the other 30% we smoothstep and return contribution lower than 1 so other
-				// reflection probes can be blended.			
+			float t = saturate(3.3333 - 3.3333 * normalizedDistance);
+			contribution = t * t * (3.0 - 2.0 * t);
 			
-				// smoothstep from 1 to 0.7:
-				//   float t = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0);
-				//   return t * t * (3.0 - 2.0 * t);
-				
-				float t = saturate(3.3333 - 3.3333 * normalizedDistance);
-				contribution = t * t * (3.0 - 2.0 * t);
-				
-				return lookupDir;
-			}
+			return lookupDir;
+		}
+		
+		float3 gatherReflectionRadiance(float3 worldPos, float3 dir, float roughness, float3 specularColor, uint probeOffset, uint numProbes)
+		{
+			if(gUseReflectionMaps == 0)
+				return specularColor;
+									
+			float mipLevel = mapRoughnessToMipLevel(roughness, gReflCubemapNumMips);
 			
-			float3 gatherReflectionRadiance(float3 worldPos, float3 dir, float roughness, float3 specularColor, uint probeOffset, uint numProbes)
+			float3 output = 0;
+			float leftoverContribution = 1.0f;
+			for(uint i = 0; i < numProbes; i++)
 			{
-				if(gUseReflectionMaps == 0)
-					return specularColor;
-										
-				float mipLevel = mapRoughnessToMipLevel(roughness, gReflCubemapNumMips);
+				if(leftoverContribution < 0.001f)
+					break;
+						
+				uint probeIdx = gReflectionProbeIndices[probeOffset + i];
+				ReflProbeData probeData = gReflectionProbes[probeIdx];
 				
-				float3 output = 0;
-				float leftoverContribution = 1.0f;
-				for(uint i = 0; i < numProbes; i++)
-				{
-					if(leftoverContribution < 0.001f)
-						break;
+				float3 probeToPos = worldPos - probeData.position;
+				float distToProbe = length(probeToPos);
+				float normalizedDist = saturate(distToProbe / probeData.radius);
 							
-					uint probeIdx = gReflectionProbeIndices[probeOffset + i];
-					ReflProbeData probeData = gReflectionProbes[probeIdx];
-					
-					float3 probeToPos = worldPos - probeData.position;
-					float distToProbe = length(probeToPos);
-					float normalizedDist = saturate(distToProbe / probeData.radius);
-								
-					if(distToProbe <= probeData.radius)
+				if(distToProbe <= probeData.radius)
+				{
+					float3 correctedDir;
+					float contribution = 0;
+					if(probeData.type == 0) // Sphere
 					{
-						float3 correctedDir;
-						float contribution = 0;
-						if(probeData.type == 0) // Sphere
-						{
-							correctedDir = getLookupForSphereProxy(worldPos, dir, probeData.position, probeData.radius);
-							contribution = getSphereReflectionContribution(normalizedDist);
-						}
-						else if(probeData.type == 1) // Box
-						{
-							correctedDir = getLookupForBoxProxy(worldPos, dir, probeData.position, probeData.boxExtents, probeData.invBoxTransform, probeData.transitionDistance, contribution);
-						}
-						
-						float4 probeSample = gReflProbeCubemaps.SampleLevel(gReflProbeSamp, float4(correctedDir, probeData.cubemapIdx), mipLevel);
-						probeSample *= contribution;
-						
-						output += probeSample.rgb * leftoverContribution; 
-						leftoverContribution *= (1.0f - contribution);
+						correctedDir = getLookupForSphereProxy(worldPos, dir, probeData.position, probeData.radius);
+						contribution = getSphereReflectionContribution(normalizedDist);
+					}
+					else if(probeData.type == 1) // Box
+					{
+						correctedDir = getLookupForBoxProxy(worldPos, dir, probeData.position, probeData.boxExtents, probeData.invBoxTransform, probeData.transitionDistance, contribution);
 					}
-				}
 					
-				if(gSkyCubemapAvailable > 0)
-				{
-					float skyMipLevel = mapRoughnessToMipLevel(roughness, gSkyCubemapNumMips);
-					float4 skySample = gSkyReflectionTex.SampleLevel(gSkyReflectionSamp, dir, skyMipLevel) * gSkyBrightness;
+					float4 probeSample = gReflProbeCubemaps.SampleLevel(gReflProbeSamp, float4(correctedDir, probeData.cubemapIdx), mipLevel);
+					probeSample *= contribution;
 					
-					output += skySample.rgb * leftoverContribution; 
+					output += probeSample.rgb * leftoverContribution; 
+					leftoverContribution *= (1.0f - contribution);
 				}
-						
-				return output;
 			}
-			
-			float3 getImageBasedSpecular(float3 worldPos, float3 V, float3 R, SurfaceData surfaceData, uint probeOffset, uint numProbes)
-			{
-				// See C++ code for generation of gPreintegratedEnvBRDF to see why this code works as is
-				float3 N = surfaceData.worldNormal.xyz;
-				float NoV = saturate(dot(N, V));
-				
-				// Note: Using a fixed F0 value of 0.04 (plastic) for dielectrics, and using albedo as specular for conductors.
-				// For more customizability allow the user to provide separate albedo/specular colors for both types.
-				float3 specularColor = lerp(float3(0.04f, 0.04f, 0.04f), surfaceData.albedo.rgb, surfaceData.metalness);
-				float3 radiance = gatherReflectionRadiance(worldPos, R, surfaceData.roughness, specularColor, probeOffset, numProbes);
 				
-				float2 envBRDF = gPreintegratedEnvBRDF.SampleLevel(gPreintegratedEnvBRDFSamp, float2(NoV, surfaceData.roughness), 0).rg;
+			if(gSkyCubemapAvailable > 0)
+			{
+				float skyMipLevel = mapRoughnessToMipLevel(roughness, gSkyCubemapNumMips);
+				float4 skySample = gSkyReflectionTex.SampleLevel(gSkyReflectionSamp, dir, skyMipLevel) * gSkyBrightness;
 				
-				return radiance * (specularColor * envBRDF.x + envBRDF.y);
-			}		
-		};
+				output += skySample.rgb * leftoverContribution; 
+			}
+					
+			return output;
+		}
+		
+		float3 getImageBasedSpecular(float3 worldPos, float3 V, float3 R, SurfaceData surfaceData, uint probeOffset, uint numProbes)
+		{
+			// See C++ code for generation of gPreintegratedEnvBRDF to see why this code works as is
+			float3 N = surfaceData.worldNormal.xyz;
+			float NoV = saturate(dot(N, V));
+			
+			// Note: Using a fixed F0 value of 0.04 (plastic) for dielectrics, and using albedo as specular for conductors.
+			// For more customizability allow the user to provide separate albedo/specular colors for both types.
+			float3 specularColor = lerp(float3(0.04f, 0.04f, 0.04f), surfaceData.albedo.rgb, surfaceData.metalness);
+			float3 radiance = gatherReflectionRadiance(worldPos, R, surfaceData.roughness, specularColor, probeOffset, numProbes);
+			
+			float2 envBRDF = gPreintegratedEnvBRDF.SampleLevel(gPreintegratedEnvBRDFSamp, float2(NoV, surfaceData.roughness), 0).rg;
+			
+			return radiance * (specularColor * envBRDF.x + envBRDF.y);
+		}		
 	};
 };

+ 53 - 56
Data/Raw/Engine/Includes/LightGridCommon.bslinc

@@ -1,64 +1,61 @@
-Technique : base("LightGridCommon") =
+mixin LightGridCommon
 {
-	Pass =
+	code
 	{
-		Common = 
+		cbuffer GridParams : register(b4)
 		{
-			cbuffer GridParams : register(b4)
-			{
-				// Offsets at which specific light types begin in gLights buffer
-				// Assumed directional lights start at 0
-				// x - offset to point lights, y - offset to spot lights, z - total number of lights
-				uint3 gLightOffsets;
-				uint gNumReflProbes;
-				uint gNumCells;
-				uint3 gGridSize;
-				uint gMaxNumLightsPerCell;
-				uint2 gGridPixelSize;
-			}
-						
-			float calcViewZFromCellZ(uint cellZ)
-			{
-				// We don't want to subdivide depth uniformly because XY sizes will be much
-				// smaller closer to the near plane, and larger towards far plane. We want 
-				// our cells to be as close to cube shape as possible, so that width/height/depth
-				// are all similar. Ideally we would use either width or height as calculated for
-				// purposes of the projection matrix, for the depth. But since we'll be splitting
-				// the depth range into multiple slices, in practice this ends up with many tiny
-				// cells close to the near plane. Instead we use a square function, which is
-				// somewhere between the two extremes:
-				//  view = slice^2
-				
-				// We need it in range [near, far] so we normalize and scale
-				//  view = slice^2 / maxSlices^2 * (far - near) + near
-				
-				// Note: Some of these calculations could be moved to CPU
-				float viewZ = (pow(cellZ, 2) / pow(gGridSize.z, 2)) * (gNearFar.y - gNearFar.x) + gNearFar.x; 
-				return -viewZ;
-			}
+			// Offsets at which specific light types begin in gLights buffer
+			// Assumed directional lights start at 0
+			// x - offset to point lights, y - offset to spot lights, z - total number of lights
+			uint3 gLightOffsets;
+			uint gNumReflProbes;
+			uint gNumCells;
+			uint3 gGridSize;
+			uint gMaxNumLightsPerCell;
+			uint2 gGridPixelSize;
+		}
+					
+		float calcViewZFromCellZ(uint cellZ)
+		{
+			// We don't want to subdivide depth uniformly because XY sizes will be much
+			// smaller closer to the near plane, and larger towards far plane. We want 
+			// our cells to be as close to cube shape as possible, so that width/height/depth
+			// are all similar. Ideally we would use either width or height as calculated for
+			// purposes of the projection matrix, for the depth. But since we'll be splitting
+			// the depth range into multiple slices, in practice this ends up with many tiny
+			// cells close to the near plane. Instead we use a square function, which is
+			// somewhere between the two extremes:
+			//  view = slice^2
+			
+			// We need it in range [near, far] so we normalize and scale
+			//  view = slice^2 / maxSlices^2 * (far - near) + near
 			
-			uint calcCellZFromViewZ(float viewZ)
-			{
-				// Inverse of calculation in calcViewZFromCellZ
-				uint cellZ = min((uint)floor(sqrt(((-viewZ - gNearFar.x)*pow(gGridSize.z, 2))/(gNearFar.y - gNearFar.x))), gGridSize.z);
-				
-				return cellZ;
-			}
+			// Note: Some of these calculations could be moved to CPU
+			float viewZ = (pow(cellZ, 2) / pow(gGridSize.z, 2)) * (gNearFar.y - gNearFar.x) + gNearFar.x; 
+			return -viewZ;
+		}
+		
+		uint calcCellZFromViewZ(float viewZ)
+		{
+			// Inverse of calculation in calcViewZFromCellZ
+			uint cellZ = min((uint)floor(sqrt(((-viewZ - gNearFar.x)*pow(gGridSize.z, 2))/(gNearFar.y - gNearFar.x))), gGridSize.z);
 			
-			uint calcCellIdx(uint2 pixelPos, float deviceZ)
-			{
-				// OpenGL uses lower left for window space origin
-				#ifdef OPENGL
-					pixelPos.y = gViewportRectangle.w - pixelPos.y;
-				#endif			
+			return cellZ;
+		}
+		
+		uint calcCellIdx(uint2 pixelPos, float deviceZ)
+		{
+			// OpenGL uses lower left for window space origin
+			#ifdef OPENGL
+				pixelPos.y = gViewportRectangle.w - pixelPos.y;
+			#endif			
+		
+			// Note: Use bitshift to divide since gGridPixelSize will be a power of 2
+			uint2 cellXY = pixelPos / gGridPixelSize;
+			uint cellZ = calcCellZFromViewZ(convertFromDeviceZ(deviceZ));
 			
-				// Note: Use bitshift to divide since gGridPixelSize will be a power of 2
-				uint2 cellXY = pixelPos / gGridPixelSize;
-				uint cellZ = calcCellZFromViewZ(convertFromDeviceZ(deviceZ));
-				
-				uint cellIdx = (cellZ * gGridSize.y + cellXY.y) * gGridSize.x + cellXY.x;
-				return cellIdx;
-			}
-		};
+			uint cellIdx = (cellZ * gGridSize.y + cellXY.y) * gGridSize.x + cellXY.x;
+			return cellIdx;
+		}
 	};
 };

+ 330 - 333
Data/Raw/Engine/Includes/LightingCommon.bslinc

@@ -1,379 +1,376 @@
 #include "$ENGINE$\SurfaceData.bslinc"
 
-Technique
- : base("LightingCommon")
- : inherits("SurfaceData") =
+mixin LightingCommon
 {
-	Pass =
+	mixin SurfaceData;
+
+	code
 	{
-		Common = 
+		// Arbitrary limit, increase if needed
+		#define MAX_LIGHTS 512
+	
+		#define PI 3.1415926
+		#define HALF_PI 1.5707963
+		
+		// Note: Size must be multiple of largest element, because of std430 rules
+		struct LightData
 		{
-			// Arbitrary limit, increase if needed
-            #define MAX_LIGHTS 512
+			float3 position;
+			float attRadius;
+			float3 direction;
+			float luminance;
+			float3 spotAngles;
+			float attRadiusSqrdInv;
+			float3 color;
+			float srcRadius;
+			float3 shiftedLightPosition;
+			float padding;
+		};
 		
-			#define PI 3.1415926
-			#define HALF_PI 1.5707963
-			
-			// Note: Size must be multiple of largest element, because of std430 rules
-			struct LightData
-			{
-				float3 position;
-				float attRadius;
-				float3 direction;
-				float luminance;
-				float3 spotAngles;
-				float attRadiusSqrdInv;
-				float3 color;
-				float srcRadius;
-				float3 shiftedLightPosition;
-				float padding;
-			};
-			
-			float3 calcMicrofacetFresnelShlick(float3 F0, float LoH)
-			{
-				return F0 + (1.0f - F0) * pow(1.0f - LoH, 5.0f);
-			}
+		float3 calcMicrofacetFresnelShlick(float3 F0, float LoH)
+		{
+			return F0 + (1.0f - F0) * pow(1.0f - LoH, 5.0f);
+		}
 
-			float calcMicrofacetShadowingSmithGGX(float roughness4, float NoV, float NoL)
-			{
-				// Note: It's probably better to use the joint shadowing + masking version of this function
+		float calcMicrofacetShadowingSmithGGX(float roughness4, float NoV, float NoL)
+		{
+			// Note: It's probably better to use the joint shadowing + masking version of this function
 
-				// Note: Original GGX G1 multiplied by NoV & NoL (respectively), so that the microfacet function divisor gets canceled out
-				// Original formula being (ignoring the factor for masking negative directions):
-				//   G1(v) = 2 / (1 + sqrt(1 + roughness^4 * tan^2(v)))
-				//
-				// Using trig identities: tan = sin/cos & sin^2 + cos^2 = 1
-				//   G1(v) = 2 / (1 + sqrt(1 + roughness^4 * (1 - cos^2(v))/cos^2(v)))
-				//
-				// Multiply by cos(v) so that we cancel out the (NoL * NoV) factor in the microfacet formula divisor
-				//   G1(v) = 2 * cos(v) / (cos^2(v) + sqrt(cos^2 + roughness^4 - roughness^4 * cos^2(v)))
-				// 
-				// Actually do the cancellation:
-				//    G1(v) = 2 / (cos^2(v) + sqrt(cos^2 + roughness^4 - roughness^4 * cos^2(v)))
-				//
-				// Also cancel out the 2 and the 4:
-				//    G1(v) = 1 / (cos^2(v) + sqrt(cos^2 + roughness^4 - roughness^4 * cos^2(v)))
-				//
-				// Final equation being:
-				//    G(v, l) = G1(v) * G1(l)
-				//
-				// Where cos(v) is NoV or NoL
-				
-				float g1V = NoV + sqrt(NoV * (NoV - NoV * roughness4) + roughness4);
-				float g1L = NoL + sqrt(NoL * (NoL - NoL * roughness4) + roughness4);
-				return rcp(g1V * g1L);
-			}
-			
-			float calcMicrofacetDistGGX(float roughness4, float NoH)
-			{
-				float d = (NoH * roughness4 - NoH) * NoH + 1.0f;
-				return roughness4 / (PI * d * d);
-			}
+			// Note: Original GGX G1 multiplied by NoV & NoL (respectively), so that the microfacet function divisor gets canceled out
+			// Original formula being (ignoring the factor for masking negative directions):
+			//   G1(v) = 2 / (1 + sqrt(1 + roughness^4 * tan^2(v)))
+			//
+			// Using trig identities: tan = sin/cos & sin^2 + cos^2 = 1
+			//   G1(v) = 2 / (1 + sqrt(1 + roughness^4 * (1 - cos^2(v))/cos^2(v)))
+			//
+			// Multiply by cos(v) so that we cancel out the (NoL * NoV) factor in the microfacet formula divisor
+			//   G1(v) = 2 * cos(v) / (cos^2(v) + sqrt(cos^2 + roughness^4 - roughness^4 * cos^2(v)))
+			// 
+			// Actually do the cancellation:
+			//    G1(v) = 2 / (cos^2(v) + sqrt(cos^2 + roughness^4 - roughness^4 * cos^2(v)))
+			//
+			// Also cancel out the 2 and the 4:
+			//    G1(v) = 1 / (cos^2(v) + sqrt(cos^2 + roughness^4 - roughness^4 * cos^2(v)))
+			//
+			// Final equation being:
+			//    G(v, l) = G1(v) * G1(l)
+			//
+			// Where cos(v) is NoV or NoL
 			
-			float3 calcDiffuseLambert(float3 color)
-			{
-				return color * (1.0f / PI);
-			}
-			
-			float getSpotAttenuation(float3 toLight, LightData lightData)
-			{
-				float output = saturate((dot(toLight, -lightData.direction) - lightData.spotAngles.y) * lightData.spotAngles.z);
-				return output * output;
-			}
+			float g1V = NoV + sqrt(NoV * (NoV - NoV * roughness4) + roughness4);
+			float g1L = NoL + sqrt(NoL * (NoL - NoL * roughness4) + roughness4);
+			return rcp(g1V * g1L);
+		}
+		
+		float calcMicrofacetDistGGX(float roughness4, float NoH)
+		{
+			float d = (NoH * roughness4 - NoH) * NoH + 1.0f;
+			return roughness4 / (PI * d * d);
+		}
+		
+		float3 calcDiffuseLambert(float3 color)
+		{
+			return color * (1.0f / PI);
+		}
+		
+		float getSpotAttenuation(float3 toLight, LightData lightData)
+		{
+			float output = saturate((dot(toLight, -lightData.direction) - lightData.spotAngles.y) * lightData.spotAngles.z);
+			return output * output;
+		}
 
-			// Window function to ensure the light contribution fades out to 0 at attenuation radius
-			float getRadialAttenuation(float distance2, LightData lightData)
-			{
-				float radialAttenuation = distance2 * lightData.attRadiusSqrdInv;
-				radialAttenuation *= radialAttenuation;
-				radialAttenuation = saturate(1.0f - radialAttenuation);
-				radialAttenuation *= radialAttenuation;
-				
-				return radialAttenuation;
-			}			
-						
-			// Calculates illuminance from a non-area point light
-			float illuminancePointLight(float distance2, float NoL, LightData lightData)
-			{
-				return (lightData.luminance * NoL) / max(distance2, 0.01f*0.01f);
-			}
+		// Window function to ensure the light contribution fades out to 0 at attenuation radius
+		float getRadialAttenuation(float distance2, LightData lightData)
+		{
+			float radialAttenuation = distance2 * lightData.attRadiusSqrdInv;
+			radialAttenuation *= radialAttenuation;
+			radialAttenuation = saturate(1.0f - radialAttenuation);
+			radialAttenuation *= radialAttenuation;
 			
-			// Calculates illuminance scale for a sphere or a disc area light, while also handling the case when
-			// parts of the area light are below the horizon.
-			// Input NoL must be unclamped.
-			// Sphere solid angle = arcsin(r / d)
-			// Right disc solid angle = atan(r / d)
-			//   - To compensate for oriented discs, multiply by dot(diskNormal, -L)
-			float illuminanceScaleSphereDiskAreaLight(float unclampedNoL, float sinSolidAngleSqrd)
+			return radialAttenuation;
+		}			
+					
+		// Calculates illuminance from a non-area point light
+		float illuminancePointLight(float distance2, float NoL, LightData lightData)
+		{
+			return (lightData.luminance * NoL) / max(distance2, 0.01f*0.01f);
+		}
+		
+		// Calculates illuminance scale for a sphere or a disc area light, while also handling the case when
+		// parts of the area light are below the horizon.
+		// Input NoL must be unclamped.
+		// Sphere solid angle = arcsin(r / d)
+		// Right disc solid angle = atan(r / d)
+		//   - To compensate for oriented discs, multiply by dot(diskNormal, -L)
+		float illuminanceScaleSphereDiskAreaLight(float unclampedNoL, float sinSolidAngleSqrd)
+		{
+			// Handles parts of the area light below the surface horizon
+			// See https://seblagarde.files.wordpress.com/2015/07/course_notes_moving_frostbite_to_pbr_v32.pdf for reference
+			float sinSolidAngle = sqrt(sinSolidAngleSqrd);
+			if(unclampedNoL < sinSolidAngle)
 			{
-				// Handles parts of the area light below the surface horizon
-				// See https://seblagarde.files.wordpress.com/2015/07/course_notes_moving_frostbite_to_pbr_v32.pdf for reference
-				float sinSolidAngle = sqrt(sinSolidAngleSqrd);
-				if(unclampedNoL < sinSolidAngle)
-				{
-					// Hermite spline approximation (see reference for exact formula)
-					unclampedNoL = max(unclampedNoL, -sinSolidAngle);
-					return ((sinSolidAngle + unclampedNoL) * (sinSolidAngle + unclampedNoL)) / (4 * sinSolidAngle);
-				}
-				else
-					return PI * sinSolidAngleSqrd * saturate(unclampedNoL);
+				// Hermite spline approximation (see reference for exact formula)
+				unclampedNoL = max(unclampedNoL, -sinSolidAngle);
+				return ((sinSolidAngle + unclampedNoL) * (sinSolidAngle + unclampedNoL)) / (4 * sinSolidAngle);
 			}
+			else
+				return PI * sinSolidAngleSqrd * saturate(unclampedNoL);
+		}
 
-			// Calculates illuminance from a sphere area light.
-			float illuminanceSphereAreaLight(float unclampedNoL, float distToLight2, LightData lightData)
-			{
-				float radius2 = lightData.srcRadius * lightData.srcRadius;
-				
-				// Squared sine of the sphere solid angle
-				float sinSolidAngle2 = radius2 / distToLight2;
-
-				// Prevent divide by zero
-				sinSolidAngle2 = min(sinSolidAngle2, 0.9999f);
-				
-				return lightData.luminance * illuminanceScaleSphereDiskAreaLight(unclampedNoL, sinSolidAngle2);	
-			}
+		// Calculates illuminance from a sphere area light.
+		float illuminanceSphereAreaLight(float unclampedNoL, float distToLight2, LightData lightData)
+		{
+			float radius2 = lightData.srcRadius * lightData.srcRadius;
 			
-			// Calculates illuminance from a disc area light.
-			float illuminanceDiscAreaLight(float unclampedNoL, float distToLight2, float3 L, LightData lightData)
-			{
-				// Solid angle for right disk = atan (r / d)
-				//  atan (r / d) = asin((r / d)/sqrt((r / d)^2+1))
-				//  sinAngle = (r / d)/sqrt((r / d)^2 + 1)
-				//  sinAngle^2 = (r / d)^2 / (r / d)^2 + 1
-				//             = r^2 / (d^2 + r^2)
+			// Squared sine of the sphere solid angle
+			float sinSolidAngle2 = radius2 / distToLight2;
+
+			// Prevent divide by zero
+			sinSolidAngle2 = min(sinSolidAngle2, 0.9999f);
 			
-				float radius2 = lightData.srcRadius * lightData.srcRadius;
-				
-				// max() to prevent light penetrating object
-				float sinSolidAngle2 = saturate(radius2 / (radius2 + max(radius2, distToLight2)));
-				
-				// Multiply by extra term to somewhat handle the case of the oriented disc (formula above only works
-				// for right discs).
-				return lightData.luminance * illuminanceScaleSphereDiskAreaLight(unclampedNoL, sinSolidAngle2 * saturate(dot(lightData.direction, -L)));	
-			}
+			return lightData.luminance * illuminanceScaleSphereDiskAreaLight(unclampedNoL, sinSolidAngle2);	
+		}
 		
-			// With microfacet BRDF the BRDF lobe is not centered around the reflected (mirror) direction.
-			// Because of NoL and shadow-masking terms the lobe gets shifted toward the normal as roughness
-			// increases. This is called the "off-specular peak". We approximate it using this function.
-			float3 getSpecularDominantDir(float3 N, float3 R, float roughness)
-			{
-				// Note: Try this formula as well:
-				//  float smoothness = 1 - roughness;
-				//  return lerp(N, R, smoothness * (sqrt(smoothness) + roughness));
+		// Calculates illuminance from a disc area light.
+		float illuminanceDiscAreaLight(float unclampedNoL, float distToLight2, float3 L, LightData lightData)
+		{
+			// Solid angle for right disk = atan (r / d)
+			//  atan (r / d) = asin((r / d)/sqrt((r / d)^2+1))
+			//  sinAngle = (r / d)/sqrt((r / d)^2 + 1)
+			//  sinAngle^2 = (r / d)^2 / (r / d)^2 + 1
+			//             = r^2 / (d^2 + r^2)
+		
+			float radius2 = lightData.srcRadius * lightData.srcRadius;
 			
-				float r2 = roughness * roughness;
-				return normalize(lerp(N, R, (1 - r2) * (sqrt(1 - r2) + r2)));
-			}		
+			// max() to prevent light penetrating object
+			float sinSolidAngle2 = saturate(radius2 / (radius2 + max(radius2, distToLight2)));
 			
-			float3 getSurfaceShading(float3 V, float3 L, float specLobeEnergy, SurfaceData surfaceData)
-			{
-				float3 N = surfaceData.worldNormal.xyz;
+			// Multiply by extra term to somewhat handle the case of the oriented disc (formula above only works
+			// for right discs).
+			return lightData.luminance * illuminanceScaleSphereDiskAreaLight(unclampedNoL, sinSolidAngle2 * saturate(dot(lightData.direction, -L)));	
+		}
+	
+		// With microfacet BRDF the BRDF lobe is not centered around the reflected (mirror) direction.
+		// Because of NoL and shadow-masking terms the lobe gets shifted toward the normal as roughness
+		// increases. This is called the "off-specular peak". We approximate it using this function.
+		float3 getSpecularDominantDir(float3 N, float3 R, float roughness)
+		{
+			// Note: Try this formula as well:
+			//  float smoothness = 1 - roughness;
+			//  return lerp(N, R, smoothness * (sqrt(smoothness) + roughness));
+		
+			float r2 = roughness * roughness;
+			return normalize(lerp(N, R, (1 - r2) * (sqrt(1 - r2) + r2)));
+		}		
+		
+		float3 getSurfaceShading(float3 V, float3 L, float specLobeEnergy, SurfaceData surfaceData)
+		{
+			float3 N = surfaceData.worldNormal.xyz;
 
-				float3 H = normalize(V + L);
-				float LoH = saturate(dot(L, H));
-				float NoH = saturate(dot(N, H));
-				float NoV = saturate(dot(N, V));
-				float NoL = saturate(dot(N, L));
-				
-				float3 diffuseColor = lerp(surfaceData.albedo.rgb, float3(0.0f, 0.0f, 0.0f), 1.0f - surfaceData.metalness);
-				
-				// Note: Using a fixed F0 value of 0.04 (plastic) for dielectrics, and using albedo as specular for conductors.
-				// For more customizability allow the user to provide separate albedo/specular colors for both types.
-				float3 specularColor = lerp(float3(0.04f, 0.04f, 0.04f), surfaceData.albedo.rgb, surfaceData.metalness);
-				
-				float3 diffuse = calcDiffuseLambert(diffuseColor);
-				
-				float roughness = max(surfaceData.roughness, 0.04f); // Prevent NaNs
-				float roughness2 = roughness * roughness;
-				float roughness4 = roughness2 * roughness2;
-				
-				float3 specular = calcMicrofacetFresnelShlick(specularColor, LoH) * 
-					calcMicrofacetDistGGX(roughness4, NoH) *
-					calcMicrofacetShadowingSmithGGX(roughness4, NoV, NoL);
-				
-				// Note: Need to add energy conservation between diffuse and specular terms?
-				return diffuse + specular * specLobeEnergy;
-			}	
+			float3 H = normalize(V + L);
+			float LoH = saturate(dot(L, H));
+			float NoH = saturate(dot(N, H));
+			float NoV = saturate(dot(N, V));
+			float NoL = saturate(dot(N, L));
+			
+			float3 diffuseColor = lerp(surfaceData.albedo.rgb, float3(0.0f, 0.0f, 0.0f), 1.0f - surfaceData.metalness);
+			
+			// Note: Using a fixed F0 value of 0.04 (plastic) for dielectrics, and using albedo as specular for conductors.
+			// For more customizability allow the user to provide separate albedo/specular colors for both types.
+			float3 specularColor = lerp(float3(0.04f, 0.04f, 0.04f), surfaceData.albedo.rgb, surfaceData.metalness);
 			
-			StructuredBuffer<LightData> gLights;
+			float3 diffuse = calcDiffuseLambert(diffuseColor);
 			
-			#ifdef USE_COMPUTE_INDICES
-				groupshared uint gLightIndices[MAX_LIGHTS];
-			#endif
-			#ifdef USE_LIGHT_GRID_INDICES
-				Buffer<uint> gLightIndices;
-			#endif
+			float roughness = max(surfaceData.roughness, 0.04f); // Prevent NaNs
+			float roughness2 = roughness * roughness;
+			float roughness4 = roughness2 * roughness2;
+			
+			float3 specular = calcMicrofacetFresnelShlick(specularColor, LoH) * 
+				calcMicrofacetDistGGX(roughness4, NoH) *
+				calcMicrofacetShadowingSmithGGX(roughness4, NoV, NoL);
+			
+			// Note: Need to add energy conservation between diffuse and specular terms?
+			return diffuse + specular * specLobeEnergy;
+		}	
+		
+		[internal] StructuredBuffer<LightData> gLights;
+		
+		#ifdef USE_COMPUTE_INDICES
+			groupshared uint gLightIndices[MAX_LIGHTS];
+		#endif
+		#ifdef USE_LIGHT_GRID_INDICES
+			[internal] Buffer<uint> gLightIndices;
+		#endif
+		
+		float4 getDirectLighting(float3 worldPos, float3 V, float3 R, SurfaceData surfaceData, uint4 lightOffsets)
+		{
+			float3 N = surfaceData.worldNormal.xyz;
+			float roughness2 = max(surfaceData.roughness, 0.08f);
+			roughness2 *= roughness2;
 			
-			float4 getDirectLighting(float3 worldPos, float3 V, float3 R, SurfaceData surfaceData, uint4 lightOffsets)
+			float3 outLuminance = 0;
+			float alpha = 0.0f;
+			if(surfaceData.worldNormal.w > 0.0f)
 			{
-				float3 N = surfaceData.worldNormal.xyz;
-				float roughness2 = max(surfaceData.roughness, 0.08f);
-				roughness2 *= roughness2;
-				
-				float3 outLuminance = 0;
-				float alpha = 0.0f;
-				if(surfaceData.worldNormal.w > 0.0f)
+				// Handle directional lights
+				for(uint i = 0; i < lightOffsets.x; ++i)
 				{
-					// Handle directional lights
-					for(uint i = 0; i < lightOffsets.x; ++i)
-					{
-						LightData lightData = gLights[i];
+					LightData lightData = gLights[i];
+				
+					float3 L = -lightData.direction;
+					float NoL = saturate(dot(N, L));
+					float specEnergy = 1.0f;
 					
-						float3 L = -lightData.direction;
-						float NoL = saturate(dot(N, L));
-						float specEnergy = 1.0f;
-						
-						// Distant disk area light. Calculate its contribution analytically by
-						// finding the most important (least error) point on the area light and
-						// use it as a form of importance sampling.
-						if(lightData.srcRadius > 0)
-						{
-							float diskRadius = sin(lightData.srcRadius);
-							float distanceToDisk = cos(lightData.srcRadius);
-							
-							// Closest point to disk (approximation for distant disks)
-							float DoR = dot(L, R);
-							float3 S = normalize(R - DoR * L);
-							L = DoR < distanceToDisk ? normalize(distanceToDisk * L + S * diskRadius) : R;
-						}
+					// Distant disk area light. Calculate its contribution analytically by
+					// finding the most important (least error) point on the area light and
+					// use it as a form of importance sampling.
+					if(lightData.srcRadius > 0)
+					{
+						float diskRadius = sin(lightData.srcRadius);
+						float distanceToDisk = cos(lightData.srcRadius);
 						
-						float3 surfaceShading = getSurfaceShading(V, L, specEnergy, surfaceData);
-						float illuminance = lightData.luminance * NoL;
-						outLuminance += lightData.color * illuminance * surfaceShading;
+						// Closest point to disk (approximation for distant disks)
+						float DoR = dot(L, R);
+						float3 S = normalize(R - DoR * L);
+						L = DoR < distanceToDisk ? normalize(distanceToDisk * L + S * diskRadius) : R;
 					}
 					
-					// Handle radial lights
-                    for (uint i = lightOffsets.y; i < lightOffsets.z; ++i)
-                    {
-                        uint lightIdx = gLightIndices[i];
-						LightData lightData = gLights[lightIdx];
-                        
-						float3 toLight = lightData.position - worldPos;
-						float distToLightSqrd = dot(toLight, toLight);
-						float invDistToLight = rsqrt(distToLightSqrd);
-						
-						float3 L = toLight * invDistToLight;
-						float NoL = dot(N, L);
-						
-						float specEnergy = 1.0f;
-						float illuminance = 0.0f;
+					float3 surfaceShading = getSurfaceShading(V, L, specEnergy, surfaceData);
+					float illuminance = lightData.luminance * NoL;
+					outLuminance += lightData.color * illuminance * surfaceShading;
+				}
+				
+				// Handle radial lights
+				for (uint i = lightOffsets.y; i < lightOffsets.z; ++i)
+				{
+					uint lightIdx = gLightIndices[i];
+					LightData lightData = gLights[lightIdx];
+					
+					float3 toLight = lightData.position - worldPos;
+					float distToLightSqrd = dot(toLight, toLight);
+					float invDistToLight = rsqrt(distToLightSqrd);
+					
+					float3 L = toLight * invDistToLight;
+					float NoL = dot(N, L);
+					
+					float specEnergy = 1.0f;
+					float illuminance = 0.0f;
 
-						// Sphere area light. Calculate its contribution analytically by
-						// finding the most important (least error) point on the area light and
-						// use it as a form of importance sampling.
-						if(lightData.srcRadius > 0)
-						{
-							// Calculate illuminance depending on source size, distance and angle
-							illuminance = illuminanceSphereAreaLight(NoL, distToLightSqrd, lightData);	
+					// Sphere area light. Calculate its contribution analytically by
+					// finding the most important (least error) point on the area light and
+					// use it as a form of importance sampling.
+					if(lightData.srcRadius > 0)
+					{
+						// Calculate illuminance depending on source size, distance and angle
+						illuminance = illuminanceSphereAreaLight(NoL, distToLightSqrd, lightData);	
 
-							// Energy conservation:
-							//    We are widening the specular distribution by the sphere's subtended angle, 
-							//    so we need to handle the increase in energy. It is not enough just to account
-							//    for the sphere solid angle, since the energy difference is highly dependent on
-							//    specular distribution. By accounting for this energy difference we ensure glossy
-							//    reflections have sharp edges, instead of being too blurry.
-							//    See http://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_notes_v2.pdf for reference
-							float sphereAngle = saturate(lightData.srcRadius * invDistToLight);
-							
-							specEnergy = roughness2 / saturate(roughness2 + 0.5f * sphereAngle);
-							specEnergy *= specEnergy;							
+						// Energy conservation:
+						//    We are widening the specular distribution by the sphere's subtended angle, 
+						//    so we need to handle the increase in energy. It is not enough just to account
+						//    for the sphere solid angle, since the energy difference is highly dependent on
+						//    specular distribution. By accounting for this energy difference we ensure glossy
+						//    reflections have sharp edges, instead of being too blurry.
+						//    See http://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_notes_v2.pdf for reference
+						float sphereAngle = saturate(lightData.srcRadius * invDistToLight);
 						
-							// Find closest point on sphere to ray
-							float3 closestPointOnRay = dot(toLight, R) * R;
-							float3 centerToRay = closestPointOnRay - toLight;
-							float invDistToRay = rsqrt(dot(centerToRay, centerToRay));
-							float3 closestPointOnSphere = toLight + centerToRay * saturate(lightData.srcRadius * invDistToRay);
-							
-							toLight = closestPointOnSphere;
-							L = normalize(toLight);
-						}
-						else
-						{
-							NoL = saturate(NoL);
-							illuminance = illuminancePointLight(distToLightSqrd, NoL, lightData);
-						}
+						specEnergy = roughness2 / saturate(roughness2 + 0.5f * sphereAngle);
+						specEnergy *= specEnergy;							
+					
+						// Find closest point on sphere to ray
+						float3 closestPointOnRay = dot(toLight, R) * R;
+						float3 centerToRay = closestPointOnRay - toLight;
+						float invDistToRay = rsqrt(dot(centerToRay, centerToRay));
+						float3 closestPointOnSphere = toLight + centerToRay * saturate(lightData.srcRadius * invDistToRay);
 						
-						float attenuation = getRadialAttenuation(distToLightSqrd, lightData);
-						float3 surfaceShading = getSurfaceShading(V, L, specEnergy, surfaceData);
-							
-						outLuminance += lightData.color * illuminance * attenuation * surfaceShading;
-                    }
-
-					// Handle spot lights
-					for(uint i = lightOffsets.z; i < lightOffsets.w; ++i)
-                    {
-                        uint lightIdx = gLightIndices[i];
-						LightData lightData = gLights[lightIdx];
+						toLight = closestPointOnSphere;
+						L = normalize(toLight);
+					}
+					else
+					{
+						NoL = saturate(NoL);
+						illuminance = illuminancePointLight(distToLightSqrd, NoL, lightData);
+					}
+					
+					float attenuation = getRadialAttenuation(distToLightSqrd, lightData);
+					float3 surfaceShading = getSurfaceShading(V, L, specEnergy, surfaceData);
 						
-						float3 toLight = lightData.position - worldPos;
-						float distToLightSqrd = dot(toLight, toLight);
-						float invDistToLight = rsqrt(distToLightSqrd);
+					outLuminance += lightData.color * illuminance * attenuation * surfaceShading;
+				}
+
+				// Handle spot lights
+				for(uint i = lightOffsets.z; i < lightOffsets.w; ++i)
+				{
+					uint lightIdx = gLightIndices[i];
+					LightData lightData = gLights[lightIdx];
+					
+					float3 toLight = lightData.position - worldPos;
+					float distToLightSqrd = dot(toLight, toLight);
+					float invDistToLight = rsqrt(distToLightSqrd);
+					
+					float3 L = toLight * invDistToLight;
+					float NoL = dot(N, L);
+					
+					float specEnergy = 1.0f;
+					float illuminance = 0.0f;
+					float spotAttenuation = 1.0f;
+					
+					// Disc area light. Calculate its contribution analytically by
+					// finding the most important (least error) point on the area light and
+					// use it as a form of importance sampling.
+					if(lightData.srcRadius > 0)
+					{
+						// Calculate illuminance depending on source size, distance and angle
+						illuminance = illuminanceDiscAreaLight(NoL, distToLightSqrd, L, lightData);	
+					
+						// Energy conservation: Similar case as with radial lights
+						float rightDiscAngle = saturate(lightData.srcRadius * invDistToLight);
 						
-						float3 L = toLight * invDistToLight;
-						float NoL = dot(N, L);
+						// Account for disc orientation somewhat
+						float discAngle = rightDiscAngle * saturate(dot(lightData.direction, -L));
 						
-						float specEnergy = 1.0f;
-						float illuminance = 0.0f;
-						float spotAttenuation = 1.0f;
+						specEnergy = roughness2 / saturate(roughness2 + 0.5f * discAngle);
+						specEnergy *= specEnergy;							
+					
+						// Find closest point on disc to ray
+						float3 discNormal = -lightData.direction;
+						float distAlongLightDir = max(dot(R, discNormal), 1e-6f);
+						float t = dot(toLight, discNormal) / distAlongLightDir;
+						float3 closestPointOnPlane = R * t; // Relative to shaded world point
 						
-						// Disc area light. Calculate its contribution analytically by
-						// finding the most important (least error) point on the area light and
-						// use it as a form of importance sampling.
-						if(lightData.srcRadius > 0)
-						{
-							// Calculate illuminance depending on source size, distance and angle
-							illuminance = illuminanceDiscAreaLight(NoL, distToLightSqrd, L, lightData);	
+						float3 centerToRay = closestPointOnPlane - toLight;
+						float invDistToRay = rsqrt(dot(centerToRay, centerToRay));
+						float3 closestPointOnDisc = toLight + centerToRay * saturate(lightData.srcRadius * invDistToRay);
+
+						toLight = closestPointOnDisc;
+						L = normalize(toLight);
 						
-							// Energy conservation: Similar case as with radial lights
-							float rightDiscAngle = saturate(lightData.srcRadius * invDistToLight);
-							
-							// Account for disc orientation somewhat
-							float discAngle = rightDiscAngle * saturate(dot(lightData.direction, -L));
-							
-							specEnergy = roughness2 / saturate(roughness2 + 0.5f * discAngle);
-							specEnergy *= specEnergy;							
+						// Expand spot attenuation by disc radius (not physically based)
+						float3 toSpotEdge = normalize(lightData.shiftedLightPosition - worldPos);
+						spotAttenuation = getSpotAttenuation(toSpotEdge, lightData);
 						
-							// Find closest point on disc to ray
-							float3 discNormal = -lightData.direction;
-							float distAlongLightDir = max(dot(R, discNormal), 1e-6f);
-							float t = dot(toLight, discNormal) / distAlongLightDir;
-							float3 closestPointOnPlane = R * t; // Relative to shaded world point
-							
-							float3 centerToRay = closestPointOnPlane - toLight;
-							float invDistToRay = rsqrt(dot(centerToRay, centerToRay));
-							float3 closestPointOnDisc = toLight + centerToRay * saturate(lightData.srcRadius * invDistToRay);
-
-							toLight = closestPointOnDisc;
-							L = normalize(toLight);
-							
-							// Expand spot attenuation by disc radius (not physically based)
-							float3 toSpotEdge = normalize(lightData.shiftedLightPosition - worldPos);
-							spotAttenuation = getSpotAttenuation(toSpotEdge, lightData);
-							
-							// TODO - Spot attenuation fades out the specular highlight in a noticeable way
-						}
-						else
-						{
-							NoL = saturate(NoL);
-							illuminance = illuminancePointLight(distToLightSqrd, NoL, lightData);
-							
-							spotAttenuation = getSpotAttenuation(L, lightData);
-						}
+						// TODO - Spot attenuation fades out the specular highlight in a noticeable way
+					}
+					else
+					{
+						NoL = saturate(NoL);
+						illuminance = illuminancePointLight(distToLightSqrd, NoL, lightData);
 						
-						float radialAttenuation = getRadialAttenuation(distToLightSqrd, lightData);
-						float attenuation = spotAttenuation * radialAttenuation;
-						float3 surfaceShading = getSurfaceShading(V, L, specEnergy, surfaceData);
-							
-						outLuminance += lightData.color * illuminance * attenuation * surfaceShading;
-                    }
+						spotAttenuation = getSpotAttenuation(L, lightData);
+					}
 					
-					// Ambient term for in-editor visualization, not used in actual lighting
-					outLuminance += surfaceData.albedo.rgb * gAmbientFactor / PI;
-					alpha = 1.0f;
+					float radialAttenuation = getRadialAttenuation(distToLightSqrd, lightData);
+					float attenuation = spotAttenuation * radialAttenuation;
+					float3 surfaceShading = getSurfaceShading(V, L, specEnergy, surfaceData);
+						
+					outLuminance += lightData.color * illuminance * attenuation * surfaceShading;
 				}
 				
-				return float4(outLuminance, alpha);
+				// Ambient term for in-editor visualization, not used in actual lighting
+				outLuminance += surfaceData.albedo.rgb * gAmbientFactor / PI;
+				alpha = 1.0f;
 			}
-		};
+			
+			return float4(outLuminance, alpha);
+		}
 	};
 };

+ 78 - 85
Data/Raw/Engine/Includes/NormalVertexInput.bslinc

@@ -1,102 +1,95 @@
-Technique
 #ifdef USE_BLEND_SHAPES
-	 : base("MorphVertexInput") =
+mixin MorphVertexInput
 #else
-	 : base("NormalVertexInput") =
+mixin NormalVertexInput
 #endif
 {
-	Pass =
+	code
 	{
-		Common = 
+		struct VStoFS
 		{
-			struct VStoFS
-			{
-				float4 position : SV_Position;
-				float2 uv0 : TEXCOORD0;
-				float3 worldPosition : TEXCOORD1;
-				
-				float3 tangentToWorldZ : NORMAL; // Note: Half-precision could be used
-				float4 tangentToWorldX : TANGENT; // Note: Half-precision could be used
-			};
+			float4 position : SV_Position;
+			float2 uv0 : TEXCOORD0;
+			float3 worldPosition : TEXCOORD1;
+			
+			float3 tangentToWorldZ : NORMAL; // Note: Half-precision could be used
+			float4 tangentToWorldX : TANGENT; // Note: Half-precision could be used
 		};
 
-		Vertex =
+		struct VertexInput
+		{
+			float3 position : POSITION;
+			float3 normal : NORMAL; // Note: Half-precision could be used
+			float4 tangent : TANGENT; // Note: Half-precision could be used
+			float2 uv0 : TEXCOORD0;
+			
+			#ifdef USE_BLEND_SHAPES
+				float3 deltaPosition : POSITION1;
+				float4 deltaNormal : NORMAL1;
+			#endif				
+		};
+		
+		struct VertexIntermediate
+		{
+			float3 worldNormal; // Note: Half-precision could be used
+			float4 worldTangent; // Note: Half-precision could be used
+			
+			float tangentSign;
+		};
+		
+		float3x3 getTangentToLocal(VertexInput input, out float tangentSign)
 		{
-			struct VertexInput
-			{
-				float3 position : POSITION;
-				float3 normal : NORMAL; // Note: Half-precision could be used
-				float4 tangent : TANGENT; // Note: Half-precision could be used
-				float2 uv0 : TEXCOORD0;
-				
-				#ifdef USE_BLEND_SHAPES
-					float3 deltaPosition : POSITION1;
-					float4 deltaNormal : NORMAL1;
-				#endif				
-			};
+			float3 normal = input.normal * 2.0f - 1.0f;
+			float3 tangent = input.tangent.xyz * 2.0f - 1.0f;
 			
-			struct VertexIntermediate
-			{
-				float3 worldNormal; // Note: Half-precision could be used
-				float4 worldTangent; // Note: Half-precision could be used
-				
-				float tangentSign;
-			};
+			#ifdef USE_BLEND_SHAPES
+				float3 deltaNormal = (input.deltaNormal.xyz * 2.0f - 1.0f) * 2.0f;
+				normal = normalize(normal + deltaNormal * input.deltaNormal.w);
+				tangent = normalize(tangent - dot(tangent, normal) * normal);
+			#endif
 			
-			float3x3 getTangentToLocal(VertexInput input, out float tangentSign)
-			{
-				float3 normal = input.normal * 2.0f - 1.0f;
-				float3 tangent = input.tangent.xyz * 2.0f - 1.0f;
-				
-				#ifdef USE_BLEND_SHAPES
-					float3 deltaNormal = (input.deltaNormal.xyz * 2.0f - 1.0f) * 2.0f;
-					normal = normalize(normal + deltaNormal * input.deltaNormal.w);
-					tangent = normalize(tangent - dot(tangent, normal) * normal);
-				#endif
-				
-				float3 bitangent = cross(normal, tangent) * input.tangent.w;
-				tangentSign = input.tangent.w * gWorldDeterminantSign;
-				
-				// Note: Maybe it's better to store everything in row vector format?
-				float3x3 result = float3x3(tangent, bitangent, normal);
-				result = transpose(result);
-												
-				return result;
-			}
+			float3 bitangent = cross(normal, tangent) * input.tangent.w;
+			tangentSign = input.tangent.w * gWorldDeterminantSign;
 			
-			VertexIntermediate getVertexIntermediate(VertexInput input)
-			{
-				VertexIntermediate result;
-				
-				float tangentSign;
-				float3x3 tangentToLocal = getTangentToLocal(input, tangentSign);
-				float3x3 tangentToWorld = mul((float3x3)gMatWorldNoScale, tangentToLocal);
-				
-				// Note: Consider transposing these externally, for easier reads
-				result.worldNormal = float3(tangentToWorld[0][2], tangentToWorld[1][2], tangentToWorld[2][2]); // Normal basis vector
-				result.worldTangent = float4(tangentToWorld[0][0], tangentToWorld[1][0], tangentToWorld[2][0], tangentSign); // Tangent basis vector
-				
-				return result;
-			}
+			// Note: Maybe it's better to store everything in row vector format?
+			float3x3 result = float3x3(tangent, bitangent, normal);
+			result = transpose(result);
+											
+			return result;
+		}
+		
+		VertexIntermediate getVertexIntermediate(VertexInput input)
+		{
+			VertexIntermediate result;
 			
-			float4 getVertexWorldPosition(VertexInput input, VertexIntermediate intermediate)
-			{
-				#ifdef USE_BLEND_SHAPES
-					float4 position = float4(input.position + input.deltaPosition, 1.0f);
-				#else
-					float4 position = float4(input.position, 1.0f);
-				#endif			
+			float tangentSign;
+			float3x3 tangentToLocal = getTangentToLocal(input, tangentSign);
+			float3x3 tangentToWorld = mul((float3x3)gMatWorldNoScale, tangentToLocal);
 			
-				return mul(gMatWorld, position);
-			}
+			// Note: Consider transposing these externally, for easier reads
+			result.worldNormal = float3(tangentToWorld[0][2], tangentToWorld[1][2], tangentToWorld[2][2]); // Normal basis vector
+			result.worldTangent = float4(tangentToWorld[0][0], tangentToWorld[1][0], tangentToWorld[2][0], tangentSign); // Tangent basis vector
 			
-			void populateVertexOutput(VertexInput input, VertexIntermediate intermediate, inout VStoFS result)
-			{
-				result.uv0 = input.uv0;
-				
-				result.tangentToWorldZ = intermediate.worldNormal;
-				result.tangentToWorldX = intermediate.worldTangent;
-			}
-		};
+			return result;
+		}
+		
+		float4 getVertexWorldPosition(VertexInput input, VertexIntermediate intermediate)
+		{
+			#ifdef USE_BLEND_SHAPES
+				float4 position = float4(input.position + input.deltaPosition, 1.0f);
+			#else
+				float4 position = float4(input.position, 1.0f);
+			#endif			
+		
+			return mul(gMatWorld, position);
+		}
+		
+		void populateVertexOutput(VertexInput input, VertexIntermediate intermediate, inout VStoFS result)
+		{
+			result.uv0 = input.uv0;
+			
+			result.tangentToWorldZ = intermediate.worldNormal;
+			result.tangentToWorldX = intermediate.worldTangent;
+		}
 	};
 };

+ 24 - 27
Data/Raw/Engine/Includes/PPBase.bslinc

@@ -1,36 +1,33 @@
-Technique : base("PPBase") =
+mixin PPBase
 {
-	Pass =
+	depth
 	{
-		DepthWrite = false;
-		DepthRead = false;
-	
-		Common = 
+		write = false;
+		read = false;
+	};
+
+	code
+	{
+		struct VStoFS
 		{
-			struct VStoFS
-			{
-				float4 position : SV_POSITION;
-				float2 uv0 : TEXCOORD0;
-			};
+			float4 position : SV_POSITION;
+			float2 uv0 : TEXCOORD0;
 		};
 
-		Vertex =
+		struct VertexInput
 		{
-			struct VertexInput
-			{
-				float2 screenPos : POSITION;
-				float2 uv0 : TEXCOORD0;
-			};
-			
-			VStoFS main(VertexInput input)
-			{
-				VStoFS output;
-			
-				output.position = float4(input.screenPos, 0, 1);
-				output.uv0 = input.uv0;
-
-				return output;
-			}			
+			float2 screenPos : POSITION;
+			float2 uv0 : TEXCOORD0;
 		};
+		
+		VStoFS vsmain(VertexInput input)
+		{
+			VStoFS output;
+		
+			output.position = float4(input.screenPos, 0, 1);
+			output.uv0 = input.uv0;
+
+			return output;
+		}			
 	};
 };

+ 97 - 100
Data/Raw/Engine/Includes/PPTonemapCommon.bslinc

@@ -1,114 +1,111 @@
-Technique : base("PPTonemapCommon") =
+mixin PPTonemapCommon
 {
-	Pass =
+	code
 	{
-		Common =
+		static const float3x3 sRGBToXYZMatrix =
 		{
-			static const float3x3 sRGBToXYZMatrix =
-			{
-				0.4124564f, 0.3575761f, 0.1804375f,
-				0.2126729f, 0.7151522f, 0.0721750f,
-				0.0193339f, 0.1191920f, 0.9503041f,
-			};
-			
-			static const float3x3 XYZTosRGBMatrix =
-			{
-				 3.2409699419f, -1.5373831776f, -0.4986107603f,
-				-0.9692436363f,  1.8759675015f,  0.0415550574f,
-				 0.0556300797f, -0.2039769589f,  1.0569715142f,
-			};
-			
-			static const float3x3 D65ToD60Matrix =
-			{
-				 1.01303,    0.00610531, -0.014971,
-				 0.00769823, 0.998165,   -0.00503203,
-				-0.00284131, 0.00468516,  0.924507,
-			};
+			0.4124564f, 0.3575761f, 0.1804375f,
+			0.2126729f, 0.7151522f, 0.0721750f,
+			0.0193339f, 0.1191920f, 0.9503041f,
+		};
+		
+		static const float3x3 XYZTosRGBMatrix =
+		{
+			 3.2409699419f, -1.5373831776f, -0.4986107603f,
+			-0.9692436363f,  1.8759675015f,  0.0415550574f,
+			 0.0556300797f, -0.2039769589f,  1.0569715142f,
+		};
+		
+		static const float3x3 D65ToD60Matrix =
+		{
+			 1.01303,    0.00610531, -0.014971,
+			 0.00769823, 0.998165,   -0.00503203,
+			-0.00284131, 0.00468516,  0.924507,
+		};
 
-			static const float3x3 D60ToD65Matrix =
-			{
-				 0.987224,   -0.00611327, 0.0159533,
-				-0.00759836,  1.00186,    0.00533002,
-				 0.00307257, -0.00509595, 1.08168,
-			};
-			
-			static const float3x3 XYZToACES2065Matrix =
-			{
-				 1.0498110175, 0.0000000000,-0.0000974845,
-				-0.4959030231, 1.3733130458, 0.0982400361,
-				 0.0000000000, 0.0000000000, 0.9912520182,
-			};
+		static const float3x3 D60ToD65Matrix =
+		{
+			 0.987224,   -0.00611327, 0.0159533,
+			-0.00759836,  1.00186,    0.00533002,
+			 0.00307257, -0.00509595, 1.08168,
+		};
+		
+		static const float3x3 XYZToACES2065Matrix =
+		{
+			 1.0498110175, 0.0000000000,-0.0000974845,
+			-0.4959030231, 1.3733130458, 0.0982400361,
+			 0.0000000000, 0.0000000000, 0.9912520182,
+		};
 
-			static const float3x3 XYZToACEScgMatrix =
-			{
-				 1.6410233797, -0.3248032942, -0.2364246952,
-				-0.6636628587,  1.6153315917,  0.0167563477,
-				 0.0117218943, -0.0082844420,  0.9883948585,
-			};
+		static const float3x3 XYZToACEScgMatrix =
+		{
+			 1.6410233797, -0.3248032942, -0.2364246952,
+			-0.6636628587,  1.6153315917,  0.0167563477,
+			 0.0117218943, -0.0082844420,  0.9883948585,
+		};
 
-			static const float3x3 ACEScgToXYZMatrix = 
-			{
-				 0.6624541811, 0.1340042065, 0.1561876870,
-				 0.2722287168, 0.6740817658, 0.0536895174,
-				-0.0055746495, 0.0040607335, 1.0103391003,
-			};
+		static const float3x3 ACEScgToXYZMatrix = 
+		{
+			 0.6624541811, 0.1340042065, 0.1561876870,
+			 0.2722287168, 0.6740817658, 0.0536895174,
+			-0.0055746495, 0.0040607335, 1.0103391003,
+		};
 
-			/**
-			 * Encodes a 10bit linear color into 8bits by converting it to log space.
-			 *
-			 * @param 	linearColor		Linear color.
-			 * @return					Encoded color in log space.
-			 */			
-			float3 LinearToLogColor(float3 linearColor)
-			{
-				float linearRange = 14.0f;
-				float linearGrey = 0.18f;
-				float exposureGrey = 444.0f;
+		/**
+		 * Encodes a 10bit linear color into 8bits by converting it to log space.
+		 *
+		 * @param 	linearColor		Linear color.
+		 * @return					Encoded color in log space.
+		 */			
+		float3 LinearToLogColor(float3 linearColor)
+		{
+			float linearRange = 14.0f;
+			float linearGrey = 0.18f;
+			float exposureGrey = 444.0f;
 
-				float3 logColor = log2(linearColor) / linearRange - log2(linearGrey) / linearRange + exposureGrey / 1023.0f;
-				return saturate(logColor);
-			}
+			float3 logColor = log2(linearColor) / linearRange - log2(linearGrey) / linearRange + exposureGrey / 1023.0f;
+			return saturate(logColor);
+		}
 
-			/**
-			 * Decodes a 8bit log encoded color back into linear space.
-			 *
-			 * @param 	logColor		Log space color.
-			 * @return					Color in linear space.
-			 */			
-			float3 LogToLinearColor(float3 logColor)
-			{
-				float linearRange = 14.0f;
-				float linearGrey = 0.18f;
-				float exposureGrey = 444.0f;
+		/**
+		 * Decodes a 8bit log encoded color back into linear space.
+		 *
+		 * @param 	logColor		Log space color.
+		 * @return					Color in linear space.
+		 */			
+		float3 LogToLinearColor(float3 logColor)
+		{
+			float linearRange = 14.0f;
+			float linearGrey = 0.18f;
+			float exposureGrey = 444.0f;
 
-				return exp2((logColor - exposureGrey / 1023.0f) * linearRange) * linearGrey;
-			}
+			return exp2((logColor - exposureGrey / 1023.0f) * linearRange) * linearGrey;
+		}
 
-			/**
-			 * Converts a linear color value in sRGB/Rec.709 color space into gamma space (applies Rec.709 transfer function). 
-			 * Rec.709 values are suitable for HDTVs and projectors.
-			 *
-			 * @param 	linearColor		Linear color in sRGB/Rec.709 color space.
-			 * @return					Gamma corrected color.
-			 */				
-			float3 LinearToGammaRec709(float3 linearColor) 
-			{
-				// TODO: Clamp lower end of linear color so it isn't denormalized?
-				return min(linearColor * 4.5f, pow(max(linearColor, 0.018f), 0.45f) * 1.099f - 0.099f);
-			}
+		/**
+		 * Converts a linear color value in sRGB/Rec.709 color space into gamma space (applies Rec.709 transfer function). 
+		 * Rec.709 values are suitable for HDTVs and projectors.
+		 *
+		 * @param 	linearColor		Linear color in sRGB/Rec.709 color space.
+		 * @return					Gamma corrected color.
+		 */				
+		float3 LinearToGammaRec709(float3 linearColor) 
+		{
+			// TODO: Clamp lower end of linear color so it isn't denormalized?
+			return min(linearColor * 4.5f, pow(max(linearColor, 0.018f), 0.45f) * 1.099f - 0.099f);
+		}
 
-			/**
-			 * Converts a linear color value in sRGB/Rec.709 color space into gamma space (applies sRGB transfer function). 
-			 * sRGB values are suitable for PC displays.
-			 *
-			 * @param 	linearColor		Linear color in sRGB/Rec.709 color space.
-			 * @return					Gamma corrected color.
-			 */		
-			float3 LinearToGammasRGB(float3 linearColor) 
-			{
-				// TODO: Clamp lower end of linear color so it isn't denormalized?
-				return min(linearColor * 12.92f, pow(max(linearColor, 0.00313067f), 1.0f/2.4f) * 1.055f - 0.055f);
-			}			
-		};
+		/**
+		 * Converts a linear color value in sRGB/Rec.709 color space into gamma space (applies sRGB transfer function). 
+		 * sRGB values are suitable for PC displays.
+		 *
+		 * @param 	linearColor		Linear color in sRGB/Rec.709 color space.
+		 * @return					Gamma corrected color.
+		 */		
+		float3 LinearToGammasRGB(float3 linearColor) 
+		{
+			// TODO: Clamp lower end of linear color so it isn't denormalized?
+			return min(linearColor * 12.92f, pow(max(linearColor, 0.00313067f), 1.0f/2.4f) * 1.055f - 0.055f);
+		}			
 	};
 };

+ 182 - 184
Data/Raw/Engine/Includes/PPWhiteBalance.bslinc

@@ -1,209 +1,207 @@
-Technique : base("PPWhiteBalance") =
+mixin PPWhiteBalance
 {
-	Pass =
+	code
 	{
-		Compute =
+		/**
+		 * Calculates correlated color temperature from chomaticity coordinates using the McCamy's formula.
+		 * Coordinates should be near the Planckian locus otherwise the returned temperature becomes meaningless.
+		 *
+		 * @param 	coords	CIE 1931 x chomaticity coordinates.
+		 * @return			Correlated color temperature in degrees Kelvin.
+		 */
+		float CCT(float2 coords)
 		{
-			/**
-			 * Calculates correlated color temperature from chomaticity coordinates using the McCamy's formula.
-			 * Coordinates should be near the Planckian locus otherwise the returned temperature becomes meaningless.
-			 *
-			 * @param 	coords	CIE 1931 x chomaticity coordinates.
-			 * @return			Correlated color temperature in degrees Kelvin.
-			 */
-			float CCT(float2 coords)
-			{
-				float n = (coords.x - 0.3320f) / (0.1858f - coords.y);
-				float n2 = n * n;
-				float n3 = n2 * n;
-				
-				return -449.0f * n3 + 3525.0f * n2 - 6823.3f * n + 5520.33f;
-			}
+			float n = (coords.x - 0.3320f) / (0.1858f - coords.y);
+			float n2 = n * n;
+			float n3 = n2 * n;
+			
+			return -449.0f * n3 + 3525.0f * n2 - 6823.3f * n + 5520.33f;
+		}
 
-			/**
-			 * Calculates chromaticity coordinates from a correlated color temperature. Uses the Planckian locus formula
-			 * which works for values in range [1000K, 15000K].
-			 *
-			 * @param	T	Correlated color temperature in degrees Kelvin.
-			 * @return		CIE 1960 UCS chomaticity coordinates.
-			 */
-			float2 PlanckianLocusChromaticity(float T)
-			{
-				float T2 = T * T;
+		/**
+		 * Calculates chromaticity coordinates from a correlated color temperature. Uses the Planckian locus formula
+		 * which works for values in range [1000K, 15000K].
+		 *
+		 * @param	T	Correlated color temperature in degrees Kelvin.
+		 * @return		CIE 1960 UCS chomaticity coordinates.
+		 */
+		float2 PlanckianLocusChromaticity(float T)
+		{
+			float T2 = T * T;
 
-				// Calculates CIE 1960 UCS coordinates
-				float u = (0.860117757f + 1.54118254e-4f * T + 1.28641212e-7f * T2) / (1.0f + 8.42420235e-4f * T + 7.08145163e-7f * T2);
-				float v = (0.317398726f + 4.22806245e-5f * T + 4.20481691e-8f * T2) / (1.0f - 2.89741816e-5f * T + 1.61456053e-7f * T2);
-				
-				return float2(u, v);
-			}
+			// Calculates CIE 1960 UCS coordinates
+			float u = (0.860117757f + 1.54118254e-4f * T + 1.28641212e-7f * T2) / (1.0f + 8.42420235e-4f * T + 7.08145163e-7f * T2);
+			float v = (0.317398726f + 4.22806245e-5f * T + 4.20481691e-8f * T2) / (1.0f - 2.89741816e-5f * T + 1.61456053e-7f * T2);
+			
+			return float2(u, v);
+		}
 
-			/**
-			 * Calculates chromaticity coordinates from a correlated color temperature. Uses the formula for series
-			 * D standard illuminants (D55, D65, D75, etc.). Valid for values in range [4000K, 25000K].
-			 *
-			 * @param	T	Correlated color temperature in degrees Kelvin.
-			 * @return		CIE 1931 chomaticity coordinates.
-			 */
-			float2 DSeriesIlluminantChromaticity(float T)
-			{
-				float x = T <= 7000.0f 
-					? 0.244063f + (0.09911e3 + (2.9678e6 - 4.6070e9 / T) / T) / T 
-					: 0.237040f + (0.24748e3 + (1.9018e6 - 2.0064e9 / T) / T) / T;
-				
-				float y = -3.0f * x * x + 2.87f * x - 0.275f;
+		/**
+		 * Calculates chromaticity coordinates from a correlated color temperature. Uses the formula for series
+		 * D standard illuminants (D55, D65, D75, etc.). Valid for values in range [4000K, 25000K].
+		 *
+		 * @param	T	Correlated color temperature in degrees Kelvin.
+		 * @return		CIE 1931 chomaticity coordinates.
+		 */
+		float2 DSeriesIlluminantChromaticity(float T)
+		{
+			float x = T <= 7000.0f 
+				? 0.244063f + (0.09911e3 + (2.9678e6 - 4.6070e9 / T) / T) / T 
+				: 0.237040f + (0.24748e3 + (1.9018e6 - 2.0064e9 / T) / T) / T;
+			
+			float y = -3.0f * x * x + 2.87f * x - 0.275f;
 
-				return float2(x, y);
-			}
+			return float2(x, y);
+		}
 
-			/**
-			 * Converts chomaticity coordinates from CIE 1960 uniform color space to CIE 1931 color space.
-			 *
-			 * @param	uv	Chromaticity coordinates in CIE 1960 UCS.
-			 * @return		Chromaticity coordinates in CIE 1931.
-			 */
-			float2 CIE1960ToCIE1931(float2 uv)
-			{
-				float x = (3 * uv.x) / (2 * uv.x - 8 * uv.y + 4);
-				float y = (2 * uv.y) / (2 * uv.x - 8 * uv.y + 4);
+		/**
+		 * Converts chomaticity coordinates from CIE 1960 uniform color space to CIE 1931 color space.
+		 *
+		 * @param	uv	Chromaticity coordinates in CIE 1960 UCS.
+		 * @return		Chromaticity coordinates in CIE 1931.
+		 */
+		float2 CIE1960ToCIE1931(float2 uv)
+		{
+			float x = (3 * uv.x) / (2 * uv.x - 8 * uv.y + 4);
+			float y = (2 * uv.y) / (2 * uv.x - 8 * uv.y + 4);
 
-				return float2(x, y);
-			}
+			return float2(x, y);
+		}
 
-			/**
-			 * Adds the specified offset along the Planckian isothermal line and returns the chromaticity coordinates for the offset position.
-			 *
-			 * @param	uv		Chromaticity coordiantes in CIE 1960 UCS for the correlated color temperature along the Planckian locus.
-			 * @param	offset	Offset to be added along the isothermal. In range [-1, 1]. The actual offset in chromaticity
-			 *					coordinates is scaled to |0.05| since values farther than that usually aren't useful.
-			 * @return			CIE 1931 chomaticity coordinates.
-			 */
-			float2 PlanckianIsothermalOffset(float2 uv, float offset)
+		/**
+		 * Adds the specified offset along the Planckian isothermal line and returns the chromaticity coordinates for the offset position.
+		 *
+		 * @param	uv		Chromaticity coordiantes in CIE 1960 UCS for the correlated color temperature along the Planckian locus.
+		 * @param	offset	Offset to be added along the isothermal. In range [-1, 1]. The actual offset in chromaticity
+		 *					coordinates is scaled to |0.05| since values farther than that usually aren't useful.
+		 * @return			CIE 1931 chomaticity coordinates.
+		 */
+		float2 PlanckianIsothermalOffset(float2 uv, float offset)
+		{
+			// Rotate uv by 90 degrees and normalize it to get the isotherm line
+			float2 isotherm = normalize(float2(-uv.y, uv.x));
+			
+			uv += isotherm * offset * 0.05f;
+			return CIE1960ToCIE1931(uv);
+		}
+		
+		/**
+		 * Converts from CIE 1931 xyY color space to XYZ color space.
+		 *
+		 * @param	xyY		Coordinates in xyY color space.
+		 * @return			Coordinates in XYZ color space.
+		 */
+		float3 xyYToXYZ(float3 xyY)
+		{
+			float divisor = max(xyY.y, 1e-10f);
+		
+			float3 XYZ;
+			XYZ.x = (xyY.x * xyY.z) / divisor;
+			XYZ.y = xyY.z;  
+			XYZ.z = ((1.0 - xyY.x - xyY.y) * xyY.z) / divisor;
+
+			return XYZ;
+		}
+		
+		/**
+		 * Converts from CIE 1931 XYZ color space to xyY color space.
+		 *
+		 * @param	XYZ		Coordinates in XYZ color space.
+		 * @return			Coordinates in xyY color space.
+		 */
+		float3 XYZToxyY(float3 XYZ)
+		{
+			float3 xyY;
+			float divisor = XYZ.x + XYZ.y + XYZ.z;
+			if (divisor == 0.0f) 
+				divisor = 1e-10f;
+			
+			xyY.x = XYZ.x / divisor;
+			xyY.y = XYZ.y / divisor;  
+			xyY.z = XYZ.y;
+		  
+			return xyY;
+		}			
+		
+		/**
+		 * Returns a matrix that transform XYZ tristimulus values for a given white point to
+		 * a new white point.
+		 *
+		 * @param	orgWhite	Chromaticity coordinates in CIE 1931 for the original white point.
+		 * @param	newWhite	Chromaticity coordinates in CIE 1931 for the new white point.
+		 * @return				Matrix that transform from the original to new white point.
+		 */
+		float3x3 ChromaticAdaptation(float2 orgWhite, float2 newWhite)
+		{
+			// Convert xyY to XYZ
+			float3 orgWhite3 = xyYToXYZ(float3(orgWhite.xy, 1.0f));
+			float3 newWhite3 = xyYToXYZ(float3(newWhite.xy, 1.0f));
+			
+			// Convert to cone response domain using Bradford's matrix
+			const float3x3 coneResponse =
 			{
-				// Rotate uv by 90 degrees and normalize it to get the isotherm line
-				float2 isotherm = normalize(float2(-uv.y, uv.x));
-				
-				uv += isotherm * offset * 0.05f;
-				return CIE1960ToCIE1931(uv);
-			}
+				 0.8951f,  0.2664f, -0.1614f,
+				-0.7502f,  1.7135f,  0.0367f,
+				 0.0389f, -0.0685f,  1.0296f,
+			};
 			
-			/**
-			 * Converts from CIE 1931 xyY color space to XYZ color space.
-			 *
-			 * @param	xyY		Coordinates in xyY color space.
-			 * @return			Coordinates in XYZ color space.
-			 */
-			float3 xyYToXYZ(float3 xyY)
+			const float3x3 invConeResponse =
 			{
-				float divisor = max(xyY.y, 1e-10f);
+				 0.9870f, -0.1471f,  0.1600f,
+				 0.4323f,  0.5184f,  0.0493f,
+				-0.0085f,  0.0400f,  0.9685f,
+			};
 			
-				float3 XYZ;
-				XYZ.x = (xyY.x * xyY.z) / divisor;
-				XYZ.y = xyY.z;  
-				XYZ.z = ((1.0 - xyY.x - xyY.y) * xyY.z) / divisor;
-
-				return XYZ;
-			}
+			orgWhite3 = mul(coneResponse, orgWhite3);
+			newWhite3 = mul(coneResponse, newWhite3);
 			
-			/**
-			 * Converts from CIE 1931 XYZ color space to xyY color space.
-			 *
-			 * @param	XYZ		Coordinates in XYZ color space.
-			 * @return			Coordinates in xyY color space.
-			 */
-			float3 XYZToxyY(float3 XYZ)
+			// Generate transformation matrix
+			float3x3 adaptation =
 			{
-				float3 xyY;
-				float divisor = XYZ.x + XYZ.y + XYZ.z;
-				if (divisor == 0.0f) 
-					divisor = 1e-10f;
-				
-				xyY.x = XYZ.x / divisor;
-				xyY.y = XYZ.y / divisor;  
-				xyY.z = XYZ.y;
-			  
-				return xyY;
-			}			
+				newWhite3.x / orgWhite3.x, 0.0f, 0.0f,
+				0.0f, newWhite3.y / orgWhite3.y, 0.0f,
+				0.0f, 0.0f, newWhite3.z / orgWhite3.z
+			};
 			
-			/**
-			 * Returns a matrix that transform XYZ tristimulus values for a given white point to
-			 * a new white point.
-			 *
-			 * @param	orgWhite	Chromaticity coordinates in CIE 1931 for the original white point.
-			 * @param	newWhite	Chromaticity coordinates in CIE 1931 for the new white point.
-			 * @return				Matrix that transform from the original to new white point.
-			 */
-			float3x3 ChromaticAdaptation(float2 orgWhite, float2 newWhite)
+			return mul(invConeResponse, mul(adaptation, coneResponse));
+		}
+		
+		[internal]
+		cbuffer WhiteBalanceInput
+		{
+			float gWhiteTemp;
+			float gWhiteOffset;
+		}
+		
+		/**
+		 * Applies color balancing to the provided color. The color is transformed from its original white point
+		 * (provided by gWhiteTemp and gWhiteOffset) to a D65 white point.
+		 * 
+		 * @param	color 	Color in linear sRGB/Rec.709 color space.
+		 * @return			White balanced linear color.
+		 */
+		float3 WhiteBalance(float3 color)
+		{
+			float2 orgPlanckianUV = PlanckianLocusChromaticity(gWhiteTemp);
+			float2 orgWhiteXY;
+			if(gWhiteTemp < 4000)
 			{
-				// Convert xyY to XYZ
-				float3 orgWhite3 = xyYToXYZ(float3(orgWhite.xy, 1.0f));
-				float3 newWhite3 = xyYToXYZ(float3(newWhite.xy, 1.0f));
-				
-				// Convert to cone response domain using Bradford's matrix
-				const float3x3 coneResponse =
-				{
-					 0.8951f,  0.2664f, -0.1614f,
-					-0.7502f,  1.7135f,  0.0367f,
-					 0.0389f, -0.0685f,  1.0296f,
-				};
-				
-				const float3x3 invConeResponse =
-				{
-					 0.9870f, -0.1471f,  0.1600f,
-					 0.4323f,  0.5184f,  0.0493f,
-					-0.0085f,  0.0400f,  0.9685f,
-				};
-				
-				orgWhite3 = mul(coneResponse, orgWhite3);
-				newWhite3 = mul(coneResponse, newWhite3);
-				
-				// Generate transformation matrix
-				float3x3 adaptation =
-				{
-					newWhite3.x / orgWhite3.x, 0.0f, 0.0f,
-					0.0f, newWhite3.y / orgWhite3.y, 0.0f,
-					0.0f, 0.0f, newWhite3.z / orgWhite3.z
-				};
-				
-				return mul(invConeResponse, mul(adaptation, coneResponse));
+				orgWhiteXY = PlanckianIsothermalOffset(orgPlanckianUV, gWhiteOffset);
 			}
-			
-			cbuffer WhiteBalanceInput
+			else
 			{
-				float gWhiteTemp;
-				float gWhiteOffset;
+				orgWhiteXY = DSeriesIlluminantChromaticity(gWhiteTemp);
+				float2 offsetXY = PlanckianIsothermalOffset(orgPlanckianUV, gWhiteOffset) - CIE1960ToCIE1931(orgPlanckianUV);
+				
+				orgWhiteXY += offsetXY;
 			}
+		
+			float2 newWhiteXY = float2(0.3128f, 0.3290f); // D65 white point
 			
-			/**
-			 * Applies color balancing to the provided color. The color is transformed from its original white point
-			 * (provided by gWhiteTemp and gWhiteOffset) to a D65 white point.
-			 * 
-			 * @param	color 	Color in linear sRGB/Rec.709 color space.
-			 * @return			White balanced linear color.
-			 */
-			float3 WhiteBalance(float3 color)
-			{
-				float2 orgPlanckianUV = PlanckianLocusChromaticity(gWhiteTemp);
-				float2 orgWhiteXY;
-				if(gWhiteTemp < 4000)
-				{
-					orgWhiteXY = PlanckianIsothermalOffset(orgPlanckianUV, gWhiteOffset);
-				}
-				else
-				{
-					orgWhiteXY = DSeriesIlluminantChromaticity(gWhiteTemp);
-					float2 offsetXY = PlanckianIsothermalOffset(orgPlanckianUV, gWhiteOffset) - CIE1960ToCIE1931(orgPlanckianUV);
-					
-					orgWhiteXY += offsetXY;
-				}
-			
-				float2 newWhiteXY = float2(0.3128f, 0.3290f); // D65 white point
-				
-				float3x3 adaptation = ChromaticAdaptation(orgWhiteXY, newWhiteXY);
-				adaptation = mul(XYZTosRGBMatrix, mul(adaptation, sRGBToXYZMatrix));
+			float3x3 adaptation = ChromaticAdaptation(orgWhiteXY, newWhiteXY);
+			adaptation = mul(XYZTosRGBMatrix, mul(adaptation, sRGBToXYZMatrix));
 
-				return mul(adaptation, color);
-			}
-		};
+			return mul(adaptation, color);
+		}
 	};
 };

+ 43 - 68
Data/Raw/Engine/Includes/PerCameraData.bslinc

@@ -1,75 +1,50 @@
-Parameters =
+mixin PerCameraData
 {
-	float3 		gViewDir : auto("ViewDir");
-	float3 		gViewOrigin : auto("ViewOrigin");
-	mat4x4		gMatViewProj : auto("VP");	
-	mat4x4		gMatView : auto("V");
-	mat4x4		gMatProj : auto("P");
-	mat4x4		gMatInvProj : auto("IP");
-	mat4x4		gMatInvViewProj : auto("IVP");
-	
-	// Special inverse view-projection matrix that had projection entries that affect z and w eliminated.
-	// Used to transform a vector(clip_x, clip_y, view_z, view_w), where clip_x/clip_y are in clip space, 
-	// and view_z/view_w in view space, into world space
-	mat4x4      gMatScreenToWorld : auto("ScreenToWorld");
-	
-	// Converts device Z to world Z using this formula: worldZ = (1 / (deviceZ + y)) * x
-	float2		gDeviceZToWorldZ : auto("DeviceToWorldZ");
-	
-	// x - near plane distance, y - far plane distance
-	float2		gNearFar : auto("NearFar");
-	
-	// xy - Viewport offset in pixels
-	// zw - Viewport width & height in pixels
-	int4 		gViewportRectangle : auto("ViewportRect");
-	
-	// xy - (Viewport size in pixels / 2) / Target size in pixels
-	// zw - (Viewport offset in pixels + (Viewport size in pixels / 2) + Optional pixel center offset) / Target size in pixels	
-	float4 		gClipToUVScaleOffset : auto("ClipToUVScaleOffset");	
-
-	float		gAmbientFactor;
-};
-
-Blocks =
-{
-	Block PerCamera : auto("PerCamera");
-};
-
-Technique : base("PerCameraData") =
-{
-	Pass =
+	code 
 	{
-		Common = 
+		[internal]
+		cbuffer PerCamera
 		{
-			cbuffer PerCamera
-			{
-				float3	 gViewDir;
-				float3 	 gViewOrigin;
-				float4x4 gMatViewProj;
-				float4x4 gMatView;
-				float4x4 gMatProj;
-				float4x4 gMatInvProj;
-				float4x4 gMatInvViewProj;
-				float4x4 gMatScreenToWorld;
-				float2 	 gDeviceZToWorldZ;
-				float2	 gNDCZToWorldZ;
-				float2	 gNearFar;
-				int4 	 gViewportRectangle;
-				float4 	 gClipToUVScaleOffset;	
-				float	gAmbientFactor;
-			}
+			float3	 gViewDir;
+			float3 	 gViewOrigin;
+			float4x4 gMatViewProj;
+			float4x4 gMatView;
+			float4x4 gMatProj;
+			float4x4 gMatInvProj;
+			float4x4 gMatInvViewProj;
+			
+			// Special inverse view-projection matrix that had projection entries that affect z and w eliminated.
+			// Used to transform a vector(clip_x, clip_y, view_z, view_w), where clip_x/clip_y are in clip space, 
+			// and view_z/view_w in view space, into world space				
+			float4x4 gMatScreenToWorld;
+			
+			// Converts device Z to world Z using this formula: worldZ = (1 / (deviceZ + y)) * x
+			float2 	 gDeviceZToWorldZ;
+			float2	 gNDCZToWorldZ;
 			
-			/** Converts Z value in range [0,1] into Z value in view space. */
-			float convertFromDeviceZ(float deviceZ)
-			{
-				return (1.0f / (deviceZ + gDeviceZToWorldZ.y)) * gDeviceZToWorldZ.x;
-			}
+			// x - near plane distance, y - far plane distance
+			float2	 gNearFar;
 			
-			/** Converts Z value from view space to NDC space. */
-			float convertToNDCZ(float viewZ)
-			{
-				return -gNDCZToWorldZ.y + (gNDCZToWorldZ.x / viewZ);
-			}
-		};
+			// xy - Viewport offset in pixels
+			// zw - Viewport width & height in pixels
+			int4 	 gViewportRectangle;
+			
+			// xy - (Viewport size in pixels / 2) / Target size in pixels
+			// zw - (Viewport offset in pixels + (Viewport size in pixels / 2) + Optional pixel center offset) / Target size in pixels	
+			float4 	 gClipToUVScaleOffset;	
+			float	gAmbientFactor;
+		}
+		
+		/** Converts Z value in range [0,1] into Z value in view space. */
+		float convertFromDeviceZ(float deviceZ)
+		{
+			return (1.0f / (deviceZ + gDeviceZToWorldZ.y)) * gDeviceZToWorldZ.x;
+		}
+		
+		/** Converts Z value from view space to NDC space. */
+		float convertToNDCZ(float viewZ)
+		{
+			return -gNDCZToWorldZ.y + (gNDCZToWorldZ.x / viewZ);
+		}
 	};
 };

+ 15 - 32
Data/Raw/Engine/Includes/PerObjectData.bslinc

@@ -1,38 +1,21 @@
-Parameters =
+mixin PerObjectData
 {
-	mat4x4		gMatWorldViewProj : auto("WVP");
-	mat4x4		gMatWorld : auto("W");
-	mat4x4		gMatInvWorld : auto("IW");
-	mat4x4		gMatWorldNoScale : auto("WNoScale");
-	mat4x4		gMatInvWorldNoScale : auto("IWNoScale");
-	float		gWorldDeterminantSign : auto("WorldDeterminantSign");
-};
-
-Blocks =
-{
-	Block PerObject : auto("PerObject");
-	Block PerCall : auto("PerCall");
-};
-
-Technique : base("PerObjectData") =
-{
-	Pass =
+	code
 	{
-		Common = 
+		[internal]
+		cbuffer PerObject
 		{
-			cbuffer PerObject
-			{
-				float4x4 gMatWorld;
-				float4x4 gMatInvWorld;
-				float4x4 gMatWorldNoScale;
-				float4x4 gMatInvWorldNoScale;
-				float gWorldDeterminantSign;
-			}	
+			float4x4 gMatWorld;
+			float4x4 gMatInvWorld;
+			float4x4 gMatWorldNoScale;
+			float4x4 gMatInvWorldNoScale;
+			float gWorldDeterminantSign;
+		}	
 
-			cbuffer PerCall
-			{
-				float4x4 gMatWorldViewProj;
-			}			
-		};
+		[internal]
+		cbuffer PerCall
+		{
+			float4x4 gMatWorldViewProj;
+		}			
 	};
 };

+ 56 - 59
Data/Raw/Engine/Includes/ReflectionCubemapCommon.bslinc

@@ -1,66 +1,63 @@
-Technique : base("ReflectionCubemapCommon") =
+mixin ReflectionCubemapCommon
 {
-	Pass =
+	code
 	{
-		Common = 
+		float3 getDirFromCubeFace(uint cubeFace, float2 uv)
 		{
-			float3 getDirFromCubeFace(uint cubeFace, float2 uv)
-			{
-				float3 dir;
-				if(cubeFace == 0)
-					dir = float3(1.0f, -uv.y, -uv.x);
-				else if(cubeFace == 1)
-					dir = float3(-1.0f, -uv.y, uv.x);
-				else if(cubeFace == 2)
-					dir = float3(uv.x, 1.0f, uv.y);
-				else if(cubeFace == 3)
-					dir = float3(uv.x, -1.0f, -uv.y);
-				else if(cubeFace == 4)
-					dir = float3(uv.x, -uv.y, 1.0f);
-				else
-					dir = float3(-uv.x, -uv.y, -1.0f);
-					
-				return dir;
-			}
-			
-			/**
-			 * Calculates a mip level to sample from based on roughness value.
-			 *
-			 * @param 	roughness	Roughness in range [0, 1]. Higher values yield more roughness.
-			 * @param	numMips		Total number of mip-map levels in the texture we'll be sampling from.
-			 * @return				Index of the mipmap level to sample.
-			 */					
-			float mapRoughnessToMipLevel(float roughness, int numMips)
-			{
-				// We use the following equation:
-				//    mipLevel = log10(1 - roughness) / log10(dropPercent)
-				//
-				// Where dropPercent represent by what % to drop the roughness with each mip level.
-				// We convert to log2 and a assume a drop percent value of 0.7. This gives us:
-				//    mipLevel = -2.8 * log2(1 - roughness);
-				
-				// Note: Another value that could be used is drop 0.6, which yields a multiply by -1.35692. 
-				// This more accurately covers the mip range, but early mip levels end up being too smooth,
-				// and benefits from our cubemap importance sampling strategy seem to be lost as most samples
-				// fall within one pixel, resulting in same effect as just trivially downsampling. With 0.7 drop
-				// the roughness increases too early and higher mip levels don't cover the full [0, 1] range. Which
-				// is better depends on what looks better.
+			float3 dir;
+			if(cubeFace == 0)
+				dir = float3(1.0f, -uv.y, -uv.x);
+			else if(cubeFace == 1)
+				dir = float3(-1.0f, -uv.y, uv.x);
+			else if(cubeFace == 2)
+				dir = float3(uv.x, 1.0f, uv.y);
+			else if(cubeFace == 3)
+				dir = float3(uv.x, -1.0f, -uv.y);
+			else if(cubeFace == 4)
+				dir = float3(uv.x, -uv.y, 1.0f);
+			else
+				dir = float3(-uv.x, -uv.y, -1.0f);
 				
-				return max(0, -2.8f * log2(1.0f - roughness));
-			}
+			return dir;
+		}
+		
+		/**
+		 * Calculates a mip level to sample from based on roughness value.
+		 *
+		 * @param 	roughness	Roughness in range [0, 1]. Higher values yield more roughness.
+		 * @param	numMips		Total number of mip-map levels in the texture we'll be sampling from.
+		 * @return				Index of the mipmap level to sample.
+		 */					
+		float mapRoughnessToMipLevel(float roughness, int numMips)
+		{
+			// We use the following equation:
+			//    mipLevel = log10(1 - roughness) / log10(dropPercent)
+			//
+			// Where dropPercent represent by what % to drop the roughness with each mip level.
+			// We convert to log2 and a assume a drop percent value of 0.7. This gives us:
+			//    mipLevel = -2.8 * log2(1 - roughness);
 			
-			/**
-			 * Calculates a roughness value from the provided mip level.
-			 *
-			 * @param 	mipLevel	Mip level to determine roughness for.
-			 * @param	numMips		Total number of mip-map levels in the texture we'll be sampling from.
-			 * @return				Roughness value for the specific mip level.
-			 */				
-			float mapMipLevelToRoughness(int mipLevel, int numMips)
-			{
-				// mapRoughnessToMipLevel() solved for roughness
-				return 1 - exp2((float)mipLevel / -2.8f);
-			}	
-		};
+			// Note: Another value that could be used is drop 0.6, which yields a multiply by -1.35692. 
+			// This more accurately covers the mip range, but early mip levels end up being too smooth,
+			// and benefits from our cubemap importance sampling strategy seem to be lost as most samples
+			// fall within one pixel, resulting in same effect as just trivially downsampling. With 0.7 drop
+			// the roughness increases too early and higher mip levels don't cover the full [0, 1] range. Which
+			// is better depends on what looks better.
+			
+			return max(0, -2.8f * log2(1.0f - roughness));
+		}
+		
+		/**
+		 * Calculates a roughness value from the provided mip level.
+		 *
+		 * @param 	mipLevel	Mip level to determine roughness for.
+		 * @param	numMips		Total number of mip-map levels in the texture we'll be sampling from.
+		 * @return				Roughness value for the specific mip level.
+		 */				
+		float mapMipLevelToRoughness(int mipLevel, int numMips)
+		{
+			// mapRoughnessToMipLevel() solved for roughness
+			return 1 - exp2((float)mipLevel / -2.8f);
+		}	
 	};
 };

+ 54 - 69
Data/Raw/Engine/Includes/ResolveCommon.bslinc

@@ -1,81 +1,66 @@
-Parameters =
-{		
-#ifdef ENABLE_MSAA
-	Texture2DMS 	gSource;
-	int				gNumSamples;
-#else
-	Texture2D 	gSource;
-#endif
-};
-
-Technique =
+technique Resolve
 {
-	Pass =
+	depth
 	{
-		DepthRead = false;
-		DepthWrite = false;
-	
-		Common = 
+		read = false;
+		write = false;
+	};
+
+	code
+	{
+		struct VStoFS
 		{
-			struct VStoFS
-			{
-				float4 position : SV_POSITION;
-				float2 uv0 : TEXCOORD0;
-			};
+			float4 position : SV_POSITION;
+			float2 uv0 : TEXCOORD0;
 		};
-	
-		Vertex =
-		{
-			struct VertexInput
-			{
-				float2 screenPos : POSITION;
-				float2 uv0 : TEXCOORD0;
-			};
-			
-			VStoFS main(VertexInput input)
-			{
-				VStoFS output;
-			
-				output.position = float4(input.screenPos, 0, 1);
-				output.uv0 = input.uv0;
 
-				return output;
-			}			
+		struct VertexInput
+		{
+			float2 screenPos : POSITION;
+			float2 uv0 : TEXCOORD0;
 		};
 		
-		Fragment = 
+		VStoFS vsmain(VertexInput input)
 		{
-			#ifdef ENABLE_MSAA
-			
-			cbuffer FragParams
-			{
-				int gNumSamples;
-			};
-			
-			Texture2DMS<float4> gSource : register(t0);
-			
-			float4 main(VStoFS input) : SV_Target0
-			{
-				int2 iUV = trunc(input.uv0);
-			
-				float4 sum = float4(0, 0, 0, 0);
-				for(int i = 0; i < gNumSamples; i++)
-					sum += gSource.Load(iUV, i);
-					
-				return sum / gNumSamples;
-			}
-			
-			#else
-			
-			Texture2D<float4> gSource : register(t0);
+			VStoFS output;
+		
+			output.position = float4(input.screenPos, 0, 1);
+			output.uv0 = input.uv0;
+
+			return output;
+		}			
+
+		#ifdef ENABLE_MSAA
 		
-			float4 main(VStoFS input) : SV_Target0
-			{
-				int2 iUV = trunc(input.uv0);
-				return gSource.Load(int3(iUV.xy, 0));
-			}
-			
-			#endif
+		[internal]
+		cbuffer FragParams
+		{
+			int gNumSamples;
 		};
+		
+		[internal] Texture2DMS<float4> gSource;
+		
+		float4 fsmain(VStoFS input) : SV_Target0
+		{
+			int2 iUV = trunc(input.uv0);
+		
+			float4 sum = float4(0, 0, 0, 0);
+			for(int i = 0; i < gNumSamples; i++)
+				sum += gSource.Load(iUV, i);
+				
+			return sum / gNumSamples;
+		}
+		
+		#else
+		
+		[internal] Texture2D<float4> gSource;
+	
+		float4 fsmain(VStoFS input) : SV_Target0
+		{
+			int2 iUV = trunc(input.uv0);
+			return gSource.Load(int3(iUV.xy, 0));
+		}
+		
+		#endif
 	};
 };

+ 114 - 117
Data/Raw/Engine/Includes/SHCommon.bslinc

@@ -1,131 +1,128 @@
-Technique : base("SHCommon") =
+mixin SHCommon
 {
-	Pass =
+	code
 	{
-		Common = 
+		struct SHVector5
 		{
-			struct SHVector5
-			{
-				float4 v0;
-				float4 v1;
-				float4 v2;
-				float4 v3;
-				float4 v4;
-				float4 v5;
-				float v6;
-			};
-			
-			struct SHVector5RGB
-			{
-				SHVector5 R;
-				SHVector5 G;
-				SHVector5 B;
-			};
-			
-			void SHZero(inout SHVector5 v)
-			{
-				v.v0 = 0;
-				v.v1 = 0;
-				v.v2 = 0;
-				v.v3 = 0;
-				v.v4 = 0;
-				v.v5 = 0;
-				v.v6 = 0;
-			}
+			float4 v0;
+			float4 v1;
+			float4 v2;
+			float4 v3;
+			float4 v4;
+			float4 v5;
+			float v6;
+		};
+		
+		struct SHVector5RGB
+		{
+			SHVector5 R;
+			SHVector5 G;
+			SHVector5 B;
+		};
+		
+		void SHZero(inout SHVector5 v)
+		{
+			v.v0 = 0;
+			v.v1 = 0;
+			v.v2 = 0;
+			v.v3 = 0;
+			v.v4 = 0;
+			v.v5 = 0;
+			v.v6 = 0;
+		}
 
-			void SHMultiplyAdd(inout SHVector5 lhs, SHVector5 rhs, float c)
-			{
-				lhs.v0 += rhs.v0 * c;
-				lhs.v1 += rhs.v1 * c;
-				lhs.v2 += rhs.v2 * c;
-				lhs.v3 += rhs.v3 * c;
-				lhs.v4 += rhs.v4 * c;
-				lhs.v5 += rhs.v5 * c;
-				lhs.v6 += rhs.v6 * c;
-			}
-			
-			void SHAdd(inout SHVector5 lhs, SHVector5 rhs)
-			{
-				lhs.v0 += rhs.v0;
-				lhs.v1 += rhs.v1;
-				lhs.v2 += rhs.v2;
-				lhs.v3 += rhs.v3;
-				lhs.v4 += rhs.v4;
-				lhs.v5 += rhs.v5;
-				lhs.v6 += rhs.v6;
-			}
-			
-			void SHMultiply(inout SHVector5 lhs, SHVector5 rhs)
-			{
-				lhs.v0 *= rhs.v0;
-				lhs.v1 *= rhs.v1;
-				lhs.v2 *= rhs.v2;
-				lhs.v3 *= rhs.v3;
-				lhs.v4 *= rhs.v4;
-				lhs.v5 *= rhs.v5;
-				lhs.v6 *= rhs.v6;
-			}	
-			
-			void SHMultiply(inout SHVector5 lhs, float rhs)
-			{
-				lhs.v0 *= rhs;
-				lhs.v1 *= rhs;
-				lhs.v2 *= rhs;
-				lhs.v3 *= rhs;
-				lhs.v4 *= rhs;
-				lhs.v5 *= rhs;
-				lhs.v6 *= rhs;
-			}				
-			
-			SHVector5 SHBasis5(float3 dir)
-			{
-				float x = dir.x;
-				float y = dir.y;
-				float z = dir.z;
+		void SHMultiplyAdd(inout SHVector5 lhs, SHVector5 rhs, float c)
+		{
+			lhs.v0 += rhs.v0 * c;
+			lhs.v1 += rhs.v1 * c;
+			lhs.v2 += rhs.v2 * c;
+			lhs.v3 += rhs.v3 * c;
+			lhs.v4 += rhs.v4 * c;
+			lhs.v5 += rhs.v5 * c;
+			lhs.v6 += rhs.v6 * c;
+		}
+		
+		void SHAdd(inout SHVector5 lhs, SHVector5 rhs)
+		{
+			lhs.v0 += rhs.v0;
+			lhs.v1 += rhs.v1;
+			lhs.v2 += rhs.v2;
+			lhs.v3 += rhs.v3;
+			lhs.v4 += rhs.v4;
+			lhs.v5 += rhs.v5;
+			lhs.v6 += rhs.v6;
+		}
+		
+		void SHMultiply(inout SHVector5 lhs, SHVector5 rhs)
+		{
+			lhs.v0 *= rhs.v0;
+			lhs.v1 *= rhs.v1;
+			lhs.v2 *= rhs.v2;
+			lhs.v3 *= rhs.v3;
+			lhs.v4 *= rhs.v4;
+			lhs.v5 *= rhs.v5;
+			lhs.v6 *= rhs.v6;
+		}	
+		
+		void SHMultiply(inout SHVector5 lhs, float rhs)
+		{
+			lhs.v0 *= rhs;
+			lhs.v1 *= rhs;
+			lhs.v2 *= rhs;
+			lhs.v3 *= rhs;
+			lhs.v4 *= rhs;
+			lhs.v5 *= rhs;
+			lhs.v6 *= rhs;
+		}				
+		
+		SHVector5 SHBasis5(float3 dir)
+		{
+			float x = dir.x;
+			float y = dir.y;
+			float z = dir.z;
 
-				float x2 = x*x;
-				float y2 = y*y;
-				float z2 = z*z;
+			float x2 = x*x;
+			float y2 = y*y;
+			float z2 = z*z;
 
-				float z3 = z2 * z;
+			float z3 = z2 * z;
 
-				float x4 = x2 * x2;
-				float y4 = y2 * y2;
-				float z4 = z2 * z2;
+			float x4 = x2 * x2;
+			float y4 = y2 * y2;
+			float z4 = z2 * z2;
 
-				SHVector5 o;
-				o.v0[0] = 0.282095f;
+			SHVector5 o;
+			o.v0[0] = 0.282095f;
 
-				o.v0[1] = -0.488603f * y;
-				o.v0[2] =  0.488603f * z;
-				o.v0[3] = -0.488603f * x;
+			o.v0[1] = -0.488603f * y;
+			o.v0[2] =  0.488603f * z;
+			o.v0[3] = -0.488603f * x;
 
-				o.v1[0] =  1.092548f * x * y;
-				o.v1[1] = -1.092548f * y * z;
-				o.v1[2] =  0.315392f * (3.0f * z2 - 1.0f);
-				o.v1[3] = -1.092548f * x * z;
-				o.v2[0] =  0.546274f * (x2 - y2);
+			o.v1[0] =  1.092548f * x * y;
+			o.v1[1] = -1.092548f * y * z;
+			o.v1[2] =  0.315392f * (3.0f * z2 - 1.0f);
+			o.v1[3] = -1.092548f * x * z;
+			o.v2[0] =  0.546274f * (x2 - y2);
 
-				o.v2[1]  = -0.590043f * y * (3.0f * x2 - y2);
-				o.v2[2] =  2.890611f * y * x * z;
-				o.v2[3] = -0.646360f * y * (-1.0f + 5.0f * z2);
-				o.v3[0] =  0.373176f *(5.0f * z3 - 3.0f * z);
-				o.v3[1] = -0.457045f * x * (-1.0f + 5.0f * z2);
-				o.v3[2] =  1.445306f *(x2 - y2) * z;
-				o.v3[3] = -0.590043f * x * (x2 - 3.0f * y2);
+			o.v2[1]  = -0.590043f * y * (3.0f * x2 - y2);
+			o.v2[2] =  2.890611f * y * x * z;
+			o.v2[3] = -0.646360f * y * (-1.0f + 5.0f * z2);
+			o.v3[0] =  0.373176f *(5.0f * z3 - 3.0f * z);
+			o.v3[1] = -0.457045f * x * (-1.0f + 5.0f * z2);
+			o.v3[2] =  1.445306f *(x2 - y2) * z;
+			o.v3[3] = -0.590043f * x * (x2 - 3.0f * y2);
 
-				o.v4[0] =  2.503340f * x * y * (x2 - y2);
-				o.v4[1] = -1.770130f * y * z * (3.0f * x2 - y2);
-				o.v4[2] =  0.946175f * y * x * (-1.0f + 7.0f * z2);
-				o.v4[3] = -0.669046f * y * z * (-3.0f + 7.0f * z2);
-				o.v5[0] =  (105.0f * z4 - 90.0f * z2 + 9.0f) / 28.359261f;
-				o.v5[1] = -0.669046f * x * z * (-3.0f + 7.0f * z2);
-				o.v5[2] =  0.473087f * (x2 - y2) * (-1.0f + 7.0f * z2);
-				o.v5[3] = -1.770130f * x * z * (x2 - 3.0f * y2);
-				o.v6 =  0.625836f * (x4 - 6.0f * y2 * x2 + y4);
-				
-				return o;
-			}
-		};
+			o.v4[0] =  2.503340f * x * y * (x2 - y2);
+			o.v4[1] = -1.770130f * y * z * (3.0f * x2 - y2);
+			o.v4[2] =  0.946175f * y * x * (-1.0f + 7.0f * z2);
+			o.v4[3] = -0.669046f * y * z * (-3.0f + 7.0f * z2);
+			o.v5[0] =  (105.0f * z4 - 90.0f * z2 + 9.0f) / 28.359261f;
+			o.v5[1] = -0.669046f * x * z * (-3.0f + 7.0f * z2);
+			o.v5[2] =  0.473087f * (x2 - y2) * (-1.0f + 7.0f * z2);
+			o.v5[3] = -1.770130f * x * z * (x2 - 3.0f * y2);
+			o.v6 =  0.625836f * (x4 - 6.0f * y2 * x2 + y4);
+			
+			return o;
+		}
 	};
 };

+ 103 - 115
Data/Raw/Engine/Includes/SkinnedVertexInput.bslinc

@@ -1,137 +1,125 @@
-Parameters =
-{
-	StructBuffer boneMatrices : auto("BoneMatrices");
-};
-
-Technique 
 #ifdef USE_BLEND_SHAPES
-	: base("SkinnedMorphVertexInput") =
+mixin SkinnedMorphVertexInput
 #else
-	: base("SkinnedVertexInput") =
+mixin SkinnedVertexInput
 #endif
 {
-	Pass =
+	code
 	{
-		Common = 
+		struct VStoFS
 		{
-			struct VStoFS
-			{
-				float4 position : SV_Position;
-				float2 uv0 : TEXCOORD0;
-				float3 worldPosition : TEXCOORD1;
-				
-				float3 tangentToWorldZ : NORMAL; // Note: Half-precision could be used
-				float4 tangentToWorldX : TANGENT; // Note: Half-precision could be used
-			};
+			float4 position : SV_Position;
+			float2 uv0 : TEXCOORD0;
+			float3 worldPosition : TEXCOORD1;
+			
+			float3 tangentToWorldZ : NORMAL; // Note: Half-precision could be used
+			float4 tangentToWorldX : TANGENT; // Note: Half-precision could be used
 		};
 
-		Vertex =
+		Buffer<float4> boneMatrices;
+	
+		struct VertexInput
+		{
+			float3 position : POSITION;
+			float3 normal : NORMAL; // Note: Half-precision could be used
+			float4 tangent : TANGENT; // Note: Half-precision could be used
+			float2 uv0 : TEXCOORD0;
+			uint4 blendIndices : BLENDINDICES;
+			float4 blendWeights : BLENDWEIGHT;
+			
+			#ifdef USE_BLEND_SHAPES
+				float3 deltaPosition : POSITION1;
+				float4 deltaNormal : NORMAL1;
+			#endif
+		};
+		
+		struct VertexIntermediate
+		{
+			float3x4 blendMatrix;
+		
+			float3 worldNormal; // Note: Half-precision could be used
+			float4 worldTangent; // Note: Half-precision could be used
+		};
+		
+		float3x4 getBoneMatrix(uint idx)
 		{
-			Buffer<float4> boneMatrices;
+			float4 row0 = boneMatrices[idx * 3 + 0];
+			float4 row1 = boneMatrices[idx * 3 + 1];
+			float4 row2 = boneMatrices[idx * 3 + 2];
+			
+			return float3x4(row0, row1, row2);
+		}
 		
-			struct VertexInput
-			{
-				float3 position : POSITION;
-				float3 normal : NORMAL; // Note: Half-precision could be used
-				float4 tangent : TANGENT; // Note: Half-precision could be used
-				float2 uv0 : TEXCOORD0;
-				uint4 blendIndices : BLENDINDICES;
-				float4 blendWeights : BLENDWEIGHT;
-				
-				#ifdef USE_BLEND_SHAPES
-					float3 deltaPosition : POSITION1;
-					float4 deltaNormal : NORMAL1;
-				#endif
-			};
+		float3x4 getBlendMatrix(VertexInput input)
+		{
+			float3x4 result = input.blendWeights.x * getBoneMatrix(input.blendIndices.x);
+			result += input.blendWeights.y * getBoneMatrix(input.blendIndices.y);
+			result += input.blendWeights.z * getBoneMatrix(input.blendIndices.z);
+			result += input.blendWeights.w * getBoneMatrix(input.blendIndices.w);
 			
-			struct VertexIntermediate
-			{
-				float3x4 blendMatrix;
+			return result;
+		}
+		
+		float3x3 getSkinnedTangentToLocal(VertexInput input, float3x4 blendMatrix, out float tangentSign)
+		{
+			tangentSign = input.tangent.w * 2.0f - 1.0f;
+		
+			float3 normal = input.normal * 2.0f - 1.0f;
+			float3 tangent = input.tangent.xyz * 2.0f - 1.0f;
 			
-				float3 worldNormal; // Note: Half-precision could be used
-				float4 worldTangent; // Note: Half-precision could be used
-			};
+			#ifdef USE_BLEND_SHAPES
+				float3 deltaNormal = (input.deltaNormal.xyz * 2.0f - 1.0f) * 2.0f;
+				normal = normalize(normal + deltaNormal * input.deltaNormal.w);
+				tangent = normalize(tangent - dot(tangent, normal) * normal);
+			#endif
 			
-			float3x4 getBoneMatrix(uint idx)
-			{
-				float4 row0 = boneMatrices[idx * 3 + 0];
-				float4 row1 = boneMatrices[idx * 3 + 1];
-				float4 row2 = boneMatrices[idx * 3 + 2];
-				
-				return float3x4(row0, row1, row2);
-			}
+			normal = mul(blendMatrix, float4(normal, 0.0f)).xyz;
+			tangent = mul(blendMatrix, float4(tangent, 0.0f)).xyz;
 			
-			float3x4 getBlendMatrix(VertexInput input)
-			{
-				float3x4 result = input.blendWeights.x * getBoneMatrix(input.blendIndices.x);
-				result += input.blendWeights.y * getBoneMatrix(input.blendIndices.y);
-				result += input.blendWeights.z * getBoneMatrix(input.blendIndices.z);
-				result += input.blendWeights.w * getBoneMatrix(input.blendIndices.w);
-				
-				return result;
-			}
+			float3 bitangent = cross(normal, tangent) * tangentSign;
+			tangentSign *= gWorldDeterminantSign;
 			
-			float3x3 getSkinnedTangentToLocal(VertexInput input, float3x4 blendMatrix, out float tangentSign)
-			{
-				tangentSign = input.tangent.w * 2.0f - 1.0f;
+			float3x3 result = float3x3(tangent, bitangent, normal);
+			result = transpose(result);
+											
+			return result;
+		}
+		
+		VertexIntermediate getVertexIntermediate(VertexInput input)
+		{
+			VertexIntermediate result;
 			
-				float3 normal = input.normal * 2.0f - 1.0f;
-				float3 tangent = input.tangent.xyz * 2.0f - 1.0f;
-				
-				#ifdef USE_BLEND_SHAPES
-					float3 deltaNormal = (input.deltaNormal.xyz * 2.0f - 1.0f) * 2.0f;
-					normal = normalize(normal + deltaNormal * input.deltaNormal.w);
-					tangent = normalize(tangent - dot(tangent, normal) * normal);
-				#endif
-				
-				normal = mul(blendMatrix, float4(normal, 0.0f)).xyz;
-				tangent = mul(blendMatrix, float4(tangent, 0.0f)).xyz;
-				
-				float3 bitangent = cross(normal, tangent) * tangentSign;
-				tangentSign *= gWorldDeterminantSign;
-				
-				float3x3 result = float3x3(tangent, bitangent, normal);
-				result = transpose(result);
-												
-				return result;
-			}
+			result.blendMatrix = getBlendMatrix(input);
 			
-			VertexIntermediate getVertexIntermediate(VertexInput input)
-			{
-				VertexIntermediate result;
-				
-				result.blendMatrix = getBlendMatrix(input);
-				
-				float tangentSign;
-				float3x3 tangentToLocal = getSkinnedTangentToLocal(input, result.blendMatrix, tangentSign);
-				float3x3 tangentToWorld = mul((float3x3)gMatWorldNoScale, tangentToLocal);
-				
-				// Note: Consider transposing these externally, for easier reads
-				result.worldNormal = float3(tangentToWorld[0][2], tangentToWorld[1][2], tangentToWorld[2][2]); // Normal basis vector
-				result.worldTangent = float4(tangentToWorld[0][0], tangentToWorld[1][0], tangentToWorld[2][0], tangentSign); // Tangent basis vector
-				
-				return result;
-			}
+			float tangentSign;
+			float3x3 tangentToLocal = getSkinnedTangentToLocal(input, result.blendMatrix, tangentSign);
+			float3x3 tangentToWorld = mul((float3x3)gMatWorldNoScale, tangentToLocal);
 			
-			float4 getVertexWorldPosition(VertexInput input, VertexIntermediate intermediate)
-			{
-				#ifdef USE_BLEND_SHAPES
-					float4 position = float4(input.position + input.deltaPosition, 1.0f);
-				#else
-					float4 position = float4(input.position, 1.0f);
-				#endif
+			// Note: Consider transposing these externally, for easier reads
+			result.worldNormal = float3(tangentToWorld[0][2], tangentToWorld[1][2], tangentToWorld[2][2]); // Normal basis vector
+			result.worldTangent = float4(tangentToWorld[0][0], tangentToWorld[1][0], tangentToWorld[2][0], tangentSign); // Tangent basis vector
 			
-				position = float4(mul(intermediate.blendMatrix, position), 1.0f);
-				return mul(gMatWorld, position);
-			}
+			return result;
+		}
+		
+		float4 getVertexWorldPosition(VertexInput input, VertexIntermediate intermediate)
+		{
+			#ifdef USE_BLEND_SHAPES
+				float4 position = float4(input.position + input.deltaPosition, 1.0f);
+			#else
+				float4 position = float4(input.position, 1.0f);
+			#endif
+		
+			position = float4(mul(intermediate.blendMatrix, position), 1.0f);
+			return mul(gMatWorld, position);
+		}
+		
+		void populateVertexOutput(VertexInput input, VertexIntermediate intermediate, inout VStoFS result)
+		{
+			result.uv0 = input.uv0;
 			
-			void populateVertexOutput(VertexInput input, VertexIntermediate intermediate, inout VStoFS result)
-			{
-				result.uv0 = input.uv0;
-				
-				result.tangentToWorldZ = intermediate.worldNormal;
-				result.tangentToWorldX = intermediate.worldTangent;
-			}
-		};
+			result.tangentToWorldZ = intermediate.worldNormal;
+			result.tangentToWorldX = intermediate.worldTangent;
+		}
 	};
 };

+ 34 - 56
Data/Raw/Engine/Includes/SpriteImage.bslinc

@@ -1,67 +1,45 @@
-Parameters =
+mixin SpriteImage
 {
-	mat4x4 	gWorldTransform;
-	float	gInvViewportWidth;
-	float	gInvViewportHeight;
-	float	gViewportYFlip;
-	color	gTint;
-	
-	Sampler2D	gMainTexSamp : alias("gMainTexture");
-	Texture2D	gMainTexture;
-};
-
-Blocks = 
-{
-	Block GUIParams : auto("GUIParams");
-};
+	depth
+	{
+		read = false;
+		write = false;
+	};
 
-Technique : base("SpriteImage") =
-{
-	Pass =
+	code
 	{
-		DepthRead = false;
-		DepthWrite = false;
-		
-		Common =
+		[internal]
+		cbuffer GUIParams
 		{
-			cbuffer GUIParams
-			{
-				float4x4 gWorldTransform;
-				float gInvViewportWidth;
-				float gInvViewportHeight;
-				float gViewportYFlip;
-				float4 gTint;
-			}	
-		};
-		
-		Vertex =
+			float4x4 gWorldTransform;
+			float gInvViewportWidth;
+			float gInvViewportHeight;
+			float gViewportYFlip;
+			float4 gTint;
+		}	
+
+		void vsmain(
+			in float3 inPos : POSITION,
+			in float2 uv : TEXCOORD0,
+			out float4 oPosition : SV_Position,
+			out float2 oUv : TEXCOORD0)
 		{
-			void main(
-				in float3 inPos : POSITION,
-				in float2 uv : TEXCOORD0,
-				out float4 oPosition : SV_Position,
-				out float2 oUv : TEXCOORD0)
-			{
-				float4 tfrmdPos = mul(gWorldTransform, float4(inPos.xy, 0, 1));
+			float4 tfrmdPos = mul(gWorldTransform, float4(inPos.xy, 0, 1));
+
+			float tfrmdX = -1.0f + (tfrmdPos.x * gInvViewportWidth);
+			float tfrmdY = (1.0f - (tfrmdPos.y * gInvViewportHeight)) * gViewportYFlip;
 
-				float tfrmdX = -1.0f + (tfrmdPos.x * gInvViewportWidth);
-				float tfrmdY = (1.0f - (tfrmdPos.y * gInvViewportHeight)) * gViewportYFlip;
+			oPosition = float4(tfrmdX, tfrmdY, 0, 1);
+			oUv = uv;
+		}
 
-				oPosition = float4(tfrmdX, tfrmdY, 0, 1);
-				oUv = uv;
-			}
-		};
+		SamplerState gMainTexSamp;
+		Texture2D gMainTexture;
 		
-		Fragment =
+		float4 fsmain(in float4 inPos : SV_Position, float2 uv : TEXCOORD0) : SV_Target
 		{
-			SamplerState gMainTexSamp : register(s0);
-			Texture2D gMainTexture : register(t0);
-			
-			float4 main(in float4 inPos : SV_Position, float2 uv : TEXCOORD0) : SV_Target
-			{
-				float4 color = gMainTexture.Sample(gMainTexSamp, uv);
-				return color * gTint;
-			}
-		};
+			float4 color = gMainTexture.Sample(gMainTexSamp, uv);
+			return color * gTint;
+		}
 	};
 };

+ 18 - 15
Data/Raw/Engine/Includes/Surface.bslinc

@@ -1,26 +1,29 @@
-Technique 
- : inherits("BasePass")
- : inherits("Surface") =
+technique Surface1
 {
+	mixin BasePass;
+	mixin Surface;
 };
 
-Technique 
- : inherits("BasePassSkinned")
- : inherits("Surface") =
+technique Surface2
 {
-	Tags = { "Skinned" };
+	mixin BasePassSkinned;
+	mixin Surface;
+
+	tags = { "Skinned" };
 };
 
-Technique 
- : inherits("BasePassMorph")
- : inherits("Surface") =
+technique Surface3
 {
-	Tags = { "Morph" };
+	mixin BasePassMorph;
+	mixin Surface;
+
+	tags = { "Morph" };
 };
 
-Technique 
- : inherits("BasePassSkinnedMorph")
- : inherits("Surface") =
+technique Surface4
 {
-	Tags = { "SkinnedMorph" };
+	mixin BasePassSkinnedMorph;
+	mixin Surface;
+
+	tags = { "SkinnedMorph" };
 };

+ 27 - 30
Data/Raw/Engine/Includes/SurfaceData.bslinc

@@ -1,42 +1,39 @@
-Technique : base("SurfaceData") =
+mixin SurfaceData
 {
-	Pass =
+	code
 	{
-		Common = 
+		struct SurfaceData
 		{
-			struct SurfaceData
-			{
-				float4 albedo;
-				float4 worldNormal;
-				float depth;
-				float roughness;
-				float metalness;
-			};
-			
+			float4 albedo;
+			float4 worldNormal;
+			float depth;
+			float roughness;
+			float metalness;
+		};
+		
 #if MSAA_COUNT > 1
-			bool needsPerSampleShading(SurfaceData samples[MSAA_COUNT])
+		bool needsPerSampleShading(SurfaceData samples[MSAA_COUNT])
+		{
+			float3 albedo = samples[0].albedo.xyz;
+			float3 normal = samples[0].worldNormal.xyz;
+			float depth = samples[0].depth;
+
+			[unroll]
+			for(int i = 1; i < MSAA_COUNT; i++)
 			{
-				float3 albedo = samples[0].albedo.xyz;
-				float3 normal = samples[0].worldNormal.xyz;
-				float depth = samples[0].depth;
+				float3 otherAlbedo = samples[i].albedo.xyz;
+				float3 otherNormal = samples[i].worldNormal.xyz;
+				float otherDepth = samples[i].depth;
 
-				[unroll]
-				for(int i = 1; i < MSAA_COUNT; i++)
+				[branch]
+				if(abs(depth - otherDepth) > 0.1f || abs(dot(abs(normal - otherNormal), float3(1, 1, 1))) > 0.1f || abs(dot(albedo - otherAlbedo, float3(1, 1, 1))) > 0.1f)
 				{
-					float3 otherAlbedo = samples[i].albedo.xyz;
-					float3 otherNormal = samples[i].worldNormal.xyz;
-					float otherDepth = samples[i].depth;
-
-					[branch]
-					if(abs(depth - otherDepth) > 0.1f || abs(dot(abs(normal - otherNormal), float3(1, 1, 1))) > 0.1f || abs(dot(albedo - otherAlbedo, float3(1, 1, 1))) > 0.1f)
-					{
-						return true;
-					}
+					return true;
 				}
-				
-				return false;
 			}
+			
+			return false;
+		}
 #endif			
-		};
 	};
 };

+ 53 - 59
Data/Raw/Engine/Includes/VolumeRenderBase.bslinc

@@ -1,72 +1,66 @@
-Technique : base("VolumeRenderBase") =
+mixin VolumeRenderBase
 {
-	Pass =
+	depth
 	{
-		DepthWrite = false;
-		DepthRead = false;
-	
-		Common = 
+		read = false;
+		write = false;
+	};
+
+	code
+	{
+		struct VStoGS
 		{
-			struct VStoGS
-			{
-				float4 position : SV_POSITION;
-				float2 uv0 : TEXCOORD0;
-				uint layerIdx : TEXCOORD1;
-			};
-			
-			struct GStoFS
-			{
-				float4 position : SV_POSITION;
-				float2 uv0 : TEXCOORD0;
-				uint layerIdx : SV_RenderTargetArrayIndex;
-			};			
+			float4 position : SV_POSITION;
+			float2 uv0 : TEXCOORD0;
+			uint layerIdx : TEXCOORD1;
 		};
+		
+		struct GStoFS
+		{
+			float4 position : SV_POSITION;
+			float2 uv0 : TEXCOORD0;
+			uint layerIdx : SV_RenderTargetArrayIndex;
+		};			
 
-		Vertex =
+		struct VertexInput
 		{
-			struct VertexInput
-			{
-				float2 screenPos : POSITION;
-				float2 uv0 : TEXCOORD0;
-				uint layerIdx : SV_InstanceID;
-			};
-			
-			VStoGS main(VertexInput input)
-			{
-				VStoGS output;
-			
-				output.position = float4(input.screenPos, 0, 1);
-				output.uv0 = input.uv0;
-				output.layerIdx = input.layerIdx;
-				
-				return output;
-			}
+			float2 screenPos : POSITION;
+			float2 uv0 : TEXCOORD0;
+			uint layerIdx : SV_InstanceID;
 		};
 		
-		Geometry = 
-		{		
-			[maxvertexcount(3)]
-			void main(triangle VStoGS input[3], inout TriangleStream<GStoFS> outStream)
-			{
-				GStoFS vert0;
-				vert0.position = input[0].position;
-				vert0.uv0 = input[0].uv0;
-				vert0.layerIdx = input[0].layerIdx;
+		VStoGS vsmain(VertexInput input)
+		{
+			VStoGS output;
+		
+			output.position = float4(input.screenPos, 0, 1);
+			output.uv0 = input.uv0;
+			output.layerIdx = input.layerIdx;
+			
+			return output;
+		}
+	
+		[maxvertexcount(3)]
+		void gsmain(triangle VStoGS input[3], inout TriangleStream<GStoFS> outStream)
+		{
+			GStoFS vert0;
+			vert0.position = input[0].position;
+			vert0.uv0 = input[0].uv0;
+			vert0.layerIdx = input[0].layerIdx;
 
-				GStoFS vert1;
-				vert1.position = input[1].position;
-				vert1.uv0 = input[1].uv0;
-				vert1.layerIdx = input[1].layerIdx;
+			GStoFS vert1;
+			vert1.position = input[1].position;
+			vert1.uv0 = input[1].uv0;
+			vert1.layerIdx = input[1].layerIdx;
 
-				GStoFS vert2;
-				vert2.position = input[2].position;
-				vert2.uv0 = input[2].uv0;
-				vert2.layerIdx = input[2].layerIdx;
+			GStoFS vert2;
+			vert2.position = input[2].position;
+			vert2.uv0 = input[2].uv0;
+			vert2.layerIdx = input[2].layerIdx;
 
-				outStream.Append(vert0);
-				outStream.Append(vert1);
-				outStream.Append(vert2);
-			}
-		};
+			outStream.Append(vert0);
+			outStream.Append(vert1);
+			outStream.Append(vert2);
+		}
 	};
 };

+ 15 - 18
Data/Raw/Engine/Shaders/Default.bsl

@@ -1,26 +1,23 @@
 #include "$ENGINE$\BasePass.bslinc"
 #include "$ENGINE$\Surface.bslinc"
 
-Technique : base("Surface") =
+mixin Surface
 {
-	Pass =
+	code
 	{
-		Fragment =
+		void fsmain(
+			in VStoFS input, 
+			out float4 OutGBufferA : SV_Target0,
+			out float4 OutGBufferB : SV_Target1,
+			out float2 OutGBufferC : SV_Target2)
 		{
-			void main(
-				in VStoFS input, 
-				out float4 OutGBufferA : SV_Target0,
-				out float4 OutGBufferB : SV_Target1,
-				out float2 OutGBufferC : SV_Target2)
-			{
-				SurfaceData surfaceData;
-				surfaceData.albedo = float4(0.05f, 0.05f, 0.05f, 1.0f);
-				surfaceData.worldNormal.xyz = input.tangentToWorldZ;
-				surfaceData.roughness = 1.0f;
-				surfaceData.metalness = 0.0f;
-				
-				encodeGBuffer(surfaceData, OutGBufferA, OutGBufferB, OutGBufferC);
-			}	
-		};
+			SurfaceData surfaceData;
+			surfaceData.albedo = float4(0.05f, 0.05f, 0.05f, 1.0f);
+			surfaceData.worldNormal.xyz = input.tangentToWorldZ;
+			surfaceData.roughness = 1.0f;
+			surfaceData.metalness = 0.0f;
+			
+			encodeGBuffer(surfaceData, OutGBufferA, OutGBufferB, OutGBufferC);
+		}	
 	};
 };

+ 27 - 43
Data/Raw/Engine/Shaders/Diffuse.bsl

@@ -1,52 +1,36 @@
 #include "$ENGINE$\BasePass.bslinc"
 #include "$ENGINE$\Surface.bslinc"
 
-Parameters =
+mixin Surface
 {
-	Sampler2D 	gAlbedoSamp : alias("gAlbedoTex");
-	Sampler2D 	gNormalSamp : alias("gNormalTex");
-	Sampler2D	gRoughnessSamp : alias("gRoughnessTex");
-	Sampler2D	gMetalnessSamp : alias("gMetalnessTex");
-	
-	Texture2D 	gAlbedoTex;
-	Texture2D	gNormalTex = "normal";
-	Texture2D	gRoughnessTex = "white";
-	Texture2D	gMetalnessTex = "black";
-};
-
-Technique : base("Surface") =
-{
-	Pass =
+	code
 	{
-		Fragment =
+		SamplerState gAlbedoSamp;
+		SamplerState gNormalSamp;
+		SamplerState gRoughnessSamp;
+		SamplerState gMetalnessSamp;
+		
+		Texture2D gAlbedoTex;
+		Texture2D gNormalTex;
+		Texture2D gRoughnessTex;
+		Texture2D gMetalnessTex;
+		
+		void fsmain(
+			in VStoFS input, 
+			out float4 OutGBufferA : SV_Target0,
+			out float4 OutGBufferB : SV_Target1,
+			out float2 OutGBufferC : SV_Target2)
 		{
-			SamplerState gAlbedoSamp : register(s0);
-			SamplerState gNormalSamp : register(s1);
-			SamplerState gRoughnessSamp : register(s2);
-			SamplerState gMetalnessSamp : register(s3);
-			
-			Texture2D gAlbedoTex : register(t0);
-			Texture2D gNormalTex : register(t1);
-			Texture2D gRoughnessTex : register(t2);
-			Texture2D gMetalnessTex : register(t3);
-			
-			void main(
-				in VStoFS input, 
-				out float4 OutGBufferA : SV_Target0,
-				out float4 OutGBufferB : SV_Target1,
-				out float2 OutGBufferC : SV_Target2)
-			{
-				float3 normal = normalize(gNormalTex.Sample(gNormalSamp, input.uv0) * 2.0f - float3(1, 1, 1));
-				float3 worldNormal = calcWorldNormal(input, normal);
+			float3 normal = normalize(gNormalTex.Sample(gNormalSamp, input.uv0) * 2.0f - float3(1, 1, 1));
+			float3 worldNormal = calcWorldNormal(input, normal);
+		
+			SurfaceData surfaceData;
+			surfaceData.albedo = gAlbedoTex.Sample(gAlbedoSamp, input.uv0);
+			surfaceData.worldNormal.xyz = worldNormal;
+			surfaceData.roughness = gRoughnessTex.Sample(gRoughnessSamp, input.uv0).x;
+			surfaceData.metalness = gMetalnessTex.Sample(gMetalnessSamp, input.uv0).x;
 			
-				SurfaceData surfaceData;
-				surfaceData.albedo = gAlbedoTex.Sample(gAlbedoSamp, input.uv0);
-				surfaceData.worldNormal.xyz = worldNormal;
-				surfaceData.roughness = gRoughnessTex.Sample(gRoughnessSamp, input.uv0).x;
-				surfaceData.metalness = gMetalnessTex.Sample(gMetalnessSamp, input.uv0).x;
-				
-				encodeGBuffer(surfaceData, OutGBufferA, OutGBufferB, OutGBufferC);
-			}	
-		};
+			encodeGBuffer(surfaceData, OutGBufferA, OutGBufferB, OutGBufferC);
+		}	
 	};
 };

+ 41 - 46
Data/Raw/Engine/Shaders/FlatFramebufferToTexture.bsl

@@ -1,60 +1,55 @@
-Technique =
+technique FlatFBToTexture
 {
-	Pass =
+	depth
 	{
-		DepthRead = false;
-		DepthWrite = false;
-		
-		Common = 
+		read = false;
+		write = false;
+	};
+
+	code
+	{
+		struct VStoFS
 		{
-			struct VStoFS
-			{
-				float4 position : SV_POSITION;
-				float2 uv0 : TEXCOORD0;
-			};
+			float4 position : SV_POSITION;
+			float2 uv0 : TEXCOORD0;
 		};
-	
-		Vertex =
-		{
-			struct VertexInput
-			{
-				float2 screenPos : POSITION;
-				float2 uv0 : TEXCOORD0;
-			};
-			
-			VStoFS main(VertexInput input)
-			{
-				VStoFS output;
-			
-				output.position = float4(input.screenPos, 0, 1);
-				output.uv0 = input.uv0;
 
-				return output;
-			}			
+		struct VertexInput
+		{
+			float2 screenPos : POSITION;
+			float2 uv0 : TEXCOORD0;
 		};
 		
-		Fragment = 
+		VStoFS vsmain(VertexInput input)
 		{
-			cbuffer Params : register(b0)
-			{
-				uint2 gFramebufferSize;
-				uint gSampleCount;
-			}		
+			VStoFS output;
 		
-			Buffer<float4> gInput : register(t0);
+			output.position = float4(input.screenPos, 0, 1);
+			output.uv0 = input.uv0;
 
-			uint getLinearAddress(uint2 coord, uint sampleIndex)
-			{
-				return (coord.y * gFramebufferSize.x + coord.x) * gSampleCount + sampleIndex;
-			}			
+			return output;
+		}			
 
-			float4 main(VStoFS input, uint sampleIndex : SV_SampleIndex) : SV_Target0
-			{
-				int2 pixelPos = trunc(input.uv0);
-				uint sourceIdx = getLinearAddress(pixelPos, sampleIndex);
+		[internal]
+		cbuffer Params : register(b0)
+		{
+			uint2 gFramebufferSize;
+			uint gSampleCount;
+		}		
+	
+		Buffer<float4> gInput : register(t0);
 
-				return gInput[sourceIdx];
-			}
-		};
+		uint getLinearAddress(uint2 coord, uint sampleIndex)
+		{
+			return (coord.y * gFramebufferSize.x + coord.x) * gSampleCount + sampleIndex;
+		}			
+
+		float4 fsmain(VStoFS input, uint sampleIndex : SV_SampleIndex) : SV_Target0
+		{
+			int2 pixelPos = trunc(input.uv0);
+			uint sourceIdx = getLinearAddress(pixelPos, sampleIndex);
+
+			return gInput[sourceIdx];
+		}
 	};
 };

+ 113 - 114
Data/Raw/Engine/Shaders/IrradianceComputeSH.bsl

@@ -1,135 +1,134 @@
 #include "$ENGINE$\ReflectionCubemapCommon.bslinc"
 #include "$ENGINE$\SHCommon.bslinc"
 
-Technique
- : inherits("ReflectionCubemapCommon")
- : inherits("SHCommon") =
+technique IrradianceComputeSH
 {
-	Pass =
+	mixin ReflectionCubemapCommon;
+	mixin SHCommon;
+
+	code
 	{
-		Compute = 
+		struct SHCoeffsAndWeight
 		{
-			struct SHCoeffsAndWeight
-			{
-				SHVector5RGB coeffs;
-				float weight;
-			};
+			SHVector5RGB coeffs;
+			float weight;
+		};
+	
+		SamplerState gInputSamp;
+		TextureCube gInputTex;
+	
+		RWStructuredBuffer<SHCoeffsAndWeight> gOutput;
 		
-			SamplerState gInputSamp;
-			TextureCube gInputTex;
+		[internal]
+		cbuffer Params
+		{
+			uint gCubeFace;
+			uint gFaceSize;
+			uint2 gDispatchSize;
+		}			
 		
-			RWStructuredBuffer<SHCoeffsAndWeight> gOutput;
-			
-			cbuffer Params
-			{
-				uint gCubeFace;
-				uint gFaceSize;
-				uint2 gDispatchSize;
-			}			
-			
-			groupshared SHCoeffsAndWeight sCoeffs[TILE_WIDTH * TILE_HEIGHT];
+		groupshared SHCoeffsAndWeight sCoeffs[TILE_WIDTH * TILE_HEIGHT];
 
-			/** 
-			 * Integrates area of a cube face projected onto the surface of the sphere, from [0, 0] to [u, v]. 
-			 * u & v expected in [-1, -1] to [1, 1] range.
-			 *
-			 * See http://www.rorydriscoll.com/2012/01/15/cubemap-texel-solid-angle/ for derivation.
-			 */
-			float integrateProjectedCubeArea(float u, float v)
-			{
-				return atan2(u * v, sqrt(u * u + v * v + 1.0f));
-			}
-			
-			/** Calculates solid angle of a texel projected onto a sphere. */
-			float texelSolidAngle(float u, float v, float invFaceSize)
-			{
-				float x0 = u - invFaceSize;
-				float x1 = u + invFaceSize;
-				float y0 = v - invFaceSize;
-				float y1 = v + invFaceSize;
+		/** 
+		 * Integrates area of a cube face projected onto the surface of the sphere, from [0, 0] to [u, v]. 
+		 * u & v expected in [-1, -1] to [1, 1] range.
+		 *
+		 * See http://www.rorydriscoll.com/2012/01/15/cubemap-texel-solid-angle/ for derivation.
+		 */
+		float integrateProjectedCubeArea(float u, float v)
+		{
+			return atan2(u * v, sqrt(u * u + v * v + 1.0f));
+		}
+		
+		/** Calculates solid angle of a texel projected onto a sphere. */
+		float texelSolidAngle(float u, float v, float invFaceSize)
+		{
+			float x0 = u - invFaceSize;
+			float x1 = u + invFaceSize;
+			float y0 = v - invFaceSize;
+			float y1 = v + invFaceSize;
 
-				return   integrateProjectedCubeArea(x1, y1)
-                       - integrateProjectedCubeArea(x0, y1)
-                       - integrateProjectedCubeArea(x1, y0)
-                       + integrateProjectedCubeArea(x0, y0);
-			}
+			return   integrateProjectedCubeArea(x1, y1)
+				   - integrateProjectedCubeArea(x0, y1)
+				   - integrateProjectedCubeArea(x1, y0)
+				   + integrateProjectedCubeArea(x0, y0);
+		}
+		
+		[numthreads(TILE_WIDTH, TILE_HEIGHT, 1)]
+		void csmain(
+			uint groupIdx : SV_GroupIndex,
+			uint3 groupId : SV_GroupID,
+			uint3 dispatchThreadId : SV_DispatchThreadID)
+		{
+			SHCoeffsAndWeight data;
+			data.weight = 0;
 			
-			[numthreads(TILE_WIDTH, TILE_HEIGHT, 1)]
-			void main(
-				uint groupIdx : SV_GroupIndex,
-				uint3 groupId : SV_GroupID,
-				uint3 dispatchThreadId : SV_DispatchThreadID)
-			{
-				SHCoeffsAndWeight data;
-				data.weight = 0;
-				
-				SHZero(data.coeffs.R);
-				SHZero(data.coeffs.G);
-				SHZero(data.coeffs.B);
-				
-				float invFaceSize = 1.0f / gFaceSize;
+			SHZero(data.coeffs.R);
+			SHZero(data.coeffs.G);
+			SHZero(data.coeffs.B);
 			
-				uint2 pixelCoords = dispatchThreadId.xy * PIXELS_PER_THREAD;
-				uint2 pixelCoordsEnd = pixelCoords + uint2(PIXELS_PER_THREAD, PIXELS_PER_THREAD);
-				for(uint y = pixelCoords.y; y < pixelCoordsEnd.y; y++)
+			float invFaceSize = 1.0f / gFaceSize;
+		
+			uint2 pixelCoords = dispatchThreadId.xy * PIXELS_PER_THREAD;
+			uint2 pixelCoordsEnd = pixelCoords + uint2(PIXELS_PER_THREAD, PIXELS_PER_THREAD);
+			for(uint y = pixelCoords.y; y < pixelCoordsEnd.y; y++)
+			{
+				for(uint x = pixelCoords.x; x < pixelCoordsEnd.x; x++)
 				{
-					for(uint x = pixelCoords.x; x < pixelCoordsEnd.x; x++)
-					{
-						// Ignore pixels out of valid range
-						if (x >= gFaceSize || y >= gFaceSize)
-							break;
-							
-						// Map from [0, size-1] to [-1.0 + invSize, 1.0 - invSize].
-						// (+0.5 in order to sample center of texel)
-                        float u = 2.0f * (x + 0.5f) * invFaceSize - 1.0f;
-                        float v = 2.0f * (y + 0.5f) * invFaceSize - 1.0f;
-						
-						float3 dir = getDirFromCubeFace(gCubeFace, float2(u, v));
-						dir = normalize(dir);
-						
-						// Need to calculate solid angle (weight) of the texel, as cube face corners have
-						// much smaller solid angle, meaning many of them occupy the same area when projected
-						// on a sphere. Without weighing that area would look too bright.
-						float weight = texelSolidAngle(u, v, invFaceSize);
-						
-						SHVector5 shBasis = SHBasis5(dir);
-						float3 radiance = gInputTex.SampleLevel(gInputSamp, dir, 0).rgb;
-						
-						SHMultiplyAdd(data.coeffs.R, shBasis, radiance.r * weight);
-						SHMultiplyAdd(data.coeffs.G, shBasis, radiance.g * weight);
-						SHMultiplyAdd(data.coeffs.B, shBasis, radiance.b * weight);
+					// Ignore pixels out of valid range
+					if (x >= gFaceSize || y >= gFaceSize)
+						break;
 						
-						data.weight += weight;
-					}
+					// Map from [0, size-1] to [-1.0 + invSize, 1.0 - invSize].
+					// (+0.5 in order to sample center of texel)
+					float u = 2.0f * (x + 0.5f) * invFaceSize - 1.0f;
+					float v = 2.0f * (y + 0.5f) * invFaceSize - 1.0f;
+					
+					float3 dir = getDirFromCubeFace(gCubeFace, float2(u, v));
+					dir = normalize(dir);
+					
+					// Need to calculate solid angle (weight) of the texel, as cube face corners have
+					// much smaller solid angle, meaning many of them occupy the same area when projected
+					// on a sphere. Without weighing that area would look too bright.
+					float weight = texelSolidAngle(u, v, invFaceSize);
+					
+					SHVector5 shBasis = SHBasis5(dir);
+					float3 radiance = gInputTex.SampleLevel(gInputSamp, dir, 0).rgb;
+					
+					SHMultiplyAdd(data.coeffs.R, shBasis, radiance.r * weight);
+					SHMultiplyAdd(data.coeffs.G, shBasis, radiance.g * weight);
+					SHMultiplyAdd(data.coeffs.B, shBasis, radiance.b * weight);
+					
+					data.weight += weight;
 				}
-				
-				sCoeffs[groupIdx] = data;
-				
-				GroupMemoryBarrierWithGroupSync();
-				
-				int numThreads = TILE_WIDTH * TILE_HEIGHT;
-				[unroll]
-				for(int tc = numThreads / 2; tc > 0; tc >>= 1)
+			}
+			
+			sCoeffs[groupIdx] = data;
+			
+			GroupMemoryBarrierWithGroupSync();
+			
+			int numThreads = TILE_WIDTH * TILE_HEIGHT;
+			[unroll]
+			for(int tc = numThreads / 2; tc > 0; tc >>= 1)
+			{
+				if(groupIdx < tc)
 				{
-					if(groupIdx < tc)
-					{
-						SHAdd(sCoeffs[groupIdx].coeffs.R, sCoeffs[groupIdx + tc].coeffs.R);
-						SHAdd(sCoeffs[groupIdx].coeffs.G, sCoeffs[groupIdx + tc].coeffs.G);
-						SHAdd(sCoeffs[groupIdx].coeffs.B, sCoeffs[groupIdx + tc].coeffs.B);
+					SHAdd(sCoeffs[groupIdx].coeffs.R, sCoeffs[groupIdx + tc].coeffs.R);
+					SHAdd(sCoeffs[groupIdx].coeffs.G, sCoeffs[groupIdx + tc].coeffs.G);
+					SHAdd(sCoeffs[groupIdx].coeffs.B, sCoeffs[groupIdx + tc].coeffs.B);
 
-						sCoeffs[groupIdx].weight += sCoeffs[groupIdx + tc].weight;
-					}
-				
-					GroupMemoryBarrierWithGroupSync();
-				}
-				
-				if(groupIdx == 0)
-				{
-					uint faceOffset = gDispatchSize.x * gDispatchSize.y * gCubeFace;
-					uint outputIdx = faceOffset + groupId.y * gDispatchSize.x + groupId.x;
-					gOutput[outputIdx] = sCoeffs[0];
+					sCoeffs[groupIdx].weight += sCoeffs[groupIdx + tc].weight;
 				}
+			
+				GroupMemoryBarrierWithGroupSync();
 			}
-		};
+			
+			if(groupIdx == 0)
+			{
+				uint faceOffset = gDispatchSize.x * gDispatchSize.y * gCubeFace;
+				uint outputIdx = faceOffset + groupId.y * gDispatchSize.x + groupId.x;
+				gOutput[outputIdx] = sCoeffs[0];
+			}
+		}
 	};
 };

+ 67 - 68
Data/Raw/Engine/Shaders/IrradianceProjectSH.bsl

@@ -2,78 +2,77 @@
 #include "$ENGINE$\ReflectionCubemapCommon.bslinc"
 #include "$ENGINE$\SHCommon.bslinc"
 
-Technique 
- : inherits("PPBase")
- : inherits("ReflectionCubemapCommon")
- : inherits("SHCommon") =
+technique IrradianceProjectSH
 {
-	Pass =
+	mixin PPBase;
+	mixin ReflectionCubemapCommon;
+	mixin SHCommon;
+
+	code
 	{
-		Fragment =
+		[internal]
+		cbuffer Params
 		{
-			cbuffer Params
-			{
-				int gCubeFace;
-			}	
-		
-			StructuredBuffer<SHVector5RGB> gSHCoeffs;
+			int gCubeFace;
+		}	
+	
+		StructuredBuffer<SHVector5RGB> gSHCoeffs;
 
-			float evaluateLambert(SHVector5 coeffs)
-			{
-				// Multiply irradiance SH coefficients by cosine lobe (Lambert diffuse) and evaluate resulting SH
-				// See: http://cseweb.ucsd.edu/~ravir/papers/invlamb/josa.pdf for derivation of the
-				// cosine lobe factors
-				float output = 0.0f;
-				
-				// Band 0 (factor 1.0)
-				output += coeffs.v0[0];
-				
-				// Band 1 (factor 2/3)
-				float f = (2.0f/3.0f);
-				float4 f4 = float4(f, f, f, f);
-				
-				output += dot(coeffs.v0.gba, f4.rgb);
-				
-                // Band 2 (factor 1/4)
-				f = (1.0f/4.0f);
-				f4 = float4(f, f, f, f);
-				
-				output += dot(coeffs.v1, f4);
-				output += coeffs.v2.r * f;
-				
-				// Band 3 (factor 0)
-				
-				// Band 4 (factor -1/24)
-				f = (-1.0f/24.0f);
-				f4 = float4(f, f, f, f);
-				
-				output += dot(coeffs.v4, f4);
-				output += dot(coeffs.v5, f4);
-				output += coeffs.v6 * f;
-				
-				return output;
-			}
+		float evaluateLambert(SHVector5 coeffs)
+		{
+			// Multiply irradiance SH coefficients by cosine lobe (Lambert diffuse) and evaluate resulting SH
+			// See: http://cseweb.ucsd.edu/~ravir/papers/invlamb/josa.pdf for derivation of the
+			// cosine lobe factors
+			float output = 0.0f;
+			
+			// Band 0 (factor 1.0)
+			output += coeffs.v0[0];
+			
+			// Band 1 (factor 2/3)
+			float f = (2.0f/3.0f);
+			float4 f4 = float4(f, f, f, f);
+			
+			output += dot(coeffs.v0.gba, f4.rgb);
+			
+			// Band 2 (factor 1/4)
+			f = (1.0f/4.0f);
+			f4 = float4(f, f, f, f);
+			
+			output += dot(coeffs.v1, f4);
+			output += coeffs.v2.r * f;
+			
+			// Band 3 (factor 0)
+			
+			// Band 4 (factor -1/24)
+			f = (-1.0f/24.0f);
+			f4 = float4(f, f, f, f);
+			
+			output += dot(coeffs.v4, f4);
+			output += dot(coeffs.v5, f4);
+			output += coeffs.v6 * f;
+			
+			return output;
+		}
+		
+		float4 fsmain(VStoFS input) : SV_Target0
+		{
+			float2 scaledUV = input.uv0 * 2.0f - 1.0f;
+			float3 dir = getDirFromCubeFace(gCubeFace, scaledUV);
+			dir = normalize(dir);
+			
+			SHVector5 shBasis = SHBasis5(dir);
+							
+			SHVector5RGB coeffs = gSHCoeffs[0];
+			SHMultiply(coeffs.R, shBasis);
+			SHMultiply(coeffs.G, shBasis);
+			SHMultiply(coeffs.B, shBasis);
+			
+			float3 output = 0;
+			output.r = evaluateLambert(coeffs.R);
+			output.g = evaluateLambert(coeffs.G);
+			output.b = evaluateLambert(coeffs.B);
 			
-			float4 main(VStoFS input) : SV_Target0
-			{
-				float2 scaledUV = input.uv0 * 2.0f - 1.0f;
-				float3 dir = getDirFromCubeFace(gCubeFace, scaledUV);
-				dir = normalize(dir);
-				
-				SHVector5 shBasis = SHBasis5(dir);
-								
-				SHVector5RGB coeffs = gSHCoeffs[0];
-				SHMultiply(coeffs.R, shBasis);
-				SHMultiply(coeffs.G, shBasis);
-				SHMultiply(coeffs.B, shBasis);
-				
-				float3 output = 0;
-				output.r = evaluateLambert(coeffs.R);
-				output.g = evaluateLambert(coeffs.G);
-				output.b = evaluateLambert(coeffs.B);
-				
-				return float4(output.rgb, 1.0f);
-			}	
-		};
+			return float4(output.rgb, 1.0f);
+		}	
 	};
 };

+ 48 - 49
Data/Raw/Engine/Shaders/IrradianceReduceSH.bsl

@@ -1,63 +1,62 @@
 #include "$ENGINE$\ReflectionCubemapCommon.bslinc"
 #include "$ENGINE$\SHCommon.bslinc"
 
-Technique
- : inherits("ReflectionCubemapCommon")
- : inherits("SHCommon") =
+technique IrradianceReduceSH
 {
-	Pass =
+	mixin ReflectionCubemapCommon;
+	mixin SHCommon;
+
+	code
 	{
-		Compute = 
+		#define PI 3.1415926
+	
+		struct SHCoeffsAndWeight
 		{
-			#define PI 3.1415926
-		
-			struct SHCoeffsAndWeight
-			{
-				SHVector5RGB coeffs;
-				float weight;
-			};
+			SHVector5RGB coeffs;
+			float weight;
+		};
 
-			StructuredBuffer<SHCoeffsAndWeight> gInput;
-			RWStructuredBuffer<SHVector5RGB> gOutput;
+		StructuredBuffer<SHCoeffsAndWeight> gInput;
+		RWStructuredBuffer<SHVector5RGB> gOutput;
+		
+		[internal]
+		cbuffer Params
+		{
+			uint gNumEntries;
+		}			
+		
+		[numthreads(1, 1, 1)]
+		void csmain(
+			uint groupIdx : SV_GroupIndex,
+			uint groupId : SV_GroupID,
+			uint3 dispatchThreadId : SV_DispatchThreadID)
+		{
+			SHVector5RGB coeffs;
+			float weight = 0;
 			
-			cbuffer Params
-			{
-				uint gNumEntries;
-			}			
+			SHZero(coeffs.R);
+			SHZero(coeffs.G);
+			SHZero(coeffs.B);
 			
-			[numthreads(1, 1, 1)]
-			void main(
-				uint groupIdx : SV_GroupIndex,
-				uint groupId : SV_GroupID,
-				uint3 dispatchThreadId : SV_DispatchThreadID)
+			// Note: There shouldn't be many entries, so we add them all in one thread. Otherwise we should do parallel reduction.
+			for(uint i = 0; i < gNumEntries; i++)
 			{
-				SHVector5RGB coeffs;
-				float weight = 0;
-				
-				SHZero(coeffs.R);
-				SHZero(coeffs.G);
-				SHZero(coeffs.B);
-				
-				// Note: There shouldn't be many entries, so we add them all in one thread. Otherwise we should do parallel reduction.
-				for(uint i = 0; i < gNumEntries; i++)
-				{
-					SHCoeffsAndWeight current = gInput[i];
-				
-					SHAdd(coeffs.R, current.coeffs.R);
-					SHAdd(coeffs.G, current.coeffs.G);
-					SHAdd(coeffs.B, current.coeffs.B);
+				SHCoeffsAndWeight current = gInput[i];
+			
+				SHAdd(coeffs.R, current.coeffs.R);
+				SHAdd(coeffs.G, current.coeffs.G);
+				SHAdd(coeffs.B, current.coeffs.B);
 
-					weight += current.weight;
-				}
-				
-				// Normalize
-				float normFactor = (4 * PI) / weight;
-				SHMultiply(coeffs.R, normFactor);
-				SHMultiply(coeffs.G, normFactor);
-				SHMultiply(coeffs.B, normFactor);
-					
-				gOutput[0] = coeffs;
+				weight += current.weight;
 			}
-		};
+			
+			// Normalize
+			float normFactor = (4 * PI) / weight;
+			SHMultiply(coeffs.R, normFactor);
+			SHMultiply(coeffs.G, normFactor);
+			SHMultiply(coeffs.B, normFactor);
+				
+			gOutput[0] = coeffs;
+		}
 	};
 };

+ 126 - 134
Data/Raw/Engine/Shaders/LightGridLLCreation.bsl

@@ -4,165 +4,157 @@
 #include "$ENGINE$\ImageBasedLighting.bslinc"
 #include "$ENGINE$\LightGridCommon.bslinc"
 
-Blocks =
+technique LightGridLLCreation
 {
-	Block PerCamera : auto("PerCamera");
-	Block GridParams : auto("GridParams");
-};
+	mixin PerCameraData;
+	mixin LightingCommon;
+	mixin LightGridCommon;
+	mixin ImageBasedLighting;
 
-Technique
- : inherits("PerCameraData")
- : inherits("LightingCommon")
- : inherits("LightGridCommon")
- : inherits("ImageBasedLighting") =
-{
-	Pass =
+	code
 	{
-		Compute = 
+		[layout(r32ui)]
+		RWBuffer<uint> gLightsCounter;
+		[layout(r32ui)]
+		RWBuffer<uint> gLightsLLHeads;
+		RWBuffer<uint4> gLightsLL;
+		
+		[layout(r32ui)]
+		RWBuffer<uint> gProbesCounter;
+		[layout(r32ui)]
+		RWBuffer<uint> gProbesLLHeads;
+		RWBuffer<uint2> gProbesLL;
+			
+		// Generates a an axis aligned bounding box in NDC and transforms it to view space.
+		// Note: This will overlap other cells, so it might be better to use frustum planes
+		// instead of AABB, although frustum testing procedure could result in more false positive
+		void calcCellAABB(uint3 cellIdx, out float3 center, out float3 extent)
 		{
-			[layout(r32ui)]
-			RWBuffer<uint> gLightsCounter;
-			[layout(r32ui)]
-			RWBuffer<uint> gLightsLLHeads;
-			RWBuffer<uint4> gLightsLL;
+			// Note:: AABB calculation in tiled deferred image based lighting shader uses less instructions than this,
+			// see if it can be applied here.
+		
+			// Convert grid XY coordinates to clip coordinates
+			float2 a = 2.0f / gGridSize.xy;
+		
+			float3 ndcMin;
+			float3 ndcMax;
+		
+			ndcMin.xy = cellIdx.xy * a - float2(1.0f, 1.0f);
+			ndcMax.xy = (cellIdx.xy + 1) * a - float2(1.0f, 1.0f);
+		
+			// Flip Y depending on render API, depending if Y in NDC is facing up or down
+			// (We negate the value because we want NDC with Y flipped, so origin is top left)
+			float flipY = -sign(gMatProj[1][1]);
+			ndcMin.y *= flipY;
+			ndcMax.y *= flipY;
+		
+			// Because we're viewing along negative Z, farther end is the minimum
+			float viewZMin = calcViewZFromCellZ(cellIdx.z + 1);
+			float viewZMax = calcViewZFromCellZ(cellIdx.z);
+		
+			ndcMin.z = convertToNDCZ(viewZMax);
+			ndcMax.z = convertToNDCZ(viewZMin);
+		
+			float4 corner[8];
+			// Near
+			corner[0] = mul(gMatInvProj, float4(ndcMin.x, ndcMin.y, ndcMin.z, 1.0f));
+			corner[1] = mul(gMatInvProj, float4(ndcMax.x, ndcMin.y, ndcMin.z, 1.0f));
+			corner[2] = mul(gMatInvProj, float4(ndcMax.x, ndcMax.y, ndcMin.z, 1.0f));
+			corner[3] = mul(gMatInvProj, float4(ndcMin.x, ndcMax.y, ndcMin.z, 1.0f));
+		
+			// Far
+			corner[4] = mul(gMatInvProj, float4(ndcMin.x, ndcMin.y, ndcMax.z, 1.0f));
+			corner[5] = mul(gMatInvProj, float4(ndcMax.x, ndcMin.y, ndcMax.z, 1.0f));
+			corner[6] = mul(gMatInvProj, float4(ndcMax.x, ndcMax.y, ndcMax.z, 1.0f));
+			corner[7] = mul(gMatInvProj, float4(ndcMin.x, ndcMax.y, ndcMax.z, 1.0f));
+		
+			[unroll]
+			for(uint i = 0; i < 8; ++i)
+				corner[i].xy /= corner[i].w;
+		
+			float3 viewMin = float3(corner[0].xy, viewZMin);
+			float3 viewMax = float3(corner[0].xy, viewZMax);
 			
-			[layout(r32ui)]
-			RWBuffer<uint> gProbesCounter;
-			[layout(r32ui)]
-			RWBuffer<uint> gProbesLLHeads;
-			RWBuffer<uint2> gProbesLL;
-				
-			// Generates a an axis aligned bounding box in NDC and transforms it to view space.
-			// Note: This will overlap other cells, so it might be better to use frustum planes
-			// instead of AABB, although frustum testing procedure could result in more false positive
-			void calcCellAABB(uint3 cellIdx, out float3 center, out float3 extent)
+			[unroll]
+			for(uint i = 1; i < 8; ++i)
 			{
-				// Note:: AABB calculation in tiled deferred image based lighting shader uses less instructions than this,
-				// see if it can be applied here.
-			
-				// Convert grid XY coordinates to clip coordinates
-				float2 a = 2.0f / gGridSize.xy;
-			
-				float3 ndcMin;
-				float3 ndcMax;
-			
-				ndcMin.xy = cellIdx.xy * a - float2(1.0f, 1.0f);
-				ndcMax.xy = (cellIdx.xy + 1) * a - float2(1.0f, 1.0f);
-			
-				// Flip Y depending on render API, depending if Y in NDC is facing up or down
-				// (We negate the value because we want NDC with Y flipped, so origin is top left)
-				float flipY = -sign(gMatProj[1][1]);
-				ndcMin.y *= flipY;
-				ndcMax.y *= flipY;
-			
-				// Because we're viewing along negative Z, farther end is the minimum
-				float viewZMin = calcViewZFromCellZ(cellIdx.z + 1);
-				float viewZMax = calcViewZFromCellZ(cellIdx.z);
-			
-				ndcMin.z = convertToNDCZ(viewZMax);
-				ndcMax.z = convertToNDCZ(viewZMin);
-			
-				float4 corner[8];
-				// Near
-				corner[0] = mul(gMatInvProj, float4(ndcMin.x, ndcMin.y, ndcMin.z, 1.0f));
-				corner[1] = mul(gMatInvProj, float4(ndcMax.x, ndcMin.y, ndcMin.z, 1.0f));
-				corner[2] = mul(gMatInvProj, float4(ndcMax.x, ndcMax.y, ndcMin.z, 1.0f));
-				corner[3] = mul(gMatInvProj, float4(ndcMin.x, ndcMax.y, ndcMin.z, 1.0f));
+				viewMin.xy = min(viewMin.xy, corner[i].xy);
+				viewMax.xy = max(viewMax.xy, corner[i].xy);
+			}
 			
-				// Far
-				corner[4] = mul(gMatInvProj, float4(ndcMin.x, ndcMin.y, ndcMax.z, 1.0f));
-				corner[5] = mul(gMatInvProj, float4(ndcMax.x, ndcMin.y, ndcMax.z, 1.0f));
-				corner[6] = mul(gMatInvProj, float4(ndcMax.x, ndcMax.y, ndcMax.z, 1.0f));
-				corner[7] = mul(gMatInvProj, float4(ndcMin.x, ndcMax.y, ndcMax.z, 1.0f));
+			extent = (viewMax - viewMin) * 0.5f;
+			center = viewMin + extent;
+		}
+	
+		[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
+		void csmain(
+			uint3 groupId : SV_GroupID,
+			uint3 groupThreadId : SV_GroupThreadID,
+			uint3 dispatchThreadId : SV_DispatchThreadID)
+		{
+			// Ignore pixels out of valid range
+			if (any(dispatchThreadId.xy >= gGridSize.xy))
+				return;
+				
+			uint maxNumLinks = gNumCells * gMaxNumLightsPerCell;	
+			uint cellIdx = (dispatchThreadId.z * gGridSize.y + dispatchThreadId.y) * gGridSize.x + dispatchThreadId.x;
 			
-				[unroll]
-				for(uint i = 0; i < 8; ++i)
-					corner[i].xy /= corner[i].w;
+			float3 cellCenter;
+			float3 cellExtent;
+			calcCellAABB(dispatchThreadId, cellCenter, cellExtent);
 			
-				float3 viewMin = float3(corner[0].xy, viewZMin);
-				float3 viewMax = float3(corner[0].xy, viewZMax);
-				
-				[unroll]
-				for(uint i = 1; i < 8; ++i)
-				{
-					viewMin.xy = min(viewMin.xy, corner[i].xy);
-					viewMax.xy = max(viewMax.xy, corner[i].xy);
-				}
-				
-				extent = (viewMax - viewMin) * 0.5f;
-				center = viewMin + extent;
-			}
-		
-			[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
-			void main(
-				uint3 groupId : SV_GroupID,
-				uint3 groupThreadId : SV_GroupThreadID,
-				uint3 dispatchThreadId : SV_DispatchThreadID)
+			for(uint type = 1; type < 3; ++type)
 			{
-				// Ignore pixels out of valid range
-				if (any(dispatchThreadId.xy >= gGridSize.xy))
-					return;
-					
-				uint maxNumLinks = gNumCells * gMaxNumLightsPerCell;	
-				uint cellIdx = (dispatchThreadId.z * gGridSize.y + dispatchThreadId.y) * gGridSize.x + dispatchThreadId.x;
-				
-				float3 cellCenter;
-				float3 cellExtent;
-				calcCellAABB(dispatchThreadId, cellCenter, cellExtent);
-				
-				for(uint type = 1; type < 3; ++type)
+				uint lightOffset = gLightOffsets[type - 1];
+				uint lightEnd = gLightOffsets[type];
+				for(uint i = lightOffset; i < lightEnd; ++i)
 				{
-					uint lightOffset = gLightOffsets[type - 1];
-					uint lightEnd = gLightOffsets[type];
-					for(uint i = lightOffset; i < lightEnd; ++i)
-					{
-						float4 lightPosition = mul(gMatView, float4(gLights[i].position, 1.0f));
-						float lightRadius = gLights[i].attRadius;
-						
-						// Calculate distance from box to light
-						float3 distances = max(abs(lightPosition - cellCenter) - cellExtent, 0);
-						float distSqrd = dot(distances, distances);
-						
-						if(distSqrd <= (lightRadius * lightRadius))
-						{
-							uint nextLink;
-							InterlockedAdd(gLightsCounter[0], 1U, nextLink);
-							
-							if(nextLink < maxNumLinks)
-							{
-								uint prevLink;
-								InterlockedExchange(gLightsLLHeads[cellIdx], nextLink, prevLink);
-								
-								gLightsLL[nextLink] = uint4(i, type, prevLink, 0);
-							}
-						}
-					}
-				}
-				
-				for(uint i = 0; i < gNumReflProbes; ++i)
-				{
-					float4 probePosition = mul(gMatView, float4(gReflectionProbes[i].position, 1.0f));
-					float probeRadius = gReflectionProbes[i].radius;
+					float4 lightPosition = mul(gMatView, float4(gLights[i].position, 1.0f));
+					float lightRadius = gLights[i].attRadius;
 					
 					// Calculate distance from box to light
-					float3 distances = max(abs(probePosition - cellCenter) - cellExtent, 0);
+					float3 distances = max(abs(lightPosition - cellCenter) - cellExtent, 0);
 					float distSqrd = dot(distances, distances);
 					
-					if(distSqrd <= (probeRadius * probeRadius))
+					if(distSqrd <= (lightRadius * lightRadius))
 					{
 						uint nextLink;
-						InterlockedAdd(gProbesCounter[0], 1U, nextLink);
+						InterlockedAdd(gLightsCounter[0], 1U, nextLink);
 						
 						if(nextLink < maxNumLinks)
 						{
 							uint prevLink;
-							InterlockedExchange(gProbesLLHeads[cellIdx], nextLink, prevLink);
+							InterlockedExchange(gLightsLLHeads[cellIdx], nextLink, prevLink);
 							
-							gProbesLL[nextLink] = uint2(i, prevLink);
+							gLightsLL[nextLink] = uint4(i, type, prevLink, 0);
 						}
 					}
 				}
 			}
-		};
+			
+			for(uint i = 0; i < gNumReflProbes; ++i)
+			{
+				float4 probePosition = mul(gMatView, float4(gReflectionProbes[i].position, 1.0f));
+				float probeRadius = gReflectionProbes[i].radius;
+				
+				// Calculate distance from box to light
+				float3 distances = max(abs(probePosition - cellCenter) - cellExtent, 0);
+				float distSqrd = dot(distances, distances);
+				
+				if(distSqrd <= (probeRadius * probeRadius))
+				{
+					uint nextLink;
+					InterlockedAdd(gProbesCounter[0], 1U, nextLink);
+					
+					if(nextLink < maxNumLinks)
+					{
+						uint prevLink;
+						InterlockedExchange(gProbesLLHeads[cellIdx], nextLink, prevLink);
+						
+						gProbesLL[nextLink] = uint2(i, prevLink);
+					}
+				}
+			}
+		}
 	};
 };

+ 89 - 91
Data/Raw/Engine/Shaders/LightGridLLReduction.bsl

@@ -1,107 +1,105 @@
 #include "$ENGINE$\PerCameraData.bslinc"
 #include "$ENGINE$\LightGridCommon.bslinc"
 
-Technique 
- : inherits("PerCameraData")
- : inherits("LightGridCommon") = 
+technique LightGridLLReduction
 {
-	Pass =
+	mixin PerCameraData;
+	mixin LightGridCommon; 
+
+	code
 	{
-		Compute = 
+		Buffer<uint> gLightsLLHeads;
+		Buffer<uint4> gLightsLL;
+					
+		Buffer<uint> gProbesLLHeads;
+		Buffer<uint2> gProbesLL;
+		
+		[layout(r32ui)]
+		RWBuffer<uint> gGridDataCounter;
+		
+		RWBuffer<uint4> gGridLightOffsetAndSize;
+		RWBuffer<uint> gGridLightIndices;
+
+		RWBuffer<uint2> gGridProbeOffsetAndSize;
+		RWBuffer<uint> gGridProbeIndices;
+		
+		[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
+		void csmain(
+			uint3 groupId : SV_GroupID,
+			uint3 groupThreadId : SV_GroupThreadID,
+			uint3 dispatchThreadId : SV_DispatchThreadID)
 		{
-			Buffer<uint> gLightsLLHeads;
-			Buffer<uint4> gLightsLL;
-						
-			Buffer<uint> gProbesLLHeads;
-			Buffer<uint2> gProbesLL;
+			// Ignore pixels out of valid range
+			if (any(dispatchThreadId.xy >= gGridSize.xy))
+				return;
+				
+			uint maxNumLinks = gNumCells * gMaxNumLightsPerCell;	
+			uint cellIdx = (dispatchThreadId.z * gGridSize.y + dispatchThreadId.y) * gGridSize.x + dispatchThreadId.x;
 			
-			[layout(r32ui)]
-			RWBuffer<uint> gGridDataCounter;
+			// Reduce lights
+			//// First count total number of lights affecting the tile
+			uint currentIdx = gLightsLLHeads[cellIdx];
+			uint numRadialLights = 0;
+			uint numSpotLights = 0;
+			while(currentIdx != 0xFFFFFFFF)
+			{
+				uint4 entry = gLightsLL[currentIdx];
 			
-			RWBuffer<uint4> gGridLightOffsetAndSize;
-			RWBuffer<uint> gGridLightIndices;
+				if(entry.y == 1) // Radial
+					numRadialLights++;
+				else // Spot
+					numSpotLights++;
 
-			RWBuffer<uint2> gGridProbeOffsetAndSize;
-			RWBuffer<uint> gGridProbeIndices;
+				currentIdx = entry.z;
+			}
 			
-			[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
-			void main(
-				uint3 groupId : SV_GroupID,
-				uint3 groupThreadId : SV_GroupThreadID,
-				uint3 dispatchThreadId : SV_DispatchThreadID)
+			//// Allocate enough room and remember the offset to indices
+			uint numLights = numRadialLights + numSpotLights;
+			uint indicesStart;
+			InterlockedAdd(gGridDataCounter[0], numLights, indicesStart);
+			gGridLightOffsetAndSize[cellIdx] = uint4(indicesStart, numRadialLights, numSpotLights, 0);
+			
+			//// Actually write light indices (reverse order, so that radial lights come first, as is the convention)
+			currentIdx = gLightsLLHeads[cellIdx];
+			uint lightIdx = 0;
+			while(currentIdx != 0xFFFFFFFF)
 			{
-				// Ignore pixels out of valid range
-				if (any(dispatchThreadId.xy >= gGridSize.xy))
-					return;
-					
-				uint maxNumLinks = gNumCells * gMaxNumLightsPerCell;	
-				uint cellIdx = (dispatchThreadId.z * gGridSize.y + dispatchThreadId.y) * gGridSize.x + dispatchThreadId.x;
-				
-				// Reduce lights
-				//// First count total number of lights affecting the tile
-				uint currentIdx = gLightsLLHeads[cellIdx];
-				uint numRadialLights = 0;
-				uint numSpotLights = 0;
-				while(currentIdx != 0xFFFFFFFF)
-				{
-					uint4 entry = gLightsLL[currentIdx];
-				
-					if(entry.y == 1) // Radial
-						numRadialLights++;
-					else // Spot
-						numSpotLights++;
-
-					currentIdx = entry.z;
-				}
-				
-				//// Allocate enough room and remember the offset to indices
-				uint numLights = numRadialLights + numSpotLights;
-				uint indicesStart;
-				InterlockedAdd(gGridDataCounter[0], numLights, indicesStart);
-				gGridLightOffsetAndSize[cellIdx] = uint4(indicesStart, numRadialLights, numSpotLights, 0);
-				
-				//// Actually write light indices (reverse order, so that radial lights come first, as is the convention)
-				currentIdx = gLightsLLHeads[cellIdx];
-				uint lightIdx = 0;
-				while(currentIdx != 0xFFFFFFFF)
-				{
-					uint4 entry = gLightsLL[currentIdx];
-				
-					gGridLightIndices[indicesStart + numLights - 1 - lightIdx] = entry.x;
-					
-					currentIdx = entry.z;
-					lightIdx++;
-				}
-				
-				// Reduce probes
-				//// First count total number of probes affecting the tile
-				currentIdx = gProbesLLHeads[cellIdx];
-				uint numProbes = 0;
-				while(currentIdx != 0xFFFFFFFF)
-				{
-					uint2 entry = gProbesLL[currentIdx];
-				
-					numProbes++;
-					currentIdx = entry.y;
-				}
-				
-				//// Allocate enough room and remember the offset to indices
-				InterlockedAdd(gGridDataCounter[1], numProbes, indicesStart);
-				gGridProbeOffsetAndSize[cellIdx] = uint2(indicesStart, numProbes);
+				uint4 entry = gLightsLL[currentIdx];
+			
+				gGridLightIndices[indicesStart + numLights - 1 - lightIdx] = entry.x;
 				
-				//// Actually write probe indices (reverse order, in order to restore original order since LL was formed in reverse)
-				currentIdx = gProbesLLHeads[cellIdx];
-				uint probeIdx = 0;
-				while(currentIdx != 0xFFFFFFFF)
-				{
-					uint2 entry = gProbesLL[currentIdx];
+				currentIdx = entry.z;
+				lightIdx++;
+			}
+			
+			// Reduce probes
+			//// First count total number of probes affecting the tile
+			currentIdx = gProbesLLHeads[cellIdx];
+			uint numProbes = 0;
+			while(currentIdx != 0xFFFFFFFF)
+			{
+				uint2 entry = gProbesLL[currentIdx];
+			
+				numProbes++;
+				currentIdx = entry.y;
+			}
+			
+			//// Allocate enough room and remember the offset to indices
+			InterlockedAdd(gGridDataCounter[1], numProbes, indicesStart);
+			gGridProbeOffsetAndSize[cellIdx] = uint2(indicesStart, numProbes);
+			
+			//// Actually write probe indices (reverse order, in order to restore original order since LL was formed in reverse)
+			currentIdx = gProbesLLHeads[cellIdx];
+			uint probeIdx = 0;
+			while(currentIdx != 0xFFFFFFFF)
+			{
+				uint2 entry = gProbesLL[currentIdx];
+			
+				gGridProbeIndices[indicesStart + numProbes - 1 - probeIdx] = entry.x;
 				
-					gGridProbeIndices[indicesStart + numProbes - 1 - probeIdx] = entry.x;
-					
-					currentIdx = entry.y;
-					probeIdx++;
-				}
+				currentIdx = entry.y;
+				probeIdx++;
 			}
-		};
+		}
 	};
 };

+ 99 - 100
Data/Raw/Engine/Shaders/PPCreateTonemapLUT.bsl

@@ -1,115 +1,114 @@
 #include "$ENGINE$\PPTonemapCommon.bslinc"
 #include "$ENGINE$\PPWhiteBalance.bslinc"
 
-Technique
- : inherits("PPTonemapCommon")
- : inherits("PPWhiteBalance") =
+technique PPCreateTonemapLUT
 {
-	Pass =
+	mixin PPTonemapCommon;
+	mixin PPWhiteBalance;
+
+	code
 	{
-		Compute =
+		[internal]
+		cbuffer Input
 		{
-			cbuffer Input
-			{
-				// [0]: x - shoulder strength, y - linear strength, z - linear angle, w - toe strength
-				// [1]: x - toe numerator, y - toe denominator, z - linear white point, w - unused
-				float4 gTonemapParams[2];
-				
-				float gGammaAdjustment;
-				// 0 - sRGB, 1 - Rec.709, 2 - 2.2 gamma
-				uint gGammaCorrectionType;
-				
-				float3 gSaturation;
-				float3 gContrast;
-				float3 gGain;
-				float3 gOffset;
-			};
-		
-			/**
-			 * Filmic curve used for tonemapping.
-			 *
-			 * @param 	linearColor		Linear color.
-			 * @return					Transformed color.
-			 */			
-			float3 FilmicCurve(float3 color)
-			{
-				// Formula from John Hable's Uncharted 2 presentation
-				float3 a = color * (gTonemapParams[0].x * color + gTonemapParams[0].y * gTonemapParams[0].z);
-				float b = gTonemapParams[0].w * gTonemapParams[1].x;
-				float3 c = color * (gTonemapParams[0].x * color + gTonemapParams[0].y);
-				float d = gTonemapParams[0].w * gTonemapParams[1].y;
-				
-				return (a + b)/(c + d) - gTonemapParams[1].x / gTonemapParams[1].y;
-			}
+			// [0]: x - shoulder strength, y - linear strength, z - linear angle, w - toe strength
+			// [1]: x - toe numerator, y - toe denominator, z - linear white point, w - unused
+			float4 gTonemapParams[2];
+			
+			float gGammaAdjustment;
+			// 0 - sRGB, 1 - Rec.709, 2 - 2.2 gamma
+			uint gGammaCorrectionType;
 			
-			/**
-			 * Applies filmic curve tonemapping to the provided color.
-			 *
-			 * @param 	linearColor		Linear color in ACEScg color space.
-			 * @return					Tonemapped color in ACEScg color space.
-			 */		
-			float3 FilmicTonemapping(float3 color)
-			{
-				return FilmicCurve(color) / FilmicCurve(gTonemapParams[1].z);
-			}
+			float3 gSaturation;
+			float3 gContrast;
+			float3 gGain;
+			float3 gOffset;
+		};
+	
+		/**
+		 * Filmic curve used for tonemapping.
+		 *
+		 * @param 	linearColor		Linear color.
+		 * @return					Transformed color.
+		 */			
+		float3 FilmicCurve(float3 color)
+		{
+			// Formula from John Hable's Uncharted 2 presentation
+			float3 a = color * (gTonemapParams[0].x * color + gTonemapParams[0].y * gTonemapParams[0].z);
+			float b = gTonemapParams[0].w * gTonemapParams[1].x;
+			float3 c = color * (gTonemapParams[0].x * color + gTonemapParams[0].y);
+			float d = gTonemapParams[0].w * gTonemapParams[1].y;
 			
-			/**
-			 * Applies color grading to the provided color.
-			 *
-			 * @param 	linearColor		Linear color in ACEScg color space.
-			 * @return					Graded color in ACEScg color space.
-			 */				
-			float3 ColorGrading(float3 color)
-			{
-				const float3 RGBToY = float3(0.2722287168f, 0.6740817658f, 0.0536895174f);
+			return (a + b)/(c + d) - gTonemapParams[1].x / gTonemapParams[1].y;
+		}
+		
+		/**
+		 * Applies filmic curve tonemapping to the provided color.
+		 *
+		 * @param 	linearColor		Linear color in ACEScg color space.
+		 * @return					Tonemapped color in ACEScg color space.
+		 */		
+		float3 FilmicTonemapping(float3 color)
+		{
+			return FilmicCurve(color) / FilmicCurve(gTonemapParams[1].z);
+		}
+		
+		/**
+		 * Applies color grading to the provided color.
+		 *
+		 * @param 	linearColor		Linear color in ACEScg color space.
+		 * @return					Graded color in ACEScg color space.
+		 */				
+		float3 ColorGrading(float3 color)
+		{
+			const float3 RGBToY = float3(0.2722287168f, 0.6740817658f, 0.0536895174f);
+		
+			float luminance = dot(color, RGBToY);
 			
-				float luminance = dot(color, RGBToY);
-				
-				color = max(0, lerp(luminance.xxx, color, gSaturation));
-				color = pow(color * (1.0f / 0.18f), gContrast) * 0.18f;
-				color = color * gGain + gOffset;
+			color = max(0, lerp(luminance.xxx, color, gSaturation));
+			color = pow(color * (1.0f / 0.18f), gContrast) * 0.18f;
+			color = color * gGain + gOffset;
 
-				return color;
-			}		
+			return color;
+		}		
+		
+		RWTexture3D<float4> gOutputTex;
+		
+		[numthreads(8, 8, 1)]
+		void csmain(
+			uint3 dispatchThreadId : SV_DispatchThreadID,
+			uint threadIndex : SV_GroupIndex)
+		{
+			// Constants
+			const float3x3 sRGBToACES2065Matrix = mul(XYZToACES2065Matrix, mul(D65ToD60Matrix, sRGBToXYZMatrix));
+			const float3x3 sRGBToACEScgMatrix = mul(XYZToACEScgMatrix, mul(D65ToD60Matrix, sRGBToXYZMatrix));
+			const float3x3 ACEScgTosRGBMatrix = mul(XYZTosRGBMatrix, mul(D60ToD65Matrix, ACEScgToXYZMatrix));
 			
-			RWTexture3D<float4> gOutputTex;
+			float3 logColor = float3(dispatchThreadId.xyz / (float)(LUT_SIZE - 1));
+			float3 linearColor = LogToLinearColor(logColor);
 			
-			[numthreads(8, 8, 1)]
-			void main(
-				uint3 dispatchThreadId : SV_DispatchThreadID,
-				uint threadIndex : SV_GroupIndex)
-			{
-				// Constants
-				const float3x3 sRGBToACES2065Matrix = mul(XYZToACES2065Matrix, mul(D65ToD60Matrix, sRGBToXYZMatrix));
-				const float3x3 sRGBToACEScgMatrix = mul(XYZToACEScgMatrix, mul(D65ToD60Matrix, sRGBToXYZMatrix));
-				const float3x3 ACEScgTosRGBMatrix = mul(XYZTosRGBMatrix, mul(D60ToD65Matrix, ACEScgToXYZMatrix));
-				
-				float3 logColor = float3(dispatchThreadId.xyz / (float)(LUT_SIZE - 1));
-				float3 linearColor = LogToLinearColor(logColor);
-				
-				linearColor = WhiteBalance(linearColor);
-				linearColor = mul(sRGBToACEScgMatrix, linearColor);
-				linearColor = ColorGrading(linearColor);
-				
-				// Note: Improve this so it's closer to the ACES curve?
-				linearColor = FilmicTonemapping(linearColor);
-				// TODO - Does the white point provided in filmic curve conflict with the white balancing?
-				
-				linearColor = mul(ACEScgTosRGBMatrix, linearColor);
-				
-				// Transform to gamma space
-				float3 gammaColor = pow(linearColor, gGammaAdjustment); // User adjustment, usually 1.0f
-					
-				if(gGammaCorrectionType == 0)
-					gammaColor = LinearToGammasRGB(gammaColor);
-				else if(gGammaCorrectionType == 1)
-					gammaColor = LinearToGammaRec709(gammaColor);
-				else
-					gammaColor = pow(gammaColor, 1.0f/2.2f);
+			linearColor = WhiteBalance(linearColor);
+			linearColor = mul(sRGBToACEScgMatrix, linearColor);
+			linearColor = ColorGrading(linearColor);
+			
+			// Note: Improve this so it's closer to the ACES curve?
+			linearColor = FilmicTonemapping(linearColor);
+			// TODO - Does the white point provided in filmic curve conflict with the white balancing?
+			
+			linearColor = mul(ACEScgTosRGBMatrix, linearColor);
+			
+			// Transform to gamma space
+			float3 gammaColor = pow(linearColor, gGammaAdjustment); // User adjustment, usually 1.0f
 				
-				// TODO - Divide by 1.05f here and then re-apply it when decoding from the texture?
-				gOutputTex[dispatchThreadId] = float4(gammaColor, 1.0f);	
-			}	
-		};
+			if(gGammaCorrectionType == 0)
+				gammaColor = LinearToGammasRGB(gammaColor);
+			else if(gGammaCorrectionType == 1)
+				gammaColor = LinearToGammaRec709(gammaColor);
+			else
+				gammaColor = pow(gammaColor, 1.0f/2.2f);
+			
+			// TODO - Divide by 1.05f here and then re-apply it when decoding from the texture?
+			gOutputTex[dispatchThreadId] = float4(gammaColor, 1.0f);	
+		}	
 	};
 };

+ 25 - 37
Data/Raw/Engine/Shaders/PPDownsample.bsl

@@ -1,50 +1,38 @@
 #include "$ENGINE$\PPBase.bslinc"
 
-Parameters =
+technique PPDownsample
 {
-	float2		gInvTexSize;
-	Sampler2D 	gInputSamp : alias("gInputTex");
-	Texture2D 	gInputTex;
-};
+	mixin PPBase;
 
-Blocks =
-{
-	Block Input;
-};
-
-Technique : inherits("PPBase") =
-{
-	Pass =
+	code
 	{
-		Fragment =
+		[internal]
+		cbuffer Input
 		{
-			cbuffer Input
-			{
-				float2 gInvTexSize;
-			}		
-		
-			SamplerState gInputSamp;
-			Texture2D gInputTex;
+			float2 gInvTexSize;
+		}		
+	
+		SamplerState gInputSamp;
+		Texture2D gInputTex;
 
-			float4 main(VStoFS input) : SV_Target0
-			{
-				float2 UV[4];
+		float4 fsmain(VStoFS input) : SV_Target0
+		{
+			float2 UV[4];
 
-				// Blur using a 4x4 kernel. It's assumed current position is right in the middle of a 2x2 kernel (because the output
-				// texture should be 1/2 the size of the output texture), and moving by one in each direction will sample areas
-				// between a 2x2 kernel as well if bilinear filtering is enabled.
-				UV[0] = input.uv0 + gInvTexSize * float2(-1, -1);
-				UV[1] = input.uv0 + gInvTexSize * float2( 1, -1);
-				UV[2] = input.uv0 + gInvTexSize * float2(-1,  1);
-				UV[3] = input.uv0 + gInvTexSize * float2( 1,  1);
+			// Blur using a 4x4 kernel. It's assumed current position is right in the middle of a 2x2 kernel (because the output
+			// texture should be 1/2 the size of the output texture), and moving by one in each direction will sample areas
+			// between a 2x2 kernel as well if bilinear filtering is enabled.
+			UV[0] = input.uv0 + gInvTexSize * float2(-1, -1);
+			UV[1] = input.uv0 + gInvTexSize * float2( 1, -1);
+			UV[2] = input.uv0 + gInvTexSize * float2(-1,  1);
+			UV[3] = input.uv0 + gInvTexSize * float2( 1,  1);
 
-				float4 samples[4];
+			float4 samples[4];
 
-				for(uint i = 0; i < 4; i++)
-					samples[i] = gInputTex.Sample(gInputSamp, UV[i]);
+			for(uint i = 0; i < 4; i++)
+				samples[i] = gInputTex.Sample(gInputSamp, UV[i]);
 
-				return (samples[0] + samples[1] + samples[2] + samples[3]) * 0.25f;
-			}	
-		};
+			return (samples[0] + samples[1] + samples[2] + samples[3]) * 0.25f;
+		}	
 	};
 };

+ 84 - 97
Data/Raw/Engine/Shaders/PPEyeAdaptHistogram.bsl

@@ -1,119 +1,106 @@
-Parameters =
-{
-	Texture2D 	gSceneColorTex;
-	RWTexture2D gOutputTex;
-};
-
-Blocks =
-{
-	Block Input;
-};
-
 #define NUM_BUCKETS (THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y)
 
-Technique =
+technique PPEyeAdaptHistogram
 {
-	Pass =
-	{
-		Compute =
-		{	
-			cbuffer Input
-			{
-				// xy - offset, zw - size
-				uint4 gPixelOffsetAndSize;
-			
-				// x - histogram scale, y - histogram offset
-				float2 gHistogramParams;
-				uint2 gThreadGroupCount;
-			}
+	code
+	{	
+		[internal]
+		cbuffer Input
+		{
+			// xy - offset, zw - size
+			uint4 gPixelOffsetAndSize;
+		
+			// x - histogram scale, y - histogram offset
+			float2 gHistogramParams;
+			uint2 gThreadGroupCount;
+		}
+	
+		Texture2D gSceneColorTex;
+		RWTexture2D<float4> gOutputTex;
 		
-			Texture2D gSceneColorTex;
-			RWTexture2D<float4> gOutputTex;
+		// Keep elements in this order as it ensures coalesced memory operations for non-random ops
+		groupshared float sharedData[NUM_BUCKETS][THREADGROUP_SIZE_X][THREADGROUP_SIZE_Y];
+		
+		float calcHistogramPos(float luminance)
+		{
+			return saturate(log2(luminance) * gHistogramParams.x + gHistogramParams.y);
+		}			
+		
+		[numthreads(THREADGROUP_SIZE_X, THREADGROUP_SIZE_Y, 1)]
+		void csmain(
+			uint3 groupId : SV_GroupID,
+			uint3 groupThreadId : SV_GroupThreadID,
+			uint3 dispatchThreadId : SV_DispatchThreadID,
+			uint threadIndex : SV_GroupIndex)
+		{
+			// Clear everything
+			for(uint i = 0; i < NUM_BUCKETS; i++)
+				sharedData[i][groupThreadId.x][groupThreadId.y] = 0.0f;
+				
+			GroupMemoryBarrierWithGroupSync();
 			
-			// Keep elements in this order as it ensures coalesced memory operations for non-random ops
-			groupshared float sharedData[NUM_BUCKETS][THREADGROUP_SIZE_X][THREADGROUP_SIZE_Y];
+			// Sort all pixel luminance for the current thread into histogram buckets
+			uint2 tileSize = uint2(LOOP_COUNT_X, LOOP_COUNT_Y);
+			uint2 maxExtent = gPixelOffsetAndSize.xy + gPixelOffsetAndSize.zw;
 			
-			float calcHistogramPos(float luminance)
+			uint2 tileStart = dispatchThreadId.xy * tileSize + gPixelOffsetAndSize.xy;
+			for(uint y = 0; y < LOOP_COUNT_Y; y++)
 			{
-				return saturate(log2(luminance) * gHistogramParams.x + gHistogramParams.y);
-			}			
+				uint2 texelPos = tileStart + uint2(0, y);
+				if(texelPos.y > maxExtent.y)
+					break;
 			
-			[numthreads(THREADGROUP_SIZE_X, THREADGROUP_SIZE_Y, 1)]
-			void main(
-				uint3 groupId : SV_GroupID,
-				uint3 groupThreadId : SV_GroupThreadID,
-				uint3 dispatchThreadId : SV_DispatchThreadID,
-				uint threadIndex : SV_GroupIndex)
-			{
-				// Clear everything
-				for(uint i = 0; i < NUM_BUCKETS; i++)
-					sharedData[i][groupThreadId.x][groupThreadId.y] = 0.0f;
-					
-				GroupMemoryBarrierWithGroupSync();
-				
-				// Sort all pixel luminance for the current thread into histogram buckets
-				uint2 tileSize = uint2(LOOP_COUNT_X, LOOP_COUNT_Y);
-				uint2 maxExtent = gPixelOffsetAndSize.xy + gPixelOffsetAndSize.zw;
-				
-				uint2 tileStart = dispatchThreadId.xy * tileSize + gPixelOffsetAndSize.xy;
-				for(uint y = 0; y < LOOP_COUNT_Y; y++)
+				for(uint x = 0; x < LOOP_COUNT_X; x++)
 				{
-					uint2 texelPos = tileStart + uint2(0, y);
-					if(texelPos.y > maxExtent.y)
+					if(texelPos.x > maxExtent.x)
 						break;
 				
-					for(uint x = 0; x < LOOP_COUNT_X; x++)
-					{
-						if(texelPos.x > maxExtent.x)
-							break;
+					float4 hdrColor = gSceneColorTex.Load(int3(texelPos, 0));
+					float luminance = dot(hdrColor.rgb, float3(0.299f, 0.587f, 0.114f)); // TODO - Perhaps just use max() of all values?
 					
-						float4 hdrColor = gSceneColorTex.Load(int3(texelPos, 0));
-						float luminance = dot(hdrColor.rgb, float3(0.299f, 0.587f, 0.114f)); // TODO - Perhaps just use max() of all values?
-						
-						float histogramPos = calcHistogramPos(luminance);
-						float bucket = histogramPos * (NUM_BUCKETS - 1) * 0.9999f;
-					
-						uint bucketAIdx = (uint)bucket;
-						uint bucketBIdx = bucketAIdx + 1;
-						
-						float weightB = frac(bucket);
-						float weightA = 1.0f - weightB;
-						
-						if(bucketAIdx != 0)
-							sharedData[bucketAIdx][groupThreadId.x][groupThreadId.y] += weightA;
+					float histogramPos = calcHistogramPos(luminance);
+					float bucket = histogramPos * (NUM_BUCKETS - 1) * 0.9999f;
+				
+					uint bucketAIdx = (uint)bucket;
+					uint bucketBIdx = bucketAIdx + 1;
 					
-						sharedData[bucketBIdx][groupThreadId.x][groupThreadId.y] += weightB;
+					float weightB = frac(bucket);
+					float weightA = 1.0f - weightB;
 					
-						texelPos.x++;
-					}
-				}
+					if(bucketAIdx != 0)
+						sharedData[bucketAIdx][groupThreadId.x][groupThreadId.y] += weightA;
 				
-				GroupMemoryBarrierWithGroupSync();
+					sharedData[bucketBIdx][groupThreadId.x][groupThreadId.y] += weightB;
 				
-				// Accumulate bucketed values from all threads in the group
-				if(threadIndex < (NUM_BUCKETS / 4))
+					texelPos.x++;
+				}
+			}
+			
+			GroupMemoryBarrierWithGroupSync();
+			
+			// Accumulate bucketed values from all threads in the group
+			if(threadIndex < (NUM_BUCKETS / 4))
+			{
+				float4 sum = 0.0f;
+				for(uint y = 0; y < THREADGROUP_SIZE_Y; y++)
 				{
-					float4 sum = 0.0f;
-					for(uint y = 0; y < THREADGROUP_SIZE_Y; y++)
+					for(uint x = 0; x < THREADGROUP_SIZE_X; x++)
 					{
-						for(uint x = 0; x < THREADGROUP_SIZE_X; x++)
-						{
-							sum += float4(
-								sharedData[threadIndex * 4 + 0][x][y],
-								sharedData[threadIndex * 4 + 1][x][y],
-								sharedData[threadIndex * 4 + 2][x][y],
-								sharedData[threadIndex * 4 + 3][x][y]
-							);
-						}
+						sum += float4(
+							sharedData[threadIndex * 4 + 0][x][y],
+							sharedData[threadIndex * 4 + 1][x][y],
+							sharedData[threadIndex * 4 + 2][x][y],
+							sharedData[threadIndex * 4 + 3][x][y]
+						);
 					}
-					
-					// Normalize and output histogram for the group (single line per group)
-					float groupArea = THREADGROUP_SIZE_X * LOOP_COUNT_X * THREADGROUP_SIZE_Y * LOOP_COUNT_Y;
-
-					gOutputTex[uint2(threadIndex, groupId.x + groupId.y * gThreadGroupCount.x)] = sum / groupArea;					
-					
 				}
-			}	
-		};
+				
+				// Normalize and output histogram for the group (single line per group)
+				float groupArea = THREADGROUP_SIZE_X * LOOP_COUNT_X * THREADGROUP_SIZE_Y * LOOP_COUNT_Y;
+
+				gOutputTex[uint2(threadIndex, groupId.x + groupId.y * gThreadGroupCount.x)] = sum / groupArea;					
+				
+			}
+		}	
 	};
 };

+ 28 - 39
Data/Raw/Engine/Shaders/PPEyeAdaptHistogramReduce.bsl

@@ -1,50 +1,39 @@
 #include "$ENGINE$\PPBase.bslinc"
 
-Parameters =
+technique PPEyeAdaptHistogramReduce
 {
-	Texture2D 	gHistogramTex;
-	Texture2D 	gEyeAdaptationTex;
-};
+	mixin PPBase;
 
-Blocks =
-{
-	Block Input;
-};
-
-Technique : inherits("PPBase") =
-{
-	Pass =
+	code
 	{
-		Fragment =
+		[internal]
+		cbuffer Input
 		{
-			cbuffer Input
-			{
-				uint gThreadGroupCount;
-			}		
-		
-			Texture2D gHistogramTex;
-			Texture2D gEyeAdaptationTex;
+			uint gThreadGroupCount;
+		}		
+	
+		Texture2D gHistogramTex;
+		Texture2D gEyeAdaptationTex;
 
-			float4 main(VStoFS input) : SV_Target0
-			{
-				int2 iUV = trunc(input.uv0);
-				float4 outputValue = 0.0f;
+		float4 fsmain(VStoFS input) : SV_Target0
+		{
+			int2 iUV = trunc(input.uv0);
+			float4 outputValue = 0.0f;
 
-				// Output texture only has two rows, store histogram on the first
-				if(input.uv0.y < 1.0f)
-				{
-					// TODO - Potentially optimize using bilinear filtering
-					for(uint i = 0; i < gThreadGroupCount; i++)
-						outputValue += gHistogramTex.Load(int3(iUV.x, i, 0));
+			// Output texture only has two rows, store histogram on the first
+			if(input.uv0.y < 1.0f)
+			{
+				// TODO - Potentially optimize using bilinear filtering
+				for(uint i = 0; i < gThreadGroupCount; i++)
+					outputValue += gHistogramTex.Load(int3(iUV.x, i, 0));
 
-					return outputValue / gThreadGroupCount;
-				}
-				else
-				{
-					// Store eye adaptation from last frame in the second row of the texture
-					return gEyeAdaptationTex.Load(int3(0, 0, 0)).x;
-				}
-			}	
-		};
+				return outputValue / gThreadGroupCount;
+			}
+			else
+			{
+				// Store eye adaptation from last frame in the second row of the texture
+				return gEyeAdaptationTex.Load(int3(0, 0, 0)).x;
+			}
+		}	
 	};
 };

+ 141 - 151
Data/Raw/Engine/Shaders/PPEyeAdaptation.bsl

@@ -1,171 +1,161 @@
 #include "$ENGINE$\PPBase.bslinc"
 
-Parameters =
+technique PPEyeAdaptation
 {
-	Texture2D 	gHistogramTex;
-};
+	mixin PPBase;
 
-Blocks =
-{
-	Block Input;
-};
-
-Technique : inherits("PPBase") =
-{
-	Pass =
+	code
 	{
-		Fragment =
+		#define NUM_BUCKETS (THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y)
+	
+		[internal]
+		cbuffer Input
 		{
-			#define NUM_BUCKETS (THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y)
+			// [0]: x - histogram scale, y - histogram offset, z - histogram percent low, w - histogram percent high
+			// [1]: x - min adaptation, y - max adaptation, z - adaptation speed up, w - adaptation speed down
+			// [2]: x - exposure scale, y - frame time delta, zw - nothing
+			float4 gEyeAdaptationParams[3];
+		}		
 		
-			cbuffer Input
-			{
-				// [0]: x - histogram scale, y - histogram offset, z - histogram percent low, w - histogram percent high
-				// [1]: x - min adaptation, y - max adaptation, z - adaptation speed up, w - adaptation speed down
-				// [2]: x - exposure scale, y - frame time delta, zw - nothing
-				float4 gEyeAdaptationParams[3];
-			}		
-			
-			Texture2D gHistogramTex;
-			
-			/** 
-			 * Returns luminance of the histogram bucket.
-			 *
-			 * @param pos	Position of the histogram bucket in range [0, 1].
-			 * @return		Luminance of the bucket.
-			 */
-			float calcHistogramLuminance(float pos)
-			{
-				return exp2((pos - gEyeAdaptationParams[0].y) / gEyeAdaptationParams[0].x);
-			}	
+		Texture2D gHistogramTex;
+		
+		/** 
+		 * Returns luminance of the histogram bucket.
+		 *
+		 * @param pos	Position of the histogram bucket in range [0, 1].
+		 * @return		Luminance of the bucket.
+		 */
+		float calcHistogramLuminance(float pos)
+		{
+			return exp2((pos - gEyeAdaptationParams[0].y) / gEyeAdaptationParams[0].x);
+		}	
+		
+		/**
+		 * Returns value of the histogram bucket.
+		 *
+		 * @param	histogram	Texture containing the histogram buckets in the first row.
+		 * @param	bucketIdx	Index of the bucket. Caller must ensure it is in valid range.
+		 * @return				Value of the needed histogram bucket.
+		 */
+		float getHistogramValue(Texture2D histogram, uint bucketIdx)
+		{
+			uint texelIdx = bucketIdx / 4;
 			
-			/**
-			 * Returns value of the histogram bucket.
-			 *
-			 * @param	histogram	Texture containing the histogram buckets in the first row.
-			 * @param	bucketIdx	Index of the bucket. Caller must ensure it is in valid range.
-			 * @return				Value of the needed histogram bucket.
-			 */
-			float getHistogramValue(Texture2D histogram, uint bucketIdx)
-			{
-				uint texelIdx = bucketIdx / 4;
-				
-				float4 packedValue = histogram.Load(int3(texelIdx, 0, 0));
-				float4 mask = float4(
-					(bucketIdx % 4) == 0,
-					(bucketIdx % 4) == 1,
-					(bucketIdx % 4) == 2,
-					(bucketIdx % 4) == 3);
-
-				return dot(packedValue, mask);	
-			}
+			float4 packedValue = histogram.Load(int3(texelIdx, 0, 0));
+			float4 mask = float4(
+				(bucketIdx % 4) == 0,
+				(bucketIdx % 4) == 1,
+				(bucketIdx % 4) == 2,
+				(bucketIdx % 4) == 3);
+
+			return dot(packedValue, mask);	
+		}
+
+		/** 
+		 * Calculates the sum of all values in the histogram.
+		 *
+		 * @param	histogram	Texture containing the histogram buckets in the first row.
+		 * @return				Sum of all the values in the histogram.
+		 */
+		float calcHistogramSum(Texture2D histogram)
+		{
+			float sum = 0;
+
+			for(uint i = 0; i < NUM_BUCKETS; i++)
+				sum += getHistogramValue(histogram, i);
+
+			return sum;
+		}	
+
+		/**
+		 * Calculates the average luminance in the histogram, while ignoring the outlier values that may skew the result.
+		 *
+		 * @param	histogram	Texture containing the histogram buckets in the first row.
+		 * @param	low			Sum below which to ignore values (removing lower end outliers), in range [0, histogramSum].
+		 * @param	high		Sum above which to ignore values (removing higher end outliers), in range [0, histogramSum]. 
+		 *                      Must be higher than @low.
+		 * @return				Average luminance in the histogram.
+		 */
+		float calcHistogramAverageLuminance(Texture2D histogram, float low, float high)
+		{
+			float2 sumAndWeight = float2(0.0f, 0.0f);
 
-			/** 
-			 * Calculates the sum of all values in the histogram.
-			 *
-			 * @param	histogram	Texture containing the histogram buckets in the first row.
-			 * @return				Sum of all the values in the histogram.
-			 */
-			float calcHistogramSum(Texture2D histogram)
-			{
-				float sum = 0;
-
-				for(uint i = 0; i < NUM_BUCKETS; i++)
-					sum += getHistogramValue(histogram, i);
-
-				return sum;
-			}	
-
-			/**
-			 * Calculates the average luminance in the histogram, while ignoring the outlier values that may skew the result.
-			 *
-			 * @param	histogram	Texture containing the histogram buckets in the first row.
-			 * @param	low			Sum below which to ignore values (removing lower end outliers), in range [0, histogramSum].
-			 * @param	high		Sum above which to ignore values (removing higher end outliers), in range [0, histogramSum]. 
-			 *                      Must be higher than @low.
-			 * @return				Average luminance in the histogram.
-			 */
-			float calcHistogramAverageLuminance(Texture2D histogram, float low, float high)
+			for(uint i = 0; i < NUM_BUCKETS; i++)
 			{
-				float2 sumAndWeight = float2(0.0f, 0.0f);
-
-				for(uint i = 0; i < NUM_BUCKETS; i++)
-				{
-					float value = getHistogramValue(histogram, i);
-
-					// Ignore any values below the @low parameter, and then shift the valid range
-					// by the amount we ignored. Eventually the low end of the range reaches zero
-					// and values are no longer ignored.
-					float offset = min(value, low);
-					value = value - offset;
-					low -= offset;
-					high -= offset;
-
-					// Ignore any values above the @high parameter, and then shift the valid range.
-					value = min(value, high);
-					high -= value;
-
-					float histogramPos = i / (float)NUM_BUCKETS;
-					float luminance = calcHistogramLuminance(histogramPos);
-					
-					sumAndWeight += float2(luminance, 1) * value;
-				}
+				float value = getHistogramValue(histogram, i);
+
+				// Ignore any values below the @low parameter, and then shift the valid range
+				// by the amount we ignored. Eventually the low end of the range reaches zero
+				// and values are no longer ignored.
+				float offset = min(value, low);
+				value = value - offset;
+				low -= offset;
+				high -= offset;
+
+				// Ignore any values above the @high parameter, and then shift the valid range.
+				value = min(value, high);
+				high -= value;
+
+				float histogramPos = i / (float)NUM_BUCKETS;
+				float luminance = calcHistogramLuminance(histogramPos);
 				
-				return sumAndWeight.x / max(0.0001f, sumAndWeight.y);
+				sumAndWeight += float2(luminance, 1) * value;
 			}
 			
-			/**
-			 * Calculates the eye adaptation from the luminance in the provided histogram. Eye adaptation value will be 
-			 * used for automatically scaling expsure based on scene brightness.
-			 *
-			 * @param	histogram	Texture containing the histogram buckets in the first row.
-			 * @return				Ideal eye adaptation value for the provided luminance.
-			 */
-			float calcEyeAdaptation(Texture2D histogram)
-			{
-				float sum = calcHistogramSum(histogram);
-				float lowRange = gEyeAdaptationParams[0].z * sum;
-				float highRange = gEyeAdaptationParams[0].w * sum;
-				
-				float avgLuminance = calcHistogramAverageLuminance(histogram, lowRange, highRange);
-				avgLuminance = clamp(avgLuminance, gEyeAdaptationParams[1].x, gEyeAdaptationParams[1].y);
-
-				return avgLuminance;
-			}
+			return sumAndWeight.x / max(0.0001f, sumAndWeight.y);
+		}
+		
+		/**
+		 * Calculates the eye adaptation from the luminance in the provided histogram. Eye adaptation value will be 
+		 * used for automatically scaling expsure based on scene brightness.
+		 *
+		 * @param	histogram	Texture containing the histogram buckets in the first row.
+		 * @return				Ideal eye adaptation value for the provided luminance.
+		 */
+		float calcEyeAdaptation(Texture2D histogram)
+		{
+			float sum = calcHistogramSum(histogram);
+			float lowRange = gEyeAdaptationParams[0].z * sum;
+			float highRange = gEyeAdaptationParams[0].w * sum;
 			
-			/** 
-			 * Smooths out eye adaptation changes over multiple frames so they aren't as jarring.
-			 *
-			 * @param	old			Eye adaptation value from the previous frame.
-			 * @param	target		Ideal eye adaptation value for this frame.
-			 * @param	frameDelta	Time difference between this and last frame, in seconds.
-			 * @return				Smoothed eye adaptation.
-			 */
-			float smoothEyeAdaptation(float old, float target, float frameDelta)
-			{
-				float diff = target - old;
+			float avgLuminance = calcHistogramAverageLuminance(histogram, lowRange, highRange);
+			avgLuminance = clamp(avgLuminance, gEyeAdaptationParams[1].x, gEyeAdaptationParams[1].y);
 
-				float speedUp = gEyeAdaptationParams[1].z;
-				float speedDown = gEyeAdaptationParams[1].w;
+			return avgLuminance;
+		}
+		
+		/** 
+		 * Smooths out eye adaptation changes over multiple frames so they aren't as jarring.
+		 *
+		 * @param	old			Eye adaptation value from the previous frame.
+		 * @param	target		Ideal eye adaptation value for this frame.
+		 * @param	frameDelta	Time difference between this and last frame, in seconds.
+		 * @return				Smoothed eye adaptation.
+		 */
+		float smoothEyeAdaptation(float old, float target, float frameDelta)
+		{
+			float diff = target - old;
 
-				float adaptionSpeed = (diff > 0) ? speedUp : speedDown;
-				float scale = 1.0f - exp2(-frameDelta * adaptionSpeed);
+			float speedUp = gEyeAdaptationParams[1].z;
+			float speedDown = gEyeAdaptationParams[1].w;
 
-				return clamp(old + diff * scale, gEyeAdaptationParams[1].x, gEyeAdaptationParams[1].y);
-			}
+			float adaptionSpeed = (diff > 0) ? speedUp : speedDown;
+			float scale = 1.0f - exp2(-frameDelta * adaptionSpeed);
+
+			return clamp(old + diff * scale, gEyeAdaptationParams[1].x, gEyeAdaptationParams[1].y);
+		}
+		
+		float4 fsmain(VStoFS input) : SV_Target0
+		{
+			float exposureScale = gEyeAdaptationParams[2].x;
+
+			float targetAdaptation = calcEyeAdaptation(gHistogramTex);
+			float oldExposure = gHistogramTex.Load(int3(0, 1, 0)).x;
+			float oldAdaptation = exposureScale / oldExposure; // Assuming same exposure scale as last frame
+			float frameDelta = gEyeAdaptationParams[2].y;
 			
-			float4 main(VStoFS input) : SV_Target0
-			{
-				float exposureScale = gEyeAdaptationParams[2].x;
-	
-				float targetAdaptation = calcEyeAdaptation(gHistogramTex);
-				float oldExposure = gHistogramTex.Load(int3(0, 1, 0)).x;
-				float oldAdaptation = exposureScale / oldExposure; // Assuming same exposure scale as last frame
-				float frameDelta = gEyeAdaptationParams[2].y;
-				
-				float smoothAdaptation = smoothEyeAdaptation(oldAdaptation, targetAdaptation, frameDelta);
-				return exposureScale / smoothAdaptation; // Returns exposure
-			}	
-		};
+			float smoothAdaptation = smoothEyeAdaptation(oldAdaptation, targetAdaptation, frameDelta);
+			return exposureScale / smoothAdaptation; // Returns exposure
+		}	
 	};
 };

+ 64 - 81
Data/Raw/Engine/Shaders/PPTonemapping.bsl

@@ -1,99 +1,82 @@
 #include "$ENGINE$\PPTonemapCommon.bslinc"
 
-Parameters =
+technique PPTonemapping
 {
-	Sampler2D 	gInputSamp : alias("gInputTex");
-	Texture2D 	gInputTex;
-	Sampler3D 	gColorLUTSamp : alias("gColorLUT");
-	Texture3D 	gColorLUT;	
-	Texture2D	gEyeAdaptationTex;
-};
+	mixin PPTonemapCommon;
 
-Blocks =
-{
-	Block Input;
-};
+	depth
+	{
+		read = false;
+		write = false;
+	};
 
-Technique : inherits("PPTonemapCommon") =
-{
-	Pass =
+	code
 	{
-		DepthWrite = false;
-		DepthRead = false;
-	
-		Common = 
+		struct VStoFS
 		{
-			struct VStoFS
-			{
-				float4 position : SV_POSITION;
-				float2 uv0 : TEXCOORD0;
-				float exposureScale : TEXCOORD1;
-			};
+			float4 position : SV_POSITION;
+			float2 uv0 : TEXCOORD0;
+			float exposureScale : TEXCOORD1;
 		};
 
-		Vertex =
+		struct VertexInput
 		{
-			struct VertexInput
-			{
-				float2 screenPos : POSITION;
-				float2 uv0 : TEXCOORD0;
-			};
-			
-			Texture2D gEyeAdaptationTex;
-			
-			VStoFS main(VertexInput input)
-			{
-				VStoFS output;
-			
-				output.position = float4(input.screenPos, 0, 1);
-				output.uv0 = input.uv0;
-				output.exposureScale = gEyeAdaptationTex.Load(int3(0, 0, 0)).r;
+			float2 screenPos : POSITION;
+			float2 uv0 : TEXCOORD0;
+		};
+		
+		Texture2D gEyeAdaptationTex;
+		
+		VStoFS vsmain(VertexInput input)
+		{
+			VStoFS output;
+		
+			output.position = float4(input.screenPos, 0, 1);
+			output.uv0 = input.uv0;
+			output.exposureScale = gEyeAdaptationTex.Load(int3(0, 0, 0)).r;
+
+			return output;
+		}			
 
-				return output;
-			}			
-		};	
-	
-		Fragment =
+		SamplerState gInputSamp;
+		Texture2D gInputTex;
+		
+		SamplerState gColorLUTSamp;
+		Texture3D gColorLUT;
+		
+		[internal]
+		cbuffer Input
 		{
-			SamplerState gInputSamp;
-			Texture2D gInputTex;
+			float gRawGamma;
+			float gManualExposureScale;
+		}				
+		
+		float3 ColorLookupTable(float3 linearColor)
+		{
+			float3 logColor = LinearToLogColor(linearColor);
+			float3 UVW = logColor * ((LUT_SIZE - 1) / (float)LUT_SIZE) + (0.5f / LUT_SIZE);
 			
-			SamplerState gColorLUTSamp;
-			Texture3D gColorLUT;
+			float3 gradedColor = gColorLUT.Sample(gColorLUTSamp, UVW).rgb;
+			return gradedColor;
+		}
+					
+		float4 fsmain(VStoFS input) : SV_Target0
+		{
+			float4 sceneColor = gInputTex.Sample(gInputSamp, input.uv0);
 			
-			cbuffer Input
-			{
-				float gRawGamma;
-				float gManualExposureScale;
-			}				
+			#if AUTO_EXPOSURE
+				sceneColor.rgb = sceneColor.rgb * input.exposureScale;
+			#else
+				sceneColor.rgb = sceneColor.rgb * gManualExposureScale;
+			#endif
 			
-			float3 ColorLookupTable(float3 linearColor)
-			{
-				float3 logColor = LinearToLogColor(linearColor);
-				float3 UVW = logColor * ((LUT_SIZE - 1) / (float)LUT_SIZE) + (0.5f / LUT_SIZE);
-				
-				float3 gradedColor = gColorLUT.Sample(gColorLUTSamp, UVW).rgb;
-				return gradedColor;
-			}
-						
-			float4 main(VStoFS input) : SV_Target0
-			{
-				float4 sceneColor = gInputTex.Sample(gInputSamp, input.uv0);
-				
-				#if AUTO_EXPOSURE
-					sceneColor.rgb = sceneColor.rgb * input.exposureScale;
-				#else
-					sceneColor.rgb = sceneColor.rgb * gManualExposureScale;
-				#endif
-				
-				#if GAMMA_ONLY
-					sceneColor.rgb = pow(sceneColor.rgb, gRawGamma);				
-				#else
-					sceneColor.rgb = ColorLookupTable(sceneColor.rgb);
-				#endif
+			#if GAMMA_ONLY
+				sceneColor.rgb = pow(sceneColor.rgb, gRawGamma);				
+			#else
+				sceneColor.rgb = ColorLookupTable(sceneColor.rgb);
+			#endif
 
-				return sceneColor;
-			}	
-		};
+			return sceneColor;
+		}	
 	};
 };

+ 18 - 31
Data/Raw/Engine/Shaders/ReflectionCubeDownsample.bsl

@@ -1,41 +1,28 @@
 #include "$ENGINE$\PPBase.bslinc"
 #include "$ENGINE$\ReflectionCubemapCommon.bslinc"
 
-Parameters =
+technique ReflectionCubeDownsample
 {
-	int			gCubeFace;
-	SamplerCUBE	gInputSamp : alias("gInputTex");
-	TextureCUBE gInputTex;
-};
+	mixin PPBase;
+	mixin ReflectionCubemapCommon;
 
-Blocks =
-{
-	Block Input;
-};
-
-Technique 
- : inherits("PPBase")
- : inherits("ReflectionCubemapCommon") =
-{
-	Pass =
+	code
 	{
-		Fragment =
+		[internal]
+		cbuffer Input
 		{
-			cbuffer Input
-			{
-				int gCubeFace;
-			}	
-		
-			SamplerState gInputSamp;
-			TextureCube gInputTex;
+			int gCubeFace;
+		}	
+	
+		SamplerState gInputSamp;
+		TextureCube gInputTex;
 
-			float4 main(VStoFS input) : SV_Target0
-			{
-				float2 scaledUV = input.uv0 * 2.0f - 1.0f;
-				float3 dir = getDirFromCubeFace(gCubeFace, scaledUV);
-				
-				return gInputTex.Sample(gInputSamp, dir);
-			}	
-		};
+		float4 fsmain(VStoFS input) : SV_Target0
+		{
+			float2 scaledUV = input.uv0 * 2.0f - 1.0f;
+			float3 dir = getDirFromCubeFace(gCubeFace, scaledUV);
+			
+			return gInputTex.Sample(gInputSamp, dir);
+		}	
 	};
 };

+ 109 - 122
Data/Raw/Engine/Shaders/ReflectionCubeImportanceSample.bsl

@@ -1,142 +1,129 @@
 #include "$ENGINE$\PPBase.bslinc"
 #include "$ENGINE$\ReflectionCubemapCommon.bslinc"
 
-Parameters =
+technique ReflectionCubeImportanceSample
 {
-	int			gCubeFace;
-	SamplerCUBE	gInputSamp : alias("gInputTex");
-	TextureCUBE gInputTex;
-};
+	mixin PPBase;
+	mixin ReflectionCubemapCommon;
 
-Blocks =
-{
-	Block Input;
-};
-
-Technique
- : inherits("PPBase")
- : inherits("ReflectionCubemapCommon") =
-{
-	Pass =
+	code
 	{
-		Fragment =
+		#define PI 3.1415926
+	
+		// From Hacker's Delight
+		float reverseBits(uint bits)  
 		{
-			#define PI 3.1415926
-		
-			// From Hacker's Delight
-			float reverseBits(uint bits)  
-			{
-				bits = (bits << 16u) | (bits >> 16u);
-				bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
-				bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
-				bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
-				bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
-				
-				return float(bits) * 2.3283064365386963e-10; // 0x100000000
-			}
-			
-			float2 hammersleySequence(uint i, uint count)
-			{
-				float2 output;
-				output.x = i / (float)count;
-				output.y = reverseBits(i);
-				
-				return output;
-			}
+			bits = (bits << 16u) | (bits >> 16u);
+			bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
+			bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
+			bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
+			bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
 			
-			// Returns cos(theta) in x and phi in y
-			float2 importanceSampleGGX(float2 e, float roughness4)
-			{
-				// See GGXImportanceSample.nb for derivation (essentially, take base GGX, normalize it,
-				// generate PDF, split PDF into marginal probability for theta and conditional probability
-				// for phi. Plug those into the CDF, invert it.)				
-				float cosTheta = sqrt((1.0f - e.x) / (1.0f + (roughness4 - 1.0f) * e.x));
-				float phi = 2.0f * PI * e.y;
-				
-				return float2(cosTheta, phi);
-			}
+			return float(bits) * 2.3283064365386963e-10; // 0x100000000
+		}
+		
+		float2 hammersleySequence(uint i, uint count)
+		{
+			float2 output;
+			output.x = i / (float)count;
+			output.y = reverseBits(i);
 			
-			float3 sphericalToCartesian(float cosTheta, float sinTheta, float phi)
-			{
-				float3 output;
-				output.x = sinTheta * cos(phi);
-				output.y = sinTheta * sin(phi);
-				output.z = cosTheta;
-				
-				return output;
-			}
+			return output;
+		}
+		
+		// Returns cos(theta) in x and phi in y
+		float2 importanceSampleGGX(float2 e, float roughness4)
+		{
+			// See GGXImportanceSample.nb for derivation (essentially, take base GGX, normalize it,
+			// generate PDF, split PDF into marginal probability for theta and conditional probability
+			// for phi. Plug those into the CDF, invert it.)				
+			float cosTheta = sqrt((1.0f - e.x) / (1.0f + (roughness4 - 1.0f) * e.x));
+			float phi = 2.0f * PI * e.y;
 			
-			float pdfGGX(float cosTheta, float sinTheta, float roughness4)
-			{
-				float d = (cosTheta*roughness4 - cosTheta) * cosTheta + 1;
-				return roughness4 * cosTheta * sinTheta / (d*d*PI);
-			}
+			return float2(cosTheta, phi);
+		}
+		
+		float3 sphericalToCartesian(float cosTheta, float sinTheta, float phi)
+		{
+			float3 output;
+			output.x = sinTheta * cos(phi);
+			output.y = sinTheta * sin(phi);
+			output.z = cosTheta;
 			
-			cbuffer Input
-			{
-				int gCubeFace;
-				int gMipLevel;
-				int gNumMips;
-				float gPrecomputedMipFactor;
-			}	
+			return output;
+		}
+		
+		float pdfGGX(float cosTheta, float sinTheta, float roughness4)
+		{
+			float d = (cosTheta*roughness4 - cosTheta) * cosTheta + 1;
+			return roughness4 * cosTheta * sinTheta / (d*d*PI);
+		}
 		
-			SamplerState gInputSamp;
-			TextureCube gInputTex;
+		[internal]
+		cbuffer Input
+		{
+			int gCubeFace;
+			int gMipLevel;
+			int gNumMips;
+			float gPrecomputedMipFactor;
+		}	
+	
+		SamplerState gInputSamp;
+		TextureCube gInputTex;
 
-			float4 main(VStoFS input) : SV_Target0
+		float4 fsmain(VStoFS input) : SV_Target0
+		{
+			float2 scaledUV = input.uv0 * 2.0f - 1.0f;
+									
+			float3 N = getDirFromCubeFace(gCubeFace, scaledUV);
+			N = normalize(N);
+			
+			// Determine which mip level to sample from depending on PDF and cube -> sphere mapping distortion
+			float distortion = rcp(pow(N.x * N.x + N.y * N.y + N.z * N.z, 3.0f/2.0f));
+			
+			float roughness = mapMipLevelToRoughness(gMipLevel, gNumMips);
+			float roughness2 = roughness * roughness;
+			float roughness4 = roughness2 * roughness2;
+			
+			float4 sum = 0;
+			for(uint i = 0; i < NUM_SAMPLES; i++)
 			{
-				float2 scaledUV = input.uv0 * 2.0f - 1.0f;
-										
-				float3 N = getDirFromCubeFace(gCubeFace, scaledUV);
-				N = normalize(N);
+				float2 random = hammersleySequence(i, NUM_SAMPLES);
+				float2 sphericalH = importanceSampleGGX(random, roughness4);
+				
+				float cosTheta = sphericalH.x;
+				float phi = sphericalH.y;
+				
+				float sinTheta = sqrt(1.0f - cosTheta * cosTheta);
 				
-				// Determine which mip level to sample from depending on PDF and cube -> sphere mapping distortion
-				float distortion = rcp(pow(N.x * N.x + N.y * N.y + N.z * N.z, 3.0f/2.0f));
+				float3 H = sphericalToCartesian(cosTheta, sinTheta, phi);
+				float PDF = pdfGGX(cosTheta, sinTheta, roughness4);
 				
-				float roughness = mapMipLevelToRoughness(gMipLevel, gNumMips);
-				float roughness2 = roughness * roughness;
-				float roughness4 = roughness2 * roughness2;
+				// Transform H to world space
+				float3 up = abs(H.z) < 0.999 ? float3(0, 0, 1) : float3(1, 0, 0);
+				float3 tangentX = normalize(cross(up, N));
+				float3 tangentY = cross(N, tangentX);
 				
-				float4 sum = 0;
-				for(uint i = 0; i < NUM_SAMPLES; i++)
-				{
-					float2 random = hammersleySequence(i, NUM_SAMPLES);
-					float2 sphericalH = importanceSampleGGX(random, roughness4);
-					
-					float cosTheta = sphericalH.x;
-					float phi = sphericalH.y;
-					
-					float sinTheta = sqrt(1.0f - cosTheta * cosTheta);
-					
-					float3 H = sphericalToCartesian(cosTheta, sinTheta, phi);
-					float PDF = pdfGGX(cosTheta, sinTheta, roughness4);
-					
-					// Transform H to world space
-					float3 up = abs(H.z) < 0.999 ? float3(0, 0, 1) : float3(1, 0, 0);
-					float3 tangentX = normalize(cross(up, N));
-					float3 tangentY = cross(N, tangentX);
-					
-					H = tangentX * H.x + tangentY * H.y + N * H.z; 
-					
-					// Calculating mip level from distortion and pdf and described by http://http.developer.nvidia.com/GPUGems3/gpugems3_ch20.html
-					int mipLevel = max(gPrecomputedMipFactor - 0.5f * log2(PDF * distortion), 0);
-					
-					// Note: Adding +1 bias as it looks better
-					mipLevel++;
-					
-					// We need a light direction to properly evaluate the NoL term of the evaluation integral
-					//  Li(u) * brdf(u, v) * (u.n) / pdf(u, v)
-					// which we don't have, so we assume a viewing direction is equal to normal and calculate lighting dir from it and half-vector
-					float3 L = 2 * dot(N, H) * H - N;
-					float NoL = saturate(dot(N, L));
-					
-					// sum += radiance * GGX(h, roughness) * NoL / PDF. In GGX/PDF most factors cancel out and we're left with 1/cos (sine factor of the PDF only needed for the integral (I think), so we don't include it)
-					if(NoL > 0)
-						sum += gInputTex.SampleLevel(gInputSamp, H, mipLevel) * NoL / cosTheta;
-				}
+				H = tangentX * H.x + tangentY * H.y + N * H.z; 
 				
-				return sum / NUM_SAMPLES;
-			}	
-		};
+				// Calculating mip level from distortion and pdf and described by http://http.developer.nvidia.com/GPUGems3/gpugems3_ch20.html
+				int mipLevel = max(gPrecomputedMipFactor - 0.5f * log2(PDF * distortion), 0);
+				
+				// Note: Adding +1 bias as it looks better
+				mipLevel++;
+				
+				// We need a light direction to properly evaluate the NoL term of the evaluation integral
+				//  Li(u) * brdf(u, v) * (u.n) / pdf(u, v)
+				// which we don't have, so we assume a viewing direction is equal to normal and calculate lighting dir from it and half-vector
+				float3 L = 2 * dot(N, H) * H - N;
+				float NoL = saturate(dot(N, L));
+				
+				// sum += radiance * GGX(h, roughness) * NoL / PDF. In GGX/PDF most factors cancel out and we're left with 1/cos (sine factor of the PDF only needed for the integral (I think), so we don't include it)
+				if(NoL > 0)
+					sum += gInputTex.SampleLevel(gInputSamp, H, mipLevel) * NoL / cosTheta;
+			}
+			
+			return sum / NUM_SAMPLES;
+		}	
 	};
 };

+ 41 - 43
Data/Raw/Engine/Shaders/Skybox.bsl

@@ -1,54 +1,52 @@
 #include "$ENGINE$\PerCameraData.bslinc"
 
-Parameters =
+technique Skybox
 {
-	SamplerCUBE 	gSkySamp : alias("gSkyTex");
-	TextureCUBE  	gSkyTex;
-};
+	mixin PerCameraData;
 
-Technique : inherits("PerCameraData") =
-{
-	Pass =
+	raster
+	{
+		cull = cw;
+	};
+
+	depth
 	{
-		Cull = CW;
-		CompareFunc = LTE;
-		DepthWrite = false;
+		compare = lte;
+		write = false;
+	};
 		
-		Vertex =
+	code
+	{
+		void vsmain(
+			in float3 inPos : POSITION,
+			out float4 oPosition : SV_Position,
+			out float3 oDir : TEXCOORD0)
 		{
-			void main(
-				in float3 inPos : POSITION,
-				out float4 oPosition : SV_Position,
-				out float3 oDir : TEXCOORD0)
-			{
-				float4 pos = mul(gMatViewProj, float4(inPos.xyz + gViewOrigin, 1));
-			
-				// Set Z = W so that final depth is 1.0f and it renders behind everything else
-				oPosition = pos.xyww;
-				oDir = inPos;
-			}
-		};
+			float4 pos = mul(gMatViewProj, float4(inPos.xyz + gViewOrigin, 1));
 		
-		Fragment =
+			// Set Z = W so that final depth is 1.0f and it renders behind everything else
+			oPosition = pos.xyww;
+			oDir = inPos;
+		}
+
+		TextureCube gSkyTex;
+		SamplerState gSkySamp;
+	
+		[internal]
+		cbuffer Params
 		{
-			TextureCube gSkyTex : register(t0);
-			SamplerState gSkySamp : register(s0);
-		
-			cbuffer Params : register(b0)
-			{
-				float4 gClearColor;
+			float4 gClearColor;
+		}
+	
+		float4 fsmain(
+			in float4 inPos : SV_Position, 
+			in float3 dir : TEXCOORD0) : SV_Target
+		{
+			#ifdef SOLID_COLOR
+				return gClearColor;
+			#else
+				return gSkyTex.SampleLevel(gSkySamp, dir, 0);
+			#endif
 			}
-		
-			float4 main(
-				in float4 inPos : SV_Position, 
-				in float3 dir : TEXCOORD0) : SV_Target
-			{
-				#ifdef SOLID_COLOR
-					return gClearColor;
-				#else
-					return gSkyTex.SampleLevel(gSkySamp, dir, 0);
-				#endif
-				}
-		};	
-	};
+	};	
 };

+ 8 - 6
Data/Raw/Engine/Shaders/SpriteImageAlpha.bsl

@@ -1,14 +1,16 @@
 #include "$ENGINE$\SpriteImage.bslinc"
 
-Technique : inherits("SpriteImage") =
+technique SpriteImageAlpha
 {
-	Pass =
+	mixin SpriteImage;
+
+	blend
 	{
-		Target = 
+		target	
 		{
-			Blend = true;
-			Color = { SRCA, SRCIA, ADD };
-			WriteMask = RGB;
+			enabled = true;
+			color = { srcA, srcIA, add };
+			writemask = RGB;
 		};
 	};
 };

+ 4 - 2
Data/Raw/Engine/Shaders/SpriteImageNoAlpha.bsl

@@ -1,4 +1,6 @@
 #include "$ENGINE$\SpriteImage.bslinc"
 
-Technique : inherits("SpriteImage") =
-{ };
+technique SpriteImageNoAlpha
+{ 
+	mixin SpriteImage;
+};

+ 51 - 60
Data/Raw/Engine/Shaders/SpriteLine.bsl

@@ -1,76 +1,67 @@
-Parameters =
+technique SpriteLine
 {
-	mat4x4 	worldTransform;
-	float	invViewportWidth;
-	float	invViewportHeight;
-
-	color	tint;
-};
-
-Technique =
-{
-	Pass =
+	blend
 	{
-		Target = 
+		target	
 		{
-			Blend = true;
-			Color = { SRCA, SRCIA, ADD };
-			WriteMask = RGB;
+			enabled = true;
+			color = { srcA, srcIA, add };
+			writemask = RGB;
 		};
-		
-		DepthRead = false;
-		DepthWrite = false;
-		
-		Multisample = false; // This controls line rendering algorithm
-		AALine = true;
-		
-		Common = 
+	};	
+	
+	depth
+	{
+		read = false;
+		write = false;
+	};
+	
+	raster
+	{
+		multisample = false; // This controls line rendering algorithm
+		lineaa = false;
+	};
+	
+	code
+	{
+		struct VStoFS
 		{
-			struct VStoFS
-			{
-				float4 position : SV_POSITION;
-			};
+			float4 position : SV_POSITION;
+		};
+
+		cbuffer VertParams
+		{
+			float invViewportWidth;
+			float invViewportHeight;
+			float4x4 worldTransform;
 		};
 		
-		Vertex =
+		struct VertexInput
 		{
-			cbuffer VertParams
-			{
-				float invViewportWidth;
-				float invViewportHeight;
-				float4x4 worldTransform;
-			};
-			
-			struct VertexInput
-			{
-				float2 position : POSITION;
-			};			
+			float2 position : POSITION;
+		};			
+		
+		VStoFS vsmain(VertexInput input)
+		{
+			float4 tfrmdPos = mul(worldTransform, float4(input.position, 0, 1));
 			
-			VStoFS main(VertexInput input)
-			{
-				float4 tfrmdPos = mul(worldTransform, float4(input.position, 0, 1));
-				
-				float tfrmdX = -1.0f + (tfrmdPos.x * invViewportWidth);
-				float tfrmdY = 1.0f - (tfrmdPos.y * invViewportHeight);
+			float tfrmdX = -1.0f + (tfrmdPos.x * invViewportWidth);
+			float tfrmdY = 1.0f - (tfrmdPos.y * invViewportHeight);
+
+			VStoFS output;
+			output.position = float4(tfrmdX, tfrmdY, 0, 1);
 
-				VStoFS output;
-				output.position = float4(tfrmdX, tfrmdY, 0, 1);
+			return output;
+		}
 
-				return output;
-			}
+		cbuffer VertParams
+		{
+			float4 tint;
 		};
 		
-		Fragment =
+		float4 fsmain(VStoFS input) : SV_Target
 		{
-			cbuffer VertParams
-			{
-				float4 tint;
-			};
-			
-			float4 main(VStoFS input) : SV_Target
-			{
-				return tint;
-			}
-		};
+			return tint;
+		}
 	};
 };

+ 43 - 63
Data/Raw/Engine/Shaders/SpriteText.bsl

@@ -1,74 +1,54 @@
-Parameters =
+technique SpriteText
 {
-	mat4x4 	gWorldTransform;
-	float	gInvViewportWidth;
-	float	gInvViewportHeight;
-	float	gViewportYFlip;
-	color	gTint;
-	
-	Sampler2D	gMainTexSamp : alias("gMainTexture");
-	Texture2D	gMainTexture;
-};
-
-Blocks = 
-{
-	Block GUIParams : auto("GUIParams");
-};
-
-Technique =
-{
-	Pass =
+	blend
 	{
-		Target = 
+		target	
 		{
-			Blend = true;
-			Color = { SRCA, SRCIA, ADD };
-			WriteMask = RGB;
-		};	
+			enabled = true;
+			color = { srcA, srcIA, add };
+			writemask = RGB;
+		};
+	};	
+	
+	depth
+	{
+		read = false;
+		write = false;
+	};
 	
-		DepthRead = false;
-		DepthWrite = false;
-		
-		Common =
+	code
+	{
+		cbuffer GUIParams
 		{
-			cbuffer GUIParams
-			{
-				float4x4 gWorldTransform;
-				float gInvViewportWidth;
-				float gInvViewportHeight;
-				float gViewportYFlip;
-				float4 gTint;
-			}	
-		};
-		
-		Vertex =
+			float4x4 gWorldTransform;
+			float gInvViewportWidth;
+			float gInvViewportHeight;
+			float gViewportYFlip;
+			float4 gTint;
+		}	
+
+		void vsmain(
+			in float3 inPos : POSITION,
+			in float2 uv : TEXCOORD0,
+			out float4 oPosition : SV_Position,
+			out float2 oUv : TEXCOORD0)
 		{
-			void main(
-				in float3 inPos : POSITION,
-				in float2 uv : TEXCOORD0,
-				out float4 oPosition : SV_Position,
-				out float2 oUv : TEXCOORD0)
-			{
-				float4 tfrmdPos = mul(gWorldTransform, float4(inPos.xy, 0, 1));
+			float4 tfrmdPos = mul(gWorldTransform, float4(inPos.xy, 0, 1));
 
-				float tfrmdX = -1.0f + (tfrmdPos.x * gInvViewportWidth);
-				float tfrmdY = (1.0f - (tfrmdPos.y * gInvViewportHeight)) * gViewportYFlip;
+			float tfrmdX = -1.0f + (tfrmdPos.x * gInvViewportWidth);
+			float tfrmdY = (1.0f - (tfrmdPos.y * gInvViewportHeight)) * gViewportYFlip;
 
-				oPosition = float4(tfrmdX, tfrmdY, 0, 1);
-				oUv = uv;
-			}
-		};
-		
-		Fragment =
-		{
-			SamplerState gMainTexSamp : register(s0);
-			Texture2D gMainTexture : register(t0);
+			oPosition = float4(tfrmdX, tfrmdY, 0, 1);
+			oUv = uv;
+		}
 
-			float4 main(in float4 inPos : SV_Position, float2 uv : TEXCOORD0) : SV_Target
-			{
-				float4 color = float4(gTint.rgb, gMainTexture.Sample(gMainTexSamp, uv).r * gTint.a);
-				return color;
-			}
-		};
+		SamplerState gMainTexSamp : register(s0);
+		Texture2D gMainTexture : register(t0);
+
+		float4 fsmain(in float4 inPos : SV_Position, float2 uv : TEXCOORD0) : SV_Target
+		{
+			float4 color = float4(gTint.rgb, gMainTexture.Sample(gMainTexSamp, uv).r * gTint.a);
+			return color;
+		}
 	};
 };

+ 0 - 188
Data/Raw/Engine/Shaders/TestFX.bsl

@@ -1,188 +0,0 @@
-Separable = false;
-
-Parameters = 
-{
-	float test1 = 1.0f;
-	float2 test2 = { 1.0f, 1.0f };
-	float3 test3 = { 1.0f, 1.0f, 1.0f };
-	float4 test4 = { 1.0f, 1.0f, 1.0f, 1.0f };
-	mat2x2 test5 = 
-		{ 1.0f, 0.0f,
-		  0.0f,  1.0f };
-	mat2x3 test6 =
-		{ 1.0f, 0.0f, 0.0f,
-		  0.0f, 1.0f, 0.0f };
-	mat2x4 test7 = 
-		{ 1.0f, 0.0f, 0.0f, 0.0f,
-		  0.0f, 1.0f, 0.0f, 0.0f };
-	mat3x2 test8 = 
-		{ 1.0f, 0.0f,
-		  0.0f, 1.0f,
-		  0.0f, 0.0f };
-	mat3x3 test9 =
-		{ 1.0f, 0.0f, 0.0f,
-		  0.0f, 1.0f, 0.0f,
-		  0.0f, 0.0f, 1.0f };
-	mat3x4 test10 =
-		{ 1.0f, 0.0f, 0.0f, 0.0f,
-		  0.0f, 1.0f, 0.0f, 0.0f,
-		  0.0f, 0.0f, 1.0f, 0.0f };
-	mat4x2 test11 =
-		{ 1.0f, 0.0f,
-		  0.0f, 1.0f,
-		  0.0f, 0.0f,
-		  0.0f, 0.0f };
-	mat4x3 test12 =
-		{ 1.0f, 0.0f, 0.0f,
-		  0.0f, 1.0f, 0.0f,
-		  0.0f, 0.0f, 1.0f,
-		  0.0f, 0.0f, 0.0f };
-	mat4x4 test13 =
-		{ 1.0f, 0.0f, 0.0f, 0.0f,
-		  0.0f, 1.0f, 0.0f, 0.0f,
-		  0.0f, 0.0f, 1.0f, 0.0f,
-		  0.0f, 0.0f, 0.0f, 1.0f };
-	Texture2D tex0;
-	Sampler2D samp0 : alias("tex0") =
-	{
-		AddressMode =
-		{
-			U = WRAP;
-			V = WRAP;
-			W = WRAP;
-		};
-		
-		MinFilter = LINEAR;
-		MagFilter = LINEAR;
-		MipFilter = LINEAR;
-		MaxAniso = 0;
-		MipmapBias = 0.0f;
-		MipMin = 0.0f;
-		MipMax = 0.0f;
-		BorderColor = { 1.0f, 1.0f, 1.0f, 1.0f };
-		CompareFunc = PASS;
-	};
-	Texture1D tex1;
-	Sampler1D samp1;
-	Texture3D tex2;
-	Sampler3D samp2;
-	TextureCUBE tex3;
-	SamplerCUBE samp3;
-	Texture2DMS tex4;
-	Sampler2DMS samp4;
-	ByteBuffer buffer1;
-	StructBuffer buffer2;
-	RWByteBuffer buffer3;
-	RWStructBuffer buffer4;
-	AppendBuffer buffer5;
-	ConsumeBuffer buffer6;
-	RWTypedBuffer buffer7;			  
-};
-
-Blocks = 
-{
-	Block global : auto("GLOBAL") : shared(false) : usage(STATIC);
-	Block perFrame : auto("PERFRAME") : shared(false) : usage(DYNAMIC);
-};
-
-Technique = 
-{
-	Renderer = "Any";
-	Tags = { "Animated", "OtherTag" };
-
-	Pass =
-	{
-		Fill = SOLID;
-		Cull = CCW;
-		DepthBias = 0.0f;
-		ScaledDepthBias = 0.0f;
-		DepthClip = true;
-		Scissor = false;
-		Multisample = false;
-		AALine = false;
-		
-		DepthRead = true;
-		DepthWrite = true;
-		CompareFunc = GTE;
-		Stencil = false;
-		StencilReadMask = 0xff;
-		StencilWriteMask = 0xff;
-		StencilOpFront = 
-		{
-			Fail = KEEP;
-			ZFail = KEEP;
-			Pass = KEEP;
-			CompareFunc = FAIL;
-		};
-		StencilOpBack =
-		{
-			Fail = KEEP;
-			ZFail = KEEP;
-			Pass = KEEP;
-			CompareFunc = FAIL;
-		};
-		
-		AlphaToCoverage = false;
-		IndependantBlend = false;
-		Target =
-		{
-			Index = 0;
-			Blend = false;
-			Color =
-			{
-				Source = ONE;
-				Dest = ONE;
-				Op =  ADD;
-			};
-			Alpha =
-			{
-				Source = ONE;
-				Dest = ONE;
-				Op =  ADD;
-			};
-			WriteMask = RGBA;
-		};
-		
-		Vertex = 
-		{
-			cbuffer VertParams
-			{
-				float invViewportWidth;
-				float invViewportHeight;
-				float4x4 worldTransform;
-			};
-			
-			void main(
-			in float3 inPos : POSITION,
-			in float2 uv : TEXCOORD0,
-			out float4 oPosition : SV_Position,
-			out float2 oUv : TEXCOORD0)
-			{
-				float4 tfrmdPos = mul(worldTransform, float4(inPos.xy, 0, 1));
-
-				float tfrmdX = -1.0f + (tfrmdPos.x * invViewportWidth);
-				float tfrmdY = 1.0f - (tfrmdPos.y * invViewportHeight);
-
-				oPosition = float4(tfrmdX, tfrmdY, 0, 1);
-				oUv = uv;
-			}	
-		};
-		
-		Fragment = 
-		{
-			SamplerState mainTexSamp : register(s0);
-			Texture2D mainTexture : register(t0);
-			
-			cbuffer FragParams
-			{
-				float4 tint;
-			};
-			
-			float4 main(in float4 inPos : SV_Position, float2 uv : TEXCOORD0) : SV_Target
-			{
-				float4 color = mainTexture.Sample(mainTexSamp, uv);
-				return color * tint;
-			}
-		};	
-	};	
-};

+ 243 - 244
Data/Raw/Engine/Shaders/TiledDeferredImageBasedLighting.bsl

@@ -5,287 +5,286 @@
 #include "$ENGINE$\LightingCommon.bslinc"
 #include "$ENGINE$\ImageBasedLighting.bslinc"
 
-Technique 
-  : inherits("GBufferInput")
-  : inherits("PerCameraData")
-  : inherits("LightingCommon")
-  : inherits("ReflectionCubemapCommon")
-  : inherits("ImageBasedLighting") =
+technique TiledDeferredImageBasedLighting
 {
-	Pass =
-	{
-		Compute = 
-		{			
-			cbuffer Params : register(b0)
-			{
-				uint2 gFramebufferSize;
-			}
+	mixin GBufferInput;
+	mixin PerCameraData;
+	mixin LightingCommon;
+	mixin ReflectionCubemapCommon;
+	mixin ImageBasedLighting;
+
+	code
+	{			
+		[internal]
+		cbuffer Params : register(b0)
+		{
+			uint2 gFramebufferSize;
+		}
+	
+		#if MSAA_COUNT > 1
+		Buffer<float4> gInColor;
+		RWBuffer<float4> gOutput;
+		
+		uint getLinearAddress(uint2 coord, uint sampleIndex)
+		{
+			return (coord.y * gFramebufferSize.x + coord.x) * MSAA_COUNT + sampleIndex;
+		}
+		
+		void writeBufferSample(uint2 coord, uint sampleIndex, float4 color)
+		{
+			uint idx = getLinearAddress(coord, sampleIndex);
+			gOutput[idx] = color;
+		}
+		
+		float4 readInColorSample(uint2 coord, uint sampleIndex)
+		{
+			uint idx = getLinearAddress(coord, sampleIndex);
+			return gInColor[idx];
+		}
+
+		#else
+		Texture2D<float4> gInColor;
+		RWTexture2D<float4>	gOutput;
+		#endif
+					
+		groupshared uint sTileMinZ;
+		groupshared uint sTileMaxZ;
+
+		void getTileZBounds(uint threadIndex, SurfaceData surfaceData[MSAA_COUNT], out float minTileZ, out float maxTileZ)
+		{
+			// Note: To improve performance perhaps:
+			//  - Use halfZ (split depth range into two regions for better culling)
+			//  - Use parallel reduction instead of atomics
 		
+			uint sampleMinZ = 0x7F7FFFFF;
+			uint sampleMaxZ = 0;
+
 			#if MSAA_COUNT > 1
-			Buffer<float4> gInColor;
-			RWBuffer<float4> gOutput;
-			
-			uint getLinearAddress(uint2 coord, uint sampleIndex)
-			{
-				return (coord.y * gFramebufferSize.x + coord.x) * MSAA_COUNT + sampleIndex;
-			}
-			
-			void writeBufferSample(uint2 coord, uint sampleIndex, float4 color)
+			[unroll]
+			for(uint i = 0; i < MSAA_COUNT; ++i)
 			{
-				uint idx = getLinearAddress(coord, sampleIndex);
-				gOutput[idx] = color;
+				sampleMinZ = min(sampleMinZ, asuint(-surfaceData[i].depth));
+				sampleMaxZ = max(sampleMaxZ, asuint(-surfaceData[i].depth));
 			}
-			
-			float4 readInColorSample(uint2 coord, uint sampleIndex)
-			{
-				uint idx = getLinearAddress(coord, sampleIndex);
-				return gInColor[idx];
-			}
-
 			#else
-			Texture2D<float4> gInColor;
-			RWTexture2D<float4>	gOutput;
+			sampleMinZ = asuint(-surfaceData[0].depth);
+			sampleMaxZ = asuint(-surfaceData[0].depth);
 			#endif
-						
-			groupshared uint sTileMinZ;
-			groupshared uint sTileMaxZ;
 
-			void getTileZBounds(uint threadIndex, SurfaceData surfaceData[MSAA_COUNT], out float minTileZ, out float maxTileZ)
+			// Set initial values
+			if(threadIndex == 0)
 			{
-				// Note: To improve performance perhaps:
-				//  - Use halfZ (split depth range into two regions for better culling)
-				//  - Use parallel reduction instead of atomics
-			
-				uint sampleMinZ = 0x7F7FFFFF;
-				uint sampleMaxZ = 0;
-
-				#if MSAA_COUNT > 1
-				[unroll]
-				for(uint i = 0; i < MSAA_COUNT; ++i)
-				{
-					sampleMinZ = min(sampleMinZ, asuint(-surfaceData[i].depth));
-					sampleMaxZ = max(sampleMaxZ, asuint(-surfaceData[i].depth));
-				}
-				#else
-				sampleMinZ = asuint(-surfaceData[0].depth);
-				sampleMaxZ = asuint(-surfaceData[0].depth);
-				#endif
-
-				// Set initial values
-				if(threadIndex == 0)
-				{
-					sTileMinZ = 0x7F7FFFFF;
-					sTileMaxZ = 0;
-				}
-				
-				GroupMemoryBarrierWithGroupSync();
-				
-				// Determine minimum and maximum depth values for a tile			
-				InterlockedMin(sTileMinZ, sampleMinZ);
-				InterlockedMax(sTileMaxZ, sampleMaxZ);
-				
-				GroupMemoryBarrierWithGroupSync();
-				
-			    minTileZ = asfloat(sTileMinZ);
-				maxTileZ = asfloat(sTileMaxZ);
+				sTileMinZ = 0x7F7FFFFF;
+				sTileMaxZ = 0;
 			}
 			
-			void calcTileAABB(uint2 tileId, float viewZMin, float viewZMax, out float3 center, out float3 extent)
+			GroupMemoryBarrierWithGroupSync();
+			
+			// Determine minimum and maximum depth values for a tile			
+			InterlockedMin(sTileMinZ, sampleMinZ);
+			InterlockedMax(sTileMaxZ, sampleMaxZ);
+			
+			GroupMemoryBarrierWithGroupSync();
+			
+			minTileZ = asfloat(sTileMinZ);
+			maxTileZ = asfloat(sTileMaxZ);
+		}
+		
+		void calcTileAABB(uint2 tileId, float viewZMin, float viewZMax, out float3 center, out float3 extent)
+		{
+			uint2 pixelPos = tileId * TILE_SIZE;
+		
+			// Convert threat XY coordinates to NDC coordinates
+			float2 uvTopLeft = (pixelPos + 0.5f) / gFramebufferSize;
+			float2 uvBottomRight = (pixelPos + uint2(TILE_SIZE, TILE_SIZE) - 0.5f) / gFramebufferSize;
+		
+			float3 ndcMin;
+			float3 ndcMax;
+		
+			ndcMin.xy = uvTopLeft * 2.0f - float2(1.0f, 1.0f);
+			ndcMax.xy = uvBottomRight * 2.0f - float2(1.0f, 1.0f);
+		
+			// Flip Y depending on render API, depending if Y in NDC is facing up or down
+			// (We negate the value because we want NDC with Y flipped, so origin is top left)
+			float flipY = -sign(gMatProj[1][1]);
+			ndcMin.y *= flipY;
+			ndcMax.y *= flipY;
+		
+			// Camera is looking along negative z, therefore min in view space is max in NDC
+			ndcMin.z = convertToNDCZ(viewZMax);
+			ndcMax.z = convertToNDCZ(viewZMin);
+		
+			float4 corner[5];
+			// Far
+			corner[0] = mul(gMatInvProj, float4(ndcMin.x, ndcMin.y, ndcMax.z, 1.0f));
+			corner[1] = mul(gMatInvProj, float4(ndcMax.x, ndcMin.y, ndcMax.z, 1.0f));
+			corner[2] = mul(gMatInvProj, float4(ndcMax.x, ndcMax.y, ndcMax.z, 1.0f));
+			corner[3] = mul(gMatInvProj, float4(ndcMin.x, ndcMax.y, ndcMax.z, 1.0f));
+			
+			// Near (only one point, as the far away face is guaranteed to be larger in XY extents)
+			corner[4] = mul(gMatInvProj, float4(ndcMin.x, ndcMin.y, ndcMin.z, 1.0f));
+		
+			[unroll]
+			for(uint i = 0; i < 5; ++i)
+				corner[i].xy /= corner[i].w;
+		
+			float3 viewMin = float3(corner[0].xy, viewZMin);
+			float3 viewMax = float3(corner[0].xy, viewZMax);
+			
+			[unroll]
+			for(uint i = 1; i < 4; ++i)
 			{
-				uint2 pixelPos = tileId * TILE_SIZE;
+				viewMin.xy = min(viewMin.xy, corner[i].xy);
+				viewMax.xy = max(viewMax.xy, corner[i].xy);
+			}
 			
-				// Convert threat XY coordinates to NDC coordinates
-				float2 uvTopLeft = (pixelPos + 0.5f) / gFramebufferSize;
-				float2 uvBottomRight = (pixelPos + uint2(TILE_SIZE, TILE_SIZE) - 0.5f) / gFramebufferSize;
+			extent = (viewMax - viewMin) * 0.5f;
+			center = viewMin + extent;
+		}
+		
+		bool intersectSphereBox(float3 sCenter, float sRadius, float3 bCenter, float3 bExtents)
+		{
+			float3 closestOnBox = max(0, abs(bCenter - sCenter) - bExtents);
+			return dot(closestOnBox, closestOnBox) < sRadius * sRadius;
+		}
+		
+		float4 getLighting(uint2 pixelPos, uint sampleIdx, float2 clipSpacePos, SurfaceData surfaceData, uint probeOffset, uint numProbes)
+		{
+			// x, y are now in clip space, z, w are in view space
+			// We multiply them by a special inverse view-projection matrix, that had the projection entries that effect
+			// z, w eliminated (since they are already in view space)
+			// Note: Multiply by depth should be avoided if using ortographic projection
+			float4 mixedSpacePos = float4(clipSpacePos * -surfaceData.depth, surfaceData.depth, 1);
+			float4 worldPosition4D = mul(gMatScreenToWorld, mixedSpacePos);
+			float3 worldPosition = worldPosition4D.xyz / worldPosition4D.w;
+			
+			float3 V = normalize(gViewOrigin - worldPosition);
+			float3 N = surfaceData.worldNormal.xyz;
+			float3 R = 2 * dot(V, N) * N - V;
+			float3 specR = getSpecularDominantDir(N, R, surfaceData.roughness);
+			
+			float4 existingColor;
+			#if MSAA_COUNT > 1
+			existingColor = readInColorSample(pixelPos, sampleIdx);
+			#else
+			existingColor = gInColor.Load(int3(pixelPos.xy, 0));
+			#endif				
 			
-				float3 ndcMin;
-				float3 ndcMax;
+			float3 indirectDiffuse = getSkyIndirectDiffuse(N) * surfaceData.albedo.rgb;
+			float3 imageBasedSpecular = getImageBasedSpecular(worldPosition, V, specR, surfaceData, probeOffset, numProbes);
+
+			float4 totalLighting = existingColor;
+			totalLighting.rgb += indirectDiffuse;
+			totalLighting.rgb += imageBasedSpecular;
 			
-				ndcMin.xy = uvTopLeft * 2.0f - float2(1.0f, 1.0f);
-				ndcMax.xy = uvBottomRight * 2.0f - float2(1.0f, 1.0f);
+			return totalLighting;				
+		}
+					
+		groupshared uint gUnsortedProbeIndices[MAX_PROBES];
+		groupshared uint sNumProbes;
+		
+		[numthreads(TILE_SIZE, TILE_SIZE, 1)]
+		void csmain(
+			uint3 groupId : SV_GroupID,
+			uint3 groupThreadId : SV_GroupThreadID,
+			uint3 dispatchThreadId : SV_DispatchThreadID)
+		{
+			uint threadIndex = groupThreadId.y * TILE_SIZE + groupThreadId.x;
+			uint2 pixelPos = dispatchThreadId.xy + gViewportRectangle.xy;
 			
-				// Flip Y depending on render API, depending if Y in NDC is facing up or down
-				// (We negate the value because we want NDC with Y flipped, so origin is top left)
-				float flipY = -sign(gMatProj[1][1]);
-				ndcMin.y *= flipY;
-				ndcMax.y *= flipY;
+			// Get data for all samples
+			SurfaceData surfaceData[MSAA_COUNT];
 			
-				// Camera is looking along negative z, therefore min in view space is max in NDC
-				ndcMin.z = convertToNDCZ(viewZMax);
-				ndcMax.z = convertToNDCZ(viewZMin);
+			#if MSAA_COUNT > 1
+			[unroll]
+			for(uint i = 0; i < MSAA_COUNT; ++i)
+				surfaceData[i] = getGBufferData(pixelPos, i);
+			#else
+			surfaceData[0] = getGBufferData(pixelPos);
+			#endif
+
+			// Set initial values
+			if(threadIndex == 0)
+				sNumProbes = 0;				
 			
-				float4 corner[5];
-				// Far
-				corner[0] = mul(gMatInvProj, float4(ndcMin.x, ndcMin.y, ndcMax.z, 1.0f));
-				corner[1] = mul(gMatInvProj, float4(ndcMax.x, ndcMin.y, ndcMax.z, 1.0f));
-				corner[2] = mul(gMatInvProj, float4(ndcMax.x, ndcMax.y, ndcMax.z, 1.0f));
-				corner[3] = mul(gMatInvProj, float4(ndcMin.x, ndcMax.y, ndcMax.z, 1.0f));
-				
-				// Near (only one point, as the far away face is guaranteed to be larger in XY extents)
-				corner[4] = mul(gMatInvProj, float4(ndcMin.x, ndcMin.y, ndcMin.z, 1.0f));
+			// Determine per-pixel minimum and maximum depth values
+			float minTileZ, maxTileZ;
+			getTileZBounds(threadIndex, surfaceData, minTileZ, maxTileZ);
 			
-				[unroll]
-				for(uint i = 0; i < 5; ++i)
-					corner[i].xy /= corner[i].w;
+			// Create AABB for the current tile
+			float3 center, extent;
+			calcTileAABB(groupId.xy, minTileZ, maxTileZ, center, extent);
+							
+			// Find probes overlapping the tile
+			for (uint i = 0; i < gNumProbes && i < MAX_LIGHTS; i += TILE_SIZE)
+			{
+				float4 probePosition = mul(gMatView, float4(gReflectionProbes[i].position, 1.0f));
+				float probeRadius = gReflectionProbes[i].radius;
 			
-				float3 viewMin = float3(corner[0].xy, viewZMin);
-				float3 viewMax = float3(corner[0].xy, viewZMax);
-				
-				[unroll]
-				for(uint i = 1; i < 4; ++i)
+				if(intersectSphereBox(probePosition, probeRadius, center, extent))
 				{
-					viewMin.xy = min(viewMin.xy, corner[i].xy);
-					viewMax.xy = max(viewMax.xy, corner[i].xy);
+					uint idx;
+					InterlockedAdd(sNumProbes, 1U, idx);
+					gUnsortedProbeIndices[idx] = i;
 				}
-				
-				extent = (viewMax - viewMin) * 0.5f;
-				center = viewMin + extent;
 			}
-			
-			bool intersectSphereBox(float3 sCenter, float sRadius, float3 bCenter, float3 bExtents)
-			{
-				float3 closestOnBox = max(0, abs(bCenter - sCenter) - bExtents);
-				return dot(closestOnBox, closestOnBox) < sRadius * sRadius;
-			}
-			
-			float4 getLighting(uint2 pixelPos, uint sampleIdx, float2 clipSpacePos, SurfaceData surfaceData, uint probeOffset, uint numProbes)
+
+			GroupMemoryBarrierWithGroupSync();
+
+			// Sort based on original indices. Using parallel enumeration sort (n^2) - could be faster
+			const uint numThreads = TILE_SIZE * TILE_SIZE;
+			for (uint i = threadIndex; i < sNumProbes; i += numThreads)
 			{
-				// x, y are now in clip space, z, w are in view space
-				// We multiply them by a special inverse view-projection matrix, that had the projection entries that effect
-				// z, w eliminated (since they are already in view space)
-				// Note: Multiply by depth should be avoided if using ortographic projection
-				float4 mixedSpacePos = float4(clipSpacePos * -surfaceData.depth, surfaceData.depth, 1);
-				float4 worldPosition4D = mul(gMatScreenToWorld, mixedSpacePos);
-				float3 worldPosition = worldPosition4D.xyz / worldPosition4D.w;
-				
-				float3 V = normalize(gViewOrigin - worldPosition);
-				float3 N = surfaceData.worldNormal.xyz;
-				float3 R = 2 * dot(V, N) * N - V;
-				float3 specR = getSpecularDominantDir(N, R, surfaceData.roughness);
-				
-				float4 existingColor;
-				#if MSAA_COUNT > 1
-				existingColor = readInColorSample(pixelPos, sampleIdx);
-				#else
-				existingColor = gInColor.Load(int3(pixelPos.xy, 0));
-				#endif				
-				
-				float3 indirectDiffuse = getSkyIndirectDiffuse(N) * surfaceData.albedo.rgb;
-				float3 imageBasedSpecular = getImageBasedSpecular(worldPosition, V, specR, surfaceData, probeOffset, numProbes);
+				int idx = gUnsortedProbeIndices[i];
+				uint smallerCount = 0;
 
-				float4 totalLighting = existingColor;
-				totalLighting.rgb += indirectDiffuse;
-				totalLighting.rgb += imageBasedSpecular;
-				
-				return totalLighting;				
+				for (uint j = 0; j < sNumProbes; j++) 
+				{
+					int otherIdx = gUnsortedProbeIndices[j];
+					if (otherIdx < idx)
+						smallerCount++;
+				}
+
+				gReflectionProbeIndices[smallerCount] = gUnsortedProbeIndices[i];
 			}
-						
-			groupshared uint gUnsortedProbeIndices[MAX_PROBES];
-			groupshared uint sNumProbes;
 			
-			[numthreads(TILE_SIZE, TILE_SIZE, 1)]
-			void main(
-				uint3 groupId : SV_GroupID,
-				uint3 groupThreadId : SV_GroupThreadID,
-				uint3 dispatchThreadId : SV_DispatchThreadID)
+			GroupMemoryBarrierWithGroupSync();
+			
+			// Generate world position
+			float2 screenUv = ((float2)(gViewportRectangle.xy + pixelPos) + 0.5f) / (float2)gViewportRectangle.zw;
+			float2 clipSpacePos = (screenUv - gClipToUVScaleOffset.zw) / gClipToUVScaleOffset.xy;
+		
+			uint2 viewportMax = gViewportRectangle.xy + gViewportRectangle.zw;
+
+			// Ignore pixels out of valid range
+			if (all(dispatchThreadId.xy < viewportMax))
 			{
-				uint threadIndex = groupThreadId.y * TILE_SIZE + groupThreadId.x;
-				uint2 pixelPos = dispatchThreadId.xy + gViewportRectangle.xy;
-				
-				// Get data for all samples
-				SurfaceData surfaceData[MSAA_COUNT];
-				
 				#if MSAA_COUNT > 1
-				[unroll]
-				for(uint i = 0; i < MSAA_COUNT; ++i)
-					surfaceData[i] = getGBufferData(pixelPos, i);
-				#else
-				surfaceData[0] = getGBufferData(pixelPos);
-				#endif
+				float4 lighting = getLighting(pixelPos, 0, clipSpacePos.xy, surfaceData[0], 0, gNumProbes);
+				writeBufferSample(pixelPos, 0, lighting);
 
-				// Set initial values
-				if(threadIndex == 0)
-					sNumProbes = 0;				
-				
-				// Determine per-pixel minimum and maximum depth values
-				float minTileZ, maxTileZ;
-				getTileZBounds(threadIndex, surfaceData, minTileZ, maxTileZ);
-				
-				// Create AABB for the current tile
-				float3 center, extent;
-				calcTileAABB(groupId.xy, minTileZ, maxTileZ, center, extent);
-								
-                // Find probes overlapping the tile
-				for (uint i = 0; i < gNumProbes && i < MAX_LIGHTS; i += TILE_SIZE)
+				bool doPerSampleShading = needsPerSampleShading(surfaceData);
+				if(doPerSampleShading)
 				{
-					float4 probePosition = mul(gMatView, float4(gReflectionProbes[i].position, 1.0f));
-					float probeRadius = gReflectionProbes[i].radius;
-				
-					if(intersectSphereBox(probePosition, probeRadius, center, extent))
+					[unroll]
+					for(uint i = 1; i < MSAA_COUNT; ++i)
 					{
-						uint idx;
-						InterlockedAdd(sNumProbes, 1U, idx);
-						gUnsortedProbeIndices[idx] = i;
+						lighting = getLighting(pixelPos, i, clipSpacePos.xy, surfaceData[i], 0, gNumProbes);
+						writeBufferSample(pixelPos, i, lighting);
 					}
 				}
-
-                GroupMemoryBarrierWithGroupSync();
-
-				// Sort based on original indices. Using parallel enumeration sort (n^2) - could be faster
-				const uint numThreads = TILE_SIZE * TILE_SIZE;
-				for (uint i = threadIndex; i < sNumProbes; i += numThreads)
+				else // Splat same information to all samples
 				{
-					int idx = gUnsortedProbeIndices[i];
-					uint smallerCount = 0;
-
-					for (uint j = 0; j < sNumProbes; j++) 
-					{
-						int otherIdx = gUnsortedProbeIndices[j];
-						if (otherIdx < idx)
-							smallerCount++;
-					}
-
-					gReflectionProbeIndices[smallerCount] = gUnsortedProbeIndices[i];
+					[unroll]
+					for(uint i = 1; i < MSAA_COUNT; ++i)
+						writeBufferSample(pixelPos, i, lighting);
 				}
 				
-				GroupMemoryBarrierWithGroupSync();
-				
-				// Generate world position
-				float2 screenUv = ((float2)(gViewportRectangle.xy + pixelPos) + 0.5f) / (float2)gViewportRectangle.zw;
-				float2 clipSpacePos = (screenUv - gClipToUVScaleOffset.zw) / gClipToUVScaleOffset.xy;
-			
-				uint2 viewportMax = gViewportRectangle.xy + gViewportRectangle.zw;
-
-				// Ignore pixels out of valid range
-				if (all(dispatchThreadId.xy < viewportMax))
-				{
-					#if MSAA_COUNT > 1
-					float4 lighting = getLighting(pixelPos, 0, clipSpacePos.xy, surfaceData[0], 0, gNumProbes);
-					writeBufferSample(pixelPos, 0, lighting);
-
-					bool doPerSampleShading = needsPerSampleShading(surfaceData);
-					if(doPerSampleShading)
-					{
-						[unroll]
-						for(uint i = 1; i < MSAA_COUNT; ++i)
-						{
-							lighting = getLighting(pixelPos, i, clipSpacePos.xy, surfaceData[i], 0, gNumProbes);
-							writeBufferSample(pixelPos, i, lighting);
-						}
-					}
-					else // Splat same information to all samples
-					{
-						[unroll]
-						for(uint i = 1; i < MSAA_COUNT; ++i)
-							writeBufferSample(pixelPos, i, lighting);
-					}
-					
-					#else
-					float4 lighting = getLighting(pixelPos, 0, clipSpacePos.xy, surfaceData[0], 0, gNumProbes);
-					gOutput[pixelPos] = lighting;
-					#endif
-				}
+				#else
+				float4 lighting = getLighting(pixelPos, 0, clipSpacePos.xy, surfaceData[0], 0, gNumProbes);
+				gOutput[pixelPos] = lighting;
+				#endif
 			}
-		};
+		}
 	};
 };

+ 238 - 239
Data/Raw/Engine/Shaders/TiledDeferredLighting.bsl

@@ -5,280 +5,279 @@
 #include "$ENGINE$\ReflectionCubemapCommon.bslinc"
 #include "$ENGINE$\ImageBasedLighting.bslinc"
 
-Technique 
-  : inherits("GBufferInput")
-  : inherits("PerCameraData")
-  : inherits("LightingCommon")
-  : inherits("ReflectionCubemapCommon")
-  : inherits("ImageBasedLighting") =
+technique TiledDeferredLighting
 {
-	Pass =
-	{
-		Compute = 
-		{			
-			cbuffer Params : register(b0)
-			{
-				// Offsets at which specific light types begin in gLights buffer
-				// Assumed directional lights start at 0
-				// x - offset to point lights, y - offset to spot lights, z - total number of lights
-				uint3 gLightOffsets;
-				uint2 gFramebufferSize;
-			}
+	mixin GBufferInput;
+	mixin PerCameraData;
+	mixin LightingCommon;
+	mixin ReflectionCubemapCommon;
+	mixin ImageBasedLighting;
+
+	code
+	{	
+		[internal]
+		cbuffer Params : register(b0)
+		{
+			// Offsets at which specific light types begin in gLights buffer
+			// Assumed directional lights start at 0
+			// x - offset to point lights, y - offset to spot lights, z - total number of lights
+			uint3 gLightOffsets;
+			uint2 gFramebufferSize;
+		}
+	
+		#if MSAA_COUNT > 1
+		RWBuffer<float4> gOutput : register(u0);
 		
-			#if MSAA_COUNT > 1
-			RWBuffer<float4> gOutput : register(u0);
+		uint getLinearAddress(uint2 coord, uint sampleIndex)
+		{
+			return (coord.y * gFramebufferSize.x + coord.x) * MSAA_COUNT + sampleIndex;
+		}
+		
+		void writeBufferSample(uint2 coord, uint sampleIndex, float4 color)
+		{
+			uint idx = getLinearAddress(coord, sampleIndex);
+			gOutput[idx] = color;
+		}
+
+		#else
+		RWTexture2D<float4>	gOutput : register(u0);
+		#endif
+					
+		groupshared uint sTileMinZ;
+		groupshared uint sTileMaxZ;
+
+		groupshared uint sNumLightsPerType[2];
+		groupshared uint sTotalNumLights;
+
+		float4 getLighting(float2 clipSpacePos, SurfaceData surfaceData)
+		{
+			// x, y are now in clip space, z, w are in view space
+			// We multiply them by a special inverse view-projection matrix, that had the projection entries that effect
+			// z, w eliminated (since they are already in view space)
+			// Note: Multiply by depth should be avoided if using ortographic projection
+			float4 mixedSpacePos = float4(clipSpacePos * -surfaceData.depth, surfaceData.depth, 1);
+			float4 worldPosition4D = mul(gMatScreenToWorld, mixedSpacePos);
+			float3 worldPosition = worldPosition4D.xyz / worldPosition4D.w;
 			
-			uint getLinearAddress(uint2 coord, uint sampleIndex)
-			{
-				return (coord.y * gFramebufferSize.x + coord.x) * MSAA_COUNT + sampleIndex;
-			}
+			uint4 lightOffsets;
+			lightOffsets.x = gLightOffsets[0];
+			lightOffsets.y = 0;
+			lightOffsets.z = sNumLightsPerType[0];
+			lightOffsets.w = sTotalNumLights;
+			
+			float3 V = normalize(gViewOrigin - worldPosition);
+			float3 N = surfaceData.worldNormal.xyz;
+			float3 R = 2 * dot(V, N) * N - V;
+			float3 specR = getSpecularDominantDir(N, R, surfaceData.roughness);
 			
-			void writeBufferSample(uint2 coord, uint sampleIndex, float4 color)
+			return getDirectLighting(worldPosition, V, specR, surfaceData, lightOffsets);				
+		}
+		
+		[numthreads(TILE_SIZE, TILE_SIZE, 1)]
+		void csmain(
+			uint3 groupId : SV_GroupID,
+			uint3 groupThreadId : SV_GroupThreadID,
+			uint3 dispatchThreadId : SV_DispatchThreadID)
+		{
+			uint threadIndex = groupThreadId.y * TILE_SIZE + groupThreadId.x;
+			uint2 pixelPos = dispatchThreadId.xy + gViewportRectangle.xy;
+			
+			// Note: To improve performance perhaps:
+			//  - Use halfZ (split depth range into two regions for better culling)
+			//  - Use parallel reduction instead of atomics
+			//  - Use AABB instead of frustum (no false positives)
+			//   - Increase tile size to 32x32 to amortize the cost of AABB calc (2x if using halfZ)
+			
+			// Get data for all samples, and determine per-pixel minimum and maximum depth values
+			SurfaceData surfaceData[MSAA_COUNT];
+			uint sampleMinZ = 0x7F7FFFFF;
+			uint sampleMaxZ = 0;
+
+			#if MSAA_COUNT > 1
+			[unroll]
+			for(uint i = 0; i < MSAA_COUNT; ++i)
 			{
-				uint idx = getLinearAddress(coord, sampleIndex);
-				gOutput[idx] = color;
+				surfaceData[i] = getGBufferData(pixelPos, i);
+				
+				sampleMinZ = min(sampleMinZ, asuint(-surfaceData[i].depth));
+				sampleMaxZ = max(sampleMaxZ, asuint(-surfaceData[i].depth));
 			}
-
 			#else
-			RWTexture2D<float4>	gOutput : register(u0);
+			surfaceData[0] = getGBufferData(pixelPos);
+			sampleMinZ = asuint(-surfaceData[0].depth);
+			sampleMaxZ = asuint(-surfaceData[0].depth);
 			#endif
-						
-			groupshared uint sTileMinZ;
-			groupshared uint sTileMaxZ;
-
-            groupshared uint sNumLightsPerType[2];
-			groupshared uint sTotalNumLights;
 
-			float4 getLighting(float2 clipSpacePos, SurfaceData surfaceData)
+			// Set initial values
+			if(threadIndex == 0)
 			{
-				// x, y are now in clip space, z, w are in view space
-				// We multiply them by a special inverse view-projection matrix, that had the projection entries that effect
-				// z, w eliminated (since they are already in view space)
-				// Note: Multiply by depth should be avoided if using ortographic projection
-				float4 mixedSpacePos = float4(clipSpacePos * -surfaceData.depth, surfaceData.depth, 1);
-				float4 worldPosition4D = mul(gMatScreenToWorld, mixedSpacePos);
-				float3 worldPosition = worldPosition4D.xyz / worldPosition4D.w;
-				
-				uint4 lightOffsets;
-				lightOffsets.x = gLightOffsets[0];
-				lightOffsets.y = 0;
-				lightOffsets.z = sNumLightsPerType[0];
-				lightOffsets.w = sTotalNumLights;
-				
-				float3 V = normalize(gViewOrigin - worldPosition);
-				float3 N = surfaceData.worldNormal.xyz;
-				float3 R = 2 * dot(V, N) * N - V;
-				float3 specR = getSpecularDominantDir(N, R, surfaceData.roughness);
-				
-				return getDirectLighting(worldPosition, V, specR, surfaceData, lightOffsets);				
+				sTileMinZ = 0x7F7FFFFF;
+				sTileMaxZ = 0;
+				sNumLightsPerType[0] = 0;
+				sNumLightsPerType[1] = 0;
+				sTotalNumLights = 0;
 			}
 			
-			[numthreads(TILE_SIZE, TILE_SIZE, 1)]
-			void main(
-				uint3 groupId : SV_GroupID,
-				uint3 groupThreadId : SV_GroupThreadID,
-				uint3 dispatchThreadId : SV_DispatchThreadID)
-			{
-				uint threadIndex = groupThreadId.y * TILE_SIZE + groupThreadId.x;
-				uint2 pixelPos = dispatchThreadId.xy + gViewportRectangle.xy;
-				
-				// Note: To improve performance perhaps:
-				//  - Use halfZ (split depth range into two regions for better culling)
-				//  - Use parallel reduction instead of atomics
-				//  - Use AABB instead of frustum (no false positives)
-				//   - Increase tile size to 32x32 to amortize the cost of AABB calc (2x if using halfZ)
-				
-				// Get data for all samples, and determine per-pixel minimum and maximum depth values
-				SurfaceData surfaceData[MSAA_COUNT];
-				uint sampleMinZ = 0x7F7FFFFF;
-				uint sampleMaxZ = 0;
+			GroupMemoryBarrierWithGroupSync();
+			
+			// Determine minimum and maximum depth values for a tile			
+			InterlockedMin(sTileMinZ, sampleMinZ);
+			InterlockedMax(sTileMaxZ, sampleMaxZ);
+			
+			GroupMemoryBarrierWithGroupSync();
+			
+			float minTileZ = asfloat(sTileMinZ);
+			float maxTileZ = asfloat(sTileMaxZ);
+			
+			// Create a frustum for the current tile
+			// First determine a scale of the tile compared to the viewport
+			float2 tileScale = gViewportRectangle.zw * rcp(float2(TILE_SIZE, TILE_SIZE));
 
-				#if MSAA_COUNT > 1
-				[unroll]
-				for(uint i = 0; i < MSAA_COUNT; ++i)
-				{
-					surfaceData[i] = getGBufferData(pixelPos, i);
-					
-					sampleMinZ = min(sampleMinZ, asuint(-surfaceData[i].depth));
-					sampleMaxZ = max(sampleMaxZ, asuint(-surfaceData[i].depth));
-				}
-				#else
-				surfaceData[0] = getGBufferData(pixelPos);
-				sampleMinZ = asuint(-surfaceData[0].depth);
-				sampleMaxZ = asuint(-surfaceData[0].depth);
-				#endif
+			// Now we need to use that scale to scale down the frustum.
+			// Assume a projection matrix:
+			// A, 0, C, 0
+			// 0, B, D, 0
+			// 0, 0, Q, QN
+			// 0, 0, -1, 0
+			//
+			// Where A is = 2*n / (r - l)
+			// and C = (r + l) / (r - l)
+			// 
+			// Q & QN are used for Z value which we don't need to scale. B & D are equivalent for the
+			// Y value, we'll only consider the X values (A & C) from now on.
+			//
+			// Both and A and C are inversely proportional to the size of the frustum (r - l). Larger scale mean that
+			// tiles are that much smaller than the viewport. This means as our scale increases, (r - l) decreases,
+			// which means A & C as a whole increase. Therefore:
+			// A' = A * tileScale.x
+			// C' = C * tileScale.x
+			
+			// Aside from scaling, we also need to offset the frustum to the center of the tile.
+			// For this we calculate the bias value which we add to the C & D factors (which control
+			// the offset in the projection matrix).
+			float2 tileBias = tileScale - 1 - groupId.xy * 2;
 
-				// Set initial values
-				if(threadIndex == 0)
+			// This will yield a bias ranging from [-(tileScale - 1), tileScale - 1]. Every second bias is skipped as
+			// corresponds to a point in-between two tiles, overlapping existing frustums.
+			
+			float flipSign = 1.0f;
+			
+			// Adjust for OpenGL's upside down texture system
+			#if OPENGL
+				flipSign = -1;
+			#endif
+			
+			float At = gMatProj[0][0] * tileScale.x;
+			float Ctt = gMatProj[0][2] * tileScale.x - tileBias.x;
+			
+			float Bt = gMatProj[1][1] * tileScale.y * flipSign;
+			float Dtt = (gMatProj[1][2] * tileScale.y + flipSign * tileBias.y) * flipSign;
+			
+			// Extract left/right/top/bottom frustum planes from scaled projection matrix
+			// Note: Do this on the CPU? Since they're shared among all entries in a tile. Plus they don't change across frames.
+			float4 frustumPlanes[6];
+			frustumPlanes[0] = float4(At, 0.0f, gMatProj[3][2] + Ctt, 0.0f);
+			frustumPlanes[1] = float4(-At, 0.0f, gMatProj[3][2] - Ctt, 0.0f);
+			frustumPlanes[2] = float4(0.0f, -Bt, gMatProj[3][2] - Dtt, 0.0f);
+			frustumPlanes[3] = float4(0.0f, Bt, gMatProj[3][2] + Dtt, 0.0f);
+			
+			// Normalize
+			[unroll]
+			for (uint i = 0; i < 4; ++i) 
+				frustumPlanes[i] *= rcp(length(frustumPlanes[i].xyz));
+			
+			// Generate near/far frustum planes
+			// Note: d gets negated in plane equation, this is why its in opposite direction than it intuitively should be
+			frustumPlanes[4] = float4(0.0f, 0.0f, -1.0f, -minTileZ); 
+			frustumPlanes[5] = float4(0.0f, 0.0f, 1.0f, maxTileZ);
+			
+			// Find radial & spot lights overlapping the tile
+			for(uint type = 0; type < 2; type++)
+			{
+				uint lightOffset = threadIndex + gLightOffsets[type];
+				uint lightsEnd = gLightOffsets[type + 1];
+				for (uint i = lightOffset; i < lightsEnd && i < MAX_LIGHTS; i += TILE_SIZE)
 				{
-					sTileMinZ = 0x7F7FFFFF;
-					sTileMaxZ = 0;
-					sNumLightsPerType[0] = 0;
-					sNumLightsPerType[1] = 0;
-					sTotalNumLights = 0;
-				}
-				
-				GroupMemoryBarrierWithGroupSync();
-				
-				// Determine minimum and maximum depth values for a tile			
-				InterlockedMin(sTileMinZ, sampleMinZ);
-				InterlockedMax(sTileMaxZ, sampleMaxZ);
-				
-				GroupMemoryBarrierWithGroupSync();
-				
-			    float minTileZ = asfloat(sTileMinZ);
-				float maxTileZ = asfloat(sTileMaxZ);
-				
-				// Create a frustum for the current tile
-				// First determine a scale of the tile compared to the viewport
-				float2 tileScale = gViewportRectangle.zw * rcp(float2(TILE_SIZE, TILE_SIZE));
-
-				// Now we need to use that scale to scale down the frustum.
-				// Assume a projection matrix:
-				// A, 0, C, 0
-				// 0, B, D, 0
-				// 0, 0, Q, QN
-				// 0, 0, -1, 0
-				//
-				// Where A is = 2*n / (r - l)
-				// and C = (r + l) / (r - l)
-				// 
-				// Q & QN are used for Z value which we don't need to scale. B & D are equivalent for the
-				// Y value, we'll only consider the X values (A & C) from now on.
-				//
-				// Both and A and C are inversely proportional to the size of the frustum (r - l). Larger scale mean that
-				// tiles are that much smaller than the viewport. This means as our scale increases, (r - l) decreases,
-				// which means A & C as a whole increase. Therefore:
-				// A' = A * tileScale.x
-				// C' = C * tileScale.x
+					float4 lightPosition = mul(gMatView, float4(gLights[i].position, 1.0f));
+					float lightRadius = gLights[i].attRadius;
+					
+					// Note: The cull method can have false positives. In case of large light bounds and small tiles, it
+					// can end up being quite a lot. Consider adding an extra heuristic to check a separating plane.
+					bool lightInTile = true;
 				
-				// Aside from scaling, we also need to offset the frustum to the center of the tile.
-				// For this we calculate the bias value which we add to the C & D factors (which control
-				// the offset in the projection matrix).
-				float2 tileBias = tileScale - 1 - groupId.xy * 2;
+					// First check side planes as this will cull majority of the lights
+					[unroll]
+					for (uint j = 0; j < 4; ++j)
+					{
+						float dist = dot(frustumPlanes[j], lightPosition);
+						lightInTile = lightInTile && (dist >= -lightRadius);
+					}
 
-				// This will yield a bias ranging from [-(tileScale - 1), tileScale - 1]. Every second bias is skipped as
-				// corresponds to a point in-between two tiles, overlapping existing frustums.
-				
-				float flipSign = 1.0f;
-				
-				// Adjust for OpenGL's upside down texture system
-				#if OPENGL
-					flipSign = -1;
-				#endif
-				
-				float At = gMatProj[0][0] * tileScale.x;
-				float Ctt = gMatProj[0][2] * tileScale.x - tileBias.x;
-				
-				float Bt = gMatProj[1][1] * tileScale.y * flipSign;
-				float Dtt = (gMatProj[1][2] * tileScale.y + flipSign * tileBias.y) * flipSign;
-				
-				// Extract left/right/top/bottom frustum planes from scaled projection matrix
-				// Note: Do this on the CPU? Since they're shared among all entries in a tile. Plus they don't change across frames.
-				float4 frustumPlanes[6];
-				frustumPlanes[0] = float4(At, 0.0f, gMatProj[3][2] + Ctt, 0.0f);
-				frustumPlanes[1] = float4(-At, 0.0f, gMatProj[3][2] - Ctt, 0.0f);
-				frustumPlanes[2] = float4(0.0f, -Bt, gMatProj[3][2] - Dtt, 0.0f);
-				frustumPlanes[3] = float4(0.0f, Bt, gMatProj[3][2] + Dtt, 0.0f);
-				
-				// Normalize
-                [unroll]
-                for (uint i = 0; i < 4; ++i) 
-					frustumPlanes[i] *= rcp(length(frustumPlanes[i].xyz));
-				
-				// Generate near/far frustum planes
-				// Note: d gets negated in plane equation, this is why its in opposite direction than it intuitively should be
-				frustumPlanes[4] = float4(0.0f, 0.0f, -1.0f, -minTileZ); 
-				frustumPlanes[5] = float4(0.0f, 0.0f, 1.0f, maxTileZ);
-				
-                // Find radial & spot lights overlapping the tile
-				for(uint type = 0; type < 2; type++)
-				{
-					uint lightOffset = threadIndex + gLightOffsets[type];
-					uint lightsEnd = gLightOffsets[type + 1];
-					for (uint i = lightOffset; i < lightsEnd && i < MAX_LIGHTS; i += TILE_SIZE)
+					// Make sure to do an actual branch, since it's quite likely an entire warp will have the same value
+					[branch]
+					if (lightInTile)
 					{
-						float4 lightPosition = mul(gMatView, float4(gLights[i].position, 1.0f));
-						float lightRadius = gLights[i].attRadius;
-						
-						// Note: The cull method can have false positives. In case of large light bounds and small tiles, it
-						// can end up being quite a lot. Consider adding an extra heuristic to check a separating plane.
-						bool lightInTile = true;
-					
-						// First check side planes as this will cull majority of the lights
+						bool inDepthRange = true;
+				
+						// Check near/far planes
 						[unroll]
-						for (uint j = 0; j < 4; ++j)
+						for (uint j = 4; j < 6; ++j)
 						{
 							float dist = dot(frustumPlanes[j], lightPosition);
-							lightInTile = lightInTile && (dist >= -lightRadius);
+							inDepthRange = inDepthRange && (dist >= -lightRadius);
 						}
-
-						// Make sure to do an actual branch, since it's quite likely an entire warp will have the same value
+						
+						// In tile, add to branch
 						[branch]
-						if (lightInTile)
+						if (inDepthRange)
 						{
-							bool inDepthRange = true;
-					
-							// Check near/far planes
-							[unroll]
-							for (uint j = 4; j < 6; ++j)
-							{
-								float dist = dot(frustumPlanes[j], lightPosition);
-								inDepthRange = inDepthRange && (dist >= -lightRadius);
-							}
+							InterlockedAdd(sNumLightsPerType[type], 1U);
 							
-							// In tile, add to branch
-							[branch]
-							if (inDepthRange)
-							{
-								InterlockedAdd(sNumLightsPerType[type], 1U);
-								
-								uint idx;
-								InterlockedAdd(sTotalNumLights, 1U, idx);
-								gLightIndices[idx] = i;
-							}
+							uint idx;
+							InterlockedAdd(sTotalNumLights, 1U, idx);
+							gLightIndices[idx] = i;
 						}
 					}
 				}
+			}
 
-                GroupMemoryBarrierWithGroupSync();
+			GroupMemoryBarrierWithGroupSync();
 
-				// Generate world position
-				float2 screenUv = ((float2)(gViewportRectangle.xy + pixelPos) + 0.5f) / (float2)gViewportRectangle.zw;
-				float2 clipSpacePos = (screenUv - gClipToUVScaleOffset.zw) / gClipToUVScaleOffset.xy;
+			// Generate world position
+			float2 screenUv = ((float2)(gViewportRectangle.xy + pixelPos) + 0.5f) / (float2)gViewportRectangle.zw;
+			float2 clipSpacePos = (screenUv - gClipToUVScaleOffset.zw) / gClipToUVScaleOffset.xy;
+		
+			uint2 viewportMax = gViewportRectangle.xy + gViewportRectangle.zw;			
 			
-				uint2 viewportMax = gViewportRectangle.xy + gViewportRectangle.zw;			
-				
-				// Ignore pixels out of valid range
-				if (all(dispatchThreadId.xy < viewportMax))
-				{
-					#if MSAA_COUNT > 1
-					float4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
-					writeBufferSample(pixelPos, 0, lighting);
+			// Ignore pixels out of valid range
+			if (all(dispatchThreadId.xy < viewportMax))
+			{
+				#if MSAA_COUNT > 1
+				float4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
+				writeBufferSample(pixelPos, 0, lighting);
 
-					bool doPerSampleShading = needsPerSampleShading(surfaceData);
-					if(doPerSampleShading)
-					{
-						[unroll]
-						for(uint i = 1; i < MSAA_COUNT; ++i)
-						{
-							lighting = getLighting(clipSpacePos.xy, surfaceData[i]);
-							writeBufferSample(pixelPos, i, lighting);
-						}
-					}
-					else // Splat same information to all samples
+				bool doPerSampleShading = needsPerSampleShading(surfaceData);
+				if(doPerSampleShading)
+				{
+					[unroll]
+					for(uint i = 1; i < MSAA_COUNT; ++i)
 					{
-						[unroll]
-						for(uint i = 1; i < MSAA_COUNT; ++i)
-							writeBufferSample(pixelPos, i, lighting);
+						lighting = getLighting(clipSpacePos.xy, surfaceData[i]);
+						writeBufferSample(pixelPos, i, lighting);
 					}
-					
-					#else
-					float4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
-					gOutput[pixelPos] = lighting;
-					#endif
 				}
+				else // Splat same information to all samples
+				{
+					[unroll]
+					for(uint i = 1; i < MSAA_COUNT; ++i)
+						writeBufferSample(pixelPos, i, lighting);
+				}
+				
+				#else
+				float4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
+				gOutput[pixelPos] = lighting;
+				#endif
 			}
-		};
+		}
 	};
 };

+ 71 - 85
Data/Raw/Engine/Shaders/Transparent.bsl

@@ -6,105 +6,91 @@
 #include "$ENGINE$\ImageBasedLighting.bslinc"
 #include "$ENGINE$\Surface.bslinc"
 
-Parameters =
+options
 {
-	Sampler2D 	gAlbedoSamp : alias("gAlbedoTex");
-	Sampler2D 	gNormalSamp : alias("gNormalTex");
-	Sampler2D	gRoughnessSamp : alias("gRoughnessTex");
-	Sampler2D	gMetalnessSamp : alias("gMetalnessTex");
-	
-	Texture2D 	gAlbedoTex;
-	Texture2D	gNormalTex = "normal";
-	Texture2D	gRoughnessTex = "white";
-	Texture2D	gMetalnessTex = "black";
-	
-	float		gOpacity   = 1.0f;
+	transparent = true;
 };
 
-Blocks =
+mixin Surface
 {
-	Block MaterialParams;
-};
-
-Transparent = true;
+	mixin LightingCommon;
+	mixin LightGridCommon;
+	mixin ReflectionCubemapCommon;
+	mixin ImageBasedLighting;
 
-Technique 
- : inherits("LightingCommon")
- : inherits("LightGridCommon")
- : inherits("ReflectionCubemapCommon")
- : inherits("ImageBasedLighting")
- : base("Surface") =
-{
-	Pass =
+	blend
 	{
-		DepthWrite = false;
-	
-		Target = 
+		target	
 		{
-			Blend = true;
-			Color = { SRCA, SRCIA, ADD };
-		};	
+			enabled = true;
+			color = { srcA, srcIA, add };
+		};
+	};	
 	
-		Fragment =
+	depth
+	{
+		write = false;
+	};
+
+	code
+	{
+		SamplerState gAlbedoSamp : register(s0);
+		SamplerState gNormalSamp : register(s1);
+		SamplerState gRoughnessSamp : register(s2);
+		SamplerState gMetalnessSamp : register(s3);
+		
+		Texture2D gAlbedoTex : register(t0);
+		Texture2D gNormalTex : register(t1);
+		Texture2D gRoughnessTex : register(t2);
+		Texture2D gMetalnessTex : register(t3);
+		
+		Buffer<uint4> gGridLightOffsetsAndSize;
+		Buffer<uint2> gGridProbeOffsetsAndSize;
+
+		cbuffer MaterialParams : register(b5)
 		{
-			SamplerState gAlbedoSamp : register(s0);
-			SamplerState gNormalSamp : register(s1);
-			SamplerState gRoughnessSamp : register(s2);
-			SamplerState gMetalnessSamp : register(s3);
+			float gOpacity;
+		}
+		
+		float4 fsmain(in VStoFS input) : SV_Target0
+		{
+			float3 normal = normalize(gNormalTex.Sample(gNormalSamp, input.uv0).xyz * 2.0f - float3(1, 1, 1));
+			float3 worldNormal = calcWorldNormal(input, normal);
+		
+			SurfaceData surfaceData;
+			surfaceData.albedo = gAlbedoTex.Sample(gAlbedoSamp, input.uv0);
+			surfaceData.worldNormal.xyz = worldNormal;
+			surfaceData.worldNormal.w = 1.0f;
+			surfaceData.roughness = gRoughnessTex.Sample(gRoughnessSamp, input.uv0).x;
+			surfaceData.metalness = gMetalnessTex.Sample(gMetalnessSamp, input.uv0).x;
 			
-			Texture2D gAlbedoTex : register(t0);
-			Texture2D gNormalTex : register(t1);
-			Texture2D gRoughnessTex : register(t2);
-			Texture2D gMetalnessTex : register(t3);
+			uint2 pixelPos = (uint2)input.position.xy;
+			uint cellIdx = calcCellIdx(pixelPos, input.position.z);
+			uint3 lightOffsetAndSize = gGridLightOffsetsAndSize[cellIdx].rgb;
 			
-			Buffer<uint4> gGridLightOffsetsAndSize;
-			Buffer<uint2> gGridProbeOffsetsAndSize;
-
-			cbuffer MaterialParams : register(b5)
-			{
-				float gOpacity;
-			}
+			uint4 lightOffsets;
+			lightOffsets.x = gLightOffsets[0];
+			lightOffsets.y = lightOffsetAndSize.x;
+			lightOffsets.z = lightOffsets.y + lightOffsetAndSize.y;
+			lightOffsets.w = lightOffsets.z + lightOffsetAndSize.z;
 			
-			float4 main(in VStoFS input) : SV_Target0
-			{
-				float3 normal = normalize(gNormalTex.Sample(gNormalSamp, input.uv0).xyz * 2.0f - float3(1, 1, 1));
-				float3 worldNormal = calcWorldNormal(input, normal);
+			uint2 reflProbeOffsetAndSize = gGridProbeOffsetsAndSize[cellIdx];
 			
-				SurfaceData surfaceData;
-				surfaceData.albedo = gAlbedoTex.Sample(gAlbedoSamp, input.uv0);
-				surfaceData.worldNormal.xyz = worldNormal;
-				surfaceData.worldNormal.w = 1.0f;
-				surfaceData.roughness = gRoughnessTex.Sample(gRoughnessSamp, input.uv0).x;
-				surfaceData.metalness = gMetalnessTex.Sample(gMetalnessSamp, input.uv0).x;
-				
-				uint2 pixelPos = (uint2)input.position.xy;
-				uint cellIdx = calcCellIdx(pixelPos, input.position.z);
-				uint3 lightOffsetAndSize = gGridLightOffsetsAndSize[cellIdx].rgb;
-				
-				uint4 lightOffsets;
-				lightOffsets.x = gLightOffsets[0];
-				lightOffsets.y = lightOffsetAndSize.x;
-				lightOffsets.z = lightOffsets.y + lightOffsetAndSize.y;
-				lightOffsets.w = lightOffsets.z + lightOffsetAndSize.z;
-				
-				uint2 reflProbeOffsetAndSize = gGridProbeOffsetsAndSize[cellIdx];
-				
-				float3 V = normalize(gViewOrigin - input.worldPosition);
-				float3 N = surfaceData.worldNormal.xyz;
-				float3 R = 2 * dot(V, N) * N - V;
-				float3 specR = getSpecularDominantDir(N, R, surfaceData.roughness);
-				
-				float4 directLighting = getDirectLighting(input.worldPosition, V, specR, surfaceData, lightOffsets);
-				float3 indirectDiffuse = getSkyIndirectDiffuse(surfaceData.worldNormal) * surfaceData.albedo;
-				float3 imageBasedSpecular = getImageBasedSpecular(input.worldPosition, V, specR, surfaceData, 
-					reflProbeOffsetAndSize.x, reflProbeOffsetAndSize.y);
+			float3 V = normalize(gViewOrigin - input.worldPosition);
+			float3 N = surfaceData.worldNormal.xyz;
+			float3 R = 2 * dot(V, N) * N - V;
+			float3 specR = getSpecularDominantDir(N, R, surfaceData.roughness);
+			
+			float4 directLighting = getDirectLighting(input.worldPosition, V, specR, surfaceData, lightOffsets);
+			float3 indirectDiffuse = getSkyIndirectDiffuse(surfaceData.worldNormal) * surfaceData.albedo;
+			float3 imageBasedSpecular = getImageBasedSpecular(input.worldPosition, V, specR, surfaceData, 
+				reflProbeOffsetAndSize.x, reflProbeOffsetAndSize.y);
 
-				float3 totalLighting = directLighting.rgb;
-				totalLighting.rgb += indirectDiffuse;
-				totalLighting.rgb += imageBasedSpecular;
+			float3 totalLighting = directLighting.rgb;
+			totalLighting.rgb += indirectDiffuse;
+			totalLighting.rgb += imageBasedSpecular;
 
-				return float4(totalLighting, gOpacity);
-			}	
-		};
+			return float4(totalLighting, gOpacity);
+		}	
 	};
 };

+ 1 - 5
Source/BansheeSL/BsLexerFX.l

@@ -264,14 +264,10 @@ code			{ BEGIN(CODEBLOCK_HEADER); return TOKEN_CODE; }
 	/* Track when the code block begins, insert all code block characters into our own buffer, record a sequential index */
 	/* of all code blocks in the text, and track bracket open/closed state so we know when we're done with the code block. */
 	/* And finally output a sequential code block index to the parser (it shouldn't be aware of anything else in the block). */
-<CODEBLOCK_HEADER>=		{ BEGIN(CODEBLOCK_EQUALS); return yytext[0]; }
+<CODEBLOCK_HEADER>\{	{ BEGIN(CODEBLOCK); beginCodeBlock(yyextra); yyextra->numOpenBrackets = 1; return yytext[0]; }
 <CODEBLOCK_HEADER>{WS}	{ /* Skip blank */ }
 <CODEBLOCK_HEADER>.		{ return yytext[0]; }
 
-<CODEBLOCK_EQUALS>\{	{ BEGIN(CODEBLOCK); beginCodeBlock(yyextra); yyextra->numOpenBrackets = 1; return yytext[0]; }
-<CODEBLOCK_EQUALS>{WS}	{ /* Skip blank */ }
-<CODEBLOCK_EQUALS>.		{ return yytext[0]; }
-
 <CODEBLOCK>\{			{ yyextra->numOpenBrackets++; appendCodeBlock(yyextra, yytext, 1); }
 <CODEBLOCK>\}			{ 
 	yyextra->numOpenBrackets--; 

+ 1 - 5
Source/BansheeSL/BsParserFX.y

@@ -488,11 +488,7 @@ target_header
 
 target_body
 	: /* empty */
-	| target_statement target_body		{ nodeOptionsAdd(parse_state->memContext, parse_state->topNode->options, &$1); }
-	;
-
-target_statement
-	: target_option
+	| target_option target_body		{ nodeOptionsAdd(parse_state->memContext, parse_state->topNode->options, &$1); }
 	;
 
 target_option

+ 1 - 1
Source/BansheeSL/Source/BsASTFX.c

@@ -13,7 +13,7 @@ OptionInfo OPTION_LOOKUP[] =
 	{ OT_Sort, ODT_Int },
 	{ OT_Transparent, ODT_Bool },
 	{ OT_Technique, ODT_Complex }, 
-	{ OT_Mixin, ODT_Complex },
+	{ OT_Mixin, ODT_String },
 	{ OT_Raster, ODT_Complex },
 	{ OT_Depth, ODT_Complex },
 	{ OT_Stencil, ODT_Complex },

+ 9 - 4
Source/BansheeSL/Source/BsSLFXCompiler.cpp

@@ -233,7 +233,7 @@ namespace bs
 		case Xsc::Reflection::BufferType::RWByteAddressBuffer: return GPOT_RWBYTE_BUFFER;
 		case Xsc::Reflection::BufferType::AppendStructuredBuffer: return GPOT_RWAPPEND_BUFFER;
 		case Xsc::Reflection::BufferType::ConsumeStructuredBuffer: return GPOT_RWCONSUME_BUFFER;
-		default: GPOT_UNKNOWN;
+		default: return GPOT_UNKNOWN;
 		}
 	}
 
@@ -1547,7 +1547,7 @@ namespace bs
 				}
 				else
 				{
-					output.errorMessage = "Base technique \"" + includes + "\" cannot be found.";
+					output.errorMessage = "Mixin \"" + includes + "\" cannot be found.";
 					return false;
 				}
 			}
@@ -1600,7 +1600,7 @@ namespace bs
 				PassData& vkslPassData = vkslTechnique.passes[j];
 
 				// Clean non-standard HLSL 
-				static const std::regex regex("\\[.*layout.*\\(.*\\).*\\]");
+				static const std::regex regex("\\[.*layout.*\\(.*\\).*\\]|\\[.*internal.*\\]|\\[.*color.*\\]");
 				hlslPassData.code = regex_replace(hlslPassData.code, regex, "");
 
 				// Find valid entry points and parameters
@@ -1681,11 +1681,11 @@ namespace bs
 					passDesc.depthStencilState = DepthStencilState::create(passData.depthStencilDesc);
 
 				GPU_PROGRAM_DESC desc;
-				desc.entryPoint = "main";
 				desc.language = metaData.language;
 
 				if (!passData.vertexCode.empty())
 				{
+					desc.entryPoint = "vsmain";
 					desc.source = passData.vertexCode;
 					desc.type = GPT_VERTEX_PROGRAM;
 
@@ -1694,6 +1694,7 @@ namespace bs
 
 				if (!passData.fragmentCode.empty())
 				{
+					desc.entryPoint = "fsmain";
 					desc.source = passData.fragmentCode;
 					desc.type = GPT_FRAGMENT_PROGRAM;
 
@@ -1702,6 +1703,7 @@ namespace bs
 
 				if (!passData.geometryCode.empty())
 				{
+					desc.entryPoint = "gsmain";
 					desc.source = passData.geometryCode;
 					desc.type = GPT_GEOMETRY_PROGRAM;
 
@@ -1710,6 +1712,7 @@ namespace bs
 
 				if (!passData.hullCode.empty())
 				{
+					desc.entryPoint = "hsmain";
 					desc.source = passData.hullCode;
 					desc.type = GPT_HULL_PROGRAM;
 
@@ -1718,6 +1721,7 @@ namespace bs
 
 				if (!passData.domainCode.empty())
 				{
+					desc.entryPoint = "dsmain";
 					desc.source = passData.domainCode;
 					desc.type = GPT_DOMAIN_PROGRAM;
 
@@ -1726,6 +1730,7 @@ namespace bs
 
 				if (!passData.computeCode.empty())
 				{
+					desc.entryPoint = "csmain";
 					desc.source = passData.computeCode;
 					desc.type = GPT_COMPUTE_PROGRAM;