Przeglądaj źródła

Added code for irradiance map generation

BearishSun 8 lat temu
rodzic
commit
4acc7015f1

+ 16 - 0
Data/Raw/Engine/DataList.json

@@ -119,6 +119,10 @@
         {
             "Path": "ReflectionCubemapSampling.bslinc",
             "UUID": "b65d98df-45db-4b03-a97c-bca6b07f1d22"
+        },
+        {
+            "Path": "SHCommon.bslinc",
+            "UUID": "b2a3ecfd-77d7-4a4c-900b-91c3804f4a58"
         }
     ],
     "Shaders": [
@@ -225,6 +229,18 @@
         {
             "Path": "ReflectionCubeImportanceSample.bsl",
             "UUID": "ed16fdd9-7982-4f5c-ae63-8303c786e9ad"
+        },
+        {
+            "Path": "IrradianceComputeSH.bsl",
+            "UUID": "5b431ac7-c97a-407e-9ca3-6845d6cd0d6c"
+        },
+        {
+            "Path": "IrradianceProjectSH.bsl",
+            "UUID": "64d9535e-b5e7-42f5-8dc4-c0b15a5c48b9"
+        },
+        {
+            "Path": "IrradianceReduceSH.bsl",
+            "UUID": "aa40f2be-00e4-4322-a4bf-e435528f1e6e"
         }
     ],
     "Skin": [

+ 122 - 0
Data/Raw/Engine/Includes/SHCommon.bslinc

@@ -0,0 +1,122 @@
+Technique : base("SHCommon") =
+{
+	Language = "HLSL11";
+
+	Pass =
+	{
+		Common = 
+		{
+			struct SHVector5
+			{
+				float4 v0;
+				float4 v1;
+				float4 v2;
+				float4 v3;
+				float4 v4;
+				float4 v5;
+				float v6;
+			};
+			
+			struct SHVector5RGB
+			{
+				SHVector5 R;
+				SHVector5 G;
+				SHVector5 B;
+			};
+			
+			void SHZero(inout SHVector5 v)
+			{
+				v.v0 = 0;
+				v.v1 = 0;
+				v.v2 = 0;
+				v.v3 = 0;
+				v.v4 = 0;
+				v.v5 = 0;
+				v.v6 = 0;
+			}
+
+			void SHMultiplyAdd(inout SHVector5 lhs, SHVector5 rhs, float c)
+			{
+				lhs.v0 += rhs.v0 * c;
+				lhs.v1 += rhs.v1 * c;
+				lhs.v2 += rhs.v2 * c;
+				lhs.v3 += rhs.v3 * c;
+				lhs.v4 += rhs.v4 * c;
+				lhs.v5 += rhs.v5 * c;
+				lhs.v6 += rhs.v6 * c;
+			}
+			
+			void SHAdd(inout SHVector5 lhs, SHVector5 rhs)
+			{
+				lhs.v0 += rhs.v0;
+				lhs.v1 += rhs.v1;
+				lhs.v2 += rhs.v2;
+				lhs.v3 += rhs.v3;
+				lhs.v4 += rhs.v4;
+				lhs.v5 += rhs.v5;
+				lhs.v6 += rhs.v6;
+			}
+			
+			void SHMultiply(inout SHVector5 lhs, SHVector5 rhs)
+			{
+				lhs.v0 *= rhs.v0;
+				lhs.v1 *= rhs.v1;
+				lhs.v2 *= rhs.v2;
+				lhs.v3 *= rhs.v3;
+				lhs.v4 *= rhs.v4;
+				lhs.v5 *= rhs.v5;
+				lhs.v6 *= rhs.v6;
+			}	
+			
+			SHVector5 SHBasis5(float3 dir)
+			{
+				float x = dir.x;
+				float y = dir.y;
+				float z = dir.z;
+
+				float x2 = x*x;
+				float y2 = y*y;
+				float z2 = z*z;
+
+				float z3 = z2 * z;
+
+				float x4 = x2 * x2;
+				float y4 = y2 * y2;
+				float z4 = z2 * z2;
+
+				SHVector5 o;
+				o.v0[0] = 0.282095f;
+
+				o.v0[1] = -0.488603f * y;
+				o.v0[2] =  0.488603f * z;
+				o.v0[3] = -0.488603f * x;
+
+				o.v1[0] =  1.092548f * x * y;
+				o.v1[1] = -1.092548f * y * z;
+				o.v1[2] =  0.315392f * (3.0f * z2 - 1.0f);
+				o.v1[3] = -1.092548f * x * z;
+				o.v2[0] =  0.546274f * (x2 - y2);
+
+				o.v2[1]  = -0.590043f * y * (3.0f * x2 - y2);
+				o.v2[2] =  2.890611f * y * x * z;
+				o.v2[3] = -0.646360f * y * (-1.0f + 5.0f * z2);
+				o.v3[0] =  0.373176f *(5.0f * z3 - 3.0f * z);
+				o.v3[1] = -0.457045f * x * (-1.0f + 5.0f * z2);
+				o.v3[2] =  1.445306f *(x2 - y2) * z;
+				o.v3[3] = -0.590043f * x * (x2 - 3.0f * y2);
+
+				o.v4[0] =  2.503340f * x * y * (x2 - y2);
+				o.v4[1] = -1.770130f * y * z * (3.0f * x2 - y2);
+				o.v4[2] =  0.946175f * y * x * (-1.0f + 7.0f * z2);
+				o.v4[3] = -0.669046f * y * z * (-3.0f + 7.0f * z2);
+				o.v5[0] =  (105.0f * z4 - 90.0f * z2 + 9.0f) / 9.02703f;
+				o.v5[1] = -0.669046f * x * z * (-3.0f + 7.0f * z2);
+				o.v5[2] =  0.473087f * (x2 - y2) * (-1.0f + 7.0f * z2);
+				o.v5[3] = -1.770130f * x * z * (x2 - 3.0f * y2);
+				o.v6 =  0.625836f * (x4 - 6.0f * y2 * x2 + y4);
+				
+				return o;
+			}
+		};
+	};
+};

+ 137 - 0
Data/Raw/Engine/Shaders/IrradianceComputeSH.bsl

@@ -0,0 +1,137 @@
+#include "$ENGINE$\ReflectionCubemapCommon.bslinc"
+#include "$ENGINE$\SHCommon.bslinc"
+
+Technique
+ : inherits("ReflectionCubemapCommon")
+ : inherits("SHCommon")
+{
+	Language = "HLSL11";
+	
+	Pass =
+	{
+		Compute = 
+		{
+			struct SHCoeffsAndWeight
+			{
+				SHVector5RGB coeffs;
+				float weight;
+			}
+		
+			SamplerState gInputSamp;
+			TextureCube gInputTex;
+		
+			RWStructuredBuffer<SHCoeffsAndWeight> gOutput;
+			
+			cbuffer Params
+			{
+				uint gCubeFace;
+				uint gFaceSize;
+				uint2 gDispatchSize;
+			}			
+			
+			groupshared SHCoeffsAndWeight sCoeffs[TILE_WIDTH * TILE_HEIGHT];
+
+			/** 
+			 * Integrates area of a cube face projected onto the surface of the sphere, from [0, 0] to [u, v]. 
+			 * u & v expected in [-1, -1] to [1, 1] range.
+			 *
+			 * See http://www.rorydriscoll.com/2012/01/15/cubemap-texel-solid-angle/ for derivation.
+			 */
+			float integrateProjectedCubeArea(float u, float v)
+			{
+				return atan2(u * v, sqrt(u * u + v * v + 1.0f));
+			}
+			
+			/** Calculates solid angle of a texel projected onto a sphere. */
+			float texelSolidAngle(float u, float v, float invFaceSize)
+			{
+				float x0 = u - invFaceSize;
+				float x1 = u + invFaceSize;
+				float y0 = v - invFaceSize;
+				float y1 = v + invFaceSize;
+
+				return   integrateProjectedCubeArea(x1, y1)
+                       - integrateProjectedCubeArea(x0, y1)
+                       - integrateProjectedCubeArea(x1, y0)
+                       + integrateProjectedCubeArea(x0, y0);
+			}
+			
+			[numthreads(TILE_WIDTH, TILE_HEIGHT, 1)]
+			void main(
+				uint groupIdx : SV_GroupIndex,
+				uint groupId : SV_GroupID,
+				uint3 dispatchThreadId : SV_DispatchThreadID)
+			{
+				SHCoeffsAndWeight data;
+				data.weight = 0;
+				
+				SHZero(data.coeffs.R);
+				SHZero(data.coeffs.G);
+				SHZero(data.coeffs.B);
+				
+				float invFaceSize = 1.0f / gFaceSize;
+			
+				uint2 pixelCoords = dispatchThreadId.xy * PIXELS_PER_THREAD;
+				uint2 pixelCoordsEnd = pixelCoords + uint2(PIXELS_PER_THREAD, PIXELS_PER_THREAD);
+				for(uint y = pixelCoords.y; y < pixelCoordsEnd.y; y++)
+				{
+					for(uint x = pixelCoords.x; x < pixelCoordsEnd.x; x++)
+					{
+						// Ignore pixels out of valid range
+						if (x >= gFaceSize || y >= gFaceSize)
+							break;
+							
+						// Map from [0, size-1] to [-1.0 + invSize, 1.0 - invSize].
+                        // Reference: u = 2.0 * (x + 0.5) / faceSize - 1.0;
+                        //      	  v = 2.0 * (y + 0.5) / faceSize - 1.0;
+						// (+0.5 in order to sample center of texel)
+                        float u = x * invFaceSize - 1.0f;
+                        float v = x * invFaceSize - 1.0f;
+						
+						float3 dir = getDirFromCubeFace(gCubeFace, float2(u, v));
+						
+						// Need to calculate solid angle (weight) of the texel, as cube face corners have
+						// much smaller solid angle, meaning many of them occupy the same area when projected
+						// on a sphere. Without weighing that area would look too bright.
+						float weight = texelSolidAngle(u, v, invFaceSize);
+						
+						SHVector5 shBasis = SHBasis5(dir);
+						float3 radiance = gInputTex.SampleLevel(gInputSamp, dir, 0);
+						
+						SHMultiplyAdd(data.coeffs.R, shBasis, radiance.r * weight);
+						SHMultiplyAdd(data.coeffs.G, shBasis, radiance.g * weight);
+						SHMultiplyAdd(data.coeffs.B, shBasis, radiance.b * weight);
+						
+						data.weight += weight;
+					}
+				}
+				
+				sCoeffs[groupIdx] = data;
+				
+				GroupMemoryBarrierWithGroupSync();
+				
+				int numThreads = TILE_WIDTH * TILE_HEIGHT;
+				[unroll]
+				for(int tc = numThreads / 2; tc > 0; tc >>= 1)
+				{
+					if(groupIdx < tc)
+					{
+						SHAdd(sCoeffs[groupIdx].R, sCoeffs[groupIdx + tc].R);
+						SHAdd(sCoeffs[groupIdx].G, sCoeffs[groupIdx + tc].G);
+						SHAdd(sCoeffs[groupIdx].B, sCoeffs[groupIdx + tc].B);
+
+						sCoeffs[groupIdx].weight += sCoeffs[groupIdx + tc].weight;
+					}
+				
+					GroupMemoryBarrierWithGroupSync();
+				}
+				
+				if(groupIdx == 0)
+				{
+					uint outputIdx = groupId.y * gDispatchSize.x + groupId.x;
+					gOutput[outputIdx] = sCoeffs[0];
+				}
+			}
+		};
+	};
+};

+ 80 - 0
Data/Raw/Engine/Shaders/IrradianceProjectSH.bsl

@@ -0,0 +1,80 @@
+#include "$ENGINE$\PPBase.bslinc"
+#include "$ENGINE$\ReflectionCubemapCommon.bslinc"
+#include "$ENGINE$\SHCommon.bslinc"
+
+Technique 
+ : inherits("PPBase")
+ : inherits("ReflectionCubemapCommon")
+ : inherits("SHCommon") =
+{
+	Language = "HLSL11";
+	
+	Pass =
+	{
+		Fragment =
+		{
+			cbuffer Params
+			{
+				int gCubeFace;
+			}	
+		
+			StructuredBuffer<SHVector5RGB> gSHCoeffs;
+
+			float evaluateLambert(SHVector5 coeffs)
+			{
+				// Multiply irradiance SH coefficients by cosine lobe (Lambert diffuse) and evaluate resulting SH
+				// See: http://cseweb.ucsd.edu/~ravir/papers/invlamb/josa.pdf for derivation of the
+				// cosine lobe factors
+				float output = 0.0f;
+				
+				// Band 0 (factor 1.0)
+				output += coeffs.v0[0];
+				
+				// Band 1 (factor 2/3)
+				float f = (2.0f/3.0f);
+				float4 f4 = float4(f, f, f, f);
+				
+				output += dot(coeffs.v0.gba, f4.rgb);
+				
+                // Band 2 (factor 1/4)
+				f = (1.0f/4.0f);
+				f4 = float4(f, f, f, f);
+				
+				output += dot(coeffs.v1, f4);
+				output += coeffs.v2.r * f;
+				
+				// Band 3 (factor 0)
+				
+				// Band 4 (factor -1/24)
+				f = (-1.0f/24.0f);
+				f4 = float4(f, f, f, f);
+				
+				output += dot(coeffs.v4, f4);
+				output += dot(coeffs.v5, f4);
+				output += coeffs.v6 * f;
+				
+				return output;
+			}
+			
+			float4 main(VStoFS input) : SV_Target0
+			{
+				float2 scaledUV = input.uv0 * 2.0f - 1.0f;
+				float3 dir = getDirFromCubeFace(gCubeFace, scaledUV);
+				
+				SHVector5 shBasis = SHBasis5(dir);
+								
+				SHVector5RGB coeffs = gSHCoeffs[0];
+				SHMultiply(coeffs.R, shBasis);
+				SHMultiply(coeffs.G, shBasis);
+				SHMultiply(coeffs.B, shBasis);
+				
+				float3 output = 0;
+				output.r = evaluateLambert(coeffs.R);
+				output.g = evaluateLambert(coeffs.G);
+				output.b = evaluateLambert(coeffs.B);
+				
+				return float4(output.rgb, 1.0f);
+			}	
+		};
+	};
+};

+ 65 - 0
Data/Raw/Engine/Shaders/IrradianceReduceSH.bsl

@@ -0,0 +1,65 @@
+#include "$ENGINE$\ReflectionCubemapCommon.bslinc"
+#include "$ENGINE$\SHCommon.bslinc"
+
+Technique
+ : inherits("ReflectionCubemapCommon")
+ : inherits("SHCommon")
+{
+	Language = "HLSL11";
+	
+	Pass =
+	{
+		Compute = 
+		{
+			#define PI 3.1415926
+		
+			struct SHCoeffsAndWeight
+			{
+				SHVector5RGB coeffs;
+				float weight;
+			}
+
+			StructuredBuffer<SHCoeffsAndWeight> gInput;
+			RWStructuredBuffer<SHVector5RGB> gOutput;
+			
+			cbuffer Params
+			{
+				uint gNumEntries;
+			}			
+			
+			[numthreads(1, 1, 1)]
+			void main(
+				uint groupIdx : SV_GroupIndex,
+				uint groupId : SV_GroupID,
+				uint3 dispatchThreadId : SV_DispatchThreadID)
+			{
+				SHVector5RGB coeffs;
+				float weight = 0;
+				
+				SHZero(coeffs.R);
+				SHZero(coeffs.G);
+				SHZero(coeffs.B);
+				
+				// Note: There shouldn't be many entries, so we add them all in one thread. Otherwise we should do parallel reduction.
+				for(uint i = 0; i < gNumEntries; i++)
+				{
+					SHCoeffsAndWeight current = gInput[i];
+				
+					SHAdd(coeffs.R, current.coeffs.R);
+					SHAdd(coeffs.G, current.coeffs.G);
+					SHAdd(coeffs.B, current.coeffs.B);
+
+					weight += current.weight;
+				}
+				
+				// Normalize
+				float normFactor = (4 * PI) / weight;
+				SHMultiply(coeffs.R, normFactor);
+				SHMultiply(coeffs.G, normFactor);
+				SHMultiply(coeffs.B, normFactor);
+					
+				gOutput[0] = coeffs;
+			}
+		};
+	};
+};

+ 129 - 0
Source/BansheeEngine/Include/BsReflectionProbes.h

@@ -62,6 +62,126 @@ namespace bs { namespace ct
 		GpuParamTexture mInputTexture;
 	};
 
+	/** Vector representing spherical harmonic coefficients for 5 bands. */
+	struct SHVector5
+	{
+		SHVector5()
+			:coeffs()
+		{ }
+
+		float coeffs[25];
+	};
+
+	/** Vector representing spherical coefficients for 5 bands, separate for red, green and blue components. */
+	struct SHVector5RGB
+	{
+		SHVector5 R, G, B;
+	};
+
+	/** Intermediate structure used for spherical coefficient calculation. Contains RGB coefficients and weight. */
+	struct SHCoeffsAndWeight
+	{
+		SHVector5RGB coeffs;
+		float weight;
+	};
+
+	BS_PARAM_BLOCK_BEGIN(IrradianceComputeSHParamDef)
+		BS_PARAM_BLOCK_ENTRY(int, gCubeFace)
+		BS_PARAM_BLOCK_ENTRY(int, gFaceSize)
+		BS_PARAM_BLOCK_ENTRY(Vector2I, gDispatchSize)
+	BS_PARAM_BLOCK_END
+
+	extern IrradianceComputeSHParamDef gIrradianceComputeSHParamDef;
+
+	/** Computes spherical harmonic coefficients from a radiance cubemap. */
+	class IrradianceComputeSHMat : public RendererMaterial<IrradianceComputeSHMat>
+	{
+		RMAT_DEF("IrradianceComputeSH.bsl")
+
+	public:
+		IrradianceComputeSHMat();
+
+		/** 
+		 * Computes spherical harmonic coefficients from a radiance texture and outputs a buffer containing a list of
+		 * coefficient sets (one set of coefficients for each thread group). Coefficients must be reduced and normalized
+		 * by IrradianceReduceSHMat before use. Output buffer should be created by calling createOutputBuffer().
+		 */
+		void execute(const SPtr<Texture>& source, UINT32 face, const SPtr<GpuBuffer>& output);
+
+		/** Creates a buffer of adequate size to be used as output for this material. */
+		static SPtr<GpuBuffer> createOutputBuffer(const SPtr<Texture>& source, UINT32& numCoeffSets);
+
+	private:
+		static const UINT32 NUM_SAMPLES;
+
+		SPtr<GpuParamBlockBuffer> mParamBuffer;
+		GpuParamTexture mInputTexture;
+		GpuParamBuffer mOutputBuffer;
+	};
+
+	BS_PARAM_BLOCK_BEGIN(IrradianceReduceSHParamDef)
+		BS_PARAM_BLOCK_ENTRY(int, gNumEntries)
+	BS_PARAM_BLOCK_END
+
+	extern IrradianceReduceSHParamDef gIrradianceReduceSHParamDef;
+
+	/** 
+	 * Sums spherical harmonic coefficients calculated by each thread group of IrradianceComputeSHMat and outputs a single
+	 * set of normalized coefficients. 
+	 */
+	class IrradianceReduceSHMat : public RendererMaterial<IrradianceReduceSHMat>
+	{
+		RMAT_DEF("IrradianceComputeSH.bsl")
+
+	public:
+		IrradianceReduceSHMat();
+
+		/** 
+		 * Sums spherical harmonic coefficients calculated by each thread group of IrradianceComputeSHMat and outputs a
+		 * single set of normalized coefficients. Output buffer should be created by calling createOutputBuffer().
+		 */
+		void execute(const SPtr<GpuBuffer>& source, UINT32 numCoeffSets, const SPtr<GpuBuffer>& output);
+
+		/** Creates a buffer of adequate size to be used as output for this material. */
+		static SPtr<GpuBuffer> createOutputBuffer();
+
+	private:
+		static const UINT32 NUM_SAMPLES;
+
+		SPtr<GpuParamBlockBuffer> mParamBuffer;
+		GpuParamBuffer mInputBuffer;
+		GpuParamBuffer mOutputBuffer;
+	};
+
+	BS_PARAM_BLOCK_BEGIN(IrradianceProjectSHParamDef)
+		BS_PARAM_BLOCK_ENTRY(int, gCubeFace)
+	BS_PARAM_BLOCK_END
+
+	extern IrradianceProjectSHParamDef gIrradianceProjectSHParamDef;
+
+	/** 
+	 * Projects spherical harmonic coefficients calculated by IrradianceReduceSHMat and projects them onto faces of
+	 * a cubemap.
+	 */
+	class IrradianceProjectSHMat : public RendererMaterial<IrradianceProjectSHMat>
+	{
+		RMAT_DEF("IrradianceProjectSH.bsl")
+
+	public:
+		IrradianceProjectSHMat();
+
+		/** 
+		 * Projects spherical harmonic coefficients calculated by IrradianceReduceSHMat and projects them onto faces of
+		 * a cubemap.
+		 */
+		void execute(const SPtr<GpuBuffer>& shCoeffs, UINT32 face, const SPtr<RenderTarget>& target);
+
+	private:
+		static const UINT32 NUM_SAMPLES;
+
+		SPtr<GpuParamBlockBuffer> mParamBuffer;
+		GpuParamBuffer mInputBuffer;
+	};
 
 	/** Helper class that handles generation and processing of textures used for reflection probes. */
 	class BS_EXPORT ReflectionProbes
@@ -78,6 +198,15 @@ namespace bs { namespace ct
 		 */
 		static void filterCubemapForSpecular(const SPtr<Texture>& cubemap, const SPtr<Texture>& scratch);
 
+		/**
+		 * Performs filtering on the cubemap, populating the output cubemap with values that can be used for evaluating
+		 * irradiance for use in diffuse lighting.
+		 * 
+		 * @param[in]		cubemap		Cubemap to filter. Its mip level 0 will be used as source.
+		 * @param[in]		output		Output cubemap to store the irradiance data in.
+		 */
+		static void filterCubemapForIrradiance(const SPtr<Texture>& cubemap, const SPtr<Texture>& output);
+
 		/**
 		 * Scales a cubemap and outputs it in the destination texture, using hardware acceleration. If both textures are the
 		 * same size, performs a copy instead.

+ 173 - 0
Source/BansheeEngine/Source/BsReflectionProbes.cpp

@@ -4,6 +4,7 @@
 #include "BsTexture.h"
 #include "BsGpuParamsSet.h"
 #include "BsRendererUtility.h"
+#include "BsGpuBuffer.h"
 
 namespace bs { namespace ct
 {
@@ -76,6 +77,153 @@ namespace bs { namespace ct
 		gRendererUtility().drawScreenQuad();
 	}
 
+	IrradianceComputeSHParamDef gIrradianceComputeSHParamDef;
+
+	const static UINT32 TILE_WIDTH = 16;
+	const static UINT32 TILE_HEIGHT = 8;
+	const static UINT32 PIXELS_PER_THREAD = 4;
+
+	IrradianceComputeSHMat::IrradianceComputeSHMat()
+	{
+		mParamBuffer = gIrradianceComputeSHParamDef.createBuffer();
+
+		mParamsSet->setParamBlockBuffer("Params", mParamBuffer);
+
+		SPtr<GpuParams> params = mParamsSet->getGpuParams();
+		params->getTextureParam(GPT_COMPUTE_PROGRAM, "gInputTex", mInputTexture);
+		params->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutput", mOutputBuffer);
+	}
+
+	void IrradianceComputeSHMat::_initDefines(ShaderDefines& defines)
+	{
+		// TILE_WIDTH * TILE_HEIGHT must be pow2 because of parallel reduction algorithm
+		defines.set("TILE_WIDTH", TILE_WIDTH);
+		defines.set("TILE_HEIGHT", TILE_HEIGHT);
+
+		// For very small textures this should be reduced so number of launched threads can properly utilize GPU cores
+		defines.set("PIXELS_PER_THREAD", PIXELS_PER_THREAD);
+	}
+
+	void IrradianceComputeSHMat::execute(const SPtr<Texture>& source, UINT32 face, const SPtr<GpuBuffer>& output)
+	{
+		auto& props = source->getProperties();
+		UINT32 faceSize = props.getWidth();
+		assert(faceSize == props.getHeight());
+
+		Vector2I dispatchSize;
+		dispatchSize.x = Math::divideAndRoundUp(faceSize, TILE_WIDTH * PIXELS_PER_THREAD);
+		dispatchSize.y = Math::divideAndRoundUp(faceSize, TILE_HEIGHT * PIXELS_PER_THREAD);
+
+		mInputTexture.set(source);
+		gIrradianceComputeSHParamDef.gCubeFace.set(mParamBuffer, face);
+		gIrradianceComputeSHParamDef.gFaceSize.set(mParamBuffer, source->getProperties().getWidth());
+		gIrradianceComputeSHParamDef.gDispatchSize.set(mParamBuffer, dispatchSize);
+
+		mOutputBuffer.set(output);
+
+		RenderAPI& rapi = RenderAPI::instance();
+
+		gRendererUtility().setComputePass(mMaterial);
+		gRendererUtility().setPassParams(mParamsSet);
+		rapi.dispatchCompute(dispatchSize.x, dispatchSize.y);
+	}
+
+	SPtr<GpuBuffer> IrradianceComputeSHMat::createOutputBuffer(const SPtr<Texture>& source, UINT32& numCoeffSets)
+	{
+		auto& props = source->getProperties();
+		UINT32 faceSize = props.getWidth();
+		assert(faceSize == props.getHeight());
+
+		Vector2I dispatchSize;
+		dispatchSize.x = Math::divideAndRoundUp(faceSize, TILE_WIDTH * PIXELS_PER_THREAD);
+		dispatchSize.y = Math::divideAndRoundUp(faceSize, TILE_HEIGHT * PIXELS_PER_THREAD);
+
+		numCoeffSets = dispatchSize.x * dispatchSize.y;
+
+		GPU_BUFFER_DESC bufferDesc;
+		bufferDesc.type = GBT_STRUCTURED;
+		bufferDesc.elementCount = numCoeffSets;
+		bufferDesc.elementSize = sizeof(SHCoeffsAndWeight);
+		bufferDesc.format = BF_UNKNOWN;
+
+		return GpuBuffer::create(bufferDesc);
+	}
+
+	IrradianceReduceSHParamDef gIrradianceReduceSHParamDef;
+
+	IrradianceReduceSHMat::IrradianceReduceSHMat()
+	{
+		mParamBuffer = gIrradianceReduceSHParamDef.createBuffer();
+
+		mParamsSet->setParamBlockBuffer("Params", mParamBuffer);
+
+		SPtr<GpuParams> params = mParamsSet->getGpuParams();
+		params->getBufferParam(GPT_COMPUTE_PROGRAM, "gInput", mInputBuffer);
+		params->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutput", mOutputBuffer);
+	}
+
+	void IrradianceReduceSHMat::_initDefines(ShaderDefines& defines)
+	{
+		// Do nothing
+	}
+
+	void IrradianceReduceSHMat::execute(const SPtr<GpuBuffer>& source, UINT32 numCoeffSets, 
+		const SPtr<GpuBuffer>& output)
+	{
+		gIrradianceReduceSHParamDef.gNumEntries.set(mParamBuffer, numCoeffSets);
+
+		mInputBuffer.set(source);
+		mOutputBuffer.set(output);
+
+		RenderAPI& rapi = RenderAPI::instance();
+
+		gRendererUtility().setComputePass(mMaterial);
+		gRendererUtility().setPassParams(mParamsSet);
+		rapi.dispatchCompute(1);
+	}
+
+	SPtr<GpuBuffer> IrradianceReduceSHMat::createOutputBuffer()
+	{
+		GPU_BUFFER_DESC bufferDesc;
+		bufferDesc.type = GBT_STRUCTURED;
+		bufferDesc.elementCount = 1;
+		bufferDesc.elementSize = sizeof(SHVector5RGB);
+		bufferDesc.format = BF_UNKNOWN;
+
+		return GpuBuffer::create(bufferDesc);
+	}
+
+	IrradianceProjectSHParamDef gIrradianceProjectSHParamDef;
+
+	IrradianceProjectSHMat::IrradianceProjectSHMat()
+	{
+		mParamBuffer = gIrradianceProjectSHParamDef.createBuffer();
+
+		mParamsSet->setParamBlockBuffer("Params", mParamBuffer);
+
+		SPtr<GpuParams> params = mParamsSet->getGpuParams();
+		params->getBufferParam(GPT_FRAGMENT_PROGRAM, "gSHCoeffs", mInputBuffer);
+	}
+
+	void IrradianceProjectSHMat::_initDefines(ShaderDefines& defines)
+	{
+		// Do nothing
+	}
+
+	void IrradianceProjectSHMat::execute(const SPtr<GpuBuffer>& shCoeffs, UINT32 face, const SPtr<RenderTarget>& target)
+	{
+		gIrradianceProjectSHParamDef.gCubeFace.set(mParamBuffer, face);
+
+		mInputBuffer.set(shCoeffs);
+
+		RenderAPI& rapi = RenderAPI::instance();
+		rapi.setRenderTarget(target);
+
+		gRendererUtility().setPass(mMaterial);
+		gRendererUtility().setPassParams(mParamsSet);
+		gRendererUtility().drawScreenQuad();
+	}
+
 	const UINT32 ReflectionProbes::REFLECTION_CUBEMAP_SIZE = 256;
 
 	void ReflectionProbes::filterCubemapForSpecular(const SPtr<Texture>& cubemap, const SPtr<Texture>& scratch)
@@ -139,6 +287,31 @@ namespace bs { namespace ct
 		}
 	}
 
+	void ReflectionProbes::filterCubemapForIrradiance(const SPtr<Texture>& cubemap, const SPtr<Texture>& output)
+	{
+		static IrradianceComputeSHMat shCompute;
+		static IrradianceReduceSHMat shReduce;
+		static IrradianceProjectSHMat shProject;
+
+		UINT32 numCoeffSets;
+		SPtr<GpuBuffer> coeffSetBuffer = IrradianceComputeSHMat::createOutputBuffer(cubemap, numCoeffSets);
+		SPtr<GpuBuffer> coeffBuffer = IrradianceReduceSHMat::createOutputBuffer();
+		for (UINT32 face = 0; face < 6; face++)
+		{
+			RENDER_TEXTURE_DESC cubeFaceRTDesc;
+			cubeFaceRTDesc.colorSurfaces[0].texture = output;
+			cubeFaceRTDesc.colorSurfaces[0].face = face;
+			cubeFaceRTDesc.colorSurfaces[0].numFaces = 1;
+			cubeFaceRTDesc.colorSurfaces[0].mipLevel = 0;
+
+			SPtr<RenderTarget> target = RenderTexture::create(cubeFaceRTDesc);
+
+			shCompute.execute(cubemap, face, coeffSetBuffer);
+			shReduce.execute(coeffSetBuffer, numCoeffSets, coeffBuffer);
+			shProject.execute(coeffBuffer, face, target);
+		}
+	}
+
 	void ReflectionProbes::scaleCubemap(const SPtr<Texture>& src, UINT32 srcMip, const SPtr<Texture>& dst, UINT32 dstMip)
 	{
 		static ReflectionCubeDownsampleMat downsampleMat;

+ 6 - 0
Source/BansheeUtility/Include/BsMath.h

@@ -107,6 +107,12 @@ namespace bs
 		/** Returns the integer nearest to the provided value. */
 		static int roundToInt(float val) { return (int)std::floor(val + 0.5f); }
 
+		/** 
+		 * Divides an integer by another integer and returns the result, rounded up. Only works if both integers are
+		 * positive. 
+		 */
+		static int divideAndRoundUp(int n, int d) { return (n + d - 1) / d; }
+
 		/** Returns the nearest integer equal or lower of the provided value. */
 		static float floor(float val) { return (float)std::floor(val); }