Преглед на файлове

WIP on light probe extrapolation

BearishSun преди 8 години
родител
ревизия
7003446b78

+ 4 - 0
Data/Raw/Engine/DataList.json

@@ -353,6 +353,10 @@
         {
             "Path": "IrradianceEvaluate.bsl",
             "UUID": "6f6ed59d-b94b-428e-91bc-82c94ed48892"
+        },
+        {
+            "Path": "Clear.bsl",
+            "UUID": "5a325167-5bab-4925-8b50-95434448930f"
         }
     ],
     "Skin": [

+ 40 - 0
Data/Raw/Engine/Shaders/Clear.bsl

@@ -0,0 +1,40 @@
+technique Clear
+{
+	depth
+	{
+		read = false;
+		write = false;
+	};
+
+	code
+	{
+		struct VStoFS
+		{
+			float4 position : SV_Position;
+		};
+
+		struct VertexInput
+		{
+			float2 screenPos : POSITION;
+			float2 uv0 : TEXCOORD0;
+		};
+		
+		cbuffer Params
+		{
+			uint gClearValue;
+		};
+		
+		VStoFS vsmain(VertexInput input)
+		{
+			VStoFS output;
+			output.position = float4(input.screenPos, 0, 1);
+
+			return output;
+		}			
+
+		uint4 fsmain(VStoFS input) : SV_Target0
+		{
+			return uint4(gClearValue, gClearValue, gClearValue, gClearValue);
+		}
+	};
+};

+ 182 - 35
Data/Raw/Engine/Shaders/IrradianceEvaluate.bsl

@@ -27,14 +27,22 @@ technique IrradianceEvaluate
 			Texture2D<uint> gInputTex;
 		#endif
 		
-		struct ProbeVolume
+		struct Tetrahedron
 		{
 			uint4 indices;
 			float3x4 transform;
 		};
 		
+		struct TetrahedronFace
+		{
+			float3 corners[3];
+			float3 normals[3];
+			uint isQuadratic;
+		};		
+		
 		StructuredBuffer<SHVector3RGB> gSHCoeffs;
-		StructuredBuffer<ProbeVolume> gProbeVolumes;
+		StructuredBuffer<Tetrahedron> gTetrahedra;
+		StructuredBuffer<TetrahedronFace> gTetFaces;
 		
 		TextureCube gSkyIrradianceTex;
 		SamplerState gSkyIrradianceSamp;
@@ -42,6 +50,7 @@ technique IrradianceEvaluate
 		cbuffer Params
 		{
 			float gSkyBrightness;
+			uint gNumTetrahedra;
 		}				
 		
 		float3 getSkyIndirectDiffuse(float3 dir)
@@ -73,7 +82,82 @@ technique IrradianceEvaluate
 			output += coeffs.v2 * f;
 						
 			return output;
-		}		
+		}	
+
+		float solveQuadratic(float A, float B, float C)
+		{
+			const float EPSILON = 0.00001f;
+		
+			if (abs(A) > EPSILON)
+			{
+				float p = B / (2 * A);
+				float q = C / A;
+				float D = p * p - q;
+
+				return sqrt(D) - p;
+			}
+			else
+			{
+				if(abs(B) > EPSILON)
+					return -C / B;
+			
+				return 0.0f;
+			}
+		}
+		
+		float solveCubic(float A, float B, float C)
+		{
+			const float EPSILON = 0.00001f;
+			const float THIRD = 1.0f / 3.0f;
+		
+			float sqA = A * A;
+			float p = THIRD * (-THIRD * sqA + B);
+			float q = (0.5f) * ((2.0f / 27.0f) * A * sqA - THIRD * A * B + C);
+
+			float cbp = p * p * p;
+			float D = q * q + cbp;
+			
+			float t;
+			if(D < 0.0f)
+			{
+				float phi = THIRD * acos(-q / sqrt(-cbp));
+				t = (2 * sqrt(-p)) * cos(phi);
+			}
+			else
+			{
+				float sqrtD = sqrt(D);
+				float u = pow(sqrtD + abs(q), 1.0f / 3.0f);
+
+				
+				if (q > 0.0f)
+					t = -u + p / u;
+				else
+					t = u - p / u;
+			}
+			
+			return t - THIRD * A;
+		}
+		
+		float3 calcTriBarycentric(float3 p, float3 a, float3 b, float3 c)
+		{
+			float3 v0 = b - a;
+			float3 v1 = c - a;
+			float3 v2 = p - a;
+			
+			float d00 = dot(v0, v0);
+			float d01 = dot(v0, v1);
+			float d11 = dot(v1, v1);
+			float d20 = dot(v2, v0);
+			float d21 = dot(v2, v1);
+			
+			float denom = d00 * d11 - d01 * d01;
+			float3 coords;
+			coords.x = (d11 * d20 - d01 * d21) / denom;
+			coords.y = (d00 * d21 - d01 * d20) / denom;
+			coords.z = 1.0f - coords.x - coords.y;
+			
+			return coords;
+		}
 		
 		float3 fsmain(VStoFS input
 			#if MSAA_COUNT > 1
@@ -90,9 +174,9 @@ technique IrradianceEvaluate
 				surfaceData = getGBufferData(pixelPos);
 			#endif		
 			
-			float3 radiance;
+			float3 irradiance = 0;
 			#if SKY_ONLY
-				radiance = gSkyIrradianceTex.SampleLevel(gSkyIrradianceSamp, surfaceData.worldNormal, 0).rgb * gSkyBrightness;
+				irradiance = gSkyIrradianceTex.SampleLevel(gSkyIrradianceSamp, surfaceData.worldNormal, 0).rgb * gSkyBrightness;
 			#else
 				uint volumeIdx;
 				#if MSAA_COUNT > 1
@@ -101,38 +185,101 @@ technique IrradianceEvaluate
 					volumeIdx = gInputTex.Load(uint3(pixelPos, 0)).x;
 				#endif
 				
-				ProbeVolume volume = gProbeVolumes[volumeIdx];
-				
-				float3 P = NDCToWorld(input.screenPos, surfaceData.depth);
-				float3 offset = float3(volume.transform[0][3], volume.transform[1][3], volume.transform[2][3]);
-				float3 factors = mul((float3x3)volume.transform, P - offset);			
-				float4 coords = float4(factors, 1.0f - factors.x - factors.y - factors.z);
-				
-				// Ignore extra points we added to make the volume cover everything
-				coords = volume.indices != -1 ? coords : float4(0, 0, 0, 0);
-				
-				// Renormalize after potential change
-				float sum = coords.x + coords.y + coords.z + coords.w;
-				coords /= sum;
-				
-				SHVector3RGB shCoeffs = gSHCoeffs[volume.indices[0]];
-				
-				SHMultiply(shCoeffs, coords.x);
-				SHMultiplyAdd(shCoeffs, gSHCoeffs[volume.indices[1]], coords.y);
-				SHMultiplyAdd(shCoeffs, gSHCoeffs[volume.indices[2]], coords.z);
-				SHMultiplyAdd(shCoeffs, gSHCoeffs[volume.indices[3]], coords.w);
-				
-				SHVector3 shBasis = SHBasis3(surfaceData.worldNormal);
-				SHMultiply(shCoeffs.R, shBasis);
-				SHMultiply(shCoeffs.G, shBasis);
-				SHMultiply(shCoeffs.B, shBasis);
-				
-				radiance.r = evaluateLambert(shCoeffs.R);
-				radiance.g = evaluateLambert(shCoeffs.G);
-				radiance.b = evaluateLambert(shCoeffs.B);
+				if(volumeIdx == 0xFFFF) // Using 16-bit texture, so need to compare like this
+					irradiance = gSkyIrradianceTex.SampleLevel(gSkyIrradianceSamp, surfaceData.worldNormal, 0).rgb * gSkyBrightness;
+				else
+				{
+					Tetrahedron volume = gTetrahedra[volumeIdx];
+					
+					float3 P = NDCToWorld(input.screenPos, surfaceData.depth);
+					
+					float4 coords;
+					[branch]
+					if(volumeIdx >= gNumTetrahedra)
+					{
+						uint faceIdx = volumeIdx - gNumTetrahedra;
+						TetrahedronFace face = gTetFaces[faceIdx];
+					
+						float3 factors = mul(volume.transform, float4(P, 1.0f));
+						float A = factors.x;
+						float B = factors.y;
+						float C = factors.z;
+
+						float t;
+						if(face.isQuadratic > 0)
+							t = solveQuadratic(A, B, C);
+						else
+							t = solveCubic(A, B, C);
+							
+						float3 triA = face.corners[0] + t * face.normals[0];
+						float3 triB = face.corners[1] + t * face.normals[1];
+						float3 triC = face.corners[2] + t * face.normals[2];
+						
+						float3 bary = calcTriBarycentric(P, triA, triB, triC);
+						
+						coords.x = bary.z;
+						coords.yz = bary.xy;
+						coords.w = 0.0f;
+					}
+					else
+					{
+						float3 factors = mul(volume.transform, float4(P, 1.0f));			
+						coords = float4(factors, 1.0f - factors.x - factors.y - factors.z);
+					}
+					
+					for(uint i = 0; i < 4; ++i)
+					{
+						if(coords[i] == 0.0f)
+							continue;
+					
+						if(volume.indices[i] == 0)
+							irradiance += float3(1.0f, 0, 0) * coords[i];
+					
+						if(volume.indices[i] == 1)
+							irradiance += float3(0.0f, 1.0f, 0) * coords[i];
+							
+						if(volume.indices[i] == 2)
+							irradiance += float3(1.0f, 1.0f, 1.0f) * coords[i];
+							
+						if(volume.indices[i] == 3)
+							irradiance += float3(1.0f, 1.0f, 1.0f) * coords[i];
+					
+						if(volume.indices[i] == 4)
+							irradiance += float3(0.0f, 1.0f, 1.0f) * coords[i];
+							
+						if(volume.indices[i] == 5)
+							irradiance += float3(1.0f, 1.0f, 0.0f) * coords[i];
+							
+						if(volume.indices[i] == 6)
+							irradiance += float3(1.0f, 1.0f, 1.0f) * coords[i];
+							
+						if(volume.indices[i] == 7)
+							irradiance += float3(1.0f, 1.0f, 1.0f) * coords[i];
+					}					
+					
+					//SHVector3RGB shCoeffs;
+					//SHZero(shCoeffs);
+					
+					//for(uint i = 0; i < 4; ++i)
+					//{
+					//	if(coords[i] > 0.0f)
+					//		SHMultiplyAdd(shCoeffs, gSHCoeffs[volume.indices[i]], coords[i]);
+					//}
+					
+					//SHVector3 shBasis = SHBasis3(surfaceData.worldNormal);
+					//SHMultiply(shCoeffs.R, shBasis);
+					//SHMultiply(shCoeffs.G, shBasis);
+					//SHMultiply(shCoeffs.B, shBasis);
+					
+					//irradiance.r = evaluate(shCoeffs.R);
+					//irradiance.g = evaluate(shCoeffs.G);
+					//irradiance.b = evaluate(shCoeffs.B);
+					
+					//irradiance *= float3(10.0f, 0.0f, 0.0f);
+				}
 			#endif // SKY_ONLY
 			
-			return radiance * surfaceData.albedo.rgb;
+			return irradiance * surfaceData.albedo.rgb;
 		}	
 	};
 };

+ 11 - 13
Data/Raw/Engine/Shaders/TetrahedraRender.bsl

@@ -1,5 +1,9 @@
+#include "$ENGINE$\PerCameraData.bslinc"
+
 technique TetrahedraRender
 {
+	mixin PerCameraData;
+
 	raster
 	{
 		cull = cw;
@@ -21,14 +25,8 @@ technique TetrahedraRender
 		struct VStoFS
 		{
 			float4 position : SV_Position;
-			uint index : TEXCOORD0;
-		};
-		
-		cbuffer Params
-		{
-			float4x4 gMatViewProj;
-			float4 gNDCToUV;
-			float2 gNDCToDeviceZ;
+			float4 clipPos : TEXCOORD0;
+			uint index : TEXCOORD1;
 		};
 		
 		VStoFS vsmain(VertexInput input)
@@ -36,6 +34,7 @@ technique TetrahedraRender
 			VStoFS output;
 		
 			output.position = mul(gMatViewProj, float4(input.position, 1.0f));
+			output.clipPos = output.position;
 			output.index = input.index;
 			
 			return output;
@@ -58,16 +57,15 @@ technique TetrahedraRender
 		#endif
 		) : SV_Target0
 		{
-			float2 uv = input.position.xy * gNDCToUV.xy + gNDCToUV.zw;
-			
 			float sceneDepth;
 			#if MSAA
-				sceneDepth = gDepthBufferTex.Load(trunc(uv), sampleIdx);
+				sceneDepth = gDepthBufferTex.Load(trunc(input.position.xy), sampleIdx);
 			#else
-				sceneDepth = gDepthBufferTex.Sample(gDepthBufferSamp, uv);
+				float2 ndcPos = input.clipPos.xy / input.clipPos.w;
+				sceneDepth = gDepthBufferTex.Sample(gDepthBufferSamp, NDCToUV(ndcPos));
 			#endif
 			
-			float currentDepth = input.position.z * gNDCToDeviceZ.x + gNDCToDeviceZ.y;
+			float currentDepth = input.position.z;
 			if(currentDepth < sceneDepth)
 				discard;
 				

+ 4 - 1
Source/BansheeCore/Source/BsLightProbeVolume.cpp

@@ -441,7 +441,10 @@ namespace bs
 
 				SPtr<Texture> cubemap = Texture::create(cubemapDesc);
 
-				gRenderer()->captureSceneCubeMap(cubemap, mProbePositions[mFirstDirtyProbe], true);
+				Vector3 localPos = mProbePositions[mFirstDirtyProbe];
+				Vector3 transformedPos = mRotation.rotate(localPos) + mPosition;
+
+				gRenderer()->captureSceneCubeMap(cubemap, transformedPos, true);
 				gIBLUtility().filterCubemapForIrradiance(cubemap, mCoefficients, probeInfo.bufferIdx);
 
 				probeInfo.flags = LightProbeFlags::Clean;

+ 28 - 1
Source/BansheeEngine/Include/BsRendererUtility.h

@@ -8,6 +8,7 @@
 #include "BsVector2I.h"
 #include "BsRect2I.h"
 #include "BsRendererMaterial.h"
+#include "BsParamBlocks.h"
 
 namespace bs { namespace ct
 {
@@ -51,6 +52,26 @@ namespace bs { namespace ct
 		static ShaderVariation VAR_8MSAA_Depth;
 	};
 
+	BS_PARAM_BLOCK_BEGIN(ClearParamDef)
+		BS_PARAM_BLOCK_ENTRY(INT32, gClearValue)
+	BS_PARAM_BLOCK_END
+
+	extern ClearParamDef gClearParamDef;
+
+	/** Shader that clears the currently bound render target to an integer value. */
+	class ClearMat : public RendererMaterial<ClearMat>
+	{
+		RMAT_DEF("Clear.bsl");
+
+	public:
+		ClearMat();
+
+		/** Executes the material on the currently bound render target, clearing to to @p value. */
+		void execute(UINT32 value);
+	private:
+		SPtr<GpuParamBlockBuffer> mParamBuffer;
+	};
+
 	/**
 	 * Contains various utility methods that make various common operations in the renderer easier.
 	 * 			
@@ -177,6 +198,12 @@ namespace bs { namespace ct
 			drawScreenQuad(uv, textureSize, numInstances);
 		}
 
+		/** 
+		 * Clears the currently bound render target to the provided integer value. This is similar to 
+		 * RenderAPI::clearRenderTarget(), except it supports integer clears.
+		 */
+		void clear(UINT32 value);
+
 		/** Returns a stencil mesh used for a radial light (a unit sphere). */
 		SPtr<Mesh> getRadialLightStencil() const { return mPointLightStencilMesh; }
 
@@ -200,4 +227,4 @@ namespace bs { namespace ct
 	BS_EXPORT RendererUtility& gRendererUtility();
 
 	/** @} */
-}}
+}}

+ 28 - 1
Source/BansheeEngine/Source/BsRendererUtility.cpp

@@ -335,6 +335,12 @@ namespace bs { namespace ct
 		draw(mFullScreenQuadMesh, mFullScreenQuadMesh->getProperties().getSubMesh(), numInstances);
 	}
 
+	void RendererUtility::clear(UINT32 value)
+	{
+		ClearMat* clearMat = ClearMat::get();
+		clearMat->execute(value);
+	}
+
 	RendererUtility& gRendererUtility()
 	{
 		return RendererUtility::instance();
@@ -379,7 +385,6 @@ namespace bs { namespace ct
 		ShaderVariation::Param("COLOR", false)
 	});
 
-
 	BlitMat::BlitMat()
 	{
 		mSource = mMaterial->getParamTexture("gSource");
@@ -438,4 +443,26 @@ namespace bs { namespace ct
 		else
 			return get(VAR_1MSAA_Color);
 	}
+
+	ClearParamDef gClearParamDef;
+
+	ClearMat::ClearMat()
+	{
+		mParamBuffer = gClearParamDef.createBuffer();
+		mParamsSet->setParamBlockBuffer("Params", mParamBuffer);
+	}
+
+	void ClearMat::_initVariations(ShaderVariations& variations)
+	{
+		// Do nothing
+	}
+
+	void ClearMat::execute(UINT32 value)
+	{
+		gClearParamDef.gClearValue.set(mParamBuffer, value);
+
+		gRendererUtility().setPass(mMaterial);
+		gRendererUtility().setPassParams(mParamsSet);
+		gRendererUtility().drawScreenQuad();
+	}
 }}

+ 59 - 41
Source/RenderBeast/Include/BsLightProbes.h

@@ -13,6 +13,7 @@
 
 namespace bs { namespace ct
 {
+	struct LightProbesInfo;
 	struct GBufferTextures;
 	struct FrameInfo;
 	class LightProbeVolume;
@@ -21,14 +22,6 @@ namespace bs { namespace ct
 	 *  @{
 	 */
 
-	BS_PARAM_BLOCK_BEGIN(TetrahedraRenderParamDef)
-		BS_PARAM_BLOCK_ENTRY(Matrix4, gMatViewProj)
-		BS_PARAM_BLOCK_ENTRY(Vector4, gNDCToUV)
-		BS_PARAM_BLOCK_ENTRY(Vector2, gNDCToDeviceZ)
-	BS_PARAM_BLOCK_END
-
-	extern TetrahedraRenderParamDef gTetrahedraRenderParamDef;
-
 	/** 
 	 * Shader that renders the tetrahedra used for light probe evaluation. Tetrahedra depth is compare with current scene
 	 * depth, and for each scene pixel the matching tetrahedron index is written to the output target.
@@ -61,7 +54,6 @@ namespace bs { namespace ct
 		/** Returns the material variation matching the provided parameters. */
 		static TetrahedraRenderMat* getVariation(bool msaa);
 	private:
-		SPtr<GpuParamBlockBuffer> mParamBuffer;
 		GpuParamTexture mDepthBufferTex;
 
 		static ShaderVariation VAR_NoMSAA;
@@ -70,6 +62,7 @@ namespace bs { namespace ct
 
 	BS_PARAM_BLOCK_BEGIN(IrradianceEvaluateParamDef)
 		BS_PARAM_BLOCK_ENTRY(float, gSkyBrightness)
+		BS_PARAM_BLOCK_ENTRY(INT32, gNumTetrahedra)
 	BS_PARAM_BLOCK_END
 
 	extern IrradianceEvaluateParamDef gIrradianceEvaluateParamDef;
@@ -88,17 +81,14 @@ namespace bs { namespace ct
 		 * @param[in]	view				View that is currently being rendered.
 		 * @param[in]	gbuffer				Previously rendered GBuffer textures.
 		 * @param[in]	lightProbeIndices	Indices calculated by TetrahedraRenderMat.
-		 * @param[in]	shCoeffs			Buffer containing spherical harmonic coefficients for every light probe.
-		 * @param[in]	volumes				Buffer containing information about tetrahedra inside the light volume, 
-		 *									including indices of the light probes they reference.
+		 * @param[in]	lightProbesInfo		Information about light probes.
 		 * @param[in]	skybox				Skybox, if available. If sky is not available, but sky rendering is enabled, 
 		 *									the system will instead use a default irradiance texture.
 		 * @param[in]	output				Output texture to write the radiance to. The evaluated value will be added to 
 		 *									existing radiance in the texture, using blending.
 		 */
 		void execute(const RendererView& view, const GBufferTextures& gbuffer, const SPtr<Texture>& lightProbeIndices,
-			const SPtr<GpuBuffer>& shCoeffs, const SPtr<GpuBuffer>& volumes, const Skybox* skybox, 
-			const SPtr<RenderTexture>& output);
+			const LightProbesInfo& lightProbesInfo, const Skybox* skybox, const SPtr<RenderTexture>& output);
 
 		/** 
 		 * Returns the material variation matching the provided parameters. 
@@ -114,7 +104,8 @@ namespace bs { namespace ct
 		GpuParamTexture mParamInputTex;
 		GpuParamTexture mParamSkyIrradianceTex;
 		GpuParamBuffer mParamSHCoeffsBuffer;
-		GpuParamBuffer mParamVolumeBuffer;
+		GpuParamBuffer mParamTetrahedraBuffer;
+		GpuParamBuffer mParamTetFacesBuffer;
 		bool mSkyOnly;
 
 		static ShaderVariation VAR_MSAA_Probes;
@@ -123,6 +114,32 @@ namespace bs { namespace ct
 		static ShaderVariation VAR_NoMSAA_Sky;
 	};
 
+	/** Contains information required by light probe shaders. Output by LightProbes. */
+	struct LightProbesInfo
+	{
+		/** Contains a set of spherical harmonic coefficients for every light probe. */
+		SPtr<GpuBuffer> shCoefficients;
+
+		/** 
+		 * Contains information about tetrahedra formed by light probes. First half of the buffer is populated by actual
+		 * tetrahedrons, while the second half is populated by information about outer faces (triangles). @p numTetrahedra
+		 * marks the spot where split happens.
+		 */
+		SPtr<GpuBuffer> tetrahedra;
+
+		/** Contains additional information about outer tetrahedron faces, required for extrapolating tetrahedron data. */
+		SPtr<GpuBuffer> faces;
+
+		/** 
+		 * Mesh representing the entire light probe volume. Each vertex has an associated tetrahedron (or face) index which
+		 * can be used to map into the tetrahedra array to retrieve probe information.
+		 */
+		SPtr<Mesh> tetrahedraVolume;
+
+		/** Total number of valid tetrahedra in the @p tetrahedra buffer. */
+		UINT32 numTetrahedra;
+	};
+
 	/** Handles any pre-processing for light (irradiance) probe lighting. */
 	class LightProbes
 	{
@@ -145,6 +162,20 @@ namespace bs { namespace ct
 			Tetrahedron volume;
 			Matrix4 transform;
 		};
+
+		/**
+		 * Information about a single tetrahedron face, with information about extrusion and how to project a point in
+		 * the extrusion volume, on to the face.
+		 */
+		struct TetrahedronFaceData
+		{
+			UINT32 innerVertices[3];
+			UINT32 outerVertices[3];
+			Vector3 normals[3];
+			Matrix4 transform;
+			UINT32 tetrahedron;
+			bool quadratic;
+		};
 	public:
 		LightProbes();
 
@@ -164,24 +195,11 @@ namespace bs { namespace ct
 		bool hasAnyProbes() const;
 
 		/** 
-		 * Returns a GPU buffer containing SH coefficients for all active light probes. updateProbes() must be called
-		 * at least once before the buffer is populated. If the probes changed since the last call, call updateProbes()
-		 * to refresh the buffer.
-		 */
-		SPtr<GpuBuffer> getSHCoefficientsBuffer() const { return mProbeCoefficientsGPU; }
-
-		/** 
-		 * Returns a GPU buffer containing information about light probe volumes (tetrahedra). updateProbes() must be called
+		 * Returns a set of buffers that can be used for rendering the light probes. updateProbes() must be called
 		 * at least once before the buffer is populated. If the probes changed since the last call, call updateProbes()
-		 * to refresh the buffer.
-		 */
-		SPtr<GpuBuffer> getTetrahedonInfosBuffer() const { return mTetrahedronInfosGPU; }
-
-		/**
-		 * Returns a mesh that contains triangles of all the light volume tetrahedra, including their corresponding
-		 * tetrahedron index. By rendering this mesh you can find which tetrahedron influences which pixel.
+		 * to refresh the buffer. 
 		 */
-		SPtr<Mesh> getTetrahedraVolumeMesh() const { return mVolumeMesh; }
+		LightProbesInfo getInfo() const;
 
 	private:
 		/**
@@ -193,16 +211,20 @@ namespace bs { namespace ct
 		 * @param[in,out]	positions					A set of positions to generate the tetrahedra from. If 
 		 *												@p generateExtrapolationVolume is enabled then this array will be
 		 *												appended with new vertices forming that volume.
-		 * @param[out]		output						A list of generated tetrahedra and relevant data.
+		 * @param[out]		tetrahedra					A list of generated tetrahedra and relevant data.
+		 * @param[out]		faces						A list of faces representing the surface of the tetrahedra volume.
 		 * @param[in]		generateExtrapolationVolume	If true, the tetrahedron volume will be surrounded with points
 		 *												at "infinity" (technically just far away).
 		 */
-		void generateTetrahedronData(Vector<Vector3>& positions, Vector<TetrahedronData>& output, 
-			bool generateExtrapolationVolume = false);
+		void generateTetrahedronData(Vector<Vector3>& positions, Vector<TetrahedronData>& tetrahedra, 
+			Vector<TetrahedronFaceData>& faces, bool generateExtrapolationVolume = false);
 
 		/** Resizes the GPU buffer used for holding tetrahedron data, to the specified size (in number of tetraheda). */
 		void resizeTetrahedronBuffer(UINT32 count);
 
+		/** Resizes the GPU buffer used for holding tetrahedron face data, to the specified size (in number of faces). */
+		void resizeTetrahedronFaceBuffer(UINT32 count);
+
 		/** 
 		 * Resized the GPU buffer that stores light probe SH coefficients, to the specified size (in the number of probes). 
 		 */
@@ -213,24 +235,20 @@ namespace bs { namespace ct
 
 		UINT32 mMaxCoefficients;
 		UINT32 mMaxTetrahedra;
+		UINT32 mMaxFaces;
 
 		Vector<TetrahedronData> mTetrahedronInfos;
 
 		SPtr<GpuBuffer> mProbeCoefficientsGPU;
 		SPtr<GpuBuffer> mTetrahedronInfosGPU;
+		SPtr<GpuBuffer> mTetrahedronFaceInfosGPU;
 		SPtr<Mesh> mVolumeMesh;
+		UINT32 mNumValidTetrahedra;
 
 		// Temporary buffers
 		Vector<Vector3> mTempTetrahedronPositions;
 		Vector<UINT32> mTempTetrahedronBufferIndices;
 	};
 
-	/** Information about a single tetrahedron, for use on the GPU. */
-	struct TetrahedronDataGPU
-	{
-		UINT32 indices[4];
-		Matrix3x4 transform;
-	};
-
 	/** @} */
 }}

+ 499 - 179
Source/RenderBeast/Source/BsLightProbes.cpp

@@ -14,8 +14,6 @@
 
 namespace bs { namespace ct 
 {
-	TetrahedraRenderParamDef gTetrahedraRenderParamDef;
-
 	ShaderVariation TetrahedraRenderMat::VAR_NoMSAA = ShaderVariation({
 		ShaderVariation::Param("MSAA", false)
 	});
@@ -43,9 +41,6 @@ namespace bs { namespace ct
 			SPtr<SamplerState> pointSampState = SamplerState::create(pointSampDesc);
 			params->setSamplerState(GPT_FRAGMENT_PROGRAM, "gDepthBufferSamp", pointSampState);
 		}
-
-		mParamBuffer = gTetrahedraRenderParamDef.createBuffer();
-		mParamsSet->setParamBlockBuffer("Params", mParamBuffer, true);
 	}
 
 	void TetrahedraRenderMat::_initVariations(ShaderVariations& variations)
@@ -57,23 +52,8 @@ namespace bs { namespace ct
 	void TetrahedraRenderMat::execute(const RendererView& view, const SPtr<Texture>& sceneDepth, const SPtr<Mesh>& mesh, 
 		const SPtr<RenderTexture>& output)
 	{
-		const RendererViewProperties& viewProps = view.getProperties();
-
-		Vector4 NDCtoUV = view.getNDCToUV();
-		if(mVariation.getBool("MSAA"))
-		{
-			NDCtoUV.x *= viewProps.viewRect.width;
-			NDCtoUV.y *= viewProps.viewRect.height;
-			NDCtoUV.z *= viewProps.viewRect.width;
-			NDCtoUV.w *= viewProps.viewRect.height;
-		}
-
-		gTetrahedraRenderParamDef.gMatViewProj.set(mParamBuffer, viewProps.viewProjTransform);
-		gTetrahedraRenderParamDef.gNDCToUV.set(mParamBuffer, NDCtoUV);
-		gTetrahedraRenderParamDef.gNDCToDeviceZ.set(mParamBuffer, RendererView::getNDCZToDeviceZ());
-		
 		mDepthBufferTex.set(sceneDepth);
-		mParamBuffer->flushToGPU();
+		mParamsSet->setParamBlockBuffer("PerCamera", view.getPerViewBuffer(), true);
 
 		RenderAPI& rapi = RenderAPI::instance();
 		rapi.setRenderTarget(output);
@@ -92,7 +72,7 @@ namespace bs { namespace ct
 		UINT32 numSamples = viewProps.numSamples;
 
 		colorDesc = POOLED_RENDER_TEXTURE_DESC::create2D(PF_R16U, width, height, TU_RENDERTARGET, numSamples);
-		depthDesc = POOLED_RENDER_TEXTURE_DESC::create2D(PF_D16, width, height, TU_DEPTHSTENCIL, numSamples);
+		depthDesc = POOLED_RENDER_TEXTURE_DESC::create2D(PF_D32, width, height, TU_DEPTHSTENCIL, numSamples);
 	}
 
 	TetrahedraRenderMat* TetrahedraRenderMat::getVariation(bool msaa)
@@ -131,14 +111,14 @@ namespace bs { namespace ct
 		mSkyOnly = mVariation.getBool("SKY_ONLY");
 
 		SPtr<GpuParams> params = mParamsSet->getGpuParams();
+		params->getTextureParam(GPT_FRAGMENT_PROGRAM, "gSkyIrradianceTex", mParamSkyIrradianceTex);
 
-		if(mSkyOnly)
-			params->getTextureParam(GPT_FRAGMENT_PROGRAM, "gSkyIrradianceTex", mParamSkyIrradianceTex);
-		else
+		if(!mSkyOnly)
 		{
 			params->getTextureParam(GPT_FRAGMENT_PROGRAM, "gInputTex", mParamInputTex);
 			params->getBufferParam(GPT_FRAGMENT_PROGRAM, "gSHCoeffs", mParamSHCoeffsBuffer);
-			params->getBufferParam(GPT_FRAGMENT_PROGRAM, "gProbeVolumes", mParamVolumeBuffer);
+			params->getBufferParam(GPT_FRAGMENT_PROGRAM, "gTetrahedra", mParamTetrahedraBuffer);
+			params->getBufferParam(GPT_FRAGMENT_PROGRAM, "gTetFaces", mParamTetFacesBuffer);
 		}
 
 		mParamBuffer = gIrradianceEvaluateParamDef.createBuffer();
@@ -154,36 +134,36 @@ namespace bs { namespace ct
 	}
 
 	void IrradianceEvaluateMat::execute(const RendererView& view, const GBufferTextures& gbuffer, 
-		const SPtr<Texture>& lightProbeIndices, const SPtr<GpuBuffer>& shCoeffs, const SPtr<GpuBuffer>& volumes, 
-		const Skybox* skybox, const SPtr<RenderTexture>& output)
+		const SPtr<Texture>& lightProbeIndices, const LightProbesInfo& lightProbesInfo, const Skybox* skybox, 
+		const SPtr<RenderTexture>& output)
 	{
 		const RendererViewProperties& viewProps = view.getProperties();
 
 		mGBufferParams.bind(gbuffer);
 
 		float skyBrightness = 1.0f;
-		if (mSkyOnly)
+		SPtr<Texture> skyIrradiance;
+		if (skybox != nullptr)
 		{
-			SPtr<Texture> skyIrradiance;
-			if (skybox != nullptr)
-			{
-				skyIrradiance = skybox->getIrradiance();
-				skyBrightness = skybox->getBrightness();
-			}
+			skyIrradiance = skybox->getIrradiance();
+			skyBrightness = skybox->getBrightness();
+		}
 
-			if(skyIrradiance == nullptr)
-				skyIrradiance = RendererTextures::defaultIndirect;
+		if(skyIrradiance == nullptr)
+			skyIrradiance = RendererTextures::defaultIndirect;
 
-			mParamSkyIrradianceTex.set(skyIrradiance);
-		}
-		else
+		mParamSkyIrradianceTex.set(skyIrradiance);
+
+		if(!mSkyOnly)
 		{
 			mParamInputTex.set(lightProbeIndices);
-			mParamSHCoeffsBuffer.set(shCoeffs);
-			mParamVolumeBuffer.set(volumes);
+			mParamSHCoeffsBuffer.set(lightProbesInfo.shCoefficients);
+			mParamTetrahedraBuffer.set(lightProbesInfo.tetrahedra);
+			mParamTetFacesBuffer.set(lightProbesInfo.faces);
 		}
 
 		gIrradianceEvaluateParamDef.gSkyBrightness.set(mParamBuffer, skyBrightness);
+		gIrradianceEvaluateParamDef.gNumTetrahedra.set(mParamBuffer, lightProbesInfo.numTetrahedra);
 		mParamBuffer->flushToGPU();
 
 		mParamsSet->setParamBlockBuffer("PerCamera", view.getPerViewBuffer(), true);
@@ -219,8 +199,23 @@ namespace bs { namespace ct
 		}
 	}
 
+	/** Information about a single tetrahedron, for use on the GPU. */
+	struct TetrahedronDataGPU
+	{
+		UINT32 indices[4];
+		Matrix3x4 transform;
+	};
+
+	/** Information about a single tetrahedron face, for use on the GPU. */
+	struct TetrahedronFaceDataGPU
+	{
+		Vector3 corners[3];
+		Vector3 normals[3];
+		UINT32 isQuadratic;
+	};
+
 	LightProbes::LightProbes()
-		:mTetrahedronVolumeDirty(false), mMaxCoefficients(0), mMaxTetrahedra(0)
+		:mTetrahedronVolumeDirty(false), mMaxCoefficients(0), mMaxTetrahedra(0), mMaxFaces(0), mNumValidTetrahedra(0)
 	{ }
 
 	void LightProbes::notifyAdded(LightProbeVolume* volume)
@@ -322,35 +317,71 @@ namespace bs { namespace ct
 
 		mTetrahedronInfos.clear();
 
-		UINT32 innerVertexCount = (UINT32)mTempTetrahedronPositions.size();
-		generateTetrahedronData(mTempTetrahedronPositions, mTetrahedronInfos, true);
+		Vector<TetrahedronFaceData> outerFaces;
+		generateTetrahedronData(mTempTetrahedronPositions, mTetrahedronInfos, outerFaces, true);
+
+		// Find valid tetrahedrons
+		UINT32 numTetrahedra = (UINT32)mTetrahedronInfos.size();
+
+		bool* validTets = (bool*)bs_stack_alloc(sizeof(bool) * numTetrahedra);
+		mNumValidTetrahedra = 0;
+		for (UINT32 i = 0; i < (UINT32)mTetrahedronInfos.size(); i++)
+		{
+			const TetrahedronData& entry = mTetrahedronInfos[i];
+
+			const Vector3& P1 = mTempTetrahedronPositions[entry.volume.vertices[0]];
+			const Vector3& P2 = mTempTetrahedronPositions[entry.volume.vertices[1]];
+			const Vector3& P3 = mTempTetrahedronPositions[entry.volume.vertices[2]];
+			const Vector3& P4 = mTempTetrahedronPositions[entry.volume.vertices[3]];
+
+			Vector3 E1 = P1 - P4;
+			Vector3 E2 = P2 - P4;
+			Vector3 E3 = P3 - P4;
+
+			// If tetrahedron is co-planar just ignore it, shader will use some other nearby one instead. We can't
+			// handle coplanar tetrahedrons because the matrix is not invertible, and for nearly co-planar ones the
+			// math breaks down because of precision issues.
+			validTets[i] = fabs(Vector3::dot(Vector3::normalize(Vector3::cross(E1, E2)), E3)) > 0.0001f;
+
+			if (validTets[i])
+				mNumValidTetrahedra++;
+		}
+
+		UINT32 numValidFaces = 0;
+		for(auto& entry : outerFaces)
+		{
+			if (validTets[entry.tetrahedron])
+				numValidFaces++;
+		}
 
 		// Generate a mesh out of all the tetrahedron triangles
 		// Note: Currently the entire volume is rendered as a single large mesh, which will isn't optimal as we can't
 		// perform frustum culling. A better option would be to split the mesh into multiple smaller volumes, do
 		// frustum culling and possibly even sort by distance from camera.
-		UINT32 numTetrahedra = (UINT32)mTetrahedronInfos.size();
-
-		UINT32 numVertices = numTetrahedra * 4 * 3;
-		UINT32 numIndices = numTetrahedra * 4 * 3;
+		UINT32 numVertices = mNumValidTetrahedra * 4 * 3 + numValidFaces * 9 * 3;
 
 		SPtr<VertexDataDesc> vertexDesc = bs_shared_ptr_new<VertexDataDesc>();
 		vertexDesc->addVertElem(VET_FLOAT3, VES_POSITION);
 		vertexDesc->addVertElem(VET_UINT1, VES_TEXCOORD);
 
-		SPtr<MeshData> meshData = MeshData::create(numVertices, numIndices, vertexDesc);
+		SPtr<MeshData> meshData = MeshData::create(numVertices, numVertices, vertexDesc);
 		auto posIter = meshData->getVec3DataIter(VES_POSITION);
 		auto idIter = meshData->getDWORDDataIter(VES_TEXCOORD);
 		UINT32* indices = meshData->getIndices32();
 
+		// Insert inner tetrahedron triangles
 		UINT32 tetIdx = 0;
-		for(auto& entry : mTetrahedronInfos)
+		UINT32 triIdx = 0;
+		for (UINT32 i = 0; i < (UINT32)mTetrahedronInfos.size(); i++)
 		{
-			const Tetrahedron& volume = entry.volume;
+			if (!validTets[i])
+				continue;
+
+			const Tetrahedron& volume = mTetrahedronInfos[i].volume;
 
 			Vector3 center(BsZero);
-			for(UINT32 i = 0; i < 4; i++)
-				center += mTempTetrahedronPositions[volume.vertices[i]];
+			for(UINT32 j = 0; j < 4; j++)
+				center += mTempTetrahedronPositions[volume.vertices[j]];
 
 			center /= 4.0f;
 
@@ -362,18 +393,18 @@ namespace bs { namespace ct
 				{ 1, 2, 3 }
 			};
 
-			for(UINT32 i = 0; i < 4; i++)
+			for(UINT32 j = 0; j < 4; j++)
 			{
-				Vector3 A = mTempTetrahedronPositions[volume.vertices[Permutations[i][0]]];
-				Vector3 B = mTempTetrahedronPositions[volume.vertices[Permutations[i][1]]];
-				Vector3 C = mTempTetrahedronPositions[volume.vertices[Permutations[i][2]]];
+				Vector3 A = mTempTetrahedronPositions[volume.vertices[Permutations[j][0]]];
+				Vector3 B = mTempTetrahedronPositions[volume.vertices[Permutations[j][1]]];
+				Vector3 C = mTempTetrahedronPositions[volume.vertices[Permutations[j][2]]];
 
-				// Make sure the triangle is clockwise
+				// Make sure the triangle is clockwise, facing away from the center
 				Vector3 e0 = A - C;
 				Vector3 e1 = B - C;
 
 				Vector3 normal = e0.cross(e1);
-				if (normal.dot(A - center) < 0.0f)
+				if (normal.dot(A - center) > 0.0f)
 					std::swap(B, C);
 
 				posIter.addValue(A);
@@ -384,37 +415,151 @@ namespace bs { namespace ct
 				idIter.addValue(tetIdx);
 				idIter.addValue(tetIdx);
 
-				indices[0] = tetIdx * 4 * 3 + i * 3 + 0;
-				indices[1] = tetIdx * 4 * 3 + i * 3 + 1;
-				indices[2] = tetIdx * 4 * 3 + i * 3 + 2;
+				indices[0] = tetIdx * 4 * 3 + j * 3 + 0;
+				indices[1] = tetIdx * 4 * 3 + j * 3 + 1;
+				indices[2] = tetIdx * 4 * 3 + j * 3 + 2;
 
 				indices += 3;
+				triIdx++;
 			}
 
 			tetIdx++;
 		}
 
+		// Generate triangles for extruded outer faces
+		UINT32 faceIdx = 0;
+		for(UINT32 i = 0; i < (UINT32)outerFaces.size(); i++)
+		{
+			if (!validTets[outerFaces[i].tetrahedron])
+				continue;
+
+			const TetrahedronFaceData& entry = outerFaces[i];
+
+			static const UINT32 Permutations[8][3] = 
+			{
+				{0, 1, 2 }, { 3, 4, 5},
+				{0, 1, 3 }, { 1, 3, 4},
+				{1, 2, 4 }, { 2, 4, 5},
+				{2, 0, 5 }, { 0, 5, 3}
+			};
+
+			// Make sure the triangle is clockwise, facing away from the center
+			Vector3 center(BsZero);
+			for (UINT32 k = 0; k < 3; k++)
+			{
+				center += mTempTetrahedronPositions[entry.innerVertices[k]];
+				center += mTempTetrahedronPositions[entry.outerVertices[k]];
+			}
+
+			center /= 6.0f;
+
+			for(UINT32 j = 0; j < 8; ++j)
+			{
+				UINT32 idxA = Permutations[j][0];
+				UINT32 idxB = Permutations[j][1];
+				UINT32 idxC = Permutations[j][2];
+
+				idxA = idxA > 2 ? entry.outerVertices[idxA - 3] : entry.innerVertices[idxA];
+				idxB = idxB > 2 ? entry.outerVertices[idxB - 3] : entry.innerVertices[idxB];
+				idxC = idxC > 2 ? entry.outerVertices[idxC - 3] : entry.innerVertices[idxC];
+				
+				Vector3 A = mTempTetrahedronPositions[idxA];
+				Vector3 B = mTempTetrahedronPositions[idxB];
+				Vector3 C = mTempTetrahedronPositions[idxC];
+
+				Vector3 e0 = A - C;
+				Vector3 e1 = B - C;
+
+				Vector3 normal = e0.cross(e1);
+				if (normal.dot(A - center) > 0.0f)
+					std::swap(A, B);
+
+				posIter.addValue(A);
+				posIter.addValue(B);
+				posIter.addValue(C);
+
+				idIter.addValue(tetIdx + faceIdx);
+				idIter.addValue(tetIdx + faceIdx);
+				idIter.addValue(tetIdx + faceIdx);
+
+				indices[0] = tetIdx * 4 * 3 + faceIdx * 8 * 3 + j * 3 + 0;
+				indices[1] = tetIdx * 4 * 3 + faceIdx * 8 * 3 + j * 3 + 1;
+				indices[2] = tetIdx * 4 * 3 + faceIdx * 8 * 3 + j * 3 + 2;
+
+				indices += 3;
+				triIdx++;
+			}
+
+			faceIdx++;
+		}
+
+		// Generate "caps" on the end of the extruded volume
+		UINT32 capIdx = 0;
+		for(UINT32 i = 0; i < (UINT32)outerFaces.size(); i++)
+		{
+			if (!validTets[outerFaces[i].tetrahedron])
+				continue;
+
+			const TetrahedronFaceData& entry = outerFaces[i];
+
+			Vector3 A = mTempTetrahedronPositions[entry.outerVertices[0]];
+			Vector3 B = mTempTetrahedronPositions[entry.outerVertices[1]];
+			Vector3 C = mTempTetrahedronPositions[entry.outerVertices[2]];
+
+			// Make sure the triangle is clockwise, facing toward the center
+			const Tetrahedron& tet = mTetrahedronInfos[entry.tetrahedron].volume;
+
+			Vector3 center(BsZero);
+			for(UINT32 j = 0; j < 4; j++)
+				center += mTempTetrahedronPositions[tet.vertices[j]];
+
+			center /= 4.0f;
+
+			Vector3 e0 = A - C;
+			Vector3 e1 = B - C;
+
+			Vector3 normal = e0.cross(e1);
+			if (normal.dot(A - center) < 0.0f)
+				std::swap(B, C);
+
+			posIter.addValue(A);
+			posIter.addValue(B);
+			posIter.addValue(C);
+
+			idIter.addValue(-1);
+			idIter.addValue(-1);
+			idIter.addValue(-1);
+
+			indices[0] = tetIdx * 4 * 3 + faceIdx * 8 * 3 + capIdx * 3 + 0;
+			indices[1] = tetIdx * 4 * 3 + faceIdx * 8 * 3 + capIdx * 3 + 1;
+			indices[2] = tetIdx * 4 * 3 + faceIdx * 8 * 3 + capIdx * 3 + 2;
+
+			indices += 3;
+			capIdx++;
+		}
+
 		mVolumeMesh = Mesh::create(meshData);
 
 		// Map vertices to actual SH coefficient indices, and write GPU buffer with tetrahedron information
-		if (numTetrahedra > mMaxTetrahedra)
+		if ((mNumValidTetrahedra + numValidFaces) > mMaxTetrahedra)
 		{
-			UINT32 newSize = Math::divideAndRoundUp(numTetrahedra, 64U) * 64U;
+			UINT32 newSize = Math::divideAndRoundUp(mNumValidTetrahedra + numValidFaces, 64U) * 64U;
 			resizeTetrahedronBuffer(newSize);
 		}
 
 		TetrahedronDataGPU* dst = (TetrahedronDataGPU*)mTetrahedronInfosGPU->lock(0, mTetrahedronInfosGPU->getSize(), 
 			GBL_WRITE_ONLY_DISCARD);
-		for (auto& entry : mTetrahedronInfos)
+
+		// Write inner tetrahedron data
+		for (UINT32 i = 0; i < (UINT32)mTetrahedronInfos.size(); i++)
 		{
-			for(UINT32 i = 0; i < 4; ++i)
-			{
-				// Check for outer vertices, which have no SH data associated with them
-				if (entry.volume.vertices[i] >= (INT32)innerVertexCount)
-					entry.volume.vertices[i] = -1;
-				else
-					entry.volume.vertices[i] = mTempTetrahedronBufferIndices[entry.volume.vertices[i]];
-			}
+			if (!validTets[i])
+				continue;
+
+			TetrahedronData& entry = mTetrahedronInfos[i];
+
+			for(UINT32 j = 0; j < 4; ++j)
+				entry.volume.vertices[j] = mTempTetrahedronBufferIndices[entry.volume.vertices[j]];
 
 			memcpy(dst->indices, entry.volume.vertices, sizeof(UINT32) * 4);
 			memcpy(&dst->transform, &entry.transform, sizeof(float) * 12);
@@ -422,8 +567,59 @@ namespace bs { namespace ct
 			dst++;
 		}
 
+		// Write extruded face data
+		for (UINT32 i = 0; i < (UINT32)outerFaces.size(); i++)
+		{
+			if (!validTets[outerFaces[i].tetrahedron])
+				continue;
+
+			const TetrahedronFaceData& entry = outerFaces[i];
+
+			UINT32 indices[4];
+			indices[0] = mTempTetrahedronBufferIndices[entry.innerVertices[0]];
+			indices[1] = mTempTetrahedronBufferIndices[entry.innerVertices[1]];
+			indices[2] = mTempTetrahedronBufferIndices[entry.innerVertices[2]];
+			indices[3] = -1;
+
+			memcpy(dst->indices, indices, sizeof(UINT32) * 4);
+			memcpy(&dst->transform, &entry.transform, sizeof(float) * 12);
+
+			dst++;
+		}
+
 		mTetrahedronInfosGPU->unlock();
 
+		// Write data specific to faces
+		if (numValidFaces > mMaxFaces)
+		{
+			UINT32 newSize = Math::divideAndRoundUp(numValidFaces, 64U) * 64U;
+			resizeTetrahedronFaceBuffer(newSize);
+		}
+
+		TetrahedronFaceDataGPU* faceDst = (TetrahedronFaceDataGPU*)mTetrahedronFaceInfosGPU->lock(0, 
+			mTetrahedronFaceInfosGPU->getSize(), GBL_WRITE_ONLY_DISCARD);
+
+		for (UINT32 i = 0; i < (UINT32)outerFaces.size(); i++)
+		{
+			if (!validTets[outerFaces[i].tetrahedron])
+				continue;
+
+			const TetrahedronFaceData& entry = outerFaces[i];
+
+			for (UINT32 j = 0; j < 3; j++)
+			{
+				faceDst->corners[j] = mTempTetrahedronPositions[entry.innerVertices[j]];
+				faceDst->normals[j] = entry.normals[j];
+			}
+
+			faceDst->isQuadratic = entry.quadratic ? 1 : 0;
+			faceDst++;
+		}
+
+		mTetrahedronFaceInfosGPU->unlock();
+
+		bs_stack_free(validTets);
+
 		mTempTetrahedronPositions.clear();
 		mTempTetrahedronBufferIndices.clear();
 		mTetrahedronVolumeDirty = false;
@@ -441,6 +637,18 @@ namespace bs { namespace ct
 		return false;
 	}
 
+	LightProbesInfo LightProbes::getInfo() const
+	{
+		LightProbesInfo info;
+		info.shCoefficients = mProbeCoefficientsGPU;
+		info.tetrahedra = mTetrahedronInfosGPU;
+		info.faces = mTetrahedronFaceInfosGPU;
+		info.tetrahedraVolume = mVolumeMesh;
+		info.numTetrahedra = mNumValidTetrahedra;
+
+		return info;
+	}
+
 	void LightProbes::resizeTetrahedronBuffer(UINT32 count)
 	{
 		GPU_BUFFER_DESC desc;
@@ -454,6 +662,19 @@ namespace bs { namespace ct
 		mMaxTetrahedra = count;
 	}
 
+	void LightProbes::resizeTetrahedronFaceBuffer(UINT32 count)
+	{
+		GPU_BUFFER_DESC desc;
+		desc.type = GBT_STRUCTURED;
+		desc.elementSize = sizeof(TetrahedronFaceDataGPU);
+		desc.elementCount = count;
+		desc.usage = GBU_STATIC;
+		desc.format = BF_UNKNOWN;
+
+		mTetrahedronFaceInfosGPU = GpuBuffer::create(desc);
+		mMaxFaces = count;
+	}
+
 	void LightProbes::resizeCoefficientBuffer(UINT32 count)
 	{
 		GPU_BUFFER_DESC desc;
@@ -480,8 +701,8 @@ namespace bs { namespace ct
 		}
 	};
 
-	void LightProbes::generateTetrahedronData(Vector<Vector3>& positions, Vector<TetrahedronData>& output, 
-		bool generateExtrapolationVolume)
+	void LightProbes::generateTetrahedronData(Vector<Vector3>& positions, Vector<TetrahedronData>& tetrahedra,
+		Vector<TetrahedronFaceData>& faces,	bool generateExtrapolationVolume)
 	{
 		bs_frame_mark();
 		{
@@ -489,11 +710,8 @@ namespace bs { namespace ct
 
 			if (generateExtrapolationVolume)
 			{
-				// We don't want ot handle the case where the user looks up a position and it falls outside of the 
-				// tetrahedron volume, as the math for projecting the point onto the volume might be too slow for the 
-				// shader (which we need in order not to have a sharp cutoff in lighting where the volume ends). Therefore
-				// we extend the tetrahedron volume to "infinity" (technically to some far away distance, but we treat it as
-				// infinity when calculating barycentric coordinates) by adding new points along the outer face normals.
+				// Add geometry so we can handle the case when the interpolation position falls outside of the tetrahedra
+				// volume. We use this geometry to project the position to the nearest face.
 				UINT32 numOuterFaces = (UINT32)volume.outerFaces.size();
 
 				// Calculate face normals for outer faces
@@ -534,6 +752,12 @@ namespace bs { namespace ct
 				}
 
 				//// Generate face normals
+				struct FaceVertex
+				{
+					Vector3 normal = Vector3::ZERO;
+					UINT32 outerIdx = -1;
+				};
+
 				FrameVector<Vector3> faceNormals(volume.outerFaces.size());
 				for (UINT32 i = 0; i < (UINT32)volume.outerFaces.size(); ++i)
 				{
@@ -544,16 +768,23 @@ namespace bs { namespace ct
 					Vector3 e0 = v1 - v0;
 					Vector3 e1 = v2 - v0;
 
-					faceNormals[i] = Vector3::normalize(e1.cross(e0));
+					// Make sure the normal is facing away from the center
+					const Tetrahedron& tet = volume.tetrahedra[volume.outerFaces[i].tetrahedron];
+
+					Vector3 center(BsZero);
+					for(UINT32 j = 0; j < 4; j++)
+						center += positions[tet.vertices[j]];
+
+					center /= 4.0f;
+
+					Vector3 normal = Vector3::normalize(e0.cross(e1));
+					if (normal.dot(v0 - center) < 0.0f)
+						normal = -normal;
+
+					faceNormals[i] = normal;
 				}
 
 				//// Generate vertex normals
-				struct FaceVertex
-				{
-					Vector3 normal = Vector3::ZERO;
-					UINT32 outerIdx = -1;
-				};
-
 				FrameUnorderedMap<INT32, FaceVertex> faceVertices;
 				for (auto& entry : edgeMap)
 				{
@@ -594,7 +825,7 @@ namespace bs { namespace ct
 					entry.second.normal.normalize();
 
 				// For each face vertex, generate an outer vertex along its normal
-				static const float ExtrapolationDistance = 1000.0f;
+				static const float ExtrapolationDistance = 5.0f;
 				for(auto& entry : faceVertices)
 				{
 					entry.second.outerIdx = (UINT32)positions.size();
@@ -603,110 +834,205 @@ namespace bs { namespace ct
 					positions.push_back(outerPos);
 				}
 
-				// For each face, generate outer tetrahedrons
-				Vector<Vector3> outerVolumeVerts;
+				// Generate face data
 				for (UINT32 i = 0; i < numOuterFaces; ++i)
 				{
 					const TetrahedronFace& face = volume.outerFaces[i];
-					UINT32 originalIndices[6];
+
+					TetrahedronFaceData faceData;
+					faceData.tetrahedron = face.tetrahedron;
+
 					for (UINT32 j = 0; j < 3; j++)
 					{
 						const FaceVertex& faceVertex = faceVertices[face.vertices[j]];
 
-						outerVolumeVerts.push_back(positions[face.vertices[j]]);
-						outerVolumeVerts.push_back(positions[faceVertex.outerIdx]);
-
-						originalIndices[j * 2 + 0] = face.vertices[j];
-						originalIndices[j * 2 + 1] = faceVertex.outerIdx;
+						faceData.innerVertices[j] = face.vertices[j];
+						faceData.outerVertices[j] = faceVertex.outerIdx;
+						faceData.normals[j] = faceVertex.normal;
 					}
 
-					TetrahedronVolume outerVolume = Triangulation::tetrahedralize(outerVolumeVerts);
-					UINT32 tetStartIdx = (UINT32)volume.tetrahedra.size();
-
-					for (auto& entry : outerVolume.tetrahedra)
+					// Add a link on the source tetrahedron to the face data
+					Tetrahedron& innerTet = volume.tetrahedra[face.tetrahedron];
+					for(UINT32 j = 0; j < 4; j++)
 					{
-						// Remap neighbors to global array
-						for (UINT32 j = 0; j < 4; j++)
+						if (innerTet.neighbors[j] == -1)
 						{
-							if (entry.neighbors[j] != -1)
-							{
-								// Valid neighbor, map to global array
-								entry.neighbors[j] += tetStartIdx;
-							}
+							// Note: Not searching for opposite neighbor here. If tet. has multiple free faces then we
+							// can't just pick the first one
+							innerTet.neighbors[j] = (UINT32)volume.tetrahedra.size() + (UINT32)faces.size();
+							break;
 						}
 					}
 
-					// Connect the new volume to the original face
-					for (auto& entry : outerVolume.outerFaces)
+					// We need a way to project a point outside the tetrahedron volume onto an outer face, then calculate
+					// triangle's barycentric coordinates. Use use the per-vertex normals to extrude the triangle face into
+					// infinity.
+
+					// Our point can be represented as:
+					// p == a (p0 + t*v0) + b (p1 + t*v1) + c (p2 + t*v2)
+					//
+					// where a, b and c are barycentric coordinates,
+					// p0, p1, p2 are the corners of the face
+					// v0, v1, v2 are the vertex normals, per corner
+					// t is the distance from the triangle to the point
+					//
+					// Essentially we're calculating the corners of a bigger triangle that's "t" units away from the
+					// face, and its corners lie along the per-vertex normals. Point "p" will lie on that triangle, for which
+					// we can then calculate barycentric coordinates normally.
+					//
+					// First we substitute: c = 1 - a - b
+					// p == a (p0 + t v0) + b (p1 + t v1) + (1 - a - b) (p2 + t v2)
+					// p == a (p0 + t v0) + b (p1 + t v1) + (p2 + t v2) - a (p2 + t v2) - b (p2 + t v2)
+					// p == a (p0 - p2 + t v0 - t v2) + b (p1 - p2 + t v1 - t v2) + (p2 + t v2)
+					//
+					// And move everything to one side:
+					// p - p2 - t v2 == a (p0 - p2 + t ( v0 - v2)) + b (p1 - p2 + t ( v1 - v2))
+					// a (p0 - p2 + t ( v0 - v2)) + b (p1 - p2 + t ( v1 - v2)) - (p - p2 - t v2) == 0
+					//
+					// We rewrite it using:
+					// Ap = p0 - p2
+					// Av = v0 - v2
+					// Bp = p1 - p2
+					// Bv = v1 - v2
+					// Cp = p - p2
+					// Cv = -v2
+					//
+					// Which yields:
+					// a (Ap + t Av) + b (Bp + t Bv) - (Cp + t Cv) == 0
+					//
+					// Which can be written in matrix form:
+					//
+					// M = {Ap + t Av, Bp + t Bv, Cp + t Cv}
+					//       a      0
+					// M * [ b ] = [0]
+					//      -1      0
+					//
+					// From that we can tell that matrix M cannot be inverted, because if we multiply the zero vector with the
+					// inverted matrix the result would be zero, and not [a, b, -1]. Since the matrix cannot be inverted
+					// det(M) == 0.
+					//
+					// We can use that fact to calculate "t". After we have "t" we can calculate barycentric coordinates
+					// normally.
+					//
+					// Solving equation det(M) == 0 yields a cubic in form:
+					// p t^3 + q t^2 + r t + s = 0
+					//
+					// We'll convert this to monic form, by dividing by p:
+					// t^3 + q/p t^2 + r/p t + s/p = 0
+					//
+					// Or if p ends up being zero, we end up with a quadratic instead:
+					// q t^2 + r t + s = 0
+					// 
+					// We want to create a matrix that when multiplied with the position, yields us the three coefficients,
+					// which we can then use to solve for "t". For this we create a 4x3 matrix, where each row represents
+					// a solution for one of the coefficients. We factor contributons to each coefficient whether they depend on
+					// position x, y, z, or don't depend on position (row columns, in that order respectively).
+
+					const Vector3& p0 = positions[faceData.innerVertices[0]];
+					const Vector3& p1 = positions[faceData.innerVertices[1]];
+					const Vector3& p2 = positions[faceData.innerVertices[2]];
+
+					const Vector3& v0 = faceVertices[faceData.innerVertices[0]].normal;
+					const Vector3& v1 = faceVertices[faceData.innerVertices[1]].normal;
+					const Vector3& v2 = faceVertices[faceData.innerVertices[2]].normal;
+
+					float p =
+							v2.x * v1.y * v0.z -
+							v1.x * v2.y * v0.z -
+							v2.x * v0.y * v1.z +
+							v0.x * v2.y * v1.z +
+							v1.x * v0.y * v2.z -
+							v0.x * v1.y * v2.z;
+						
+					float qx = -v1.y * v0.z + v2.y * v0.z + v0.y * v1.z - v2.y * v1.z - v0.y * v2.z + v1.y * v2.z;
+					float qy = v1.x * v0.z - v2.x * v0.z - v0.x * v1.z + v2.x * v1.z + v0.x * v2.z - v1.x * v2.z;
+					float qz = -v1.x * v0.y + v2.x * v0.y + v0.x * v1.y - v2.x * v1.y - v0.x * v2.y + v1.x * v2.y;
+					float qw = v2.y * v1.z * p0.x - v1.y * v2.z * p0.x - v2.y * v0.z * p1.x + v0.y * v2.z * p1.x + 
+						v1.y * v0.z * p2.x - v0.y * v1.z * p2.x - v2.x * v1.z * p0.y + v1.x * v2.z * p0.y + 
+						v2.x * v0.z * p1.y - v0.x * v2.z * p1.y - v1.x * v0.z * p2.y + v0.x * v1.z * p2.y + 
+						v2.x * v1.y * p0.z - v1.x * v2.y * p0.z - v2.x * v0.y * p1.z + v0.x * v2.y * p1.z + 
+						v1.x * v0.y * p2.z - v0.x * v1.y * p2.z;
+
+					float rx = v1.z * p0.y - v2.z * p0.y - v0.z * p1.y + v2.z * p1.y + v0.z * p2.y - v1.z * p2.y -
+						v1.y * p0.z + v2.y * p0.z + v0.y * p1.z - v2.y * p1.z - v0.y * p2.z + v1.y * p2.z;
+					float ry = -v1.z * p0.x + v2.z * p0.x + v0.z * p1.x - v2.z * p1.x - v0.z * p2.x + v1.z * p2.x +
+						v1.x * p0.z - v2.x * p0.z - v0.x * p1.z + v2.x * p1.z + v0.x * p2.z - v1.x * p2.z;
+					float rz = v1.y * p0.x - v2.y * p0.x - v0.y * p1.x + v2.y * p1.x + v0.y * p2.x - v1.y * p2.x -
+						v1.x * p0.y + v2.x * p0.y + v0.x * p1.y - v2.x * p1.y - v0.x * p2.y + v1.x * p2.y;
+					float rw = v2.z * p1.x * p0.y - v1.z * p2.x * p0.y - v2.z * p0.x * p1.y + v0.z * p2.x * p1.y +
+						v1.z * p0.x * p2.y - v0.z * p1.x * p2.y - v2.y * p1.x * p0.z + v1.y * p2.x * p0.z +
+						v2.x * p1.y * p0.z - v1.x * p2.y * p0.z + v2.y * p0.x * p1.z - v0.y * p2.x * p1.z -
+						v2.x * p0.y * p1.z + v0.x * p2.y * p1.z - v1.y * p0.x * p2.z + v0.y * p1.x * p2.z +
+						v1.x * p0.y * p2.z - v0.x * p1.y * p2.z;
+
+					float sx = -p1.y * p0.z + p2.y * p0.z + p0.y * p1.z - p2.y * p1.z - p0.y * p2.z + p1.y * p2.z;
+					float sy = p1.x * p0.z - p2.x * p0.z - p0.x * p1.z + p2.x * p1.z + p0.x * p2.z - p1.x * p2.z;
+					float sz = -p1.x * p0.y + p2.x * p0.y + p0.x * p1.y - p2.x * p1.y - p0.x * p2.y + p1.x * p2.y;
+					float sw = p2.x * p1.y * p0.z - p1.x * p2.y * p0.z - p2.x * p0.y * p1.z + 
+						p0.x * p2.y * p1.z + p1.x * p0.y * p2.z - p0.x * p1.y * p2.z;
+
+					faceData.transform[0][0] = qx;
+					faceData.transform[0][1] = qy;
+					faceData.transform[0][2] = qz;
+					faceData.transform[0][3] = qw;
+
+					faceData.transform[1][0] = rx;
+					faceData.transform[1][1] = ry;
+					faceData.transform[1][2] = rz;
+					faceData.transform[1][3] = rw;
+
+					faceData.transform[2][0] = sx;
+					faceData.transform[2][1] = sy;
+					faceData.transform[2][2] = sz;
+					faceData.transform[2][3] = sw;
+
+					// Unused
+					faceData.transform[3][0] = 0.0f;
+					faceData.transform[3][1] = 0.0f;
+					faceData.transform[3][2] = 0.0f;
+					faceData.transform[3][3] = 0.0f;
+
+					if (fabs(p) > 0.00001f)
 					{
-						// Look for the face sharing all vertices with the original face
-						bool isValid = true;
-						for (UINT32 j = 0; j < 3; j++)
-						{
-							if (entry.vertices[j] % 2 == 1)
-							{
-								isValid = false;
-								break;
-							}
-						}
-
-						if (!isValid)
-							continue;
-
-						Tetrahedron& outerTet = outerVolume.tetrahedra[entry.tetrahedron];
-
-						UINT32 oppositeVert = -1;
-						for (UINT32 j = 0; j < 4; j++)
-						{
-							if(outerTet.vertices[j] != entry.vertices[0] &&
-								outerTet.vertices[j] != entry.vertices[1] &&
-								outerTet.vertices[j] != entry.vertices[2])
-							{
-								oppositeVert = j;
-								break;
-							}
-						}
-
-						assert(outerTet.neighbors[oppositeVert] == -1);
-						outerTet.neighbors[oppositeVert] = face.tetrahedron;
-
-						Tetrahedron& innerTet = volume.tetrahedra[face.tetrahedron];
-						for(UINT32 j = 0; j < 4; j++)
-						{
-							if (innerTet.neighbors[j] == -1)
-							{
-								// Note: Not searching for opposite neighbor here. If tet. has multiple free faces then we
-								// can't just pick the first one
-								innerTet.neighbors[j] = tetStartIdx + entry.tetrahedron;
-								break;
-							}
-						}
+						faceData.transform = faceData.transform * (1.0f / p);
+						faceData.quadratic = false;
+					}
+					else // Quadratic
+					{
+						faceData.quadratic = true;
 					}
 
-					for (auto& entry : outerVolume.tetrahedra)
+					faces.push_back(faceData);
+				}
+			}
+			else
+			{
+				for (UINT32 i = 0; i < (UINT32)volume.outerFaces.size(); ++i)
+				{
+					const TetrahedronFace& face = volume.outerFaces[i];
+					TetrahedronFaceData faceData;
+
+					for (UINT32 j = 0; j < 3; j++)
 					{
-						// Remap vertices back to global array
-						for (UINT32 j = 0; j < 4; j++)
-							entry.vertices[j] = originalIndices[entry.vertices[j]];
+						faceData.innerVertices[j] = face.vertices[j];
+						faceData.outerVertices[j] = -1;
+						faceData.normals[j] = Vector3::ZERO;
 					}
 
-					outerVolumeVerts.clear();
+					faceData.tetrahedron = face.tetrahedron;
+					faceData.transform = Matrix4::IDENTITY;
+					faceData.quadratic = false;
 
-					// Add to global tetrahedra array
-					for (auto& entry : outerVolume.tetrahedra)
-						volume.tetrahedra.push_back(entry);
+					faces.push_back(faceData);
 				}
-
-				// Note: Not forming neighbor connections between outer tetrahedrons. Since we generate them separately
-				// we can't guarantee they even exist.
 			}
 
 			// Generate matrices
 			UINT32 numOutputTets = (UINT32)volume.tetrahedra.size();
-			output.reserve(numOutputTets);
+			tetrahedra.reserve(numOutputTets);
 
-			// Insert tetrahedrons, generate matrices
-			for(UINT32 i = 0; i < (UINT32)volume.tetrahedra.size(); ++i)
+			//// For inner tetrahedrons
+			for(UINT32 i = 0; i < (UINT32)numOutputTets; ++i)
 			{
 				TetrahedronData entry;
 				entry.volume = volume.tetrahedra[i];
@@ -745,21 +1071,15 @@ namespace bs { namespace ct
 				Vector3 E2 = P2 - P4;
 				Vector3 E3 = P3 - P4;
 
-				Matrix3 mat;
-				mat.setColumn(0, E1);
-				mat.setColumn(1, E2);
-				mat.setColumn(2, E3);
-
-				// If tetrahedron is co-planar just ignore it, shader will use some other nearby one instead. We can't
-				// handle coplanar tetrahedrons because the matrix is not invertible, and for nearly co-planar ones the
-				// math breaks down because of precision issues.
-				if (fabs(Vector3::dot(Vector3::normalize(Vector3::cross(E1, E2)), E3)) < 0.0001f)
-					continue;
+				Matrix4 mat;
+				mat.setColumn(0, Vector4(E1, 0.0f));
+				mat.setColumn(1, Vector4(E2, 0.0f));
+				mat.setColumn(2, Vector4(E3, 0.0f));
+				mat.setColumn(3, Vector4(P4, 1.0f));
 
-				entry.transform = Matrix4(mat.inverse());
-				entry.transform.setColumn(3, Vector4(P4, 1.0f));
+				entry.transform = mat.inverse();
 
-				output.push_back(entry);
+				tetrahedra.push_back(entry);
 			}
 		}
 		bs_frame_clear();

+ 5 - 7
Source/RenderBeast/Source/BsRenderCompositor.cpp

@@ -661,10 +661,7 @@ namespace bs { namespace ct
 		const RendererViewProperties& viewProps = inputs.view.getProperties();
 
 		const LightProbes& lightProbes = inputs.scene.lightProbes;
-
-		SPtr<GpuBuffer> shCoeffs = lightProbes.getSHCoefficientsBuffer();
-		SPtr<GpuBuffer> volumeInfos = lightProbes.getTetrahedonInfosBuffer();
-		SPtr<Mesh> volumeMesh = lightProbes.getTetrahedraVolumeMesh();
+		LightProbesInfo lpInfo = lightProbes.getInfo();
 
 		IrradianceEvaluateMat* evaluateMat;
 		SPtr<PooledRenderTexture> volumeIndices;
@@ -685,10 +682,11 @@ namespace bs { namespace ct
 
 			RenderAPI& rapi = RenderAPI::instance();
 			rapi.setRenderTarget(rt);
-			rapi.clearRenderTarget(FBT_COLOR | FBT_DEPTH);			
+			rapi.clearRenderTarget(FBT_DEPTH);
+			gRendererUtility().clear(-1);
 
 			TetrahedraRenderMat* renderTetrahedra = TetrahedraRenderMat::getVariation(viewProps.numSamples > 1);
-			renderTetrahedra->execute(inputs.view, sceneDepthNode->depthTex->texture, volumeMesh, rt);
+			renderTetrahedra->execute(inputs.view, sceneDepthNode->depthTex->texture, lpInfo.tetrahedraVolume, rt);
 
 			rt = nullptr;
 			resPool.release(depthTex);
@@ -710,7 +708,7 @@ namespace bs { namespace ct
 		if (volumeIndices)
 			volumeIndicesTex = volumeIndices->texture;
 
-		evaluateMat->execute(inputs.view, gbuffer, volumeIndicesTex, shCoeffs, volumeInfos, inputs.scene.skybox, 
+		evaluateMat->execute(inputs.view, gbuffer, volumeIndicesTex, lpInfo, inputs.scene.skybox, 
 			lightAccumNode->renderTarget);
 
 		if(volumeIndices)