Browse Source

WIP: Adding alternatives to existing compute shaders, in order to support older hardware and mobiles
- Refactored irradiance evaluation code so it uses SH coefficients stored in textures rather than in buffers. This way the code is also compatible with non-compute method of generating the coefficients.

BearishSun 8 years ago
parent
commit
b52bd59e1d
39 changed files with 335 additions and 162 deletions
  1. BIN
      Data/Engine/Includes/SHCommon.bslinc.asset
  2. BIN
      Data/Engine/Includes/TemporalResolve.bslinc.asset
  3. BIN
      Data/Engine/Shaders/IrradianceComputeSH.bsl.asset
  4. BIN
      Data/Engine/Shaders/IrradianceComputeSHFrag.bsl.asset
  5. BIN
      Data/Engine/Shaders/IrradianceComputeSH_1.bsl.asset
  6. BIN
      Data/Engine/Shaders/IrradianceEvaluate.bsl.asset
  7. BIN
      Data/Engine/Shaders/IrradianceEvaluate_1.bsl.asset
  8. BIN
      Data/Engine/Shaders/IrradianceEvaluate_2.bsl.asset
  9. BIN
      Data/Engine/Shaders/IrradianceEvaluate_3.bsl.asset
  10. BIN
      Data/Engine/Shaders/IrradianceEvaluate_4.bsl.asset
  11. BIN
      Data/Engine/Shaders/IrradianceEvaluate_5.bsl.asset
  12. BIN
      Data/Engine/Shaders/IrradianceProjectSH.bsl.asset
  13. BIN
      Data/Engine/Shaders/IrradianceReduceSH.bsl.asset
  14. BIN
      Data/Engine/Shaders/IrradianceReduceSH_1.bsl.asset
  15. BIN
      Data/Engine/Shaders/PPCreateTonemapLUT.bsl.asset
  16. BIN
      Data/Engine/Shaders/PPCreateTonemapLUT_1.bsl.asset
  17. BIN
      Data/Engine/Shaders/PPEyeAdaptationBasic.bsl.asset
  18. BIN
      Data/Engine/Shaders/PPEyeAdaptationBasicSetup.bsl.asset
  19. BIN
      Data/Engine/Shaders/PPSSRResolve.bsl.asset
  20. BIN
      Data/Engine/Shaders/PPSSRResolve_1.bsl.asset
  21. 17 0
      Data/Raw/Engine/Includes/SHCommon.bslinc
  22. 5 10
      Data/Raw/Engine/Includes/TemporalResolve.bslinc
  23. 3 2
      Data/Raw/Engine/Shaders/IrradianceEvaluate.bsl
  24. 2 2
      Data/Raw/Engine/Shaders/IrradianceProjectSH.bsl
  25. 10 4
      Data/Raw/Engine/Shaders/IrradianceReduceSH.bsl
  26. 4 4
      Data/Raw/Engine/Shaders/PPCreateTonemapLUT.bsl
  27. 2 1
      Data/Raw/Engine/Shaders/PPEyeAdaptationBasic.bsl
  28. 3 2
      Data/Raw/Engine/Shaders/PPEyeAdaptationBasicSetup.bsl
  29. 2 2
      Documentation/GitHub/dependencies.md
  30. 31 0
      Source/BansheeCore/Renderer/BsIBLUtility.cpp
  31. 15 6
      Source/BansheeCore/Renderer/BsIBLUtility.h
  32. 65 17
      Source/BansheeCore/Renderer/BsLightProbeVolume.cpp
  33. 5 5
      Source/BansheeCore/Renderer/BsLightProbeVolume.h
  34. 2 0
      Source/BansheeUtility/Utility/BsTriangulation.h
  35. 53 33
      Source/RenderBeast/BsLightProbes.cpp
  36. 8 6
      Source/RenderBeast/BsLightProbes.h
  37. 11 1
      Source/RenderBeast/BsRenderBeast.cpp
  38. 84 54
      Source/RenderBeast/BsRenderBeastIBLUtility.cpp
  39. 13 13
      Source/RenderBeast/BsRenderBeastIBLUtility.h

BIN
Data/Engine/Includes/SHCommon.bslinc.asset


BIN
Data/Engine/Includes/TemporalResolve.bslinc.asset


BIN
Data/Engine/Shaders/IrradianceComputeSH.bsl.asset


BIN
Data/Engine/Shaders/IrradianceComputeSHFrag.bsl.asset


BIN
Data/Engine/Shaders/IrradianceComputeSH_1.bsl.asset


BIN
Data/Engine/Shaders/IrradianceEvaluate.bsl.asset


BIN
Data/Engine/Shaders/IrradianceEvaluate_1.bsl.asset


BIN
Data/Engine/Shaders/IrradianceEvaluate_2.bsl.asset


BIN
Data/Engine/Shaders/IrradianceEvaluate_3.bsl.asset


BIN
Data/Engine/Shaders/IrradianceEvaluate_4.bsl.asset


BIN
Data/Engine/Shaders/IrradianceEvaluate_5.bsl.asset


BIN
Data/Engine/Shaders/IrradianceProjectSH.bsl.asset


BIN
Data/Engine/Shaders/IrradianceReduceSH.bsl.asset


BIN
Data/Engine/Shaders/IrradianceReduceSH_1.bsl.asset


BIN
Data/Engine/Shaders/PPCreateTonemapLUT.bsl.asset


BIN
Data/Engine/Shaders/PPCreateTonemapLUT_1.bsl.asset


BIN
Data/Engine/Shaders/PPEyeAdaptationBasic.bsl.asset


BIN
Data/Engine/Shaders/PPEyeAdaptationBasicSetup.bsl.asset


BIN
Data/Engine/Shaders/PPSSRResolve.bsl.asset


BIN
Data/Engine/Shaders/PPSSRResolve_1.bsl.asset


+ 17 - 0
Data/Raw/Engine/Includes/SHCommon.bslinc

@@ -16,6 +16,23 @@ mixin SHCommon
 			SHVector B;
 		};
 		
+		SHVectorRGB SHLoad(Texture2D input, int2 offset)
+		{
+			SHVectorRGB output;
+						
+			[unroll]
+			for(int i = 0; i < SH_NUM_COEFFS; ++i)
+			{
+				float3 coeff = input.Load(int3(offset.x + i, offset.y, 0)).rgb;
+			
+				output.R.v[i] = coeff.r;
+				output.G.v[i] = coeff.g;
+				output.B.v[i] = coeff.b;
+			}
+			
+			return output;
+		}
+		
 		void SHZero(inout SHVector v)
 		{
 			[unroll]

+ 5 - 10
Data/Raw/Engine/Includes/TemporalResolve.bslinc

@@ -243,19 +243,14 @@ mixin TemporalResolve
 			#define _TONEMAP_COLOR(v) v
 		#endif // TEMPORAL_TONEMAP
 		
-		// Samples the scene color texture, automatically converting to YCoCg space (if enabled),
-		// and tonemapping the value
-		float4 sampleColor(
-			_TEXCOLOR(tex), 
-			float2 uv, 
-			int2 offset
+		// Converts scene color to YCoCg space (if enabled) and applies tonemapping
+		float4 convertColor(
+			float4 color
 			#if TEMPORAL_TONEMAP
 			, float exposureScale
 			#endif // TEMPORAL_TONEMAP
 		)
 		{
-			float4 color = _SAMPLECOL(tex, uv, offset);
-			
 			#if TEMPORAL_YCOCG
 				color.rgb = _TONEMAP_COLOR(RGBToYCoCg(color.rgb));
 			#else
@@ -266,9 +261,9 @@ mixin TemporalResolve
 		}
 		
 		#if TEMPORAL_TONEMAP
-			#define _SAMPLE_COLOR(n, uv, offset) sampleColor(_PTEXCOLOR(n), uv, offset, exposureScale)
+			#define _SAMPLE_COLOR(n, uv, offset) convertColor(_SAMPLECOL(n, uv, offset), exposureScale)
 		#else
-			#define _SAMPLE_COLOR(n, uv, offset) sampleColor(_PTEXCOLOR(n), uv, offset)
+			#define _SAMPLE_COLOR(n, uv, offset) convertColor(_SAMPLECOL(n, uv, offset))
 		#endif
 		
 		///////////////////////////// MAIN /////////////////////////////////

+ 3 - 2
Data/Raw/Engine/Shaders/IrradianceEvaluate.bsl

@@ -35,6 +35,7 @@ technique IrradianceEvaluate
 		struct Tetrahedron
 		{
 			uint4 indices;
+			uint2 offsets[4];
 			float3x4 transform;
 		};
 		
@@ -46,7 +47,7 @@ technique IrradianceEvaluate
 			float padding[3];
 		};		
 		
-		StructuredBuffer<SHVectorRGB> gSHCoeffs;
+		Texture2D gSHCoeffs;
 		StructuredBuffer<Tetrahedron> gTetrahedra;
 		StructuredBuffer<TetrahedronFace> gTetFaces;
 		
@@ -246,7 +247,7 @@ technique IrradianceEvaluate
 					{
 						if(coords[i] > 0.0f)
 						{
-							SHVectorRGB coeff = gSHCoeffs[volume.indices[i]];
+							SHVectorRGB coeff = SHLoad(gSHCoeffs, volume.offsets[i]);
 							SHMultiplyAdd(shCoeffs, coeff, coords[i]);
 						}
 					}

+ 2 - 2
Data/Raw/Engine/Shaders/IrradianceProjectSH.bsl

@@ -17,7 +17,7 @@ technique IrradianceProjectSH
 			int gCubeFace;
 		}	
 	
-		StructuredBuffer<SHVectorRGB> gSHCoeffs;
+		Texture2D gSHCoeffs;
 
 		float evaluateLambert(SHVector coeffs)
 		{
@@ -57,7 +57,7 @@ technique IrradianceProjectSH
 			
 			SHVector shBasis = SHBasis(dir);
 							
-			SHVectorRGB coeffs = gSHCoeffs[0];
+			SHVectorRGB coeffs = SHLoad(gSHCoeffs, int2(0, 0));
 			SHMultiply(coeffs.R, shBasis);
 			SHMultiply(coeffs.G, shBasis);
 			SHMultiply(coeffs.B, shBasis);

+ 10 - 4
Data/Raw/Engine/Shaders/IrradianceReduceSH.bsl

@@ -17,13 +17,13 @@ technique IrradianceReduceSH
 		};
 
 		StructuredBuffer<SHCoeffsAndWeight> gInput;
-		RWStructuredBuffer<SHVectorRGB> gOutput;
+		RWTexture2D<float4> gOutput;
 		
 		[internal]
 		cbuffer Params
 		{
+			uint2 gOutputIdx;
 			uint gNumEntries;
-			uint gOutputIdx;
 		}			
 		
 		[numthreads(1, 1, 1)]
@@ -56,8 +56,14 @@ technique IrradianceReduceSH
 			SHMultiply(coeffs.R, normFactor);
 			SHMultiply(coeffs.G, normFactor);
 			SHMultiply(coeffs.B, normFactor);
-				
-			gOutput[gOutputIdx] = coeffs;
+			
+			uint2 writeIdx = gOutputIdx;
+			[unroll]
+			for(int i = 0; i < SH_NUM_COEFFS; ++i)
+			{			
+				gOutput[writeIdx] = float4(coeffs.R.v[i], coeffs.G.v[i], coeffs.B.v[i], 0.0f);
+				writeIdx.x += 1;
+			}
 		}
 	};
 };

+ 4 - 4
Data/Raw/Engine/Shaders/PPCreateTonemapLUT.bsl

@@ -139,11 +139,11 @@ technique PPCreateTonemapLUT
 			// Red goes from 0 to 1, in each slice along X (LUT_SIZE number of slices)
 			logColor.r = frac(uv.x * LUT_SIZE);
 			
-			// Green value is constant within each slice, and increases by 1/LUT_SIZE with each slice along X
-			logColor.g = uv.x - logColor.r / LUT_SIZE;
+			// Blue value is constant within each slice, and increases by 1/LUT_SIZE with each slice along X
+			logColor.b = uv.x - logColor.r / LUT_SIZE;
 			
-			// Blue increases linearly with y
-			logColor.b = uv.y;
+			// Green increases linearly with y
+			logColor.g = uv.y;
 			
 			float3 gammaColor = tonemapColor(logColor);
 							

+ 2 - 1
Data/Raw/Engine/Shaders/PPEyeAdaptationBasic.bsl

@@ -61,7 +61,8 @@ technique PPEyeAdaptationBasic
 			float avgLuminance = calcWeightedAverageAlpha(gCurFrameTex, gInputTexSize, slope);
 		
 			// Scale back into normal range (was log2 encoded and scale into [0, 1] range)
-			avgLuminance = exp2((pos - gEyeAdaptationParams[0].y) / gEyeAdaptationParams[0].x);
+			avgLuminance = exp2((avgLuminance - gEyeAdaptationParams[0].y) / gEyeAdaptationParams[0].x);
+			avgLuminance /= 0.16f;
 		
 			// Clamp to valid range
 			avgLuminance = clamp(avgLuminance, gEyeAdaptationParams[1].x, gEyeAdaptationParams[1].y);

+ 3 - 2
Data/Raw/Engine/Shaders/PPEyeAdaptationBasicSetup.bsl

@@ -16,14 +16,15 @@ technique PPEyeAdaptationBasicSetup
 		float4 fsmain(VStoFS input) : SV_Target0
 		{
 			float4 value = gInputTex.Sample(gInputSamp, input.uv0);
-			
-			float luminance = dot(OutColor.xyz, float3(0.2126, 0.7152, 0.0722));
+			float luminance = dot(value.xyz, float3(0.2126, 0.7152, 0.0722));
 			
 			float maxIntensity = gEyeAdaptationParams[2].z;
 			luminance = max(maxIntensity, luminance);
 			
 			// Store intensity as log, and scale to [0, 1] range
 			value.w = gEyeAdaptationParams[0].x * log2(luminance) + gEyeAdaptationParams[0].y;
+			
+			return value;
 		}	
 	};
 };

+ 2 - 2
Documentation/GitHub/dependencies.md

@@ -84,8 +84,8 @@ Additionally, if the dependency structure still isn't clear, download one of the
 - https://github.com/kcat/openal-soft
 - Required by BansheeOpenAudio
 - **Linux only**
- - Make sure to get audio backend libraries before compiling: PulseAudio, OSS, ALSA and JACK
- - On Debian/Ubuntu run: *apt-get install libpulse libasound2-dev libjack-dev* 
+  - Make sure to get audio backend libraries before compiling: PulseAudio, OSS, ALSA and JACK
+  - On Debian/Ubuntu run: *apt-get install libpulse libasound2-dev libjack-dev* 
 - Compile as a dynamic library
    
 **libogg**

+ 31 - 0
Source/BansheeCore/Renderer/BsIBLUtility.cpp

@@ -1,12 +1,43 @@
 //********************************** Banshee Engine (www.banshee3d.com) **************************************************//
 //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
 #include "Renderer/BsIBLUtility.h"
+#include "Math/BsVector2I.h"
 
 namespace bs { namespace ct
 {
 	const UINT32 IBLUtility::REFLECTION_CUBEMAP_SIZE = 256;
 	const UINT32 IBLUtility::IRRADIANCE_CUBEMAP_SIZE = 32;
 
+	/** Returns the size of the texture required to store the provided number of SH coefficients. */
+	Vector2I IBLUtility::getSHCoeffTextureSize(UINT32 numCoeffSets, UINT32 shOrder)
+	{
+		UINT32 coeffsPerSet = shOrder * shOrder;
+		
+		// Assuming the texture maximum size is 4096
+		UINT32 maxSetsPerRow = 4096 / coeffsPerSet;
+		
+		Vector2I output;
+		output.x = (numCoeffSets > maxSetsPerRow ? maxSetsPerRow : numCoeffSets) * coeffsPerSet;
+		output.y = 1 + numCoeffSets / (maxSetsPerRow + 1);
+
+		return output;
+	}
+	
+	/** Determines the position of a set of coefficients in the coefficient texture, depending on the coefficient index. */
+	Vector2I IBLUtility::getSHCoeffXYFromIdx(UINT32 idx, UINT32 shOrder)
+	{
+		UINT32 coeffsPerSet = shOrder * shOrder;
+		
+		// Assuming the texture maximum size is 4096
+		UINT32 maxSetsPerRow = 4096 / coeffsPerSet;
+
+		Vector2I output;
+		output.x = (idx % maxSetsPerRow) * coeffsPerSet;
+		output.y = idx / maxSetsPerRow;
+		
+		return output;
+	}
+
 	const IBLUtility& gIBLUtility()
 	{
 		return IBLUtility::instance();

+ 15 - 6
Source/BansheeCore/Renderer/BsIBLUtility.h

@@ -37,16 +37,16 @@ namespace bs { namespace ct
 		virtual void filterCubemapForIrradiance(const SPtr<Texture>& cubemap, const SPtr<Texture>& output) const = 0;
 
 		/**
-		 * Performs filtering on the cubemap, populating the output cubemap with values that can be used for evaluating
-		 * irradiance for use in diffuse lighting. Uses order-5 SH (9 coefficients) and outputs the values in the form of
-		 * a cubemap.
+		 * Performs filtering on the cubemap, populating the output texture with values that can be used for evaluating
+		 * irradiance for use in diffuse lighting. Uses order-3 SH (9 coefficients) and outputs the values in the form of
+		 * SH coefficients.
 		 * 
 		 * @param[in]		cubemap		Cubemap to filter. Its mip level 0 will be used as source.
-		 * @param[in]		output		Output buffer in which to place the results. Must be allocated using 
-		 *								IrradianceReduceMat<ORDER>::createOutputBuffer();
+		 * @param[in]		output		Output texture in which to place the results. Must be allocated using 
+		 *								IrradianceReduceMat::createOutputTexture();
 		 * @param[in]		outputIdx	Index in the output buffer at which to write the output coefficients to.
 		 */
-		virtual void filterCubemapForIrradiance(const SPtr<Texture>& cubemap, const SPtr<GpuBuffer>& output, 
+		virtual void filterCubemapForIrradiance(const SPtr<Texture>& cubemap, const SPtr<Texture>& output, 
 			UINT32 outputIdx) const = 0;
 
 		/**
@@ -60,6 +60,15 @@ namespace bs { namespace ct
 		 */
 		virtual void scaleCubemap(const SPtr<Texture>& src, UINT32 srcMip, const SPtr<Texture>& dst, UINT32 dstMip) const = 0;
 
+
+		/** Returns the size of the texture required to store the provided number of SH coefficient sets. */
+		static Vector2I getSHCoeffTextureSize(UINT32 numCoeffSets, UINT32 shOrder);
+		
+		/** 
+		 * Determines the position of a set of coefficients in the coefficient texture, depending on the coefficient index. 
+		 */
+		static Vector2I getSHCoeffXYFromIdx(UINT32 idx, UINT32 shOrder);
+
 		static const UINT32 REFLECTION_CUBEMAP_SIZE;
 		static const UINT32 IRRADIANCE_CUBEMAP_SIZE;
 	};

+ 65 - 17
Source/BansheeCore/Renderer/BsLightProbeVolume.cpp

@@ -5,7 +5,6 @@
 #include "Allocators/BsFrameAlloc.h"
 #include "Renderer/BsRenderer.h"
 #include "Renderer/BsLight.h"
-#include "RenderAPI/BsGpuBuffer.h"
 #include "Image/BsTexture.h"
 #include "Renderer/BsIBLUtility.h"
 #include "Scene/BsSceneObject.h"
@@ -409,8 +408,33 @@ namespace bs
 		UINT32 numCoefficients = (UINT32)mInitCoefficients.size();
 		assert(mInitCoefficients.size() == mProbeMap.size());
 
-		resizeCoefficientBuffer(std::max(32U, numCoefficients));
-		mCoefficients->writeData(0, sizeof(LightProbeSHCoefficients) * numCoefficients, mInitCoefficients.data());
+		resizeCoefficientTexture(std::max(32U, numCoefficients));
+
+		SPtr<PixelData> coeffData = mCoefficients->getProperties().allocBuffer(0, 0);
+		UINT32 probesPerRow = coeffData->getWidth() / 9;
+		UINT32 probeIdx = 0;
+		for(UINT32 y = 0; y < coeffData->getHeight(); ++y)
+		{
+			for(UINT32 x = 0; x < probesPerRow; ++x)
+			{
+				if(probeIdx >= numCoefficients)
+					break;
+
+				for(UINT32 i = 0; i < 9; i++)
+				{
+					Color value;
+					value.r = mInitCoefficients[probeIdx].coeffsR[i];
+					value.g = mInitCoefficients[probeIdx].coeffsG[i];
+					value.b = mInitCoefficients[probeIdx].coeffsB[i];
+
+					coeffData->setColorAt(value, x * 9, y);
+				}
+
+				probeIdx++;
+			}
+		}
+
+		mCoefficients->writeData(*coeffData, 0, 0, true);
 		mInitCoefficients.clear();
 
 		gRenderer()->notifyLightProbeVolumeAdded(this);
@@ -422,7 +446,7 @@ namespace bs
 		// Probe map only contains active probes
 		UINT32 numUsedProbes = (UINT32)mProbeMap.size();
 		if(numUsedProbes > mCoeffBufferSize)
-			resizeCoefficientBuffer(std::max(32U, numUsedProbes * 2));
+			resizeCoefficientTexture(std::max(32U, numUsedProbes * 2));
 
 		UINT32 numProbeUpdates = 0;
 		for (; mFirstDirtyProbe < (UINT32)mProbeInfos.size(); ++mFirstDirtyProbe)
@@ -595,7 +619,31 @@ namespace bs
 		output.resize(numActiveProbes);
 
 		LightProbeSHCoefficients* coefficients = bs_stack_alloc<LightProbeSHCoefficients>(numActiveProbes);
-		mCoefficients->readData(0, sizeof(LightProbeSHCoefficients) * numActiveProbes, coefficients);
+
+		SPtr<PixelData> coeffData = mCoefficients->getProperties().allocBuffer(0, 0);
+		mCoefficients->readData(*coeffData);
+
+		UINT32 probesPerRow = coeffData->getWidth() / 9;
+		UINT32 probeIdx = 0;
+		for(UINT32 y = 0; y < coeffData->getHeight(); ++y)
+		{
+			for(UINT32 x = 0; x < probesPerRow; ++x)
+			{
+				if(probeIdx >= numActiveProbes)
+					break;
+
+				for(UINT32 i = 0; i < 9; i++)
+				{
+					Color value = coeffData->getColorAt(x * 9, y);
+
+					coefficients[probeIdx].coeffsR[i] = value.r;
+					coefficients[probeIdx].coeffsG[i] = value.g;
+					coefficients[probeIdx].coeffsB[i] = value.b;
+				}
+
+				probeIdx++;
+			}
+		}
 
 		for(UINT32 i = 0; i < numActiveProbes; ++i)
 		{
@@ -606,21 +654,21 @@ namespace bs
 		bs_stack_free(coefficients);
 	}
 
-	void LightProbeVolume::resizeCoefficientBuffer(UINT32 count)
+	void LightProbeVolume::resizeCoefficientTexture(UINT32 count)
 	{
-		GPU_BUFFER_DESC desc;
-		desc.type = GBT_STRUCTURED;
-		desc.elementSize = sizeof(LightProbeSHCoefficients);
-		desc.elementCount = count;
-		desc.usage = GBU_STATIC;
-		desc.format = BF_UNKNOWN;
-		desc.randomGpuWrite = true;
-
-		SPtr<GpuBuffer> newBuffer = GpuBuffer::create(desc);
+		Vector2I texSize = IBLUtility::getSHCoeffTextureSize(count, 3);
+
+		TEXTURE_DESC desc;
+		desc.width = (UINT32)texSize.x;
+		desc.height = (UINT32)texSize.y;
+		desc.usage = TU_LOADSTORE | TU_RENDERTARGET;
+		desc.format = PF_RGBA32F;
+
+		SPtr<Texture> newTexture = Texture::create(desc);
 		if (mCoefficients)
-			newBuffer->copyData(*mCoefficients, 0, 0, mCoefficients->getSize(), true);
+			mCoefficients->copy(newTexture);
 
-		mCoefficients = newBuffer;
+		mCoefficients = newTexture;
 		mCoeffBufferSize = count;
 	}
 }}

+ 5 - 5
Source/BansheeCore/Renderer/BsLightProbeVolume.h

@@ -253,8 +253,8 @@ namespace bs
 		/** Populates the vector with SH coefficients for each light probe. Involves reading the GPU buffer. */
 		void getProbeCoefficients(Vector<LightProbeCoefficientInfo>& output) const;
 
-		/** Returns the GPU buffer containing SH coefficients. */
-		SPtr<GpuBuffer> getCoefficientsBuffer() const { return mCoefficients; }
+		/** Returns the texture containing SH coefficients for all probes in the volume. */
+		SPtr<Texture> getCoefficientsTexture() const { return mCoefficients; }
 	protected:
 		friend class bs::LightProbeVolume;
 
@@ -276,10 +276,10 @@ namespace bs
 		bool renderProbes(UINT32 maxProbes);
 
 		/** 
-		 * Resizes the internal GPU buffer that stores light probe SH coefficients, to the specified size (in the number
+		 * Resizes the internal texture that stores light probe SH coefficients, to the specified size (in the number
 		 * of probes). 
 		 */
-		void resizeCoefficientBuffer(UINT32 count);
+		void resizeCoefficientTexture(UINT32 count);
 
 		UINT32 mRendererId = 0;
 		UnorderedMap<UINT32, UINT32> mProbeMap; // Map from static indices to compact list of probes
@@ -289,7 +289,7 @@ namespace bs
 		Vector<LightProbeInfo> mProbeInfos;
 
 		// Contains SH coefficients for the probes
-		SPtr<GpuBuffer> mCoefficients;
+		SPtr<Texture> mCoefficients;
 		UINT32 mCoeffBufferSize = 0;
 
 		// Temporary until initialization

+ 2 - 0
Source/BansheeUtility/Utility/BsTriangulation.h

@@ -3,6 +3,7 @@
 #pragma once
 
 #include "Prerequisites/BsPrerequisitesUtil.h"
+#include "Math/BsVector2I.h"
 
 namespace bs
 {
@@ -15,6 +16,7 @@ namespace bs
 	{
 		/** Indices of vertices that form the tetrahedron pointing to an external point array. */
 		INT32 vertices[4];
+
 		/** 
 		 * Indices pointing to neighbor tetrahedrons. Each neighbor index maps to the @p vertices array, so neighbor/vertex
 		 * pair at the same location will be the only neighbor not containing that vertex (i.e. neighbor opposite to

+ 53 - 33
Source/RenderBeast/BsLightProbes.cpp

@@ -153,7 +153,7 @@ namespace bs { namespace ct
 		if(!mSkyOnly)
 		{
 			params->getTextureParam(GPT_FRAGMENT_PROGRAM, "gInputTex", mParamInputTex);
-			params->getBufferParam(GPT_FRAGMENT_PROGRAM, "gSHCoeffs", mParamSHCoeffsBuffer);
+			params->getTextureParam(GPT_FRAGMENT_PROGRAM, "gSHCoeffs", mParamSHCoeffsTexture);
 			params->getBufferParam(GPT_FRAGMENT_PROGRAM, "gTetrahedra", mParamTetrahedraBuffer);
 			params->getBufferParam(GPT_FRAGMENT_PROGRAM, "gTetFaces", mParamTetFacesBuffer);
 		}
@@ -197,7 +197,7 @@ namespace bs { namespace ct
 		if(!mSkyOnly)
 		{
 			mParamInputTex.set(lightProbeIndices);
-			mParamSHCoeffsBuffer.set(lightProbesInfo.shCoefficients);
+			mParamSHCoeffsTexture.set(lightProbesInfo.shCoefficients);
 			mParamTetrahedraBuffer.set(lightProbesInfo.tetrahedra);
 			mParamTetFacesBuffer.set(lightProbesInfo.faces);
 		}
@@ -266,6 +266,7 @@ namespace bs { namespace ct
 	struct TetrahedronDataGPU
 	{
 		UINT32 indices[4];
+		Vector2I offsets[4];
 		Matrix3x4 transform;
 	};
 
@@ -279,7 +280,7 @@ namespace bs { namespace ct
 	};
 
 	LightProbes::LightProbes()
-		:mTetrahedronVolumeDirty(false), mMaxCoefficients(0), mMaxTetrahedra(0), mMaxFaces(0), mNumValidTetrahedra(0)
+		:mTetrahedronVolumeDirty(false), mMaxCoefficientRows(0), mMaxTetrahedra(0), mMaxFaces(0), mNumValidTetrahedra(0)
 	{ }
 
 	void LightProbes::notifyAdded(LightProbeVolume* volume)
@@ -330,53 +331,59 @@ namespace bs { namespace ct
 			return;
 
 		// Move all coefficients into the global buffer
-		UINT32 numCoeffs = 0;
+		UINT32 numRows = 0;
 		for(auto& entry : mVolumes)
 		{
-			UINT32 numProbes = (UINT32)entry.volume->getLightProbePositions().size();
-			numCoeffs += numProbes;
+			SPtr<Texture> localTexture = entry.volume->getCoefficientsTexture();
+			numRows += localTexture->getProperties().getHeight();
 		}
 
-		if(numCoeffs > mMaxCoefficients)
-		{
-			UINT32 newSize = Math::divideAndRoundUp(numCoeffs, 32U) * 32U;
-			resizeCoefficientBuffer(newSize);
-		}
+		if(numRows > mMaxCoefficientRows)
+			resizeCoefficientTexture(numRows + 4);
 
-		UINT32 writePos = 0;
+		UINT32 rowIdx = 0;
 		for(auto& entry : mVolumes)
 		{
-			UINT32 numProbes = (UINT32)entry.volume->getLightProbePositions().size();
-			UINT32 size = numProbes * sizeof(LightProbeSHCoefficients);
-			SPtr<GpuBuffer> localBuffer = entry.volume->getCoefficientsBuffer();
+			TEXTURE_COPY_DESC copyDesc;
+			copyDesc.dstPosition = Vector3I(0, rowIdx, 0);
+
+			SPtr<Texture> localTexture = entry.volume->getCoefficientsTexture();
+			localTexture->copy(mProbeCoefficientsGPU, copyDesc);
 			
-			// Note: Some of the coefficients might still be dirty (unrendered). Check for this and write them as black?
-			mProbeCoefficientsGPU->copyData(*localBuffer, 0, writePos, size);
-			writePos += size;
+			rowIdx += localTexture->getProperties().getHeight();
 		}
 
 		// Gather all positions
 		UINT32 bufferOffset = 0;
+		rowIdx = 0;
 		for(auto& entry : mVolumes)
 		{
 			const Vector<LightProbeInfo>& infos = entry.volume->getLightProbeInfos();
 			const Vector<Vector3>& positions = entry.volume->getLightProbePositions();
+
 			UINT32 numProbes = entry.volume->getNumActiveProbes();
-			
+
 			if (numProbes == 0)
 				continue;
 
 			const Transform& tfrm = entry.volume->getTransform();
 			Vector3 offset = tfrm.getPosition();
 			Quaternion rotation = tfrm.getRotation();
-			for(UINT32 i = 0; i < numProbes; i++)
+
+			for (UINT32 i = 0; i < numProbes; i++)
 			{
 				Vector3 localPos = positions[i];
 				Vector3 transformedPos = rotation.rotate(localPos) + offset;
 				mTempTetrahedronPositions.push_back(transformedPos);
+
 				mTempTetrahedronBufferIndices.push_back(bufferOffset + infos[i].bufferIdx);
+
+				Vector2I offset = IBLUtility::getSHCoeffXYFromIdx(infos[i].bufferIdx, 3);
+				mTempTetrahedronBufferOffsets.push_back(offset);
 			}
 
+			SPtr<Texture> localTexture = entry.volume->getCoefficientsTexture();
+			rowIdx += localTexture->getProperties().getHeight();
 			bufferOffset += (UINT32)positions.size();
 		}
 
@@ -718,10 +725,15 @@ namespace bs { namespace ct
 
 			TetrahedronData& entry = mTetrahedronInfos[i];
 
+			Vector2I offsets[4];
 			for(UINT32 j = 0; j < 4; ++j)
+			{
 				entry.volume.vertices[j] = mTempTetrahedronBufferIndices[entry.volume.vertices[j]];
+				offsets[j] = mTempTetrahedronBufferOffsets[entry.volume.vertices[j]];
+			}
 
 			memcpy(dst->indices, entry.volume.vertices, sizeof(UINT32) * 4);
+			memcpy(dst->offsets, &offsets, sizeof(offsets));
 			memcpy(&dst->transform, &entry.transform, sizeof(float) * 12);
 
 			dst++;
@@ -736,12 +748,17 @@ namespace bs { namespace ct
 			const TetrahedronFaceData& entry = outerFaces[i];
 
 			UINT32 indices[4];
-			indices[0] = mTempTetrahedronBufferIndices[entry.innerVertices[0]];
-			indices[1] = mTempTetrahedronBufferIndices[entry.innerVertices[1]];
-			indices[2] = mTempTetrahedronBufferIndices[entry.innerVertices[2]];
+			Vector2I offsets[4];
+			for(UINT32 j = 0; j < 3; j++)
+			{
+				indices[j] = mTempTetrahedronBufferIndices[entry.innerVertices[j]];
+				offsets[j] = mTempTetrahedronBufferOffsets[entry.innerVertices[j]];
+			}
+
 			indices[3] = -1;
 
 			memcpy(dst->indices, indices, sizeof(UINT32) * 4);
+			memcpy(dst->offsets, offsets, sizeof(offsets));
 			memcpy(&dst->transform, &entry.transform, sizeof(float) * 12);
 
 			dst++;
@@ -835,17 +852,20 @@ namespace bs { namespace ct
 		mMaxFaces = count;
 	}
 
-	void LightProbes::resizeCoefficientBuffer(UINT32 count)
+	void LightProbes::resizeCoefficientTexture(UINT32 numRows)
 	{
-		GPU_BUFFER_DESC desc;
-		desc.type = GBT_STRUCTURED;
-		desc.elementSize = sizeof(LightProbeSHCoefficients);
-		desc.elementCount = count;
-		desc.usage = GBU_STATIC;
-		desc.format = BF_UNKNOWN;
-
-		mProbeCoefficientsGPU = GpuBuffer::create(desc);
-		mMaxCoefficients = count;
+		TEXTURE_DESC desc;
+		desc.width = 4096;
+		desc.height = numRows;
+		desc.usage = TU_LOADSTORE | TU_RENDERTARGET;
+		desc.format = PF_RGBA32F;
+
+		SPtr<Texture> newTexture = Texture::create(desc);
+		if (mProbeCoefficientsGPU)
+			mProbeCoefficientsGPU->copy(newTexture);
+
+		mProbeCoefficientsGPU = newTexture;
+		mMaxCoefficientRows = numRows;
 	}
 
 	void LightProbes::generateTetrahedronData(Vector<Vector3>& positions, Vector<TetrahedronData>& tetrahedra,

+ 8 - 6
Source/RenderBeast/BsLightProbes.h

@@ -124,7 +124,7 @@ namespace bs { namespace ct
 		GpuParamTexture mParamInputTex;
 		GpuParamTexture mParamSkyIrradianceTex;
 		GpuParamTexture mParamAmbientOcclusionTex;
-		GpuParamBuffer mParamSHCoeffsBuffer;
+		GpuParamTexture mParamSHCoeffsTexture;
 		GpuParamBuffer mParamTetrahedraBuffer;
 		GpuParamBuffer mParamTetFacesBuffer;
 		bool mSkyOnly;
@@ -141,7 +141,7 @@ namespace bs { namespace ct
 	struct LightProbesInfo
 	{
 		/** Contains a set of spherical harmonic coefficients for every light probe. */
-		SPtr<GpuBuffer> shCoefficients;
+		SPtr<Texture> shCoefficients;
 
 		/** 
 		 * Contains information about tetrahedra formed by light probes. First half of the buffer is populated by actual
@@ -249,20 +249,21 @@ namespace bs { namespace ct
 		void resizeTetrahedronFaceBuffer(UINT32 count);
 
 		/** 
-		 * Resized the GPU buffer that stores light probe SH coefficients, to the specified size (in the number of probes). 
+		 * Resized the GPU buffer that stores light probe SH coefficients, to the specified number of rows (each row
+		 * holds 4096 coefficients, and each volume starts in its own row.). 
 		 */
-		void resizeCoefficientBuffer(UINT32 count);
+		void resizeCoefficientTexture(UINT32 numRows);
 
 		Vector<VolumeInfo> mVolumes;
 		bool mTetrahedronVolumeDirty;
 
-		UINT32 mMaxCoefficients;
+		UINT32 mMaxCoefficientRows;
 		UINT32 mMaxTetrahedra;
 		UINT32 mMaxFaces;
 
 		Vector<TetrahedronData> mTetrahedronInfos;
 
-		SPtr<GpuBuffer> mProbeCoefficientsGPU;
+		SPtr<Texture> mProbeCoefficientsGPU;
 		SPtr<GpuBuffer> mTetrahedronInfosGPU;
 		SPtr<GpuBuffer> mTetrahedronFaceInfosGPU;
 		SPtr<Mesh> mVolumeMesh;
@@ -271,6 +272,7 @@ namespace bs { namespace ct
 		// Temporary buffers
 		Vector<Vector3> mTempTetrahedronPositions;
 		Vector<UINT32> mTempTetrahedronBufferIndices;
+		Vector<Vector2I> mTempTetrahedronBufferOffsets;
 	};
 
 	/** @} */

+ 11 - 1
Source/RenderBeast/BsRenderBeast.cpp

@@ -541,8 +541,18 @@ namespace bs { namespace ct
 					else
 					{
 						for(UINT32 face = 0; face < 6; face++)
+						{
 							for(UINT32 mip = 0; mip <= srcProps.getNumMipmaps(); mip++)
-								texture->copy(sceneInfo.reflProbeCubemapsTex, face, mip, probeInfo.arrayIdx * 6 + face, mip);
+							{
+								TEXTURE_COPY_DESC copyDesc;
+								copyDesc.srcFace = face;
+								copyDesc.srcMip = mip;
+								copyDesc.dstFace = probeInfo.arrayIdx * 6 + face;
+								copyDesc.dstMip = mip;
+
+								texture->copy(sceneInfo.reflProbeCubemapsTex, copyDesc);
+							}
+						}
 					}
 
 					mScene->setReflectionProbeArrayIndex(i, probeInfo.arrayIdx, true);

+ 84 - 54
Source/RenderBeast/BsRenderBeastIBLUtility.cpp

@@ -31,11 +31,22 @@ namespace bs { namespace ct
 		UINT32 mip,
 		const SPtr<RenderTarget>& target)
 	{
-		mInputTexture.set(source);
+		RenderAPI& rapi = RenderAPI::instance();
+		const RenderAPIInfo& rapiInfo = rapi.getAPIInfo();
+
 		gReflectionCubeDownsampleParamDef.gCubeFace.set(mParamBuffer, face);
-		gReflectionCubeDownsampleParamDef.gMipLevel.set(mParamBuffer, mip);
 
-		RenderAPI& rapi = RenderAPI::instance();
+		if(rapiInfo.isFlagSet(RenderAPIFeatureFlag::TextureViews))
+		{
+			mInputTexture.set(source, TextureSurface(mip, 1, 0, 6));
+			gReflectionCubeDownsampleParamDef.gMipLevel.set(mParamBuffer, 0);
+		}
+		else
+		{
+			mInputTexture.set(source);
+			gReflectionCubeDownsampleParamDef.gMipLevel.set(mParamBuffer, mip);
+		}
+
 		rapi.setRenderTarget(target);
 
 		gRendererUtility().setPass(mMaterial);
@@ -292,8 +303,8 @@ namespace bs { namespace ct
 		// Do nothing
 	}
 
-	void IrradianceAccumulateCubeSHMat::execute(const SPtr<Texture>& source, UINT32 sourceMip, UINT32 coefficientIdx, 
-		const SPtr<RenderTarget>& output)
+	void IrradianceAccumulateCubeSHMat::execute(const SPtr<Texture>& source, UINT32 sourceMip, const Vector2I& outputOffset,
+		UINT32 coefficientIdx, const SPtr<RenderTarget>& output)
 	{
 		// Set parameters
 		mInputTexture.set(source);
@@ -309,11 +320,11 @@ namespace bs { namespace ct
 
 		// Render to just one pixel corresponding to the coefficient
 		Rect2 viewRect;
-		viewRect.x = coefficientIdx / (float)rtProps.width;
-		viewRect.y = 0.0f;
+		viewRect.x = (outputOffset.x + coefficientIdx) / (float)rtProps.width;
+		viewRect.y = outputOffset.y / (float)rtProps.height;
 
 		viewRect.width = 1.0f / rtProps.width;
-		viewRect.height = 1.0f;
+		viewRect.height = 1.0f / rtProps.height;
 
 		// Render
 		RenderAPI& rapi = RenderAPI::instance();
@@ -349,7 +360,7 @@ namespace bs { namespace ct
 		SPtr<GpuParams> params = mParamsSet->getGpuParams();
 		params->setParamBlockBuffer("Params", mParamBuffer);
 		params->getBufferParam(GPT_COMPUTE_PROGRAM, "gInput", mInputBuffer);
-		params->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutput", mOutputBuffer);
+		params->getLoadStoreTextureParam(GPT_COMPUTE_PROGRAM, "gOutput", mOutputTexture);
 	}
 
 	void IrradianceReduceSHMat::_initVariations(ShaderVariations& variations)
@@ -359,13 +370,16 @@ namespace bs { namespace ct
 	}
 
 	void IrradianceReduceSHMat::execute(const SPtr<GpuBuffer>& source, UINT32 numCoeffSets, 
-		const SPtr<GpuBuffer>& output, UINT32 outputIdx)
+		const SPtr<Texture>& output, UINT32 outputIdx)
 	{
+		UINT32 shOrder = (UINT32)mVariation.getInt("SH_ORDER");
+
+		Vector2I outputCoords = IBLUtility::getSHCoeffXYFromIdx(outputIdx, shOrder);
+		gIrradianceReduceSHParamDef.gOutputIdx.set(mParamBuffer, outputCoords);
 		gIrradianceReduceSHParamDef.gNumEntries.set(mParamBuffer, numCoeffSets);
-		gIrradianceReduceSHParamDef.gOutputIdx.set(mParamBuffer, outputIdx);
 
 		mInputBuffer.set(source);
-		mOutputBuffer.set(output);
+		mOutputTexture.set(output);
 
 		RenderAPI& rapi = RenderAPI::instance();
 
@@ -374,20 +388,18 @@ namespace bs { namespace ct
 		rapi.dispatchCompute(1);
 	}
 
-	SPtr<GpuBuffer> IrradianceReduceSHMat::createOutputBuffer(UINT32 numEntries)
+	SPtr<Texture> IrradianceReduceSHMat::createOutputTexture(UINT32 numCoeffSets)
 	{
-		GPU_BUFFER_DESC bufferDesc;
-		bufferDesc.type = GBT_STRUCTURED;
-		bufferDesc.elementCount = numEntries;
-		bufferDesc.format = BF_UNKNOWN;
-		bufferDesc.randomGpuWrite = true;
+		UINT32 shOrder = (UINT32)mVariation.getInt("SH_ORDER");
+		Vector2I size = IBLUtility::getSHCoeffTextureSize(numCoeffSets, shOrder);
 
-		if(mVariation.getInt("SH_ORDER") == 3)
-			bufferDesc.elementSize = sizeof(SHVector3RGB);
-		else
-			bufferDesc.elementSize = sizeof(SHVector5RGB);
+		TEXTURE_DESC textureDesc;
+		textureDesc.width = (UINT32)size.x;
+		textureDesc.height = (UINT32)size.y;
+		textureDesc.format = PF_RGBA32F;
+		textureDesc.usage = TU_STATIC | TU_LOADSTORE;
 
-		return GpuBuffer::create(bufferDesc);
+		return Texture::create(textureDesc);
 	}
 
 	IrradianceReduceSHMat* IrradianceReduceSHMat::getVariation(int order)
@@ -406,7 +418,7 @@ namespace bs { namespace ct
 
 		SPtr<GpuParams> params = mParamsSet->getGpuParams();
 		params->setParamBlockBuffer("Params", mParamBuffer);
-		params->getBufferParam(GPT_FRAGMENT_PROGRAM, "gSHCoeffs", mInputBuffer);
+		params->getTextureParam(GPT_FRAGMENT_PROGRAM, "gSHCoeffs", mInputTexture);
 	}
 
 	void IrradianceProjectSHMat::_initVariations(ShaderVariations& variations)
@@ -414,11 +426,11 @@ namespace bs { namespace ct
 		// Do nothing
 	}
 
-	void IrradianceProjectSHMat::execute(const SPtr<GpuBuffer>& shCoeffs, UINT32 face, const SPtr<RenderTarget>& target)
+	void IrradianceProjectSHMat::execute(const SPtr<Texture>& shCoeffs, UINT32 face, const SPtr<RenderTarget>& target)
 	{
 		gIrradianceProjectSHParamDef.gCubeFace.set(mParamBuffer, face);
 
-		mInputBuffer.set(shCoeffs);
+		mInputTexture.set(shCoeffs);
 
 		RenderAPI& rapi = RenderAPI::instance();
 		rapi.setRenderTarget(target);
@@ -459,7 +471,13 @@ namespace bs { namespace ct
 
 		// Copy base mip level to scratch cubemap
 		for (UINT32 face = 0; face < 6; face++)
-			cubemap->copy(scratchCubemap, face, 0, face, 0);
+		{
+			TEXTURE_COPY_DESC copyDesc;
+			copyDesc.srcFace = face;
+			copyDesc.dstFace = face;
+
+			cubemap->copy(scratchCubemap, copyDesc);
+		}
 
 		// Fill out remaining scratch mip levels by downsampling
 		for (UINT32 mip = 1; mip < numMips; mip++)
@@ -497,6 +515,7 @@ namespace bs { namespace ct
 
 	void RenderBeastIBLUtility::filterCubemapForIrradiance(const SPtr<Texture>& cubemap, const SPtr<Texture>& output) const
 	{
+		SPtr<Texture> coeffTexture;
 		if(supportsComputeSH())
 		{
 			IrradianceComputeSHMat* shCompute = IrradianceComputeSHMat::getVariation(5);
@@ -507,31 +526,33 @@ namespace bs { namespace ct
 			for (UINT32 face = 0; face < 6; face++)
 				shCompute->execute(cubemap, face, coeffSetBuffer);
 
-			SPtr<GpuBuffer> coeffBuffer = shReduce->createOutputBuffer(1);
-			shReduce->execute(coeffSetBuffer, numCoeffSets, coeffBuffer, 0);
-
-			IrradianceProjectSHMat* shProject = IrradianceProjectSHMat::get();
-			for (UINT32 face = 0; face < 6; face++)
-			{
-				RENDER_TEXTURE_DESC cubeFaceRTDesc;
-				cubeFaceRTDesc.colorSurfaces[0].texture = output;
-				cubeFaceRTDesc.colorSurfaces[0].face = face;
-				cubeFaceRTDesc.colorSurfaces[0].numFaces = 1;
-				cubeFaceRTDesc.colorSurfaces[0].mipLevel = 0;
-
-				SPtr<RenderTarget> target = RenderTexture::create(cubeFaceRTDesc);
-				shProject->execute(coeffBuffer, face, target);
-			}
+			coeffTexture = shReduce->createOutputTexture(1);
+			shReduce->execute(coeffSetBuffer, numCoeffSets, coeffTexture, 0);
 		}
 		else
 		{
-			SPtr<Texture> shCoeffs = filterCubemapForIrradianceNonCompute(cubemap);
+			GpuResourcePool& resPool = GpuResourcePool::instance();
+			SPtr<PooledRenderTexture> finalCoeffs = resPool.get(IrradianceAccumulateCubeSHMat::getOutputDesc());
+
+			filterCubemapForIrradianceNonCompute(cubemap, 0, finalCoeffs->renderTexture);
+			coeffTexture = finalCoeffs->texture;
+		}
+
+		IrradianceProjectSHMat* shProject = IrradianceProjectSHMat::get();
+		for (UINT32 face = 0; face < 6; face++)
+		{
+			RENDER_TEXTURE_DESC cubeFaceRTDesc;
+			cubeFaceRTDesc.colorSurfaces[0].texture = output;
+			cubeFaceRTDesc.colorSurfaces[0].face = face;
+			cubeFaceRTDesc.colorSurfaces[0].numFaces = 1;
+			cubeFaceRTDesc.colorSurfaces[0].mipLevel = 0;
 
-			// TODO - Re-project the coefficients
+			SPtr<RenderTarget> target = RenderTexture::create(cubeFaceRTDesc);
+			shProject->execute(coeffTexture, face, target);
 		}
 	}
 	
-	void RenderBeastIBLUtility::filterCubemapForIrradiance(const SPtr<Texture>& cubemap, const SPtr<GpuBuffer>& output, 
+	void RenderBeastIBLUtility::filterCubemapForIrradiance(const SPtr<Texture>& cubemap, const SPtr<Texture>& output, 
 		UINT32 outputIdx) const
 	{
 		if(supportsComputeSH())
@@ -548,9 +569,11 @@ namespace bs { namespace ct
 		}
 		else
 		{
-			SPtr<Texture> shCoeffs = filterCubemapForIrradianceNonCompute(cubemap);
-			
-			// TODO - Output expects a buffer
+			RENDER_TEXTURE_DESC rtDesc;
+			rtDesc.colorSurfaces[0].texture = output;
+
+			SPtr<RenderTexture> target = RenderTexture::create(rtDesc);
+			filterCubemapForIrradianceNonCompute(cubemap, outputIdx, target);
 		}
 	}
 
@@ -594,7 +617,15 @@ namespace bs { namespace ct
 		if(sizeSrcLog2 == sizeDstLog2)
 		{
 			for (UINT32 face = 0; face < 6; face++)
-				src->copy(dst, face, srcMip, face, dstMip);
+			{
+				TEXTURE_COPY_DESC copyDesc;
+				copyDesc.srcFace = face;
+				copyDesc.srcMip = srcMip;
+				copyDesc.dstFace = face;
+				copyDesc.dstMip = dstMip;
+
+				src->copy(dst, copyDesc);
+			}
 		}
 		else
 			downsampleCubemap(scratchTex, srcMip, dst, dstMip);
@@ -618,7 +649,8 @@ namespace bs { namespace ct
 		}
 	}
 
-	SPtr<Texture> RenderBeastIBLUtility::filterCubemapForIrradianceNonCompute(const SPtr<Texture>& cubemap)
+	void RenderBeastIBLUtility::filterCubemapForIrradianceNonCompute(const SPtr<Texture>& cubemap, UINT32 outputIdx,
+		const SPtr<RenderTexture>& output)
 	{
 		static const UINT32 NUM_COEFFS = 9;
 
@@ -627,7 +659,6 @@ namespace bs { namespace ct
 		IrradianceAccumulateSHMat* shAccum = IrradianceAccumulateSHMat::get();
 		IrradianceAccumulateCubeSHMat* shAccumCube = IrradianceAccumulateCubeSHMat::get();
 
-		SPtr<PooledRenderTexture> finalCoeffs = resPool.get(shAccumCube->getOutputDesc());
 		for(UINT32 coeff = 0; coeff < NUM_COEFFS; ++coeff)
 		{
 			SPtr<PooledRenderTexture> coeffsTex = resPool.get(shCompute->getOutputDesc(cubemap));
@@ -673,9 +704,8 @@ namespace bs { namespace ct
 			}
 
 			// Sum up all the faces and write the coefficient to the final texture
-			shAccumCube->execute(downsampleInput->texture, 0, coeff, finalCoeffs->renderTexture);
+			Vector2I outputOffset = getSHCoeffXYFromIdx(outputIdx, 3);
+			shAccumCube->execute(downsampleInput->texture, 0, outputOffset, coeff, output);
 		}
-
-		return finalCoeffs->texture;
 	}
 }}

+ 13 - 13
Source/RenderBeast/BsRenderBeastIBLUtility.h

@@ -150,8 +150,8 @@ namespace bs { namespace ct
 	};
 
 	BS_PARAM_BLOCK_BEGIN(IrradianceReduceSHParamDef)
+		BS_PARAM_BLOCK_ENTRY(Vector2I, gOutputIdx)
 		BS_PARAM_BLOCK_ENTRY(int, gNumEntries)
-		BS_PARAM_BLOCK_ENTRY(int, gOutputIdx)
 	BS_PARAM_BLOCK_END
 
 	extern IrradianceReduceSHParamDef gIrradianceReduceSHParamDef;
@@ -169,13 +169,13 @@ namespace bs { namespace ct
 
 		/** 
 		 * Sums spherical harmonic coefficients calculated by each thread group of IrradianceComputeSHMat and outputs a
-		 * single set of normalized coefficients. Output buffer should be created by calling createOutputBuffer(). The
-		 * value will be recorded at the @p outputIdx position in the buffer.
+		 * single set of normalized coefficients. Output texture should be created by calling createOutputTexture(). The
+		 * value will be recorded at the @p outputIdx position in the texture.
 		 */
-		void execute(const SPtr<GpuBuffer>& source, UINT32 numCoeffSets, const SPtr<GpuBuffer>& output, UINT32 outputIdx);
+		void execute(const SPtr<GpuBuffer>& source, UINT32 numCoeffSets, const SPtr<Texture>& output, UINT32 outputIdx);
 
-		/** Creates a buffer of adequate size to be used as output for this material. */
-		SPtr<GpuBuffer> createOutputBuffer(UINT32 numEntries);
+		/** Creates a texture of adequate size to be used as output for this material. */
+		SPtr<Texture> createOutputTexture(UINT32 numCoeffSets);
 
 		/** 
 		 * Returns the material variation matching the provided parameters.
@@ -188,7 +188,7 @@ namespace bs { namespace ct
 	private:
 		SPtr<GpuParamBlockBuffer> mParamBuffer;
 		GpuParamBuffer mInputBuffer;
-		GpuParamBuffer mOutputBuffer;
+		GpuParamLoadStoreTexture mOutputTexture;
 
 		static ShaderVariation VAR_Order3;
 		static ShaderVariation VAR_Order5;
@@ -286,7 +286,7 @@ namespace bs { namespace ct
 		 * Sums up all faces of the input cube texture and writes the value to the corresponding index in the output
 		 * texture. The source mip should point to a mip level with size 1x1.
 		 */
-		void execute(const SPtr<Texture>& source, UINT32 sourceMip, UINT32 coefficientIdx, 
+		void execute(const SPtr<Texture>& source, UINT32 sourceMip, const Vector2I& outputOffset, UINT32 coefficientIdx, 
 			const SPtr<RenderTarget>& output);
 
 		/** 
@@ -321,11 +321,11 @@ namespace bs { namespace ct
 		 * Projects spherical harmonic coefficients calculated by IrradianceReduceSHMat and projects them onto faces of
 		 * a cubemap.
 		 */
-		void execute(const SPtr<GpuBuffer>& shCoeffs, UINT32 face, const SPtr<RenderTarget>& target);
+		void execute(const SPtr<Texture>& shCoeffs, UINT32 face, const SPtr<RenderTarget>& target);
 
 	private:
 		SPtr<GpuParamBlockBuffer> mParamBuffer;
-		GpuParamBuffer mInputBuffer;
+		GpuParamTexture mInputTexture;
 	};
 
 	/** Render beast implementation of IBLUtility. */
@@ -339,12 +339,11 @@ namespace bs { namespace ct
 		void filterCubemapForIrradiance(const SPtr<Texture>& cubemap, const SPtr<Texture>& output) const override;
 
 		/** @copydoc IBLUtility::filterCubemapForIrradiance(const SPtr<Texture>&, const SPtr<GpuBuffer>&, UINT32) */
-		void filterCubemapForIrradiance(const SPtr<Texture>& cubemap, const SPtr<GpuBuffer>& output, 
+		void filterCubemapForIrradiance(const SPtr<Texture>& cubemap, const SPtr<Texture>& output, 
 			UINT32 outputIdx) const override;
 
 		/** @copydoc IBLUtility::scaleCubemap */
 		void scaleCubemap(const SPtr<Texture>& src, UINT32 srcMip, const SPtr<Texture>& dst, UINT32 dstMip) const override;
-
 	private:
 		/** 
 		 * Downsamples a cubemap using hardware bilinear filtering. 
@@ -360,7 +359,8 @@ namespace bs { namespace ct
 		 * Generates irradiance SH coefficients from the input cubemap and writes them to a 1D texture. Does not make
 		 * use of the compute shader.
 		 */
-		static SPtr<Texture> filterCubemapForIrradianceNonCompute(const SPtr<Texture>& cubemap);
+		static void filterCubemapForIrradianceNonCompute(const SPtr<Texture>& cubemap, UINT32 outputIdx, 
+			const SPtr<RenderTexture>& output);
 	};
 
 	/** @} */