Browse Source

Optimizing SSAO using a lookup table

Panagiotis Christopoulos Charitos 9 years ago
parent
commit
65c3b8be55
6 changed files with 174 additions and 14 deletions
  1. 11 0
      shaders/Functions.glsl
  2. 25 7
      shaders/Ssao.frag.glsl
  3. 4 0
      src/anki/math/Mat3.h
  4. 3 0
      src/anki/math/Vec3.h
  5. 128 7
      src/anki/renderer/Ssao.cpp
  6. 3 0
      src/anki/renderer/Ssao.h

+ 11 - 0
shaders/Functions.glsl

@@ -42,4 +42,15 @@ float linearizeDepthOptimal(in float depth, in float a, in float b)
 	return 1.0 / (a + depth * b);
 }
 
+// Project a vector by knowing only the non zero values of a perspective matrix
+vec4 projectPerspective(in vec4 vec, in float m00, in float m11, in float m22, in float m23)
+{
+	vec4 o;
+	o.x = vec.x * m00;
+	o.y = vec.y * m11;
+	o.z = vec.z * m22 + vec.w * m23;
+	o.w = -vec.z;
+	return o;
+}
+
 #endif

+ 25 - 7
shaders/Ssao.frag.glsl

@@ -26,12 +26,14 @@ layout(location = 0) out float out_color;
 
 layout(ANKI_UBO_BINDING(0, 0), std140, row_major) uniform _blk
 {
-	RendererCommonUniforms u_uniforms;
+	vec4 u_projectionParams;
+	vec4 u_projectionMat;
 };
 
 layout(ANKI_TEX_BINDING(0, 0)) uniform sampler2D u_mMsDepthRt;
 layout(ANKI_TEX_BINDING(0, 1)) uniform sampler2D u_msRt;
 layout(ANKI_TEX_BINDING(0, 2)) uniform sampler2D u_noiseMap;
+layout(ANKI_TEX_BINDING(0, 3)) uniform sampler2DArray u_hemisphereLut;
 
 // Get normal
 vec3 readNormal(in vec2 uv)
@@ -55,7 +57,7 @@ vec3 readRandom(in vec2 uv)
 float readZ(in vec2 uv)
 {
 	float depth = texture(u_mMsDepthRt, uv).r;
-	float z = u_uniforms.projectionParams.z / (u_uniforms.projectionParams.w + depth);
+	float z = u_projectionParams.z / (u_projectionParams.w + depth);
 	return z;
 }
 
@@ -65,7 +67,7 @@ vec3 readPosition(in vec2 uv)
 	vec3 fragPosVspace;
 	fragPosVspace.z = readZ(uv);
 
-	fragPosVspace.xy = (2.0 * uv - 1.0) * u_uniforms.projectionParams.xy * fragPosVspace.z;
+	fragPosVspace.xy = (2.0 * uv - 1.0) * u_projectionParams.xy * fragPosVspace.z;
 
 	return fragPosVspace;
 }
@@ -85,19 +87,35 @@ void main(void)
 	vec3 bitangent = cross(normal, tangent);
 	mat3 tbn = mat3(tangent, bitangent, normal);
 
+	float theta = atan(normal.y, normal.x); // [-pi, pi]
+	// Now move theta to [0, 2*pi]. Adding 2*pi gives the same angle. Then fmod to move back to [0, 2*pi]
+	theta = mod(theta + 2.0 * PI, 2.0 * PI);
+
+	float phi = acos(normal.z / 1.0); // [0, PI]
+
+	vec2 lutCoords;
+	lutCoords.x = theta / (2.0 * PI);
+	lutCoords.y = phi / PI;
+	lutCoords = clamp(lutCoords, 0.0, 1.0);
+
 	// Iterate kernel
 	float factor = 0.0;
 	for(uint i = 0U; i < sampleCount; ++i)
 	{
+#if 0
 		// get position
 		vec3 sample_ = tbn * KERNEL[i];
 		sample_ = sample_ * RADIUS + origin;
+#else
+		vec3 sample_ = texture(u_hemisphereLut, vec3(lutCoords, float(i))).xyz;
+		sample_ = normalize(sample_);
+		sample_ = sample_ * RADIUS + origin;
+#endif
 
 		// project sample position:
-		vec4 offset = vec4(sample_, 1.0);
-		offset = u_uniforms.projectionMatrix * offset;
-		offset.xy = offset.xy / (2.0 * offset.w) + 0.5; // persp div &
-		// to NDC -> [0, 1]
+		vec4 offset = projectPerspective(
+			vec4(sample_, 1.0), u_projectionMat.x, u_projectionMat.y, u_projectionMat.z, u_projectionMat.w);
+		offset.xy = offset.xy / (2.0 * offset.w) + 0.5; // persp div & to NDC -> [0, 1]
 
 		// get sample depth:
 		float sampleDepth = readZ(offset.xy);

+ 4 - 0
src/anki/math/Mat3.h

@@ -195,6 +195,10 @@ TMat3<T> operator/(T f, const TMat3<T>& m3)
 /// F32 3x3 matrix
 using Mat3 = TMat3<F32>;
 static_assert(sizeof(Mat3) == sizeof(F32) * 3 * 3, "Incorrect size");
+
+/// F64 3x3 matrix
+using DMat3 = TMat3<F64>;
+static_assert(sizeof(DMat3) == sizeof(F64) * 3 * 3, "Incorrect size");
 /// @}
 
 } // end namespace anki

+ 3 - 0
src/anki/math/Vec3.h

@@ -133,6 +133,9 @@ using IVec3 = TVec3<I32>;
 
 /// 32bit unsigned integer 3D vector
 using UVec3 = TVec3<U32>;
+
+/// 64bit float 3D vector
+using DVec3 = TVec3<F64>;
 /// @}
 
 } // end namespace anki

+ 128 - 7
src/anki/renderer/Ssao.cpp

@@ -10,6 +10,7 @@
 #include <anki/scene/SceneGraph.h>
 #include <anki/util/Functions.h>
 #include <anki/misc/ConfigSet.h>
+#include <anki/scene/FrustumComponent.h>
 
 namespace anki
 {
@@ -17,20 +18,21 @@ namespace anki
 const U NOISE_TEX_SIZE = 4;
 const U KERNEL_SIZE = 16;
 
-static void genKernel(Vec3* ANKI_RESTRICT arr, Vec3* ANKI_RESTRICT arrEnd)
+template<typename TVec>
+static void genHemisphere(TVec* ANKI_RESTRICT arr, TVec* ANKI_RESTRICT arrEnd)
 {
 	ANKI_ASSERT(arr && arrEnd && arr != arrEnd);
 
 	do
 	{
 		// Calculate the normal
-		arr->x() = randRange(-1.0f, 1.0f);
-		arr->y() = randRange(-1.0f, 1.0f);
-		arr->z() = randRange(0.0f, 1.0f);
+		arr->x() = randRange(-1.0, 1.0);
+		arr->y() = randRange(-1.0, 1.0);
+		arr->z() = randRange(0.0, 1.0);
 		arr->normalize();
 
 		// Adjust the length
-		(*arr) *= randRange(0.0f, 1.0f);
+		(*arr) *= randRange(0.0, 1.0);
 	} while(++arr != arrEnd);
 }
 
@@ -49,6 +51,110 @@ static void genNoise(Vec4* ANKI_RESTRICT arr, Vec4* ANKI_RESTRICT arrEnd)
 
 const PixelFormat Ssao::RT_PIXEL_FORMAT(ComponentFormat::R8, TransformFormat::UNORM);
 
+void Ssao::createHemisphereLut()
+{
+	constexpr F64 PI = getPi<F64>();
+	constexpr F64 MIN_ANGLE = PI / 8.0;
+
+	// Compute the hemisphere
+	Array<DVec3, KERNEL_SIZE> kernel;
+	genHemisphere(&kernel[0], &kernel[0] + KERNEL_SIZE);
+
+	constexpr U LUT_TEX_SIZE_X = 2.0 * PI / MIN_ANGLE;
+	constexpr U LUT_TEX_SIZE_Y = PI / MIN_ANGLE;
+	constexpr U LUT_TEX_LAYERS = KERNEL_SIZE;
+
+	Array<Array2d<Vec4, LUT_TEX_SIZE_Y, LUT_TEX_SIZE_X>, LUT_TEX_LAYERS> lutTexData;
+
+	UVec3 counts(0u);
+	U totalCount = 0;
+	(void)totalCount;
+	for(F64 theta = 0.0; theta < 2.0 * PI; theta += MIN_ANGLE)
+	{
+		counts.y() = 0;
+		for(F64 phi = 0.0; phi < PI; phi += MIN_ANGLE)
+		{
+			// Compute the normal from the spherical coordinates
+			DVec3 normal;
+			normal.x() = cos(theta) * sin(phi);
+			normal.y() = sin(theta) * sin(phi);
+			normal.z() = cos(phi);
+			normal.normalize();
+
+			// Compute a tangent & bitangent
+			DVec3 bitangent(0.01, 1.0, 0.01);
+			bitangent.normalize();
+
+			DVec3 tangent = bitangent.cross(normal);
+			tangent.normalize();
+
+			bitangent = normal.cross(tangent);
+
+			// Set the TBN matrix
+			DMat3 rot;
+			rot.setColumns(tangent, bitangent, normal);
+
+			counts.z() = 0;
+			for(U k = 0; k < KERNEL_SIZE; ++k)
+			{
+				DVec3 rotVec = rot * kernel[k];
+
+				lutTexData[counts.z()][counts.y()][counts.x()] = Vec4(rotVec.x(), rotVec.y(), rotVec.z(), 0.0);
+
+				++counts.z();
+				++totalCount;
+			}
+
+			++counts.y();
+		}
+
+		++counts.x();
+	}
+
+	ANKI_ASSERT(totalCount == (LUT_TEX_SIZE_Y * LUT_TEX_SIZE_X * LUT_TEX_LAYERS));
+
+	// Create the texture
+	TextureInitInfo tinit;
+	tinit.m_usage = TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::UPLOAD;
+	tinit.m_width = LUT_TEX_SIZE_X;
+	tinit.m_height = LUT_TEX_SIZE_Y;
+	tinit.m_depth = 1;
+	tinit.m_layerCount = LUT_TEX_LAYERS;
+	tinit.m_type = TextureType::_2D_ARRAY;
+	tinit.m_format = PixelFormat(ComponentFormat::R32G32B32A32, TransformFormat::FLOAT);
+	tinit.m_mipmapsCount = 1;
+	tinit.m_sampling.m_minMagFilter = SamplingFilter::LINEAR;
+	tinit.m_sampling.m_repeat = false;
+
+	m_hemisphereLut = getGrManager().newInstance<Texture>(tinit);
+
+	CommandBufferInitInfo cmdbinit;
+	cmdbinit.m_flags = CommandBufferFlag::SMALL_BATCH;
+	CommandBufferPtr cmdb = getGrManager().newInstance<CommandBuffer>(cmdbinit);
+
+	for(U i = 0; i < LUT_TEX_LAYERS; ++i)
+	{
+		cmdb->setTextureSurfaceBarrier(
+			m_hemisphereLut, TextureUsageBit::NONE, TextureUsageBit::UPLOAD, TextureSurfaceInfo(0, 0, 0, i));
+	}
+
+	for(U i = 0; i < LUT_TEX_LAYERS; ++i)
+	{
+		cmdb->uploadTextureSurfaceCopyData(
+			m_hemisphereLut, TextureSurfaceInfo(0, 0, 0, i), &lutTexData[i][0][0], sizeof(lutTexData[i]));
+	}
+
+	for(U i = 0; i < LUT_TEX_LAYERS; ++i)
+	{
+		cmdb->setTextureSurfaceBarrier(m_hemisphereLut,
+			TextureUsageBit::UPLOAD,
+			TextureUsageBit::SAMPLED_FRAGMENT,
+			TextureSurfaceInfo(0, 0, 0, i));
+	}
+
+	cmdb->flush();
+}
+
 Error Ssao::createFb(FramebufferPtr& fb, TexturePtr& rt)
 {
 	// Set to bilinear because the blurring techniques take advantage of that
@@ -132,7 +238,7 @@ Error Ssao::initInternal(const ConfigSet& config)
 	StringAuto kernelStr(getAllocator());
 	Array<Vec3, KERNEL_SIZE> kernel;
 
-	genKernel(kernel.begin(), kernel.end());
+	genHemisphere(kernel.begin(), kernel.end());
 	kernelStr.create("vec3[](");
 	for(U i = 0; i < kernel.size(); i++)
 	{
@@ -211,6 +317,11 @@ Error Ssao::initInternal(const ConfigSet& config)
 
 	m_vblurPpline = getGrManager().newInstance<Pipeline>(ppinit);
 
+	//
+	// Lookup texture
+	//
+	createHemisphereLut();
+
 	//
 	// Resource groups
 	//
@@ -229,6 +340,8 @@ Error Ssao::initInternal(const ConfigSet& config)
 
 	rcinit.m_textures[2].m_texture = m_noiseTex;
 
+	rcinit.m_textures[3].m_texture = m_hemisphereLut;
+
 	rcinit.m_uniformBuffers[0].m_uploadedMemory = true;
 	rcinit.m_uniformBuffers[0].m_usage = BufferUsageBit::UNIFORM_FRAGMENT;
 	m_rcFirst = gr.newInstance<ResourceGroup>(rcinit);
@@ -284,7 +397,15 @@ void Ssao::run(RenderingContext& ctx)
 	cmdb->bindPipeline(m_ssaoPpline);
 
 	TransientMemoryInfo inf;
-	inf.m_uniformBuffers[0] = m_r->getCommonUniformsTransientMemoryToken();
+	Vec4* unis = static_cast<Vec4*>(getGrManager().allocateFrameTransientMemory(
+		sizeof(Vec4) * 2, BufferUsageBit::UNIFORM_ALL, inf.m_uniformBuffers[0]));
+
+	const FrustumComponent& frc = *ctx.m_frustumComponent;
+	const Mat4& pmat = frc.getProjectionMatrix();
+	*unis = frc.getProjectionParameters();
+	++unis;
+	*unis = Vec4(pmat(0, 0), pmat(1, 1), pmat(2, 2), pmat(2, 3));
+
 	cmdb->bindResourceGroup(m_rcFirst, 0, &inf);
 
 	// Draw

+ 3 - 0
src/anki/renderer/Ssao.h

@@ -57,6 +57,7 @@ private:
 	PipelinePtr m_vblurPpline;
 
 	TexturePtr m_noiseTex;
+	TexturePtr m_hemisphereLut;
 
 	ResourceGroupPtr m_rcFirst;
 	ResourceGroupPtr m_hblurRc;
@@ -64,6 +65,8 @@ private:
 
 	ANKI_USE_RESULT Error createFb(FramebufferPtr& fb, TexturePtr& rt);
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& initializer);
+
+	void createHemisphereLut();
 };
 /// @}