Explorar el Código

New SSAO that runs faster

Panagiotis Christopoulos Charitos hace 12 años
padre
commit
d8cfc98335

+ 1 - 1
include/anki/math/Mat4.h

@@ -10,7 +10,7 @@ namespace anki {
 
 /// 4x4 Matrix. Used mainly for transformations but not necessarily. Its
 /// row major. SSE optimized
-class Mat4
+ANKI_ATTRIBUTE_ALIGNED(class, 16) Mat4
 {
 public:
 	/// @name Constructors

+ 0 - 1
include/anki/renderer/Is.h

@@ -88,7 +88,6 @@ private:
 	/// @name For drawing a quad into the active framebuffer
 	/// @{
 	Vbo quadPositionsVbo; ///< The VBO for quad positions
-	Vbo quadVertIndecesVbo; ///< The VBO for quad array buffer elements
 	Vao quadVao; ///< This VAO is used everywhere except material stage
 	/// @}
 

+ 1 - 1
include/anki/renderer/Pps.h

@@ -16,7 +16,7 @@ class ShaderProgram;
 
 /// Post-processing stage.This stage is divided into 2 two parts. The first
 /// happens before blending stage and the second after
-class Pps: public RenderingPass
+class Pps: public OptionalRenderingPass
 {
 public:
 	Pps(Renderer* r);

+ 0 - 4
include/anki/renderer/Renderer.h

@@ -256,9 +256,6 @@ public:
 
 	void drawQuadInstanced(U32 primitiveCount);
 
-	/// Used by blurring where we draw the same quad many times
-	void drawQuadMultiple(U times);
-
 	/// Calculate the planes needed for the calculation of the fragment
 	/// position z in view space. Having the fragment's depth, the camera's
 	/// zNear and zFar the z of the fragment is being calculated inside the
@@ -331,7 +328,6 @@ private:
 	/// @name For drawing a quad into the active framebuffer
 	/// @{
 	Vbo quadPositionsVbo; ///< The VBO for quad positions
-	Vbo quadVertIndecesVbo; ///< The VBO for quad array buffer elements
 	Vao quadVao; ///< This VAO is used everywhere except material stage
 	/// @}
 };

+ 1 - 1
include/anki/renderer/Ssao.h

@@ -45,7 +45,7 @@ private:
 	Fbo mpFbo; ///< Main pass FBO. Not used if on non blit
 	Fbo vblurFbo;
 	Fbo hblurFbo;
-	TextureResourcePointer noiseMap;
+	Texture noiseTex;
 	ShaderProgramResourcePointer ssaoSProg;
 	ShaderProgramResourcePointer hblurSProg;
 	ShaderProgramResourcePointer vblurSProg;

+ 55 - 48
shaders/PpsSsao.glsl

@@ -5,35 +5,31 @@
 #pragma anki start fragmentShader
 #pragma anki include "shaders/CommonFrag.glsl"
 #pragma anki include "shaders/Pack.glsl"
+#pragma anki include "shaders/LinearDepth.glsl"
 
-/// @name Varyings
-/// @{
 in vec2 vTexCoords;
-/// @}
 
-/// @name Output
-/// @{
 layout(location = 0) out float fColor;
-/// @}
-
 
-/// @name Uniforms
-/// @{
 layout(std140, row_major) uniform commonBlock
 {
 	/// Packs:
 	/// - x: zNear. For the calculation of frag pos in view space
 	/// - zw: Planes. For the calculation of frag pos in view space
-	uniform vec4 nearPlanes;
+	vec4 nearPlanes;
 
 	/// For the calculation of frag pos in view space. The xy is the 
 	/// limitsOfNearPlane and the zw is an optimization see PpsSsao.glsl and 
 	/// r403 for the clean one
-	uniform vec4 limitsOfNearPlane_;
+	vec4 limitsOfNearPlane_;
+
+	/// The projection matrix
+	mat4 projectionMatrix;
 };
 
-#define planes nearPlanes.zw
 #define zNear nearPlanes.x
+#define zFar nearPlanes.y
+#define planes nearPlanes.zw
 #define limitsOfNearPlane limitsOfNearPlane_.xy
 #define limitsOfNearPlane2 limitsOfNearPlane_.zw
 
@@ -46,11 +42,9 @@ uniform highp usampler2D msGFai;
 uniform sampler2D noiseMap; 
 /// @}
 
-#define SAMPLE_RAD 0.08
-#define SCALE 1.0
-#define INTENSITY 3.0
-#define BIAS 0.1
+#define RADIUS 0.5
 
+// Get normal
 vec3 getNormal(in vec2 uv)
 {
 #if USE_MRT
@@ -62,17 +56,19 @@ vec3 getNormal(in vec2 uv)
 	return normal;
 }
 
-vec2 getRandom(in vec2 uv)
+// Read the noise tex
+vec3 getRandom(in vec2 uv)
 {
 	const vec2 tmp = vec2(
 		float(WIDTH) / float(NOISE_MAP_SIZE), 
 		float(HEIGHT) / float(NOISE_MAP_SIZE));
 
-	vec2 noise = texture(noiseMap, tmp * uv).xy;
+	vec3 noise = texture(noiseMap, tmp * uv).xyz;
 	//return normalize(noise * 2.0 - 1.0);
 	return noise;
 }
 
+// Get position in view space
 vec3 getPosition(in vec2 uv)
 {
 	float depth = texture(msDepthFai, uv).r;
@@ -88,45 +84,56 @@ vec3 getPosition(in vec2 uv)
 	return fragPosVspace;
 }
 
-float calcAmbientOcclusionFactor(in vec2 uv, in vec3 original, in vec3 cnorm)
+float getZ(in vec2 uv)
 {
-	vec3 newp = getPosition(uv);
-	vec3 diff = newp - original;
-	vec3 v = normalize(diff);
-	float d = length(diff) /* * SCALE*/;
-
-	float ret = max(0.0, dot(cnorm, v)  - BIAS) * (INTENSITY / (1.0 + d));
-	return ret;
+	float depth = texture(msDepthFai, uv).r;
+	float z = -planes.y / (planes.x + depth);
+	return z;
 }
 
-#define KERNEL_SIZE 16
-const vec2 KERNEL[KERNEL_SIZE] = vec2[](
-	vec2(0.53812504, 0.18565957), vec2(0.13790712, 0.24864247), 
-	vec2(0.33715037, 0.56794053), vec2(-0.6999805, -0.04511441),
-	vec2(0.06896307, -0.15983082), vec2(0.056099437, 0.006954967),
-	vec2(-0.014653638, 0.14027752), vec2(0.010019933, -0.1924225),
-	vec2(-0.35775623, -0.5301969), vec2(-0.3169221, 0.106360726),
-	vec2(0.010350345, -0.58698344), vec2(-0.08972908, -0.49408212),
-	vec2(0.7119986, -0.0154690035), vec2(-0.053382345, 0.059675813),
-	vec2(0.035267662, -0.063188605), vec2(-0.47761092, 0.2847911));
-
 void main(void)
 {
-	vec3 p = getPosition(vTexCoords);
-	vec3 n = getNormal(vTexCoords);
-	vec2 rand = getRandom(vTexCoords);
-	//rand = rand * 0.000001 + vec2(0.0, 0.0);
+	vec3 origin = getPosition(vTexCoords);
 
-	fColor = 0.0;
+	vec3 normal = getNormal(vTexCoords);
+	vec3 rvec = getRandom(vTexCoords);
 	
-	for(int j = 0; j < KERNEL_SIZE; ++j)
+	vec3 tangent = normalize(rvec - normal * dot(rvec, normal));
+	vec3 bitangent = cross(normal, tangent);
+	mat3 tbn = mat3(tangent, bitangent, normal);
+
+	// Iterate kernel
+	float factor = 0.0;
+	for(uint i = 0; i < KERNEL_SIZE; ++i) 
 	{
-		vec2 coord = reflect(KERNEL[j], rand) * SAMPLE_RAD;
-		fColor += calcAmbientOcclusionFactor(vTexCoords + coord, p, n);
-	}
+		// get position
+		vec3 sample_ = tbn * KERNEL[i];
+		sample_ = sample_ * RADIUS + origin;
+
+		// project sample position:
+		vec4 offset = vec4(sample_, 1.0);
+		offset = projectionMatrix * offset;
+		offset.xy /= offset.w;
+		offset.xy = offset.xy * 0.5 + 0.5;
+
+		// get sample depth:
+		float sampleDepth = getZ(offset.xy);
+
+		// range check & accumulate:
+		const float ADVANCE = 1.0 / float(KERNEL_SIZE);
 
-	fColor = 1.0 - fColor / float(KERNEL_SIZE);
+#if 1
+		float rangeCheck = 
+			abs(origin.z - sampleDepth) * (1.0 / (RADIUS * 10.0));
+		rangeCheck = 1.0 - rangeCheck;
+
+		factor += clamp(sampleDepth - sample_.z, 0.0, ADVANCE) * rangeCheck;
+#else
+		float rangeCheck = abs(origin.z - sampleDepth) < RADIUS ? 1.0 : 0.0;
+		factor += (sampleDepth > sample_.z ? ADVANCE : 0.0) * rangeCheck;
+#endif
+	}
 
-	//fColor = fColor * 0.00001 + (rand.x + rand.y) / 2.0;
+	fColor = 1.0 - factor;
 }
 

+ 2 - 9
src/renderer/Is.cpp

@@ -499,19 +499,13 @@ void Is::initInternal(const RendererInitializer& initializer)
 	// Init the quad
 	//
 	static const F32 quadVertCoords[][2] = {{1.0, 1.0}, {0.0, 1.0},
-		{0.0, 0.0}, {1.0, 0.0}};
+		{1.0, 0.0}, {0.0, 0.0}};
 	quadPositionsVbo.create(GL_ARRAY_BUFFER, sizeof(quadVertCoords),
 		quadVertCoords, GL_STATIC_DRAW);
 
-	static const U16 quadVertIndeces[2][3] =
-		{{0, 1, 3}, {1, 2, 3}}; // 2 triangles
-	quadVertIndecesVbo.create(GL_ELEMENT_ARRAY_BUFFER, sizeof(quadVertIndeces),
-		quadVertIndeces, GL_STATIC_DRAW);
-
 	quadVao.create();
 	quadVao.attachArrayBufferVbo(
 		&quadPositionsVbo, 0, 2, GL_FLOAT, false, 0, 0);
-	quadVao.attachElementArrayBufferVbo(&quadVertIndecesVbo);
 
 	//
 	// Create UBOs
@@ -861,8 +855,7 @@ void Is::lightPass()
 	lightPassProg->findUniformVariable("shadowMapArr").set(sm.sm2DArrayTex);
 
 	quadVao.bind();
-	glDrawElementsInstanced(GL_TRIANGLES, 2 * 3, GL_UNSIGNED_SHORT, 0,
-		TILES_COUNT);
+	glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 6, TILES_COUNT);
 }
 
 //==============================================================================

+ 1 - 1
src/renderer/Lf.cpp

@@ -63,7 +63,7 @@ void Lf::init(const RendererInitializer& initializer)
 //==============================================================================
 void Lf::initInternal(const RendererInitializer& initializer)
 {
-	enabled = initializer.pps.lf.enabled;
+	enabled = initializer.pps.lf.enabled && initializer.pps.hdr.enabled;
 	if(!enabled)
 	{
 		return;

+ 11 - 1
src/renderer/Pps.cpp

@@ -8,7 +8,7 @@ namespace anki {
 
 //==============================================================================
 Pps::Pps(Renderer* r_)
-	: RenderingPass(r_), hdr(r_), ssao(r_), bl(r_), lf(r_)
+	: OptionalRenderingPass(r_), hdr(r_), ssao(r_), bl(r_), lf(r_)
 {}
 
 //==============================================================================
@@ -18,6 +18,14 @@ Pps::~Pps()
 //==============================================================================
 void Pps::initInternal(const RendererInitializer& initializer)
 {
+	enabled = initializer.pps.enabled;
+	if(!enabled)
+	{
+		return;
+	}
+
+	ANKI_ASSERT("Initializing PPS");
+
 	ssao.init(initializer);
 	hdr.init(initializer);
 	lf.init(initializer);
@@ -81,6 +89,8 @@ void Pps::init(const Renderer::Initializer& initializer)
 //==============================================================================
 void Pps::run()
 {
+	ANKI_ASSERT(enabled);
+
 	GlStateSingleton::get().disable(GL_BLEND);
 
 	// First SSAO because it depends on MS where HDR depends on IS

+ 7 - 36
src/renderer/Renderer.cpp

@@ -41,19 +41,13 @@ void Renderer::init(const RendererInitializer& initializer)
 
 	// quad VBOs and VAO
 	static const F32 quadVertCoords[][2] = {{1.0, 1.0}, {-1.0, 1.0}, 
-		{-1.0, -1.0}, {1.0, -1.0}};
+		{1.0, -1.0}, {-1.0, -1.0}};
 	quadPositionsVbo.create(GL_ARRAY_BUFFER, sizeof(quadVertCoords),
 		quadVertCoords, GL_STATIC_DRAW);
 
-	static const U16 quadVertIndeces[2][3] = 
-		{{0, 1, 3}, {1, 2, 3}}; // 2 triangles
-	quadVertIndecesVbo.create(GL_ELEMENT_ARRAY_BUFFER, sizeof(quadVertIndeces),
-		quadVertIndeces, GL_STATIC_DRAW);
-
 	quadVao.create();
 	quadVao.attachArrayBufferVbo(
 		&quadPositionsVbo, 0, 2, GL_FLOAT, false, 0, 0);
-	quadVao.attachElementArrayBufferVbo(&quadVertIndecesVbo);
 }
 
 //==============================================================================
@@ -96,7 +90,10 @@ void Renderer::render(SceneGraph& scene_)
 	bs.run();
 
 	ANKI_COUNTER_START_TIMER(C_RENDERER_PPS_TIME);
-	pps.run();
+	if(pps.getEnabled())
+	{
+		pps.run();
+	}
 	ANKI_COUNTER_STOP_TIMER_INC(C_RENDERER_PPS_TIME);
 
 	ANKI_CHECK_GL_ERROR();
@@ -107,40 +104,14 @@ void Renderer::render(SceneGraph& scene_)
 void Renderer::drawQuad()
 {
 	quadVao.bind();
-	glDrawElements(GL_TRIANGLES, 2 * 3, GL_UNSIGNED_SHORT, 0);
+	glDrawArrays(GL_TRIANGLE_STRIP, 0, 6);
 }
 
 //==============================================================================
 void Renderer::drawQuadInstanced(U32 primitiveCount)
 {
 	quadVao.bind();
-	glDrawElementsInstanced(GL_TRIANGLES, 2 * 3, GL_UNSIGNED_SHORT, 0,
-		primitiveCount);
-}
-
-//==============================================================================
-void Renderer::drawQuadMultiple(U times)
-{
-	quadVao.bind();
-#if ANKI_GL == ANKI_GL_DESKTOP
-	const U max_times = 16;
-	Array<GLsizei, max_times> count;
-	Array<const GLvoid*, max_times> indices;
-
-	for(U i = 0; i < times; i++)
-	{
-		count[i] = 2 * 3;
-		indices[i] = nullptr;
-	}
-
-	glMultiDrawElements(
-		GL_TRIANGLES, &count[0], GL_UNSIGNED_SHORT, &indices[0], times);
-#else
-	for(U i = 0; i < times; i++)
-	{
-		glDrawElements(GL_TRIANGLES, 2 * 3, GL_UNSIGNED_SHORT, 0);
-	}
-#endif
+	glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 6, primitiveCount);
 }
 
 //==============================================================================

+ 96 - 8
src/renderer/Ssao.cpp

@@ -2,16 +2,64 @@
 #include "anki/renderer/Renderer.h"
 #include "anki/scene/Camera.h"
 #include "anki/scene/SceneGraph.h"
+#include "anki/util/Functions.h"
 
 namespace anki {
 
+//==============================================================================
+// Misc                                                                        =
+//==============================================================================
+
+const U NOISE_TEX_SIZE = 8;
+const U KERNEL_SIZE = 16;
+
+//==============================================================================
+static void genKernel(Vec3* ANKI_RESTRICT arr, 
+	Vec3* ANKI_RESTRICT arrEnd)
+{
+	ANKI_ASSERT(arr && arrEnd && arr != arrEnd);
+
+	do
+	{
+		// Calculate the normal
+		arr->x() = randRange(-1.0f, 1.0f);
+		arr->y() = randRange(-1.0f, 1.0f);
+		arr->z() = randRange(0.0f, 1.0f);
+		arr->normalize();
+
+		// Adjust the length
+		(*arr) *= randRange(0.0f, 1.0f);
+	} while(++arr != arrEnd);
+}
+
+//==============================================================================
+static void genNoise(Vec3* ANKI_RESTRICT arr, 
+	Vec3* ANKI_RESTRICT arrEnd)
+{
+	ANKI_ASSERT(arr && arrEnd && arr != arrEnd);
+
+	do
+	{
+		// Calculate the normal
+		arr->x() = randRange(-1.0f, 1.0f);
+		arr->y() = randRange(-1.0f, 1.0f);
+		arr->z() = 0.0;
+		arr->normalize();
+	} while(++arr != arrEnd);
+}
+
 //==============================================================================
 struct ShaderCommonUniforms
 {
 	Vec4 nearPlanes;
 	Vec4 limitsOfNearPlane;
+	Mat4 projectionMatrix;
 };
 
+//==============================================================================
+// Ssao                                                                        =
+//==============================================================================
+
 //==============================================================================
 void Ssao::createFbo(Fbo& fbo, Texture& fai, F32 width, F32 height)
 {
@@ -70,13 +118,46 @@ void Ssao::initInternal(const RendererInitializer& initializer)
 	}
 
 	//
-	// noise map
+	// noise texture
 	//
-	noiseMap.load("engine_data/noise.ankitex");
-	noiseMap->setFiltering(Texture::TFT_NEAREST);
-	if(noiseMap->getWidth() != noiseMap->getHeight())
+	Array<Vec3, NOISE_TEX_SIZE * NOISE_TEX_SIZE> noise;
+	Texture::Initializer tinit;
+
+	genNoise(noise.begin(), noise.end());
+
+	tinit.width = tinit.height = NOISE_TEX_SIZE;
+	tinit.target = GL_TEXTURE_2D;
+	tinit.internalFormat = GL_RGB32F;
+	tinit.format = GL_RGB;
+	tinit.type = GL_FLOAT;
+	tinit.filteringType = Texture::TFT_NEAREST;
+	tinit.repeat = true;
+	tinit.mipmapsCount = 1;
+	tinit.data[0][0] = {&noise[0], sizeof(noise)};
+
+	noiseTex.create(tinit);
+
+	//
+	// Kernel
+	//
+	std::stringstream kernelStr;
+	Array<Vec3, KERNEL_SIZE> kernel;
+
+	genKernel(kernel.begin(), kernel.end());
+	kernelStr << "const vec3 KERNEL[" << KERNEL_SIZE << "] = vec3[](";
+	for(U i = 0; i < kernel.size(); i++)
 	{
-		throw ANKI_EXCEPTION("Incorrect noisemap size");
+		kernelStr << "vec3(" << kernel[i].x() << ", " << kernel[i].y()
+			<< ", " << kernel[i].z() << ")";
+
+		if(i != kernel.size() - 1)
+		{
+			kernelStr << ", ";
+		}
+		else
+		{
+			kernelStr << ");";
+		}
 	}
 
 	//
@@ -87,10 +168,12 @@ void Ssao::initInternal(const RendererInitializer& initializer)
 	std::stringstream pps;
 
 	// main pass prog
-	pps << "#define NOISE_MAP_SIZE " << noiseMap->getWidth()
+	pps << "#define NOISE_MAP_SIZE " << NOISE_TEX_SIZE
 		<< "\n#define WIDTH " << mpWidth
 		<< "\n#define HEIGHT " << mpHeight
 		<< "\n#define USE_MRT " << ANKI_RENDERER_USE_MRT
+		<< "\n#define KERNEL_SIZE " << KERNEL_SIZE
+		<< "\n" << kernelStr.str() 
 		<< "\n";
 	ssaoSProg.load(ShaderProgramResource::createSrcCodeToCache(
 		"shaders/PpsSsao.glsl", pps.str().c_str()).c_str());
@@ -158,14 +241,19 @@ void Ssao::run()
 
 	// Write common block
 	if(commonUboUpdateTimestamp < r->getPlanesUpdateTimestamp()
+		|| commonUboUpdateTimestamp < cam.getFrustumableTimestamp()
 		|| commonUboUpdateTimestamp == 1)
 	{
 		ShaderCommonUniforms blk;
-		blk.nearPlanes = Vec4(cam.getNear(), 0.0, r->getPlanes().x(),
+
+		blk.nearPlanes = Vec4(cam.getNear(), cam.getFar(), r->getPlanes().x(),
 			r->getPlanes().y());
+
 		blk.limitsOfNearPlane = Vec4(r->getLimitsOfNearPlane(),
 			r->getLimitsOfNearPlane2());
 
+		blk.projectionMatrix = cam.getProjectionMatrix().getTransposed();
+
 		commonUbo.write(&blk);
 		commonUboUpdateTimestamp = getGlobTimestamp();
 	}
@@ -175,7 +263,7 @@ void Ssao::run()
 		r->getMs().getDepthFai());
 
 	// noiseMap
-	ssaoSProg->findUniformVariable("noiseMap").set(*noiseMap);
+	ssaoSProg->findUniformVariable("noiseMap").set(noiseTex);
 
 	// msGFai
 #if ANKI_RENDERER_USE_MRT

+ 4 - 4
testapp/Main.cpp

@@ -545,16 +545,16 @@ void initSubsystems(int argc, char* argv[])
 	initializer.is.sm.enabled = true;
 	initializer.is.sm.pcfEnabled = false;
 	initializer.is.sm.resolution = 512;
+	initializer.pps.enabled = true;
 	initializer.pps.hdr.enabled = true;
-	initializer.pps.hdr.renderingQuality = 0.25;
+	initializer.pps.hdr.renderingQuality = 0.5;
 	initializer.pps.hdr.blurringDist = 1.0;
 	initializer.pps.hdr.blurringIterationsCount = 1;
 	initializer.pps.hdr.exposure = 8.0;
 	initializer.pps.ssao.blurringIterationsNum = 1;
 	initializer.pps.ssao.enabled = true;
-	initializer.pps.ssao.mainPassRenderingQuality = 0.5;
-	initializer.pps.ssao.blurringRenderingQuality = 0.5;
-	initializer.pps.enabled = true;
+	initializer.pps.ssao.mainPassRenderingQuality = 0.35;
+	initializer.pps.ssao.blurringRenderingQuality = 0.35;
 	initializer.pps.bl.enabled = true;
 	initializer.pps.bl.blurringIterationsNum = 2;
 	initializer.pps.bl.sideBlurFactor = 1.0;