Browse Source

add denoise example (#2344)

/*
* Implement SVGF style denoising as bgfx example. Goal is to explore various
* options and parameters, not produce an optimized, efficient denoiser.
*
* Starts with deferred rendering scene with very basic lighting. Lighting is
* masked out with a noise pattern to provide something to denoise. There are
* two options for the noise pattern. One is a fixed 2x2 dither pattern to
* stand-in for lighting at quarter resolution. The other is the common
* shadertoy random pattern as a stand-in for some fancier lighting without
* enough samples per pixel, like ray tracing.
*
* First a temporal denoising filter is applied. The temporal filter is only
* using normals to reject previous samples. The SVGF paper also describes using
* depth comparison to reject samples but that is not implemented here.
*
* Followed by some number of spatial filters. These are implemented like in the
* SVGF paper. As an alternative to the 5x5 Edge-Avoiding A-Trous filter, can
* select a 3x3 filter instead. The 3x3 filter takes fewer samples and covers a
* smaller area, but takes less time to compute. From a loosely eyeballed
* comparison, N 5x5 passes looks similar to N+1 3x3 passes. The wider spatial
* filters take a fair chunk of time to compute. I wonder if it would be a good
* idea to interleave the input texture before computing, after the first pass
* which skips zero pixels.
*
* I have not implemetened the variance guided part.
*
* There's also an optional TXAA pass to be applied after. I am not happy with
* its implementation yet, so it defaults to off here.
*/

/*
* References:
* Spatiotemporal Variance-Guided Filtering: Real-Time Reconstruction for
*	Path-Traced Global Illumination. by Christoph Schied and more.
*	- SVGF denoising algorithm
*
* Streaming G-Buffer Compression for Multi-Sample Anti-Aliasing.
*	by E. Kerzner and M. Salvi.
*	- details about history comparison for temporal denoising filter
*
* Edge-Avoiding À-Trous Wavelet Transform for Fast Global Illumination
*	Filtering. by Holger Dammertz and more.
*	- details about a-trous algorithm for spatial denoising filter
*/
elvencache 5 years ago
parent
commit
a33ca71bcf

+ 1075 - 0
examples/xx-denoise/denoise.cpp

@@ -0,0 +1,1075 @@
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+/*
+* Implement SVGF style denoising as bgfx example. Goal is to explore various
+* options and parameters, not produce an optimized, efficient denoiser.
+*
+* Starts with deferred rendering scene with very basic lighting. Lighting is
+* masked out with a noise pattern to provide something to denoise. There are
+* two options for the noise pattern. One is a fixed 2x2 dither pattern to
+* stand-in for lighting at quarter resolution. The other is the common
+* shadertoy random pattern as a stand-in for some fancier lighting without
+* enough samples per pixel, like ray tracing.
+*
+* First a temporal denoising filter is applied. The temporal filter is only
+* using normals to reject previous samples. The SVGF paper also describes using
+* depth comparison to reject samples but that is not implemented here.
+*
+* Followed by some number of spatial filters. These are implemented like in the
+* SVGF paper. As an alternative to the 5x5 Edge-Avoiding A-Trous filter, can
+* select a 3x3 filter instead. The 3x3 filter takes fewer samples and covers a
+* smaller area, but takes less time to compute. From a loosely eyeballed
+* comparison, N 5x5 passes looks similar to N+1 3x3 passes. The wider spatial
+* filters take a fair chunk of time to compute. I wonder if it would be a good
+* idea to interleave the input texture before computing, after the first pass
+* which skips zero pixels.
+*
+* I have not implemetened the variance guided part.
+*
+* There's also an optional TXAA pass to be applied after. I am not happy with
+* its implementation yet, so it defaults to off here.
+*/
+
+/*
+* References:
+* Spatiotemporal Variance-Guided Filtering: Real-Time Reconstruction for
+*	Path-Traced Global Illumination. by Christoph Schied and more.
+*	- SVGF denoising algorithm
+*
+* Streaming G-Buffer Compression for Multi-Sample Anti-Aliasing.
+*	by E. Kerzner and M. Salvi.
+*	- details about history comparison for temporal denoising filter
+*
+* Edge-Avoiding À-Trous Wavelet Transform for Fast Global Illumination
+*	Filtering. by Holger Dammertz and more.
+*	- details about a-trous algorithm for spatial denoising filter
+*/
+
+
+#include <common.h>
+#include <camera.h>
+#include <bgfx_utils.h>
+#include <imgui/imgui.h>
+#include <bx/rng.h>
+#include <bx/os.h>
+
+
+namespace {
+
+#define DENOISE_MAX_PASSES		6
+
+// Gbuffer has multiple render targets
+#define GBUFFER_RT_COLOR		0
+#define GBUFFER_RT_NORMAL		1
+#define GBUFFER_RT_VELOCITY		2
+#define GBUFFER_RT_DEPTH		3
+#define GBUFFER_RENDER_TARGETS	4
+
+#define MODEL_COUNT				100
+
+static const char * s_meshPaths[] =
+{
+	"meshes/column.bin",
+	"meshes/tree.bin",
+	"meshes/hollowcube.bin",
+	"meshes/bunny.bin"
+};
+
+static const float s_meshScale[] =
+{
+	0.05f,
+	0.15f,
+	0.25f,
+	0.25f
+};
+
+// Vertex decl for our screen space quad (used in deferred rendering)
+struct PosTexCoord0Vertex
+{
+	float m_x;
+	float m_y;
+	float m_z;
+	float m_u;
+	float m_v;
+
+	static void init()
+	{
+		ms_layout
+			.begin()
+			.add(bgfx::Attrib::Position, 3, bgfx::AttribType::Float)
+			.add(bgfx::Attrib::TexCoord0, 2, bgfx::AttribType::Float)
+			.end();
+	}
+
+	static bgfx::VertexLayout ms_layout;
+};
+
+bgfx::VertexLayout PosTexCoord0Vertex::ms_layout;
+
+struct Uniforms
+{
+	enum { NumVec4 = 13 };
+
+	void init() {
+		u_params = bgfx::createUniform("u_params", bgfx::UniformType::Vec4, NumVec4);
+	};
+
+	void submit() const {
+		bgfx::setUniform(u_params, m_params, NumVec4);
+	}
+
+	void destroy() {
+		bgfx::destroy(u_params);
+	}
+
+	union
+	{
+		struct
+		{
+			/*  0    */ struct { float m_cameraJitterCurr[2]; float m_cameraJitterPrev[2]; };
+			/*  1    */ struct { float m_feedbackMin; float m_feedbackMax; float m_unused1[2]; };
+			/*  2    */ struct { float m_unused2; float m_applyMitchellFilter; float m_options[2]; };
+			/*  3-6  */ struct { float m_worldToViewPrev[16]; };
+			/*  7-10 */ struct { float m_viewToProjPrev[16]; };
+			/* 11    */ struct { float m_frameOffsetForNoise; float m_noiseType; float m_unused11[2]; };
+			/* 12    */ struct { float m_denoiseStep; float m_sigmaDepth; float m_sigmaNormal; float m_unused12; };
+		};
+
+		float m_params[NumVec4 * 4];
+	};
+
+	bgfx::UniformHandle u_params;
+};
+
+struct RenderTarget
+{
+	void init(uint32_t _width, uint32_t _height, bgfx::TextureFormat::Enum _format, uint64_t _flags)
+	{
+		m_texture = bgfx::createTexture2D(uint16_t(_width), uint16_t(_height), false, 1, _format, _flags);
+		const bool destroyTextures = true;
+		m_buffer = bgfx::createFrameBuffer(1, &m_texture, destroyTextures);
+	}
+
+	void destroy()
+	{
+		// also responsible for destroying texture
+		bgfx::destroy(m_buffer);
+	}
+
+	bgfx::TextureHandle m_texture;
+	bgfx::FrameBufferHandle m_buffer;
+};
+
+void screenSpaceQuad(float _textureWidth, float _textureHeight, float _texelHalf, bool _originBottomLeft, float _width = 1.0f, float _height = 1.0f)
+{
+	if (3 == bgfx::getAvailTransientVertexBuffer(3, PosTexCoord0Vertex::ms_layout))
+	{
+		bgfx::TransientVertexBuffer vb;
+		bgfx::allocTransientVertexBuffer(&vb, 3, PosTexCoord0Vertex::ms_layout);
+		PosTexCoord0Vertex* vertex = (PosTexCoord0Vertex*)vb.data;
+
+		const float minx = -_width;
+		const float maxx =  _width;
+		const float miny = 0.0f;
+		const float maxy =  _height * 2.0f;
+
+		const float texelHalfW = _texelHalf / _textureWidth;
+		const float texelHalfH = _texelHalf / _textureHeight;
+		const float minu = -1.0f + texelHalfW;
+		const float maxu =  1.0f + texelHalfW;
+
+		const float zz = 0.0f;
+
+		float minv = texelHalfH;
+		float maxv = 2.0f + texelHalfH;
+
+		if (_originBottomLeft)
+		{
+			float temp = minv;
+			minv = maxv;
+			maxv = temp;
+
+			minv -= 1.0f;
+			maxv -= 1.0f;
+		}
+
+		vertex[0].m_x = minx;
+		vertex[0].m_y = miny;
+		vertex[0].m_z = zz;
+		vertex[0].m_u = minu;
+		vertex[0].m_v = minv;
+
+		vertex[1].m_x = maxx;
+		vertex[1].m_y = miny;
+		vertex[1].m_z = zz;
+		vertex[1].m_u = maxu;
+		vertex[1].m_v = minv;
+
+		vertex[2].m_x = maxx;
+		vertex[2].m_y = maxy;
+		vertex[2].m_z = zz;
+		vertex[2].m_u = maxu;
+		vertex[2].m_v = maxv;
+
+		bgfx::setVertexBuffer(0, &vb);
+	}
+}
+
+void vec2Set(float* _v, float _x, float _y)
+{
+	_v[0] = _x;
+	_v[1] = _y;
+}
+
+void vec4Set(float* _v, float _x, float _y, float _z, float _w)
+{
+	_v[0] = _x;
+	_v[1] = _y;
+	_v[2] = _z;
+	_v[3] = _w;
+}
+
+void mat4Set(float * _m, const float * _src)
+{
+	const uint32_t MAT4_FLOATS = 16;
+	for (uint32_t ii = 0; ii < MAT4_FLOATS; ++ii) {
+		_m[ii] = _src[ii];
+	}
+}
+
+class ExampleDenoise : public entry::AppI
+{
+public:
+	ExampleDenoise(const char* _name, const char* _description)
+		: entry::AppI(_name, _description)
+		, m_currFrame(UINT32_MAX)
+		, m_texelHalf(0.0f)
+	{
+	}
+
+	void init(int32_t _argc, const char* const* _argv, uint32_t _width, uint32_t _height) override
+	{
+		Args args(_argc, _argv);
+
+		m_width = _width;
+		m_height = _height;
+		m_debug = BGFX_DEBUG_NONE;
+		m_reset = BGFX_RESET_VSYNC;
+
+		bgfx::Init init;
+		init.type = args.m_type;
+
+		init.vendorId = args.m_pciId;
+		init.resolution.width = m_width;
+		init.resolution.height = m_height;
+		init.resolution.reset = m_reset;
+		bgfx::init(init);
+
+		// Enable debug text.
+		bgfx::setDebug(m_debug);
+
+		// Create uniforms
+		m_uniforms.init();
+
+		// Create texture sampler uniforms (used when we bind textures)
+		s_albedo = bgfx::createUniform("s_albedo", bgfx::UniformType::Sampler); // Model's source albedo
+		s_color = bgfx::createUniform("s_color", bgfx::UniformType::Sampler); // Color (albedo) gbuffer, default color input
+		s_normal = bgfx::createUniform("s_normal", bgfx::UniformType::Sampler); // Normal gbuffer, Model's source normal
+		s_velocity = bgfx::createUniform("s_velocity", bgfx::UniformType::Sampler); // Velocity gbuffer
+		s_depth = bgfx::createUniform("s_depth", bgfx::UniformType::Sampler); // Depth gbuffer
+		s_previousColor = bgfx::createUniform("s_previousColor", bgfx::UniformType::Sampler); // Previous frame's result
+		s_previousNormal = bgfx::createUniform("s_previousNormal", bgfx::UniformType::Sampler); // Previous frame's gbuffer normal
+
+		// Create program from shaders.
+		m_gbufferProgram = loadProgram("vs_denoise_gbuffer", "fs_denoise_gbuffer"); // Fill gbuffer
+		m_combineProgram = loadProgram("vs_denoise_screenquad", "fs_denoise_deferred_combine"); // Compute lighting from gbuffer
+		m_copyProgram = loadProgram("vs_denoise_screenquad", "fs_denoise_copy"); 
+		m_denoiseTemporalProgram = loadProgram("vs_denoise_screenquad", "fs_denoise_temporal");
+		m_denoiseSpatialProgram3x3 = loadProgram("vs_denoise_screenquad", "fs_denoise_spatial_3x3");
+		m_denoiseSpatialProgram5x5 = loadProgram("vs_denoise_screenquad", "fs_denoise_spatial_5x5");
+		m_denoiseApplyLighting = loadProgram("vs_denoise_screenquad", "fs_denoise_apply_lighting");
+		m_txaaProgram = loadProgram("vs_denoise_screenquad", "fs_denoise_txaa");
+
+		// Load some meshes
+		for (uint32_t ii = 0; ii < BX_COUNTOF(s_meshPaths); ++ii)
+		{
+			m_meshes[ii] = meshLoad(s_meshPaths[ii]);
+		}
+
+		// Randomly create some models
+		bx::RngMwc mwc;
+		for (uint32_t ii = 0; ii < BX_COUNTOF(m_models); ++ii)
+		{
+			Model& model = m_models[ii];
+
+			model.mesh = mwc.gen() % BX_COUNTOF(s_meshPaths);
+			model.position[0] = (((mwc.gen() % 256)) - 128.0f) / 20.0f;
+			model.position[1] = 0;
+			model.position[2] = (((mwc.gen() % 256)) - 128.0f) / 20.0f;
+		}
+
+		// Load ground, just use the cube
+		m_ground = meshLoad("meshes/cube.bin");
+
+		m_groundTexture = loadTexture("textures/fieldstone-rgba.dds");
+		m_normalTexture = loadTexture("textures/fieldstone-n.dds");
+
+		m_recreateFrameBuffers = false;
+		createFramebuffers();
+	
+		// Vertex decl
+		PosTexCoord0Vertex::init();
+
+		// Init camera
+		cameraCreate();
+		cameraSetPosition({ 0.0f, 1.5f, 0.0f });
+		cameraSetVerticalAngle(-0.3f);
+		m_fovY = 60.0f;
+
+		// Init "prev" matrices, will be same for first frame
+		cameraGetViewMtx(m_view);
+		bx::mtxProj(m_proj, m_fovY, float(m_size[0]) / float(m_size[1]), 0.01f, 100.0f,  bgfx::getCaps()->homogeneousDepth);
+		mat4Set(m_worldToViewPrev, m_view);
+		mat4Set(m_viewToProjPrev, m_proj);
+
+		// Track whether previous results are valid
+		m_havePrevious = false;
+
+		// Get renderer capabilities info.
+		const bgfx::RendererType::Enum renderer = bgfx::getRendererType();
+		m_texelHalf = bgfx::RendererType::Direct3D9 == renderer ? 0.5f : 0.0f;
+
+		imguiCreate();
+	}
+
+	int32_t shutdown() override
+	{
+		for (uint32_t ii = 0; ii < BX_COUNTOF(s_meshPaths); ++ii)
+		{
+			meshUnload(m_meshes[ii]);
+		}
+		meshUnload(m_ground);
+
+		bgfx::destroy(m_normalTexture);
+		bgfx::destroy(m_groundTexture);
+
+		bgfx::destroy(m_gbufferProgram);
+		bgfx::destroy(m_combineProgram);
+		bgfx::destroy(m_copyProgram);
+		bgfx::destroy(m_denoiseTemporalProgram);
+		bgfx::destroy(m_denoiseSpatialProgram3x3);
+		bgfx::destroy(m_denoiseSpatialProgram5x5);
+		bgfx::destroy(m_denoiseApplyLighting);
+		bgfx::destroy(m_txaaProgram);
+
+		m_uniforms.destroy();
+
+		bgfx::destroy(s_albedo);
+		bgfx::destroy(s_color);
+		bgfx::destroy(s_normal);
+		bgfx::destroy(s_velocity);
+		bgfx::destroy(s_depth);
+		bgfx::destroy(s_previousColor);
+		bgfx::destroy(s_previousNormal);
+
+		destroyFramebuffers();
+
+		cameraDestroy();
+
+		imguiDestroy();
+
+		bgfx::shutdown();
+
+		return 0;
+	}
+
+	bool update() override
+	{
+		if (!entry::processEvents(m_width, m_height, m_debug, m_reset, &m_mouseState))
+		{
+			// skip processing when minimized, otherwise crashing
+			if (0 == m_width || 0 == m_height)
+			{
+				return true;
+			}
+
+			// Update frame timer
+			int64_t now = bx::getHPCounter();
+			static int64_t last = now;
+			const int64_t frameTime = now - last;
+			last = now;
+			const double freq = double(bx::getHPFrequency());
+			const float deltaTime = float(frameTime / freq);
+			const bgfx::Caps* caps = bgfx::getCaps();
+
+			if (m_size[0] != (int32_t)m_width
+			||  m_size[1] != (int32_t)m_height
+			||  m_recreateFrameBuffers)
+			{
+				destroyFramebuffers();
+				createFramebuffers();
+				m_recreateFrameBuffers = false;
+			}
+
+			// Update camera
+			cameraUpdate(deltaTime*0.15f, m_mouseState);
+
+			// Set up matrices for gbuffer
+			cameraGetViewMtx(m_view);
+
+			updateUniforms();
+
+			bx::mtxProj(m_proj, m_fovY, float(m_size[0]) / float(m_size[1]), 0.01f, 100.0f, caps->homogeneousDepth);
+			bx::mtxProj(m_proj2, m_fovY, float(m_size[0]) / float(m_size[1]), 0.01f, 100.0f, false);
+
+			if (m_enableTxaa)
+			{
+				m_proj[2*4+0] += m_jitter[0] * (2.0f / m_size[0]);
+				m_proj[2*4+1] -= m_jitter[1] * (2.0f / m_size[1]);
+			}
+
+			bgfx::ViewId view = 0;
+
+			// Draw everything into gbuffer
+			{
+				bgfx::setViewName(view, "gbuffer");
+				bgfx::setViewClear(view
+					, BGFX_CLEAR_COLOR | BGFX_CLEAR_DEPTH
+					, 0
+					, 1.0f
+					, 0
+				);
+
+				bgfx::setViewRect(view, 0, 0, uint16_t(m_size[0]), uint16_t(m_size[1]));
+				bgfx::setViewTransform(view, m_view, m_proj);
+				// Make sure when we draw it goes into gbuffer and not backbuffer
+				bgfx::setViewFrameBuffer(view, m_gbuffer);
+
+				bgfx::setState(0
+					| BGFX_STATE_WRITE_RGB
+					| BGFX_STATE_WRITE_A
+					| BGFX_STATE_WRITE_Z
+					| BGFX_STATE_DEPTH_TEST_LESS
+					);
+
+				drawAllModels(view, m_gbufferProgram, m_uniforms);
+				++view;
+			}
+
+			float orthoProj[16];
+			bx::mtxOrtho(orthoProj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, caps->homogeneousDepth);
+
+			// Shade gbuffer
+			{
+				bgfx::setViewName(view, "combine");
+
+				// for some reason, previous draws texture lingering in transform stack
+				// need to clear out, otherwise this copy is garbled. this used to work
+				// and broke after updating, but i last updated like 2 years ago.
+				float identity[16];
+				bx::mtxIdentity(identity);
+				bgfx::setTransform(identity);
+
+				bgfx::setViewRect(view, 0, 0, uint16_t(m_width), uint16_t(m_height));
+				bgfx::setViewTransform(view, NULL, orthoProj);
+				bgfx::setViewFrameBuffer(view, m_currentColor.m_buffer);
+				bgfx::setState(0
+					| BGFX_STATE_WRITE_RGB
+					| BGFX_STATE_WRITE_A
+					| BGFX_STATE_DEPTH_TEST_ALWAYS
+					);
+				bgfx::setTexture(0, s_color, m_gbufferTex[GBUFFER_RT_COLOR]);
+				bgfx::setTexture(1, s_normal, m_gbufferTex[GBUFFER_RT_NORMAL]);
+				m_uniforms.submit();
+				screenSpaceQuad(float(m_width), float(m_height), m_texelHalf, caps->originBottomLeft);
+				bgfx::submit(view, m_combineProgram);
+				++view;
+			}
+
+			// update last texture written, to chain passes together
+			bgfx::TextureHandle lastTex = m_currentColor.m_texture;
+
+			// denoise temporal pass
+			if (m_useTemporalPass && m_havePrevious)
+			{
+				bgfx::setViewName(view, "denoise temporal");
+
+				bgfx::setViewRect(view, 0, 0, uint16_t(m_width), uint16_t(m_height));
+				bgfx::setViewTransform(view, NULL, orthoProj);
+				bgfx::setViewFrameBuffer(view, m_temporaryColor.m_buffer);
+				bgfx::setState(BGFX_STATE_WRITE_RGB | BGFX_STATE_WRITE_A | BGFX_STATE_DEPTH_TEST_ALWAYS);
+
+				// want color, prevColor
+				//		normal, prevNormal
+				//		depth, prevDepth to reject previous samples from accumulating - skipping depth for now
+
+				bgfx::setTexture(0, s_color, lastTex);
+				bgfx::setTexture(1, s_normal, m_gbufferTex[GBUFFER_RT_NORMAL]);
+				bgfx::setTexture(2, s_velocity, m_gbufferTex[GBUFFER_RT_VELOCITY]);
+				bgfx::setTexture(3, s_previousColor, m_previousDenoise.m_texture);
+				bgfx::setTexture(4, s_previousNormal, m_previousNormal.m_texture);
+
+				m_uniforms.submit();
+				screenSpaceQuad(float(m_width), float(m_height), m_texelHalf, caps->originBottomLeft);
+				bgfx::submit(view, m_denoiseTemporalProgram);
+				++view;
+
+				lastTex = m_temporaryColor.m_texture;
+			}
+			
+			// denoise spatial passes
+			if (0 < m_denoisePasses)
+			{
+				// variable number of passes for denoise, alternate between two textures/buffers
+				bgfx::FrameBufferHandle destBuffer[DENOISE_MAX_PASSES] = {
+					m_previousDenoise.m_buffer,
+					m_currentColor.m_buffer,
+					m_temporaryColor.m_buffer,
+					m_currentColor.m_buffer,
+					m_temporaryColor.m_buffer,
+					m_currentColor.m_buffer
+				};
+				
+				uint32_t denoisePasses = bx::min(DENOISE_MAX_PASSES, m_denoisePasses);
+				for (uint32_t i = 0; i < denoisePasses; ++i)
+				{
+					const char buffer[] = { 'd', 'e', 'n', 'o', 'i', 's', 'e', ' ', char('0'+i), 0 };
+					bgfx::setViewName(view, buffer);
+
+					bgfx::setViewRect(view, 0, 0, uint16_t(m_width), uint16_t(m_height));
+					bgfx::setViewTransform(view, NULL, orthoProj);
+					bgfx::setViewFrameBuffer(view, destBuffer[i]);
+					bgfx::setState(BGFX_STATE_WRITE_RGB | BGFX_STATE_WRITE_A | BGFX_STATE_DEPTH_TEST_ALWAYS);
+					bgfx::setTexture(0, s_color, lastTex);
+					bgfx::setTexture(1, s_normal, m_gbufferTex[GBUFFER_RT_NORMAL]);
+					bgfx::setTexture(2, s_depth, m_gbufferTex[GBUFFER_RT_DEPTH]);
+
+					// need to update some denoise uniforms per draw
+					float denoiseStepScale = bx::pow(2.0f, float(i));
+					m_uniforms.m_denoiseStep = denoiseStepScale;
+
+					m_uniforms.submit();
+					screenSpaceQuad(float(m_width), float(m_height), m_texelHalf, caps->originBottomLeft);
+					bgfx::ProgramHandle spatialProgram = (0 == m_spatialSampleType)
+						? m_denoiseSpatialProgram3x3
+						: m_denoiseSpatialProgram5x5;
+					bgfx::submit(view, spatialProgram);
+					++view;
+
+					if (m_previousDenoise.m_buffer.idx == destBuffer[i].idx)
+					{
+						lastTex = m_previousDenoise.m_texture;
+					}
+					else if (m_temporaryColor.m_buffer.idx == destBuffer[i].idx)
+					{
+						lastTex = m_temporaryColor.m_texture;
+					}
+					else
+					{
+						lastTex = m_currentColor.m_texture;
+					}
+				}
+			}
+			else
+			{
+				// need color result for temporal denoise if not supplied by spatial pass
+				// (per SVGF paper, reuse previous frame's first spatial pass output as previous color
+				bgfx::setViewName(view, "copy color for temporal denoise");
+				bgfx::setViewRect(view, 0, 0, uint16_t(m_width), uint16_t(m_height));
+				bgfx::setViewTransform(view, NULL, orthoProj);
+				bgfx::setViewFrameBuffer(view, m_previousDenoise.m_buffer);
+				bgfx::setState(BGFX_STATE_WRITE_RGB | BGFX_STATE_WRITE_A | BGFX_STATE_DEPTH_TEST_ALWAYS);
+				bgfx::setTexture(0, s_color, lastTex);
+				screenSpaceQuad(float(m_width), float(m_height), m_texelHalf, caps->originBottomLeft);
+				bgfx::submit(view, m_copyProgram);
+				++view;
+			}
+
+			// apply lighting
+			{
+				bgfx::setViewName(view, "apply lighting");
+
+				bgfx::setViewRect(view, 0, 0, uint16_t(m_width), uint16_t(m_height));
+				bgfx::setViewTransform(view, NULL, orthoProj);
+
+				bgfx::FrameBufferHandle destBuffer = (lastTex.idx == m_currentColor.m_texture.idx)
+					? m_temporaryColor.m_buffer
+					: m_currentColor.m_buffer;
+				bgfx::setViewFrameBuffer(view, destBuffer);
+				bgfx::setState(0
+					| BGFX_STATE_WRITE_RGB
+					| BGFX_STATE_WRITE_A
+					| BGFX_STATE_DEPTH_TEST_ALWAYS
+					);
+				bgfx::setTexture(0, s_color, lastTex);
+				bgfx::setTexture(1, s_albedo, m_gbufferTex[GBUFFER_RT_COLOR]);
+				m_uniforms.submit();
+				screenSpaceQuad(float(m_width), float(m_height), m_texelHalf, caps->originBottomLeft);
+				bgfx::submit(view, m_denoiseApplyLighting);
+				++view;
+				lastTex = (m_temporaryColor.m_buffer.idx == destBuffer.idx)
+					? m_temporaryColor.m_texture
+					: m_currentColor.m_texture;
+			}
+
+			if (m_enableTxaa)
+			{
+				// Draw txaa to txaa buffer
+				{
+					bgfx::setViewName(view, "temporal aa");
+
+					bgfx::setViewRect(view, 0, 0, uint16_t(m_width), uint16_t(m_height));
+					bgfx::setViewTransform(view, NULL, orthoProj);
+					bgfx::setViewFrameBuffer(view, m_txaaColor.m_buffer);
+					bgfx::setState(0
+						| BGFX_STATE_WRITE_RGB
+						| BGFX_STATE_WRITE_A
+						| BGFX_STATE_DEPTH_TEST_ALWAYS
+						);
+					bgfx::setTexture(0, s_color, lastTex);
+					bgfx::setTexture(1, s_previousColor, m_previousColor.m_texture);
+					bgfx::setTexture(2, s_velocity, m_gbufferTex[GBUFFER_RT_VELOCITY]);
+					bgfx::setTexture(3, s_depth, m_gbufferTex[GBUFFER_RT_DEPTH]);
+					m_uniforms.submit();
+					screenSpaceQuad(float(m_width), float(m_height), m_texelHalf, caps->originBottomLeft);
+					bgfx::submit(view, m_txaaProgram);
+					++view;
+				}
+			
+				// Copy txaa result to previous
+				{
+					bgfx::setViewName(view, "copy2previous");
+
+					bgfx::setViewRect(view, 0, 0, uint16_t(m_width), uint16_t(m_height));
+					bgfx::setViewTransform(view, NULL, orthoProj);
+					bgfx::setViewFrameBuffer(view, m_previousColor.m_buffer);
+					bgfx::setState(0
+						| BGFX_STATE_WRITE_RGB
+						| BGFX_STATE_WRITE_A
+						| BGFX_STATE_DEPTH_TEST_ALWAYS
+						);
+					bgfx::setTexture(0, s_color, m_txaaColor.m_texture);
+					screenSpaceQuad(float(m_width), float(m_height), m_texelHalf, caps->originBottomLeft);
+					bgfx::submit(view, m_copyProgram);
+					++view;
+				}
+
+				// Copy txaa result to swap chain
+				{
+					bgfx::setViewName(view, "display");
+
+					bgfx::setViewRect(view, 0, 0, uint16_t(m_width), uint16_t(m_height));
+					bgfx::setViewTransform(view, NULL, orthoProj);
+					bgfx::setViewFrameBuffer(view, BGFX_INVALID_HANDLE);
+					bgfx::setState(0
+						| BGFX_STATE_WRITE_RGB
+						| BGFX_STATE_WRITE_A
+						| BGFX_STATE_DEPTH_TEST_ALWAYS
+						);
+					bgfx::setTexture(0, s_color, m_txaaColor.m_texture);
+					screenSpaceQuad(float(m_width), float(m_height), m_texelHalf, caps->originBottomLeft);
+					bgfx::submit(view, m_copyProgram);
+					++view;
+				}
+			}
+			else
+			{
+				// Copy color result to swap chain
+				{
+					bgfx::setViewName(view, "display");
+					bgfx::setViewClear(view
+						, BGFX_CLEAR_NONE
+						, 0
+						, 1.0f
+						, 0
+					);
+
+					bgfx::setViewRect(view, 0, 0, uint16_t(m_width), uint16_t(m_height));
+					bgfx::setViewTransform(view, NULL, orthoProj);
+					bgfx::setViewFrameBuffer(view, BGFX_INVALID_HANDLE);
+					bgfx::setState(0
+						| BGFX_STATE_WRITE_RGB
+						| BGFX_STATE_WRITE_A
+						);
+					bgfx::setTexture(0, s_color, lastTex);
+					screenSpaceQuad(float(m_width), float(m_height), m_texelHalf, caps->originBottomLeft);
+					bgfx::submit(view, m_copyProgram);
+					++view;
+				}
+			}
+
+			// copy the normal buffer for next time
+			{
+				bgfx::setViewName(view, "copy normals");
+				bgfx::setViewRect(view, 0, 0, uint16_t(m_width), uint16_t(m_height));
+				bgfx::setViewTransform(view, NULL, orthoProj);
+				bgfx::setViewFrameBuffer(view, m_previousNormal.m_buffer);
+				bgfx::setState(BGFX_STATE_WRITE_RGB | BGFX_STATE_WRITE_A | BGFX_STATE_DEPTH_TEST_ALWAYS);
+				bgfx::setTexture(0, s_color, m_gbufferTex[GBUFFER_RT_NORMAL]);
+				screenSpaceQuad(float(m_width), float(m_height), m_texelHalf, caps->originBottomLeft);
+				bgfx::submit(view, m_copyProgram);
+				++view;
+
+				// update previous status
+				m_havePrevious = true;
+			}	
+
+			// Copy matrices for next time
+			mat4Set(m_worldToViewPrev, m_view);
+			mat4Set(m_viewToProjPrev, m_proj);
+
+			// Draw UI
+			imguiBeginFrame(m_mouseState.m_mx
+				, m_mouseState.m_my
+				, (m_mouseState.m_buttons[entry::MouseButton::Left] ? IMGUI_MBUT_LEFT : 0)
+				| (m_mouseState.m_buttons[entry::MouseButton::Right] ? IMGUI_MBUT_RIGHT : 0)
+				| (m_mouseState.m_buttons[entry::MouseButton::Middle] ? IMGUI_MBUT_MIDDLE : 0)
+				, m_mouseState.m_mz
+				, uint16_t(m_width)
+				, uint16_t(m_height)
+				);
+
+			showExampleDialog(this);
+
+			ImGui::SetNextWindowPos(
+				ImVec2(m_width - m_width / 4.0f - 10.0f, 10.0f)
+				, ImGuiCond_FirstUseEver
+				);
+			ImGui::SetNextWindowSize(
+				ImVec2(m_width / 4.0f, m_height / 1.24f)
+				, ImGuiCond_FirstUseEver
+				);
+			ImGui::Begin("Settings"
+				, NULL
+				, 0
+				);
+
+			ImGui::PushItemWidth(ImGui::GetWindowWidth() * 0.5f);
+
+			{
+				ImGui::TextWrapped(
+					"In this demo, noise is added to results of deferred lighting. Then denoise is applied "
+					"before multiplying the lit result with gbuffer albedo. Optionally, temporal antialiasing "
+					"can be applied after that. (off by default, implementation blurry)");
+				ImGui::Separator();
+
+				ImGui::Text("noise controls:");
+				ImGui::Combo("pattern", &m_noiseType, "none\0dither\0random\0\0");
+				if (ImGui::IsItemHovered())
+				{
+					ImGui::BeginTooltip();
+					ImGui::Text("none");
+					ImGui::BulletText("compare denoised results to this");
+					ImGui::BulletText("brighter than noisy images, not losing any pixel's energy");
+					ImGui::Text("dither");
+					ImGui::BulletText("reject 3 out of 4 pixels in 2x2 pattern");
+					ImGui::BulletText("could represent lower resolution signal");
+					ImGui::Text("random");
+					ImGui::BulletText("reject about half pixels, using common shader random");
+					ImGui::BulletText("could represent monte carlo something or other");
+					ImGui::EndTooltip();
+				}
+
+				ImGui::Checkbox("dynamic noise", &m_dynamicNoise);
+				if (ImGui::IsItemHovered())
+					ImGui::SetTooltip("update noise pattern each frame");
+				ImGui::Separator();
+			}
+
+			{
+				ImGui::Text("temporal denoise pass controls:");
+				ImGui::Checkbox("use temporal pass", &m_useTemporalPass);
+				ImGui::Separator();
+			}
+
+			{
+				ImGui::Text("spatial denoise pass controls:");
+				ImGui::SliderInt("spatial passes", &m_denoisePasses, 0, DENOISE_MAX_PASSES);
+				if (ImGui::IsItemHovered())
+					ImGui::SetTooltip("set passes to 0 to turn off spatial denoise");
+
+				ImGui::Combo("spatial sample extent", &m_spatialSampleType, "three\0five\0\0");
+				if (ImGui::IsItemHovered())
+					ImGui::SetTooltip("select 3x3 or 5x5 filter kernal");
+
+				ImGui::SliderFloat("sigma z", &m_sigmaDepth, 0.0f, 0.1f, "%.5f");
+				if (ImGui::IsItemHovered())
+					ImGui::SetTooltip("lower sigma z, pickier blending across depth edges");
+
+				ImGui::SliderFloat("sigma n", &m_sigmaNormal, 1.0f, 256.0f);
+				if (ImGui::IsItemHovered())
+					ImGui::SetTooltip("higher sigma n, pickier blending across normal edges");
+				ImGui::Separator();
+			}
+
+			if (ImGui::CollapsingHeader("TXAA options"))
+			{
+				ImGui::Checkbox("use TXAA", &m_enableTxaa);
+				ImGui::Checkbox("apply extra blur to current color", &m_applyMitchellFilter);
+				if (ImGui::IsItemHovered())
+					ImGui::SetTooltip("reduces flicker/crawl on thin features, maybe too much!");
+
+				ImGui::SliderFloat("feedback min", &m_feedbackMin, 0.0f, 1.0f);
+				if (ImGui::IsItemHovered())
+					ImGui::SetTooltip("minimum amount of previous frame to blend in");
+
+				ImGui::SliderFloat("feedback max", &m_feedbackMax, 0.0f, 1.0f);
+				if (ImGui::IsItemHovered())
+					ImGui::SetTooltip("maximum amount of previous frame to blend in");
+
+				ImGui::Checkbox("debug TXAA with slow frame rate", &m_useTxaaSlow);
+				if (ImGui::IsItemHovered())
+				{
+					ImGui::BeginTooltip();
+					ImGui::Text("sleep 100ms per frame to highlight temporal artifacts");
+					ImGui::Text("high framerate compensates for flickering, masking issues");
+					ImGui::EndTooltip();
+				}
+				ImGui::Separator();
+			}
+
+			ImGui::End();
+
+			imguiEndFrame();
+
+			// Advance to next frame. Rendering thread will be kicked to
+			// process submitted rendering primitives.
+			m_currFrame = bgfx::frame();
+
+			// add artificial wait to emphasize txaa behavior
+			if (m_useTxaaSlow)
+			{
+				bx::sleep(100);
+			}
+
+			return true;
+		}
+
+		return false;
+	}
+
+	void drawAllModels(bgfx::ViewId _pass, bgfx::ProgramHandle _program, const Uniforms & _uniforms)
+	{
+		for (uint32_t ii = 0; ii < BX_COUNTOF(m_models); ++ii)
+		{
+			const Model& model = m_models[ii];
+
+			// Set up transform matrix for each model
+			const float scale = s_meshScale[model.mesh];
+			float mtx[16];
+			bx::mtxSRT(mtx
+				, scale
+				, scale
+				, scale
+				, 0.0f
+				, 0.0f
+				, 0.0f
+				, model.position[0]
+				, model.position[1]
+				, model.position[2]
+				);
+
+			// Submit mesh to gbuffer
+			bgfx::setTexture(0, s_albedo, m_groundTexture);
+			bgfx::setTexture(1, s_normal, m_normalTexture);
+			_uniforms.submit();
+
+			meshSubmit(m_meshes[model.mesh], _pass, _program, mtx);
+		}
+
+		// Draw ground
+		float mtxScale[16];
+		const float scale = 10.0f;
+		bx::mtxScale(mtxScale, scale, scale, scale);
+
+		float mtxTranslate[16];
+		bx::mtxTranslate(mtxTranslate
+			, 0.0f
+			, -10.0f
+			, 0.0f
+			);
+
+		float mtx[16];
+		bx::mtxMul(mtx, mtxScale, mtxTranslate);
+		bgfx::setTexture(0, s_albedo, m_groundTexture);
+		bgfx::setTexture(1, s_normal, m_normalTexture);
+		_uniforms.submit();
+
+		meshSubmit(m_ground, _pass, _program, mtx);
+	}
+
+	void createFramebuffers()
+	{
+		m_size[0] = m_width;
+		m_size[1] = m_height;
+
+		const uint64_t bilinearFlags = 0
+			| BGFX_TEXTURE_RT
+			| BGFX_SAMPLER_U_CLAMP
+			| BGFX_SAMPLER_V_CLAMP
+			;
+
+		const uint64_t pointSampleFlags = bilinearFlags
+			| BGFX_SAMPLER_MIN_POINT
+			| BGFX_SAMPLER_MAG_POINT
+			| BGFX_SAMPLER_MIP_POINT
+			;
+
+		m_gbufferTex[GBUFFER_RT_COLOR]    = bgfx::createTexture2D(uint16_t(m_size[0]), uint16_t(m_size[1]), false, 1, bgfx::TextureFormat::BGRA8, pointSampleFlags);
+		m_gbufferTex[GBUFFER_RT_NORMAL]   = bgfx::createTexture2D(uint16_t(m_size[0]), uint16_t(m_size[1]), false, 1, bgfx::TextureFormat::BGRA8, pointSampleFlags);
+		m_gbufferTex[GBUFFER_RT_VELOCITY] = bgfx::createTexture2D(uint16_t(m_size[0]), uint16_t(m_size[1]), false, 1, bgfx::TextureFormat::RG16F, pointSampleFlags);
+		m_gbufferTex[GBUFFER_RT_DEPTH]    = bgfx::createTexture2D(uint16_t(m_size[0]), uint16_t(m_size[1]), false, 1, bgfx::TextureFormat::D24, pointSampleFlags);
+		m_gbuffer = bgfx::createFrameBuffer(BX_COUNTOF(m_gbufferTex), m_gbufferTex, true);
+
+		m_currentColor.init(m_size[0], m_size[1], bgfx::TextureFormat::RG11B10F, bilinearFlags);
+		m_previousColor.init(m_size[0], m_size[1], bgfx::TextureFormat::RG11B10F, bilinearFlags);
+		m_txaaColor.init(m_size[0], m_size[1], bgfx::TextureFormat::RG11B10F, bilinearFlags);
+		m_temporaryColor.init(m_size[0], m_size[1], bgfx::TextureFormat::RG11B10F, bilinearFlags);
+		m_previousNormal.init(m_size[0], m_size[1], bgfx::TextureFormat::RG11B10F, pointSampleFlags);
+		m_previousDenoise.init(m_size[0], m_size[1], bgfx::TextureFormat::RG11B10F, bilinearFlags);
+	}
+
+	// all buffers set to destroy their textures
+	void destroyFramebuffers()
+	{
+		bgfx::destroy(m_gbuffer);
+
+		m_currentColor.destroy();
+		m_previousColor.destroy();
+		m_txaaColor.destroy();
+		m_temporaryColor.destroy();
+		m_previousNormal.destroy();
+		m_previousDenoise.destroy();
+	}
+
+	void updateUniforms()
+	{
+		{
+			uint32_t idx = m_currFrame % 8;
+			const float offsets[] = {
+				(1.0f/2.0f),  (1.0f/3.0f),
+				(1.0f/4.0f),  (2.0f/3.0f),
+				(3.0f/4.0f),  (1.0f/9.0f),
+				(1.0f/8.0f),  (4.0f/9.0f),
+				(5.0f/8.0f),  (7.0f/9.0f),
+				(3.0f/8.0f),  (2.0f/9.0f),
+				(7.0f/8.0f),  (5.0f/9.0f),
+				(1.0f/16.0f), (8.0f/9.0f)
+			};
+
+			// Strange constant for jitterX is because 8 values from halton2
+			// sequence above do not average out to 0.5, 1/16 skews it to the
+			// left. Subtracting a smaller value to center the range of jitter
+			// around 0. Not necessary for jitterY. Not confident this makes sense...
+			const float jitterX = 1.0f * (offsets[2*idx]   - (7.125f/16.0f));
+			const float jitterY = 1.0f * (offsets[2*idx+1] - 0.5f);
+
+			vec2Set(m_uniforms.m_cameraJitterCurr, jitterX, jitterY);
+			vec2Set(m_uniforms.m_cameraJitterPrev, m_jitter[0], m_jitter[1]);
+
+			m_jitter[0] = jitterX;
+			m_jitter[1] = jitterY;
+		}
+
+		m_uniforms.m_feedbackMin = m_feedbackMin;
+		m_uniforms.m_feedbackMax = m_feedbackMax;
+		m_uniforms.m_applyMitchellFilter = m_applyMitchellFilter ? 1.0f : 0.0f;
+
+		mat4Set(m_uniforms.m_worldToViewPrev, m_worldToViewPrev);
+		mat4Set(m_uniforms.m_viewToProjPrev, m_viewToProjPrev);
+
+		m_uniforms.m_frameOffsetForNoise = m_dynamicNoise
+			? float(m_currFrame % 8)
+			: 0.0f;
+		m_uniforms.m_noiseType = float(m_noiseType);
+		m_uniforms.m_sigmaDepth = m_sigmaDepth;
+		m_uniforms.m_sigmaNormal = m_sigmaNormal;
+	}
+
+
+	uint32_t m_width;
+	uint32_t m_height;
+	uint32_t m_debug;
+	uint32_t m_reset;
+
+	entry::MouseState m_mouseState;
+
+	// Resource handles
+	bgfx::ProgramHandle m_gbufferProgram;
+	bgfx::ProgramHandle m_combineProgram;
+	bgfx::ProgramHandle m_copyProgram;
+	bgfx::ProgramHandle m_denoiseTemporalProgram;
+	bgfx::ProgramHandle m_denoiseSpatialProgram3x3;
+	bgfx::ProgramHandle m_denoiseSpatialProgram5x5;
+	bgfx::ProgramHandle m_denoiseApplyLighting;
+	bgfx::ProgramHandle m_txaaProgram;
+
+	// Shader uniforms
+	Uniforms m_uniforms;
+
+	// Uniforms to indentify texture samplers
+	bgfx::UniformHandle s_albedo;
+	bgfx::UniformHandle s_color;
+	bgfx::UniformHandle s_normal;
+	bgfx::UniformHandle s_velocity;
+	bgfx::UniformHandle s_depth;
+	bgfx::UniformHandle s_previousColor;
+	bgfx::UniformHandle s_previousNormal;
+
+	bgfx::FrameBufferHandle m_gbuffer;
+	bgfx::TextureHandle m_gbufferTex[GBUFFER_RENDER_TARGETS];
+
+	RenderTarget m_currentColor;
+	RenderTarget m_previousColor;
+	RenderTarget m_txaaColor;
+	RenderTarget m_temporaryColor; // need another buffer to ping-pong results
+	RenderTarget m_previousNormal;
+	RenderTarget m_previousDenoise; // color output by first spatial denoise pass, input to next frame as previous color
+
+	struct Model
+	{
+		uint32_t mesh; // Index of mesh in m_meshes
+		float position[3];
+	};
+
+	Model m_models[MODEL_COUNT];
+	Mesh* m_meshes[BX_COUNTOF(s_meshPaths)];
+	Mesh* m_ground;
+	bgfx::TextureHandle m_groundTexture;
+	bgfx::TextureHandle m_normalTexture;
+
+	uint32_t m_currFrame;
+	float m_texelHalf = 0.0f;
+	float m_fovY = 60.0f;
+	bool m_recreateFrameBuffers = false;
+	bool m_havePrevious = false;
+
+	float m_view[16];
+	float m_proj[16];
+	float m_proj2[16];
+	float m_viewToProjPrev[16];
+	float m_worldToViewPrev[16];
+	float m_jitter[2];
+	int32_t m_size[2];
+
+	// UI parameters
+	int32_t m_noiseType = 2;
+	bool m_dynamicNoise = true;
+	bool m_useTemporalPass = true;
+	int32_t m_spatialSampleType = 1;
+	int32_t m_denoisePasses = 5;
+	float m_sigmaDepth = 0.05f;
+	float m_sigmaNormal = 128.0f;
+	bool m_enableTxaa = false;
+	float m_feedbackMin = 0.8f;
+	float m_feedbackMax = 0.95f;
+	bool m_applyMitchellFilter = true;
+	bool m_useTxaaSlow = false;
+};
+
+} // namespace
+
+ENTRY_IMPLEMENT_MAIN(ExampleDenoise, "xx-denoise", "Denoise.");

+ 25 - 0
examples/xx-denoise/fs_denoise_apply_lighting.sc

@@ -0,0 +1,25 @@
+$input v_texcoord0
+
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#include "../common/common.sh"
+#include "parameters.sh"
+
+SAMPLER2D(s_color, 0);
+SAMPLER2D(s_albedo, 1);
+
+void main()
+{
+	vec2 texCoord = v_texcoord0;
+	vec3 lightColor = texture2D(s_color, texCoord).xyz;
+	vec3 albedo = texture2D(s_albedo, texCoord).xyz;
+	albedo = toLinear(albedo);
+
+	vec3 color = lightColor * albedo;
+	color = toGamma(color);
+
+	gl_FragColor = vec4(color, 1.0);
+}

+ 18 - 0
examples/xx-denoise/fs_denoise_copy.sc

@@ -0,0 +1,18 @@
+$input v_texcoord0
+
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#include "../common/common.sh"
+#include "parameters.sh"
+
+SAMPLER2D(s_color, 0);
+
+void main()
+{
+	vec2 texCoord = v_texcoord0;
+	vec4 color = texture2D(s_color, texCoord);
+	gl_FragColor = color;
+}

+ 61 - 0
examples/xx-denoise/fs_denoise_deferred_combine.sc

@@ -0,0 +1,61 @@
+$input v_texcoord0
+
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#include "../common/common.sh"
+#include "parameters.sh"
+#include "normal_encoding.sh"
+
+SAMPLER2D(s_color, 0);
+SAMPLER2D(s_normal, 1);
+
+float ShadertoyNoise (vec2 uv) {
+	return fract(sin(dot(uv.xy, vec2(12.9898,78.233))) * 43758.5453123);
+}
+
+int ModHelper (float a, float b)
+{
+	return int( a - (b*floor(a/b)));
+}
+
+void main()
+{
+	vec2 texCoord = v_texcoord0;
+
+	// mess with result so there's something to denosie
+	float sn = 1.0;
+	if (1.5 < u_noiseType)
+	{
+		sn = ShadertoyNoise(gl_FragCoord.xy + vec2(314.0, 159.0)*u_frameIdx);
+		sn = (sn < 0.5) ? 0.0 : 1.0;
+	}
+	else if (0.5 < u_noiseType)
+	{
+		// having trouble compiling for gles when using % or mod :(
+		int modCoordX = ModHelper(gl_FragCoord.x, 2.0);
+		int modCoordY = ModHelper(gl_FragCoord.y, 2.0);
+		int frameSelect = modCoordY * 2 + modCoordX;
+		int frameMod4 = ModHelper(u_frameIdx, 4.0);
+		sn = (frameSelect == frameMod4) ? 1.0 : 0.0;
+	}
+
+	vec4 normalRoughness = texture2D(s_normal, texCoord).xyzw;
+	vec3 normal = NormalDecode(normalRoughness.xyz);
+	float roughness = 0.5;
+
+	// need to get a valid view vector for any microfacet stuff :(
+	float gloss = 1.0-roughness;
+	float specPower = 1022.0 * gloss + 2.0;
+
+	vec3 light = normalize(vec3(-0.2, 1.0, -0.2));
+	float NdotL = saturate(dot(normal, light));
+	float diff = NdotL*0.99 + 0.01;
+	float spec = 5.0 * pow(NdotL, specPower);
+
+	float lightAmt = (diff + spec) * sn;
+
+	gl_FragColor = vec4(vec3_splat(lightAmt), 1.0);
+}

+ 69 - 0
examples/xx-denoise/fs_denoise_gbuffer.sc

@@ -0,0 +1,69 @@
+$input v_normal, v_texcoord0, v_texcoord1, v_texcoord2, v_texcoord3
+
+#include "../common/common.sh"
+#include "parameters.sh"
+#include "normal_encoding.sh"
+
+SAMPLER2D(s_albedo, 0);
+SAMPLER2D(s_normal, 1);
+
+// http://www.thetenthplanet.de/archives/1180
+// "followup: normal mapping without precomputed tangents"
+mat3 cotangentFrame(vec3 N, vec3 p, vec2 uv)
+{
+	// get edge vectors of the pixel triangle
+	vec3 dp1 = dFdx(p);
+	vec3 dp2 = dFdy(p);
+	vec2 duv1 = dFdx(uv);
+	vec2 duv2 = dFdy(uv);
+
+	// solve the linear system
+	vec3 dp2perp = cross(dp2, N);
+	vec3 dp1perp = cross(N, dp1);
+	vec3 T = dp2perp * duv1.x + dp1perp * duv2.x;
+	vec3 B = dp2perp * duv1.y + dp1perp * duv2.y;
+
+	// construct a scale-invariant frame
+	float invMax = inversesqrt(max(dot(T,T), dot(B,B)));
+	return mat3(T*invMax, B*invMax, N);
+}
+
+void main()
+{
+	vec3 albedo = toLinear(texture2D(s_albedo, v_texcoord0).xyz);
+
+	// get vertex normal
+	vec3 normal = normalize(v_normal);
+
+	// get normal map normal, unpack, and calculate z
+	vec3 normalMap;
+	normalMap.xy = texture2D(s_normal, v_texcoord0).xy;
+	normalMap.xy = normalMap.xy * 2.0 - 1.0;
+	normalMap.z = sqrt(1.0 - dot(normalMap.xy, normalMap.xy));
+
+	// swap x and y, because the brick texture looks flipped, don't copy this...
+	normalMap.xy = normalMap.yx;
+
+	// perturb geometry normal by normal map
+	vec3 pos = v_texcoord2.xyz; // contains world space pos
+	mat3 TBN = cotangentFrame(normal, pos, v_texcoord0);
+	vec3 bumpedNormal = normalize(instMul(TBN, normalMap));
+
+	// need some proxy for roughness value w/o roughness texture
+	// assume horizontal (blue) normal map is smooth, and then
+	// modulate with albedo for some higher frequency detail
+	float roughness = normalMap.z * mix(0.9, 1.0, albedo.y);
+	roughness = roughness * 0.6 + 0.2;
+
+	// Calculate velocity as delta position from previous frame to this
+	vec2 previousNDC = v_texcoord1.xy * (1.0/v_texcoord1.w);
+	previousNDC.y *= -1.0;
+	previousNDC = previousNDC * 0.5 + 0.5;
+	vec2 velocity = gl_FragCoord.xy*u_viewTexel.xy - previousNDC;
+
+	vec3 bufferNormal = NormalEncode(bumpedNormal);
+
+	gl_FragData[0] = vec4(toGamma(albedo), 1.0);
+	gl_FragData[1] = vec4(bufferNormal, roughness); // Todo, better packing
+	gl_FragData[2] = vec4(velocity, 0.0, 0.0);
+}

+ 12 - 0
examples/xx-denoise/fs_denoise_spatial_3x3.sc

@@ -0,0 +1,12 @@
+$input v_texcoord0
+
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+// don't use 5x5 sample pattern for spatial denoise, use 3x3 instead
+#define USE_SPATIAL_5X5     0
+
+// includes main function to implement spatial pattern
+#include "fs_denoise_spatial_implementation.sh"

+ 12 - 0
examples/xx-denoise/fs_denoise_spatial_5x5.sc

@@ -0,0 +1,12 @@
+$input v_texcoord0
+
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+// use 5x5 sample pattern for spatial denoise
+#define USE_SPATIAL_5X5     1
+
+// includes main function to implement spatial pattern
+#include "fs_denoise_spatial_implementation.sh"

+ 97 - 0
examples/xx-denoise/fs_denoise_spatial_implementation.sh

@@ -0,0 +1,97 @@
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#ifndef FS_DENOISE_SPATIAL_IMPLEMENTATION_SH
+#define FS_DENOISE_SPATIAL_IMPLEMENTATION_SH
+
+#include "../common/common.sh"
+#include "parameters.sh"
+#include "normal_encoding.sh"
+
+SAMPLER2D(s_color,  0); // input color, signal to be denoised
+SAMPLER2D(s_normal, 1); // scene's gbuffer normal, used for edge stopping function
+SAMPLER2D(s_depth,  2); // scene's depth, used for edge stopping function
+
+void main()
+{
+	vec2 texCoord = v_texcoord0;
+
+	// read center pixel
+	vec4 color = texture2D(s_color, texCoord);
+	vec3 normal = NormalDecode(texture2D(s_normal, texCoord).xyz); // * 2.0 - 1.0;
+	float depth = texture2D(s_depth, texCoord).x;
+	// want depth gradient for edge stopping function
+	float depthGradient = abs(dFdx(depth)) + abs(dFdy(depth));
+
+	float du = u_texCoordStep * u_viewTexel.x;
+	float dv = u_texCoordStep * u_viewTexel.y;
+
+#if USE_SPATIAL_5X5
+	float gaussianWeights[5];
+	gaussianWeights[0] = 1.0/16.0;
+	gaussianWeights[1] = 4.0/16.0;
+	gaussianWeights[2] = 6.0/16.0;
+	gaussianWeights[3] = 4.0/16.0;
+	gaussianWeights[4] = 1.0/16.0;
+	float initialWeight = (gaussianWeights[2]*gaussianWeights[2]);
+	int centerIdx = 2;
+
+	vec4 accumulateColor = color * initialWeight;
+	float accumulateWeight = initialWeight;
+
+	for (int yy = 0; yy < 5; ++yy)
+	{
+		for (int xx = 0; xx < 5; ++xx)
+		{
+#else
+	float gaussianWeights[3];
+	gaussianWeights[0] = 1.0/4.0;
+	gaussianWeights[1] = 2.0/4.0;
+	gaussianWeights[2] = 1.0/4.0;
+	float initialWeight = (gaussianWeights[1]*gaussianWeights[1]);
+	int centerIdx = 1;
+
+	vec4 accumulateColor = color * initialWeight;
+	float accumulateWeight = initialWeight;
+
+	for (int yy = 0; yy < 3; ++yy)
+	{
+		for (int xx = 0; xx < 3; ++xx)
+		{
+#endif // USE_SPATIAL_5X5
+			if ((centerIdx == xx) && (centerIdx == yy)) {
+				continue;
+			}
+
+			float xOffset = float(xx) - float(centerIdx);
+			float yOffset = float(yy) - float(centerIdx);
+			vec2 sampleTexCoord = texCoord;
+			sampleTexCoord.x += xOffset * du;
+			sampleTexCoord.y += yOffset * dv;
+
+			vec4 sampleColor = texture2D(s_color, sampleTexCoord);
+			vec3 sampleNormal = NormalDecode(texture2D(s_normal, sampleTexCoord).xyz);
+			float normalWeight = pow(saturate(dot(normal, sampleNormal)), u_sigmaNormal);
+			
+			float sampleDepth = texture2D(s_depth, sampleTexCoord).x;
+			float depthDelta = depth - sampleDepth;
+			float depthWeight = exp(-abs(depthDelta) / max(1e-5, u_sigmaDepth*u_sigmaDepth));
+
+			float weight = depthWeight * normalWeight;
+
+			// apply gaussian
+			weight *= (gaussianWeights[xx]*gaussianWeights[yy]);
+
+			accumulateColor += sampleColor * weight;
+			accumulateWeight += weight;
+		}
+	}
+
+	accumulateColor /= max(accumulateWeight, 1e-5);
+
+	gl_FragColor = accumulateColor;
+}
+
+#endif // FS_DENOISE_SPATIAL_IMPLEMENTATION_SH

+ 96 - 0
examples/xx-denoise/fs_denoise_temporal.sc

@@ -0,0 +1,96 @@
+$input v_texcoord0
+
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#include "../common/common.sh"
+#include "parameters.sh"
+#include "normal_encoding.sh"
+#include "shared_functions.sh"
+
+SAMPLER2D(s_color,			0);
+SAMPLER2D(s_normal,			1);
+SAMPLER2D(s_velocity,		2);
+SAMPLER2D(s_previousColor,	3); // previous color
+SAMPLER2D(s_previousNormal,	4); // previous normal
+
+#define COS_PI_OVER_4   0.70710678118
+
+void main()
+{
+	vec2 texCoord = v_texcoord0;
+
+	// read center pixel
+	vec4 color = texture2D(s_color, texCoord);
+	vec3 normal = NormalDecode(texture2D(s_normal, texCoord).xyz);
+
+	// offset to last pixel
+	vec2 velocity = texture2D(s_velocity, texCoord).xy;
+	vec2 texCoordPrev = GetTexCoordPreviousNoJitter(texCoord, velocity);
+
+	// SVGF approach suggests sampling and test/rejecting 4 contributing
+	// samples individually and then doing custom bilinear filter of result
+
+	// multiply texCoordPrev by dimensions to get nearest pixels, produces (X.5, Y.5) coordinate
+	// under no motion, so subtract half here to get correct weights for bilinear filter.
+	// not thrilled by this, feels like something is wrong.
+	vec2 screenPixelPrev = texCoordPrev * u_viewRect.zw - vec2_splat(0.5);
+	vec2 screenPixelMin = floor(screenPixelPrev);
+	vec2 screenPixelMix = fract(screenPixelPrev);
+
+	float x0 = 1.0 - screenPixelMix.x;
+	float x1 = screenPixelMix.x;
+	float y0 = 1.0 - screenPixelMix.y;
+	float y1 = screenPixelMix.y;
+
+	float coordWeights[4];
+	coordWeights[0] = x0*y0;
+	coordWeights[1] = x1*y0;
+	coordWeights[2] = x0*y1;
+	coordWeights[3] = x1*y1;
+
+	// adding a half texel here to correct the modification above, in addition to pixel offset
+	// to grab adjacent pixels for bilinear filter. not thrilled by this, feels like something is wrong.
+	vec2 coords[4];
+	coords[0] = (screenPixelMin + vec2(0.5, 0.5)) * u_viewTexel.xy;
+	coords[1] = (screenPixelMin + vec2(1.5, 0.5)) * u_viewTexel.xy;
+	coords[2] = (screenPixelMin + vec2(0.5, 1.5)) * u_viewTexel.xy;
+	coords[3] = (screenPixelMin + vec2(1.5, 1.5)) * u_viewTexel.xy;
+
+	// SVGF paper mentions comparing depths and normals to establish
+	// whether samples are similar enough to contribute, but does not
+	// describe how. References the following paper, which uses threshold
+	// of cos(PI/4) to accept/reject.
+	// https://software.intel.com/content/www/us/en/develop/articles/streaming-g-buffer-compression-for-multi-sample-anti-aliasing.html
+	// this paper also discusses using depth derivatives to estimate overlapping depth range
+
+	vec4 accumulatedColor = vec4_splat(0.0);
+	float accumulatedWeight = 0.0;
+	for (int i = 0; i < 4; ++i)
+	{
+		vec3 sampleNormal = NormalDecode(texture2D(s_previousNormal, coords[i]).xyz);
+		float normalSimilarity = dot(normal, sampleNormal);
+		float weight = (normalSimilarity < COS_PI_OVER_4) ? 0.0 : 1.0;
+
+		vec4 sampleColor = texture2D(s_previousColor, coords[i]);
+
+		weight *= coordWeights[i];
+		accumulatedColor += sampleColor * weight;
+		accumulatedWeight += weight;
+	}
+
+	if (0.0 < accumulatedWeight)
+	{
+		accumulatedColor *= (1.0 / accumulatedWeight);
+		color = mix(color, accumulatedColor, 0.8);
+	}
+	else
+	{
+		// debug colorize
+		//color.xyz *= vec3(0.5, 0.01, 0.65);
+	}
+
+	gl_FragColor = color;
+}

+ 208 - 0
examples/xx-denoise/fs_denoise_txaa.sc

@@ -0,0 +1,208 @@
+$input v_texcoord0
+
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#include "../common/common.sh"
+#include "parameters.sh"
+#include "shared_functions.sh"
+
+#define APPLY_TXAA_IN_LINEAR	0
+#define DEBUG_HALF_SCREEN		0
+
+SAMPLER2D(s_color,			0); // this frame's shaded color
+SAMPLER2D(s_previousColor,	1); // previous frame's shaded color
+SAMPLER2D(s_velocity,		2); // screenspace delta from previous to current frame
+SAMPLER2D(s_depth,			3); // depth buffer
+
+vec3 FindNearestDepth(sampler2D _depthSampler, vec2 _texCoord) {
+	vec2 du = vec2(u_viewTexel.x, 0.0);
+	vec2 dv = vec2(u_viewTexel.y, 0.0);
+
+	vec2 coord = _texCoord - du - dv;
+	vec3 tcd0 = vec3(coord, texture2D(_depthSampler, coord).x);
+	coord = _texCoord      - dv;
+	vec3 tcd1 = vec3(coord, texture2D(_depthSampler, coord).x);
+	coord = _texCoord + du - dv;
+	vec3 tcd2 = vec3(coord, texture2D(_depthSampler, coord).x);
+	coord = _texCoord + du;
+	vec3 tcd3 = vec3(coord, texture2D(_depthSampler, coord).x);
+	coord = _texCoord;
+	vec3 tcd4 = vec3(coord, texture2D(_depthSampler, coord).x);
+	coord = _texCoord - du;
+	vec3 tcd5 = vec3(coord, texture2D(_depthSampler, coord).x);
+	coord = _texCoord - du + dv;
+	vec3 tcd6 = vec3(coord, texture2D(_depthSampler, coord).x);
+	coord = _texCoord      + dv;
+	vec3 tcd7 = vec3(coord, texture2D(_depthSampler, coord).x);
+	coord = _texCoord + du + dv;
+	vec3 tcd8 = vec3(coord, texture2D(_depthSampler, coord).x);
+	
+	vec3 minTcd = tcd0;
+	if (tcd1.z < minTcd.z) minTcd = tcd1;
+	if (tcd2.z < minTcd.z) minTcd = tcd2;
+	if (tcd3.z < minTcd.z) minTcd = tcd3;
+	if (tcd4.z < minTcd.z) minTcd = tcd4;
+	if (tcd5.z < minTcd.z) minTcd = tcd5;
+	if (tcd6.z < minTcd.z) minTcd = tcd6;
+	if (tcd7.z < minTcd.z) minTcd = tcd7;
+	if (tcd8.z < minTcd.z) minTcd = tcd8;
+
+	return minTcd;
+}
+
+float Mitchell (float _b, float _c, float _x) {
+
+	float v = 0.0;
+	float x = abs(_x);
+	float x2 = x*x;
+	float x3 = x2*x;
+
+	if (x < 1.0) {
+		v = (12.0-9.0*_b-6.0*_c)*x3 + (-18.0+12.0*_b+6.0*_c)*x2 + (6.0-2.0*_b);
+	}
+	else if (x < 2.0) {
+		v = (-_b-6.0*_c)*x3 + (6.0*_b+30.0*_c)*x2 + (-12.0*_b-48.0*_c)*x + (8.0*_b+24.0*_c);
+	}
+
+	return v*(1.0/6.0);
+}
+
+
+void main()
+{
+	vec2 texCoord = v_texcoord0;
+	vec3 colorCurr = texture2D(s_color, texCoord).xyz;
+
+#if DEBUG_HALF_SCREEN
+	if (texCoord.x > 0.5) {
+#endif
+
+	vec3 nearestCoordAndDepth = FindNearestDepth(s_depth, texCoord);
+
+	vec2 velocity = texture2D(s_velocity, nearestCoordAndDepth.xy).xy;
+	vec2 texCoordPrev = GetTexCoordPrevious(texCoord, velocity);
+
+	vec3 colorPrev = texture2D(s_previousColor, texCoordPrev).xyz;
+	
+	// Sample local neighborhood for variance clipping
+	vec2 du = vec2(u_viewTexel.x, 0.0);
+	vec2 dv = vec2(0.0, u_viewTexel.y);
+
+	vec3 colorUL = texture2D(s_color, texCoord - du - dv).xyz;
+	vec3 colorUp = texture2D(s_color, texCoord      - dv).xyz;
+	vec3 colorUR = texture2D(s_color, texCoord + du - dv).xyz;
+	vec3 colorRi = texture2D(s_color, texCoord + du     ).xyz;
+	vec3 colorLe = texture2D(s_color, texCoord - du     ).xyz;
+	vec3 colorDL = texture2D(s_color, texCoord - du + dv).xyz;
+	vec3 colorDo = texture2D(s_color, texCoord      + dv).xyz;
+	vec3 colorDR = texture2D(s_color, texCoord + du + dv).xyz;
+
+	// in an ideal world, lighting and such is in linear space,
+	// would possibly want to convert to gamma and apply txaa
+	// there. but this sample isn't storing intermediate results
+	// in linear space (or doing any reasonable lighting) so
+	// would possibly want to do the opposite.
+#if APPLY_TXAA_IN_LINEAR
+	colorCurr = toLinear(colorCurr);
+	colorPrev = toLinear(colorPrev);
+	colorUL = toLinear(colorUL);
+	colorUp = toLinear(colorUp);
+	colorUR = toLinear(colorUR);
+	colorLe = toLinear(colorLe);
+	colorRi = toLinear(colorRi);
+	colorDL = toLinear(colorDL);
+	colorDo = toLinear(colorDo);
+	colorDR = toLinear(colorDR);
+#endif
+
+	// Compute variance box on color neighborhood, clip to box
+	float outVal = 0.0;
+	{
+		vec3 m1 = vec3_splat(0.0);
+		vec3 m2 = vec3_splat(0.0);
+		m1 += colorUL; m2 += colorUL*colorUL;
+		m1 += colorUp; m2 += colorUp*colorUp;
+		m1 += colorUR; m2 += colorUR*colorUR;
+		m1 += colorLe; m2 += colorLe*colorLe;
+		m1 += colorCurr; m2 += colorCurr*colorCurr;
+		m1 += colorRi; m2 += colorRi*colorRi;
+		m1 += colorDL; m2 += colorDL*colorDL;
+		m1 += colorDo; m2 += colorDo*colorDo;
+		m1 += colorDR; m2 += colorDR*colorDR;
+		m1 *= (1.0/9.0);
+		m2 *= (1.0/9.0);
+
+		vec3 var = max(vec3_splat(0.0), m2 - m1*m1);
+		vec3 sigma = sqrt(var);
+		outVal = max(sigma.x, max(sigma.y, sigma.z));
+		sigma *= 1.4;
+		vec3 colorMin = m1 - sigma;
+		vec3 colorMax = m1 + sigma;
+
+		vec3 displacement = colorPrev - m1;
+		vec3 units = abs(displacement / sigma);
+		float maxUnit = max(max(units.x, units.y), max(units.z, 1.0));
+
+		colorPrev = m1 + displacement * (1.0/maxUnit);
+	}
+
+	float lumaCurr = dot(colorCurr, vec3(0.3, 0.6, 0.1));
+	float lumaPrev = dot(colorPrev, vec3(0.3, 0.6, 0.1));
+
+	// adjust feedback/blend amount depending on color difference
+	float r = abs(lumaCurr-lumaPrev) / max(max(lumaCurr, lumaPrev), 0.2);
+	r = 1.0-r;
+	r = r*r;
+	float feedback = mix(u_feedbackMin, u_feedbackMax, r);
+
+	vec3 colorOut = mix(colorCurr, colorPrev, feedback);
+
+	// optionally blur current color, since we've already taken
+	// the samples to build the variance window. could use more
+	// blur when feedback is lower, to replace temporal accumulation
+	// with spatial accumulation. or could use filter to sharpen.
+	if (u_applyMitchellFilter > 0.0)
+	{
+		// adjust filter coefficients depending on color difference
+		float b = mix(3.0/2.0, 1.0/3.0, r);
+		float c = mix(-1.0/4.0, 1.0/3.0, r);
+
+		float m0 = Mitchell(b, c, 0.0);
+		float m1 = Mitchell(b, c, 1.0);
+		float m2 = Mitchell(b, c, sqrt(2.0));
+
+		vec3 colorFilter = m0 * colorCurr;
+		colorFilter += m1 * colorLe;
+		colorFilter += m1 * colorRi;
+		colorFilter += m1 * colorUp;
+		colorFilter += m1 * colorDo;
+		colorFilter += m2 * colorUL;
+		colorFilter += m2 * colorUR;
+		colorFilter += m2 * colorDL;
+		colorFilter += m2 * colorDR;
+		colorFilter *= 1.0/(m0 + 4.0*m1 + 4.0*m2);
+
+		colorOut = mix(colorFilter, colorPrev, feedback);
+	}
+
+	// in an ideal world, lighting and such is in linear space,
+	// would possibly want to convert to gamma and apply txaa
+	// there. but this sample isn't storing intermediate results
+	// in linear space (or doing any reasonable lighting) so
+	// would possibly want to do the opposite.
+#if APPLY_TXAA_IN_LINEAR
+	colorCurr = toGamma(colorOut);
+#else
+	colorCurr = colorOut;
+#endif
+
+
+#if DEBUG_HALF_SCREEN
+	}
+#endif
+
+	gl_FragColor = vec4(colorCurr, 1.0);
+}

+ 10 - 0
examples/xx-denoise/makefile

@@ -0,0 +1,10 @@
+#
+# Copyright 2011-2019 Branimir Karadzic. All rights reserved.
+# License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+#
+
+BGFX_DIR=../..
+RUNTIME_DIR=$(BGFX_DIR)/examples/runtime
+BUILD_DIR=../../.build
+
+include $(BGFX_DIR)/scripts/shader.mk

+ 92 - 0
examples/xx-denoise/normal_encoding.sh

@@ -0,0 +1,92 @@
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#ifndef NORMAL_ENCODING_SH
+#define NORMAL_ENCODING_SH
+
+#define NE_USE_OCTAHEDRAL_REPRESENTATION   0
+
+// From "A Survey of Efficient Representations for Independent Unit Vectors"
+// http://jcgt.org/published/0003/02/01/paper.pdf
+
+// Convert an oct24 (2x12bit normal) to an rgb8 value for storing in texture
+vec3 snorm12x2_to_unorm8x3 (vec2 f) {
+
+	f        = clamp(f, -1.0, 1.0);//min(max(f, vec2(-1.0)), vec2(1.0));
+	vec2   u = floor(f * 2047.0 + 2047.5);
+	float  t = floor(u.y / 256.0);
+
+	// "This code assumes that rounding will occur during storage."
+	// -- Not certain but this appears to mainly apply to the x channel.
+	//    From paper: x = u.x / 16.0 - 0.5
+	//    Instead round by +0.5 and floor.
+	return vec3(floor(u.x / 16.0), fract(u.x / 16.0) * 256.0 + t, u.y - t * 256.0) / 255.0;
+}
+
+// Unpack oct24 (2x12bit normal) from an rgb8 value stored in texture (normal spec)
+vec2 unorm8x3_to_snorm12x2 (vec3 u) {
+
+	u *= 255.0;
+	u.y *= (1.0 / 16.0);
+	vec2 s = vec2(u.x * 16.0 + floor(u.y), fract(u.y) * (16.0 * 256.0) + u.z);
+
+	s = s * (1.0 / 2047.0) - 1.0;
+	return min(max(s, -1.0), 1.0);
+}
+
+// Built in sign test could return 0, don't want that
+vec2 signNotZero (vec2 v) {
+	return vec2((v.x >= 0.0) ? 1.0 : -1.0, (v.y >= 0.0) ? 1.0 : -1.0);
+}
+
+// Assume normalized input. Output is (-1, 1) for each component
+vec2 float32x3_to_oct(vec3 v) {
+
+	// Project the sphere onto the octahedron, and then onto the xy plane
+	vec2 p = v.xy * (1.0 / (abs(v.x) + abs(v.y) + abs(v.z)));
+	
+	// Reflect the folds of the lower hemisphere over the diagonals
+	return (v.z <= 0.0) ? ((1.0 - abs(p.yx)) * signNotZero(p)) : p;
+}
+
+// Get a float3 normal from an oct representation
+vec3 oct_to_float32x3 (vec2 e) {
+	vec3 v = vec3(e.xy, 1.0 - abs(e.x) - abs(e.y));
+	if (v.z < 0.0) {
+		v.xy = (1.0 - abs(v.yx)) * signNotZero(v.xy);
+	}
+
+	return normalize(v);
+}
+
+vec3 SignedNormalEncodeToOct (vec3 normal) {
+
+	return snorm12x2_to_unorm8x3(float32x3_to_oct(normal));
+}
+
+vec3 SignedNormalDecodeFromOct (vec3 normal) {
+
+	return oct_to_float32x3(unorm8x3_to_snorm12x2(normal));
+}
+
+vec3 NormalEncode (vec3 normal)
+{
+#if NE_USE_OCTAHEDRAL_REPRESENTATION
+	return SignedNormalEncodeToOct(normal);
+#else
+	return normal * 0.5 + 0.5;
+#endif
+}
+
+vec3 NormalDecode (vec3 normal)
+{
+#if NE_USE_OCTAHEDRAL_REPRESENTATION
+	return SignedNormalDecodeFromOct(normal);
+#else
+	return normal * 2.0 - 1.0;
+#endif
+}
+
+#endif // NORMAL_ENCODING_SH

+ 32 - 0
examples/xx-denoise/parameters.sh

@@ -0,0 +1,32 @@
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#ifndef PARAMETERS_SH
+#define PARAMETERS_SH
+
+uniform vec4 u_params[13];
+
+#define u_jitterCurr				(u_params[0].xy)
+#define u_jitterPrev				(u_params[0].zw)
+#define u_feedbackMin				(u_params[1].x)
+#define u_feedbackMax				(u_params[1].y)
+#define u_applyMitchellFilter		(u_params[2].y)
+
+#define u_worldToViewPrev0			(u_params[3])
+#define u_worldToViewPrev1			(u_params[4])
+#define u_worldToViewPrev2			(u_params[5])
+#define u_worldToViewPrev3			(u_params[6])
+#define u_viewToProjPrev0			(u_params[7])
+#define u_viewToProjPrev1			(u_params[8])
+#define u_viewToProjPrev2			(u_params[9])
+#define u_viewToProjPrev3			(u_params[10])
+
+#define u_frameIdx					(u_params[11].x)
+#define u_noiseType					(u_params[11].y) // 0=none, 1=dither, 2=random
+#define u_texCoordStep				(u_params[12].x)
+#define u_sigmaDepth				(u_params[12].y)
+#define u_sigmaNormal				(u_params[12].z)
+
+#endif // PARAMETERS_SH

+ 25 - 0
examples/xx-denoise/shared_functions.sh

@@ -0,0 +1,25 @@
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#ifndef SHARED_FUNCTIONS_SH
+#define SHARED_FUNCTIONS_SH
+
+vec2 GetTexCoordPreviousNoJitter(vec2 texCoord, vec2 velocity)
+{
+	vec2 texCoordPrev = texCoord - velocity;
+	return texCoordPrev;
+}
+
+vec2 GetTexCoordPrevious(vec2 texCoord, vec2 velocity)
+{
+	vec2 texCoordPrev = texCoord - velocity;
+
+	vec2 jitterDelta = (u_jitterCurr-u_jitterPrev);
+	texCoordPrev += jitterDelta * u_viewTexel.xy;
+
+	return texCoordPrev;
+}
+
+#endif // SHARED_FUNCTIONS_SH

+ 9 - 0
examples/xx-denoise/varying.def.sc

@@ -0,0 +1,9 @@
+vec4 a_position  : POSITION;
+vec2 a_texcoord0 : TEXCOORD0;
+vec3 a_normal    : NORMAL;
+
+vec2 v_texcoord0 : TEXCOORD0;
+vec4 v_texcoord1 : TEXCOORD1;
+vec4 v_texcoord2 : TEXCOORD2;
+vec4 v_texcoord3 : TEXCOORD3;
+vec3 v_normal    : NORMAL = vec3(0.0, 0.0, 1.0);

+ 54 - 0
examples/xx-denoise/vs_denoise_gbuffer.sc

@@ -0,0 +1,54 @@
+$input a_position, a_normal, a_texcoord0
+$output v_normal, v_texcoord0, v_texcoord1, v_texcoord2, v_texcoord3
+
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#include "../common/common.sh"
+#include "parameters.sh"
+
+void main()
+{
+	// Calculate vertex position
+	vec3 pos = a_position.xyz;
+	gl_Position = mul(u_modelViewProj, vec4(pos, 1.0));
+
+	// Calculate previous frame's position
+	mat4 worldToViewPrev = mat4(
+		u_worldToViewPrev0,
+		u_worldToViewPrev1,
+		u_worldToViewPrev2,
+		u_worldToViewPrev3
+	);
+	mat4 viewToProjPrev = mat4(
+		u_viewToProjPrev0,
+		u_viewToProjPrev1,
+		u_viewToProjPrev2,
+		u_viewToProjPrev3
+	);
+
+	vec3 wsPos  = mul(u_model[0], vec4(pos, 1.0)).xyz;
+	vec3 vspPos = instMul(worldToViewPrev, vec4(wsPos, 1.0)).xyz;
+	vec4 pspPos = instMul(viewToProjPrev, vec4(vspPos, 1.0));
+
+	// Calculate normal, unpack
+	vec3 osNormal = a_normal.xyz * 2.0 - 1.0;
+
+	// Transform normal into world space
+	vec3 wsNormal = mul(u_model[0], vec4(osNormal, 0.0)).xyz;
+
+	v_normal.xyz = normalize(wsNormal);
+	v_texcoord0 = a_texcoord0;
+
+	// Store previous frame projection space position in extra texCoord attribute
+	v_texcoord1 = pspPos;
+
+	// Store world space view vector in extra texCoord attribute
+	vec3 wsCamPos = mul(u_invView, vec4(0.0, 0.0, 0.0, 1.0)).xyz;
+	vec3 view = normalize(wsCamPos - wsPos);
+
+	v_texcoord2 = vec4(wsPos, 1.0);
+	v_texcoord3 = vec4(wsCamPos, 1.0);
+}

+ 10 - 0
examples/xx-denoise/vs_denoise_screenquad.sc

@@ -0,0 +1,10 @@
+$input a_position, a_texcoord0
+$output v_texcoord0
+
+#include "../common/common.sh"
+
+void main()
+{
+	gl_Position = mul(u_modelViewProj, vec4(a_position.xyz, 1.0));
+	v_texcoord0 = a_texcoord0;
+}