Sfoglia il codice sorgente

Merge pull request #1575 from attilaz/assao-final

Adaptive Screen Space Ambient Occlusion example. Issue #978.
Бранимир Караџић 7 anni fa
parent
commit
9aa4c55268
29 ha cambiato i file con 2995 aggiunte e 0 eliminazioni
  1. 1147 0
      examples/39-assao/assao.cpp
  2. 103 0
      examples/39-assao/cs_assao_apply.sc
  3. 50 0
      examples/39-assao/cs_assao_generate_importance_map.sc
  4. 520 0
      examples/39-assao/cs_assao_generate_q.sh
  5. 9 0
      examples/39-assao/cs_assao_generate_q0.sc
  6. 9 0
      examples/39-assao/cs_assao_generate_q1.sc
  7. 9 0
      examples/39-assao/cs_assao_generate_q2.sc
  8. 9 0
      examples/39-assao/cs_assao_generate_q3.sc
  9. 9 0
      examples/39-assao/cs_assao_generate_q3base.sc
  10. 15 0
      examples/39-assao/cs_assao_load_counter_clear.sc
  11. 29 0
      examples/39-assao/cs_assao_non_smart_apply.sc
  12. 37 0
      examples/39-assao/cs_assao_non_smart_blur.sc
  13. 26 0
      examples/39-assao/cs_assao_non_smart_half_apply.sc
  14. 47 0
      examples/39-assao/cs_assao_postprocess_importance_map_a.sc
  15. 55 0
      examples/39-assao/cs_assao_postprocess_importance_map_b.sc
  16. 103 0
      examples/39-assao/cs_assao_prepare_depth_mip.sc
  17. 58 0
      examples/39-assao/cs_assao_prepare_depths.sc
  18. 192 0
      examples/39-assao/cs_assao_prepare_depths_and_normals.sc
  19. 188 0
      examples/39-assao/cs_assao_prepare_depths_and_normals_half.sc
  20. 48 0
      examples/39-assao/cs_assao_prepare_depths_half.sc
  21. 82 0
      examples/39-assao/cs_assao_smart_blur.sc
  22. 83 0
      examples/39-assao/cs_assao_smart_blur_wide.sc
  23. 43 0
      examples/39-assao/fs_assao_deferred_combine.sc
  24. 22 0
      examples/39-assao/fs_assao_gbuffer.sc
  25. 10 0
      examples/39-assao/makefile
  26. 42 0
      examples/39-assao/uniforms.sh
  27. 7 0
      examples/39-assao/varying.def.sc
  28. 16 0
      examples/39-assao/vs_assao.sc
  29. 27 0
      examples/39-assao/vs_assao_gbuffer.sc

+ 1147 - 0
examples/39-assao/assao.cpp

@@ -0,0 +1,1147 @@
+/*
+* Copyright 2018 Attila Kocsis. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#include <common.h>
+#include <camera.h>
+#include <bgfx_utils.h>
+#include <imgui/imgui.h>
+#include <bx/rng.h>
+#include <bx/os.h>
+
+namespace
+{
+
+	/*
+	* ASSAO is a SSAO implementation tuned for scalability and flexibility.
+
+	* https://software.intel.com/en-us/articles/adaptive-screen-space-ambient-occlusion
+	* https://github.com/GameTechDev/ASSAO
+	*/
+
+	// Render passes
+#define RENDER_PASS_GBUFFER      0  // GBuffer for normals and albedo
+#define RENDER_PASS_COMBINE      1  // Directional light and final result
+
+	// Gbuffer has multiple render targets
+#define GBUFFER_RT_NORMAL 0
+#define GBUFFER_RT_COLOR  1
+#define GBUFFER_RT_DEPTH  2
+
+	// Random meshes we draw
+#define MODEL_COUNT 120  // In this demo, a model is a mesh plus a transform
+
+#define SAMPLER_POINT_CLAMP (BGFX_SAMPLER_MIN_POINT|BGFX_SAMPLER_MAG_POINT|BGFX_SAMPLER_MIP_POINT|BGFX_SAMPLER_U_CLAMP| BGFX_SAMPLER_V_CLAMP | BGFX_SAMPLER_W_CLAMP)
+#define SAMPLER_POINT_MIRROR (BGFX_SAMPLER_MIN_POINT|BGFX_SAMPLER_MAG_POINT|BGFX_SAMPLER_MIP_POINT|BGFX_SAMPLER_U_MIRROR| BGFX_SAMPLER_V_MIRROR | BGFX_SAMPLER_W_MIRROR)
+#define SAMPLER_LINEAR_CLAMP (BGFX_SAMPLER_U_CLAMP| BGFX_SAMPLER_V_CLAMP | BGFX_SAMPLER_W_CLAMP)
+
+#define SSAO_DEPTH_MIP_LEVELS                       4
+
+	static const char * s_meshPaths[] =
+	{
+		"meshes/cube.bin",
+		"meshes/orb.bin",
+		"meshes/column.bin",
+		"meshes/bunny_decimated.bin",
+		"meshes/tree.bin",
+		"meshes/hollowcube.bin"
+	};
+
+	static const float s_meshScale[] =
+	{
+		0.25f,
+		0.5f,
+		0.05f,
+		0.5f,
+		0.05f,
+		0.25f
+	};
+
+	// Vertex decl for our screen space quad (used in deferred rendering)
+	struct PosTexCoord0Vertex
+	{
+		float m_x;
+		float m_y;
+		float m_z;
+		float m_u;
+		float m_v;
+
+		static void init()
+		{
+			ms_decl
+				.begin()
+				.add(bgfx::Attrib::Position, 3, bgfx::AttribType::Float)
+				.add(bgfx::Attrib::TexCoord0, 2, bgfx::AttribType::Float)
+				.end();
+		}
+
+		static bgfx::VertexDecl ms_decl;
+	};
+	bgfx::VertexDecl PosTexCoord0Vertex::ms_decl;
+
+	// Utility function to draw a screen space quad for deferred rendering
+	void screenSpaceQuad(float _textureWidth, float _textureHeight, float _texelHalf, bool _originBottomLeft, float _width = 1.0f, float _height = 1.0f)
+	{
+		if (3 == bgfx::getAvailTransientVertexBuffer(3, PosTexCoord0Vertex::ms_decl))
+		{
+			bgfx::TransientVertexBuffer vb;
+			bgfx::allocTransientVertexBuffer(&vb, 3, PosTexCoord0Vertex::ms_decl);
+			PosTexCoord0Vertex* vertex = (PosTexCoord0Vertex*)vb.data;
+
+			const float minx = -_width;
+			const float maxx = _width;
+			const float miny = 0.0f;
+			const float maxy = _height * 2.0f;
+
+			const float texelHalfW = _texelHalf / _textureWidth;
+			const float texelHalfH = _texelHalf / _textureHeight;
+			const float minu = -1.0f + texelHalfW;
+			const float maxu = 1.0f + texelHalfH;
+
+			const float zz = 0.0f;
+
+			float minv = texelHalfH;
+			float maxv = 2.0f + texelHalfH;
+
+			if (_originBottomLeft)
+			{
+				float temp = minv;
+				minv = maxv;
+				maxv = temp;
+
+				minv -= 1.0f;
+				maxv -= 1.0f;
+			}
+
+			vertex[0].m_x = minx;
+			vertex[0].m_y = miny;
+			vertex[0].m_z = zz;
+			vertex[0].m_u = minu;
+			vertex[0].m_v = minv;
+
+			vertex[1].m_x = maxx;
+			vertex[1].m_y = miny;
+			vertex[1].m_z = zz;
+			vertex[1].m_u = maxu;
+			vertex[1].m_v = minv;
+
+			vertex[2].m_x = maxx;
+			vertex[2].m_y = maxy;
+			vertex[2].m_z = zz;
+			vertex[2].m_u = maxu;
+			vertex[2].m_v = maxv;
+
+			bgfx::setVertexBuffer(0, &vb);
+		}
+	}
+
+	struct Settings
+	{
+		float       m_radius;                             // [0.0,  ~ ] World (view) space size of the occlusion sphere.
+		float       m_shadowMultiplier;                   // [0.0, 5.0] Effect strength linear multiplier
+		float       m_shadowPower;                        // [0.5, 5.0] Effect strength pow modifier
+		float       m_shadowClamp;                        // [0.0, 1.0] Effect max limit (applied after multiplier but before blur)
+		float       m_horizonAngleThreshold;              // [0.0, 0.2] Limits self-shadowing (makes the sampling area less of a hemisphere, more of a spherical cone, to avoid self-shadowing and various artifacts due to low tessellation and depth buffer imprecision, etc.)
+		float       m_fadeOutFrom;                        // [0.0,  ~ ] Distance to start start fading out the effect.
+		float       m_fadeOutTo;                          // [0.0,  ~ ] Distance at which the effect is faded out.
+		int         m_qualityLevel;                       // [ -1,  3 ] Effect quality; -1 - lowest (low, half res checkerboard), 0 - low, 1 - medium, 2 - high, 3 - very high / adaptive; each quality level is roughly 2x more costly than the previous, except the q3 which is variable but, in general, above q2.
+		float       m_adaptiveQualityLimit;               // [0.0, 1.0] (only for Quality Level 3)
+		int         m_blurPassCount;                      // [  0,   6] Number of edge-sensitive smart blur passes to apply. Quality 0 is an exception with only one 'dumb' blur pass used.
+		float       m_sharpness;                          // [0.0, 1.0] (How much to bleed over edges; 1: not at all, 0.5: half-half; 0.0: completely ignore edges)
+		float       m_temporalSupersamplingAngleOffset;   // [0.0,  PI] Used to rotate sampling kernel; If using temporal AA / supersampling, suggested to rotate by ( (frame%3)/3.0*PI ) or similar. Kernel is already symmetrical, which is why we use PI and not 2*PI.
+		float       m_temporalSupersamplingRadiusOffset;  // [0.0, 2.0] Used to scale sampling kernel; If using temporal AA / supersampling, suggested to scale by ( 1.0f + (((frame%3)-1.0)/3.0)*0.1 ) or similar.
+		float       m_detailShadowStrength;               // [0.0, 5.0] Used for high-res detail AO using neighboring depth pixels: adds a lot of detail but also reduces temporal stability (adds aliasing).
+		bool		m_generateNormals;					  // [true/false] If true normals will be generated from depth.
+
+		Settings()
+		{
+			m_radius = 1.2f;
+			m_shadowMultiplier = 1.0f;
+			m_shadowPower = 1.50f;
+			m_shadowClamp = 0.98f;
+			m_horizonAngleThreshold = 0.06f;
+			m_fadeOutFrom = 50.0f;
+			m_fadeOutTo = 200.0f;
+			m_adaptiveQualityLimit = 0.45f;
+			m_qualityLevel = 3;
+			m_blurPassCount = 2;
+			m_sharpness = 0.98f;
+			m_temporalSupersamplingAngleOffset = 0.0f;
+			m_temporalSupersamplingRadiusOffset = 1.0f;
+			m_detailShadowStrength = 0.5f;
+			m_generateNormals = true;
+		}
+	};
+
+	struct Uniforms
+	{
+		enum { NumVec4 = 19 };
+
+		void init()
+		{
+			u_params = bgfx::createUniform("u_params", bgfx::UniformType::Vec4, NumVec4);
+		}
+
+		void submit()
+		{
+			bgfx::setUniform(u_params, m_params, NumVec4);
+		}
+
+		void destroy()
+		{
+			bgfx::destroy(u_params);
+		}
+
+		union
+		{
+			struct
+			{
+				/* 0*/ struct { float m_viewportPixelSize[2]; float m_halfViewportPixelSize[2]; };
+				/* 1*/ struct { float m_depthUnpackConsts[2]; float m_unused0[2]; };
+				/* 2*/ struct { float m_ndcToViewMul[2]; float m_ndcToViewAdd[2]; };
+				/* 3*/ struct { float m_perPassFullResCoordOffset[2]; float m_perPassFullResUVOffset[2]; };
+				/* 4*/ struct { float m_viewport2xPixelSize[2]; float m_viewport2xPixelSize_x_025[2]; };
+				/* 5*/ struct { float m_effectRadius; float m_effectShadowStrength; float m_effectShadowPow; float m_effectShadowClamp; };
+				/* 6*/ struct { float m_effectFadeOutMul; float m_effectFadeOutAdd; float m_effectHorizonAngleThreshold; float m_effectSamplingRadiusNearLimitRec; };
+				/* 7*/ struct { float m_depthPrecisionOffsetMod; float m_negRecEffectRadius; float m_loadCounterAvgDiv; float m_adaptiveSampleCountLimit; };
+				/* 8*/ struct { float m_invSharpness; float m_passIndex; float m_quarterResPixelSize[2]; };
+				/* 9-13*/ struct { float m_patternRotScaleMatrices[5][4]; };
+				/*14*/ struct { float m_normalsUnpackMul; float m_normalsUnpackAdd; float m_detailAOStrength; float m_layer; };
+				/*15-18*/ struct { float m_normalsWorldToViewspaceMatrix[16]; };
+			};
+
+			float m_params[NumVec4 * 4];
+		};
+
+		bgfx::UniformHandle u_params;
+	};
+
+	void vec2Set(float *_v, float _x, float _y) { _v[0] = _x; _v[1] = _y; }
+	void vec4Set(float *_v, float _x, float _y, float _z, float _w) { _v[0] = _x; _v[1] = _y; _v[2] = _z; _v[3] = _w; }
+	void vec4iSet(int *_v, int _x, int _y, int _z, int _w) { _v[0] = _x; _v[1] = _y; _v[2] = _z; _v[3] = _w; }
+
+	static const int cMaxBlurPassCount = 6;
+
+	class ExampleASSAO : public entry::AppI
+	{
+	public:
+		ExampleASSAO(const char* _name, const char* _description)
+			: entry::AppI(_name, _description)
+			, m_currFrame(UINT32_MAX)
+			, m_texelHalf(0.0f)
+			, m_enableSSAO(true)
+			, m_enableTexturing(true)
+			, m_framebufferGutter(true)
+		{
+		}
+
+		void init(int32_t _argc, const char* const* _argv, uint32_t _width, uint32_t _height) override
+		{
+			Args args(_argc, _argv);
+
+			m_width = _width;
+			m_height = _height;
+			m_debug = BGFX_DEBUG_NONE;
+			m_reset = BGFX_RESET_VSYNC;
+
+			bgfx::Init init;
+			init.type = args.m_type;
+
+			init.vendorId = args.m_pciId;
+			init.resolution.width = m_width;
+			init.resolution.height = m_height;
+			init.resolution.reset = m_reset;
+			bgfx::init(init);
+
+			// Enable debug text.
+			bgfx::setDebug(m_debug);
+
+			// Labeling for renderdoc captures, etc
+			bgfx::setViewName(RENDER_PASS_GBUFFER, "gbuffer");
+			bgfx::setViewName(RENDER_PASS_COMBINE, "post combine");
+
+			// Set up screen clears
+			bgfx::setViewClear(RENDER_PASS_GBUFFER
+				, BGFX_CLEAR_COLOR | BGFX_CLEAR_DEPTH
+				, 0
+				, 1.0f
+				, 0
+			);
+
+			// Create uniforms
+			u_combineParams = bgfx::createUniform("u_combineParams", bgfx::UniformType::Vec4, 2);
+			u_rect = bgfx::createUniform("u_rect", bgfx::UniformType::Vec4);  // viewport/scissor rect for compute
+			m_uniforms.init();
+
+			// Create texture sampler uniforms (used when we bind textures)
+			s_normal = bgfx::createUniform("s_normal", bgfx::UniformType::Int1);  // Normal gbuffer
+			s_depth = bgfx::createUniform("s_depth", bgfx::UniformType::Int1);  // Normal gbuffer
+			s_color = bgfx::createUniform("s_color", bgfx::UniformType::Int1);  // Color (albedo) gbuffer
+
+			s_albedo = bgfx::createUniform("s_albedo", bgfx::UniformType::Int1);
+
+			s_ao = bgfx::createUniform("s_ao", bgfx::UniformType::Int1);
+			s_blurInput = bgfx::createUniform("s_blurInput", bgfx::UniformType::Int1);
+			s_finalSSAO = bgfx::createUniform("s_finalSSAO", bgfx::UniformType::Int1);
+			s_depthSource = bgfx::createUniform("s_depthSource", bgfx::UniformType::Int1);
+			s_viewspaceDepthSource = bgfx::createUniform("s_viewspaceDepthSource", bgfx::UniformType::Int1);
+			s_viewspaceDepthSourceMirror = bgfx::createUniform("s_viewspaceDepthSourceMirror", bgfx::UniformType::Int1);
+			s_importanceMap = bgfx::createUniform("s_importanceMap", bgfx::UniformType::Int1);
+
+			// Create program from shaders.
+			m_gbufferProgram = loadProgram("vs_assao_gbuffer", "fs_assao_gbuffer");  // Gbuffer
+			m_combineProgram = loadProgram("vs_assao", "fs_assao_deferred_combine");
+
+			m_prepareDepthsProgram = loadProgram("cs_assao_prepare_depths", NULL);
+			m_prepareDepthsAndNormalsProgram = loadProgram("cs_assao_prepare_depths_and_normals", NULL);
+			m_prepareDepthsHalfProgram = loadProgram("cs_assao_prepare_depths_half", NULL);
+			m_prepareDepthsAndNormalsHalfProgram = loadProgram("cs_assao_prepare_depths_and_normals_half", NULL);
+			m_prepareDepthMipProgram = loadProgram("cs_assao_prepare_depth_mip", NULL);
+			m_generateQ0Program = loadProgram("cs_assao_generate_q0", NULL);
+			m_generateQ1Program = loadProgram("cs_assao_generate_q1", NULL);
+			m_generateQ2Program = loadProgram("cs_assao_generate_q2", NULL);
+			m_generateQ3Program = loadProgram("cs_assao_generate_q3", NULL);
+			m_generateQ3BaseProgram = loadProgram("cs_assao_generate_q3base", NULL);
+			m_smartBlurProgram = loadProgram("cs_assao_smart_blur", NULL);
+			m_smartBlurWideProgram = loadProgram("cs_assao_smart_blur_wide", NULL);
+			m_nonSmartBlurProgram = loadProgram("cs_assao_non_smart_blur", NULL);
+			m_applyProgram = loadProgram("cs_assao_apply", NULL);
+			m_nonSmartApplyProgram = loadProgram("cs_assao_non_smart_apply", NULL);
+			m_nonSmartHalfApplyProgram = loadProgram("cs_assao_non_smart_half_apply", NULL);
+			m_generateImportanceMapProgram = loadProgram("cs_assao_generate_importance_map", NULL);
+			m_postprocessImportanceMapAProgram = loadProgram("cs_assao_postprocess_importance_map_a", NULL);
+			m_postprocessImportanceMapBProgram = loadProgram("cs_assao_postprocess_importance_map_b", NULL);
+			m_loadCounterClearProgram = loadProgram("cs_assao_load_counter_clear", NULL);
+
+			 // Load some meshes
+			for (uint32_t ii = 0; ii < BX_COUNTOF(s_meshPaths); ++ii)
+			{
+				m_meshes[ii] = meshLoad(s_meshPaths[ii]);
+			}
+
+			// Randomly create some models
+			bx::RngMwc mwc;  // Random number generator
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_models); ++ii)
+			{
+				Model& model = m_models[ii];
+
+				model.mesh = 1 + mwc.gen() % (BX_COUNTOF(s_meshPaths) - 1);
+				model.position[0] = (((mwc.gen() % 256)) - 128.0f) / 20.0f;
+				model.position[1] = 0;
+				model.position[2] = (((mwc.gen() % 256)) - 128.0f) / 20.0f;
+			}
+
+			// Load ground.  We'll just use the cube since I don't have a ground model right now
+			m_ground = meshLoad("meshes/cube.bin");
+
+			m_groundTexture = loadTexture("textures/fieldstone-rgba.dds");
+			const bgfx::Memory* mem = bgfx::alloc(4);
+			bx::memSet(mem->data, 0xc0, 4);
+			m_modelTexture = bgfx::createTexture2D(1,1, false, 1, bgfx::TextureFormat::RGBA8, 0,  mem);
+
+			m_recreateFrameBuffers = false;
+			createFramebuffers();
+
+			m_loadCounter = bgfx::createTexture2D(1, 1, false, 1, bgfx::TextureFormat::R32U, BGFX_TEXTURE_COMPUTE_WRITE);
+
+			// Vertex decl
+			PosTexCoord0Vertex::init();
+
+			// Init camera
+			cameraCreate();
+			float camPos[] = { 0.0f, 1.5f, 0.0f };
+			cameraSetPosition(camPos);
+			cameraSetVerticalAngle(-0.3f);
+			m_fovY = 60.0f;
+
+			// Get renderer capabilities info.
+			const bgfx::RendererType::Enum renderer = bgfx::getRendererType();
+			m_texelHalf = bgfx::RendererType::Direct3D9 == renderer ? 0.5f : 0.0f;
+
+			imguiCreate();
+		}
+
+		int shutdown() override
+		{
+			for (uint32_t ii = 0; ii < BX_COUNTOF(s_meshPaths); ++ii)
+			{
+				meshUnload(m_meshes[ii]);
+			}
+
+			meshUnload(m_ground);
+			bgfx::destroy(m_groundTexture);
+			bgfx::destroy(m_modelTexture);
+
+			// Cleanup.
+			bgfx::destroy(m_gbufferProgram);
+			bgfx::destroy(m_combineProgram);
+
+			bgfx::destroy(m_prepareDepthsProgram);
+			bgfx::destroy(m_prepareDepthsAndNormalsProgram);
+			bgfx::destroy(m_prepareDepthsHalfProgram);
+			bgfx::destroy(m_prepareDepthsAndNormalsHalfProgram);
+			bgfx::destroy(m_prepareDepthMipProgram);
+			bgfx::destroy(m_generateQ0Program);
+			bgfx::destroy(m_generateQ1Program);
+			bgfx::destroy(m_generateQ2Program);
+			bgfx::destroy(m_generateQ3Program);
+			bgfx::destroy(m_generateQ3BaseProgram);
+			bgfx::destroy(m_smartBlurProgram);
+			bgfx::destroy(m_smartBlurWideProgram);
+			bgfx::destroy(m_nonSmartBlurProgram);
+			bgfx::destroy(m_applyProgram);
+			bgfx::destroy(m_nonSmartApplyProgram);
+			bgfx::destroy(m_nonSmartHalfApplyProgram);
+			bgfx::destroy(m_generateImportanceMapProgram);
+			bgfx::destroy(m_postprocessImportanceMapAProgram);
+			bgfx::destroy(m_postprocessImportanceMapBProgram);
+			bgfx::destroy(m_loadCounterClearProgram);
+			bgfx::destroy(m_combineProgram);
+
+			m_uniforms.destroy();
+
+			bgfx::destroy(u_combineParams);
+			bgfx::destroy(u_rect);
+
+			bgfx::destroy(s_normal);
+			bgfx::destroy(s_depth);
+			bgfx::destroy(s_color);
+			bgfx::destroy(s_albedo);
+			bgfx::destroy(s_ao);
+			bgfx::destroy(s_blurInput);
+			bgfx::destroy(s_finalSSAO);
+			bgfx::destroy(s_depthSource);
+			bgfx::destroy(s_viewspaceDepthSource);
+			bgfx::destroy(s_viewspaceDepthSourceMirror);
+			bgfx::destroy(s_importanceMap);
+
+			bgfx::destroy(m_loadCounter);
+			destroyFramebuffers();
+
+			cameraDestroy();
+
+			imguiDestroy();
+
+			// Shutdown bgfx.
+			bgfx::shutdown();
+
+			return 0;
+		}
+
+		bool update() override
+		{
+			if (!entry::processEvents(m_width, m_height, m_debug, m_reset, &m_mouseState))
+			{
+				// Update frame timer
+				int64_t now = bx::getHPCounter();
+				static int64_t last = now;
+				const int64_t frameTime = now - last;
+				last = now;
+				const double freq = double(bx::getHPFrequency());
+				const float deltaTime = float(frameTime / freq);
+				const bgfx::Caps* caps = bgfx::getCaps();
+
+				if (m_size[0] != (int)m_width+2*m_border || m_size[1] != (int)m_height + 2 * m_border || m_recreateFrameBuffers)
+				{
+					destroyFramebuffers();
+					createFramebuffers();
+					m_recreateFrameBuffers = false;
+				}
+
+				// Update camera
+				cameraUpdate(deltaTime*0.15f, m_mouseState);
+
+				// Set up matrices for gbuffer
+				cameraGetViewMtx(m_view);
+
+				bx::mtxProj(m_proj, m_fovY, float(m_size[0]) / float(m_size[1]), 0.1f, 100.0f, bgfx::getCaps()->homogeneousDepth);
+				bx::mtxProj(m_proj2, m_fovY, float(m_size[0]) / float(m_size[1]), 0.1f, 100.0f, false);
+
+				bgfx::setViewRect(RENDER_PASS_GBUFFER, 0, 0, uint16_t(m_size[0]), uint16_t(m_size[1]));
+				bgfx::setViewTransform(RENDER_PASS_GBUFFER, m_view, m_proj);
+				// Make sure when we draw it goes into gbuffer and not backbuffer
+				bgfx::setViewFrameBuffer(RENDER_PASS_GBUFFER, m_gbuffer);
+				// Draw everything into g-buffer
+				drawAllModels(RENDER_PASS_GBUFFER, m_gbufferProgram);
+
+				// Set up transform matrix for fullscreen quad
+#if USE_ASSAO == 1
+				float orthoProj[16];
+				bx::mtxOrtho(orthoProj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, caps->homogeneousDepth);
+				bgfx::setViewTransform(RENDER_PASS_COMBINE, NULL, orthoProj);
+				bgfx::setViewRect(RENDER_PASS_COMBINE, 0, 0, uint16_t(m_width), uint16_t(m_height));
+				// Bind vertex buffer and draw quad
+				screenSpaceQuad((float)m_width, (float)m_height, m_texelHalf, caps->originBottomLeft);
+				//bgfx::submit(RENDER_PASS_COMBINE, m_combineProgram);
+				bgfx::touch(RENDER_PASS_COMBINE);
+
+				BX_UNUSED(orthoProj, caps)
+#endif
+
+				// ASSAO passes
+#if USE_ASSAO == 0
+				updateUniforms(0);
+
+				bgfx::ViewId view = 2;
+				bgfx::setViewName(view, "ASSAO");
+
+				{
+					bgfx::setTexture(0, s_depthSource, bgfx::getTexture(m_gbuffer, GBUFFER_RT_DEPTH), SAMPLER_POINT_CLAMP);
+					m_uniforms.submit();
+
+					if (m_settings.m_generateNormals)
+					{
+						bgfx::setImage(5, m_normals, 0, bgfx::Access::Write, bgfx::TextureFormat::RGBA8);
+					}
+
+					if (m_settings.m_qualityLevel < 0)
+					{
+						for (int j = 0; j<2; ++j)
+							bgfx::setImage((uint8_t)(j + 1), m_halfDepths[j == 0 ? 0 : 3], 0, bgfx::Access::Write, bgfx::TextureFormat::R16F);
+						bgfx::dispatch(view, m_settings.m_generateNormals ? m_prepareDepthsAndNormalsHalfProgram : m_prepareDepthsHalfProgram, (m_halfSize[0] + 7) / 8, (m_halfSize[1] + 7) / 8);
+					}
+					else
+					{
+						for(int j=0;j<4;++j)
+							bgfx::setImage((uint8_t)(j+1), m_halfDepths[j], 0, bgfx::Access::Write, bgfx::TextureFormat::R16F);
+						bgfx::dispatch(view, m_settings.m_generateNormals ? m_prepareDepthsAndNormalsProgram : m_prepareDepthsProgram, (m_halfSize[0] + 7) / 8, (m_halfSize[1] + 7) / 8);
+
+					}
+				}
+
+				// only do mipmaps for higher quality levels (not beneficial on quality level 1, and detrimental on quality level 0)
+				if (m_settings.m_qualityLevel > 1)
+				{
+					uint16_t mipWidth = (uint16_t)m_halfSize[0];
+					uint16_t mipHeight = (uint16_t)m_halfSize[1];
+
+					for (uint8_t i = 1; i < SSAO_DEPTH_MIP_LEVELS; i++)
+					{
+						mipWidth = (uint16_t)bx::max(1, mipWidth >> 1);
+						mipHeight = (uint16_t)bx::max(1, mipHeight >> 1);
+
+						for (uint8_t j = 0; j < 4; ++j)
+						{
+							bgfx::setImage(j, m_halfDepths[j], i-1, bgfx::Access::Read, bgfx::TextureFormat::R16F);
+							bgfx::setImage(j + 4, m_halfDepths[j], i, bgfx::Access::Write, bgfx::TextureFormat::R16F);
+						}
+
+						m_uniforms.submit();
+						float rect[4] = { 0.0f, 0.0f, (float)mipWidth, (float)mipHeight };
+						bgfx::setUniform(u_rect, rect);
+						
+						bgfx::dispatch(view, m_prepareDepthMipProgram, (mipWidth + 7) / 8, (mipHeight + 7) / 8);
+					}
+				}
+
+				// for adaptive quality, importance map pass
+				for (int ssaoPass = 0; ssaoPass < 2; ++ssaoPass)
+				{
+					if (ssaoPass == 0 && m_settings.m_qualityLevel < 3)
+						continue;
+
+					bool adaptiveBasePass = (ssaoPass == 0);
+
+					BX_UNUSED(adaptiveBasePass);
+
+					int passCount = 4;
+
+					int halfResNumX = (m_halfResOutScissorRect[2] - m_halfResOutScissorRect[0] + 7) / 8;
+					int halfResNumY = (m_halfResOutScissorRect[3] - m_halfResOutScissorRect[1] + 7) / 8;
+					float halfResRect[4] = { (float)m_halfResOutScissorRect[0], (float)m_halfResOutScissorRect[1], (float)m_halfResOutScissorRect[2], (float)m_halfResOutScissorRect[3] };
+
+					for (int pass = 0; pass < passCount; pass++)
+					{
+						if ((m_settings.m_qualityLevel < 0) && ((pass == 1) || (pass == 2)))
+							continue;
+
+						int blurPasses = m_settings.m_blurPassCount;
+						blurPasses = bx::min(blurPasses, cMaxBlurPassCount);
+
+						if (m_settings.m_qualityLevel == 3)
+						{
+							// if adaptive, at least one blur pass needed as the first pass needs to read the final texture results - kind of awkward
+							if (adaptiveBasePass)
+								blurPasses = 0;
+							else
+								blurPasses = bx::max(1, blurPasses);
+						}
+						else
+							if (m_settings.m_qualityLevel <= 0)
+							{
+								// just one blur pass allowed for minimum quality 
+								blurPasses = bx::min(1, m_settings.m_blurPassCount);
+							}
+
+						updateUniforms(pass);
+
+						bgfx::TextureHandle pPingRT = m_pingPongHalfResultA;
+						bgfx::TextureHandle pPongRT = m_pingPongHalfResultB;
+
+						// Generate
+						{
+							bgfx::setImage(6, blurPasses == 0 ? m_finalResults : pPingRT, 0, bgfx::Access::Write, bgfx::TextureFormat::RG8);
+
+							bgfx::setUniform(u_rect, halfResRect);
+
+							bgfx::setTexture(0, s_viewspaceDepthSource, m_halfDepths[pass], SAMPLER_POINT_CLAMP);
+							bgfx::setTexture(1, s_viewspaceDepthSourceMirror, m_halfDepths[pass], SAMPLER_POINT_MIRROR);
+							if (m_settings.m_generateNormals)
+								bgfx::setImage(2, m_normals,0, bgfx::Access::Read, bgfx::TextureFormat::RGBA8);
+							else
+								bgfx::setImage(2, bgfx::getTexture(m_gbuffer, GBUFFER_RT_NORMAL), 0, bgfx::Access::Read, bgfx::TextureFormat::RGBA8);
+
+							if (!adaptiveBasePass && (m_settings.m_qualityLevel == 3))
+							{
+								bgfx::setImage(3, m_loadCounter, 0, bgfx::Access::Read, bgfx::TextureFormat::R32U);
+								bgfx::setTexture(4, s_importanceMap, m_importanceMap, SAMPLER_LINEAR_CLAMP);
+								bgfx::setImage(5, m_finalResults, 0, bgfx::Access::Read, bgfx::TextureFormat::RG8);
+							}
+
+							bgfx::ProgramHandle programs[5] = { m_generateQ0Program, m_generateQ1Program , m_generateQ2Program , m_generateQ3Program , m_generateQ3BaseProgram };
+							int programIndex = bx::max(0, (!adaptiveBasePass) ? (m_settings.m_qualityLevel) : (4));
+
+							m_uniforms.m_layer = blurPasses == 0 ? (float)pass : 0.0f;
+							m_uniforms.submit();
+							bgfx::dispatch(view, programs[programIndex], halfResNumX, halfResNumY);
+						}
+
+						// Blur
+						if (blurPasses > 0)
+						{
+							int wideBlursRemaining = bx::max(0, blurPasses - 2);
+
+							for (int i = 0; i < blurPasses; i++)
+							{
+								bgfx::setViewFrameBuffer(view, BGFX_INVALID_HANDLE);
+								bgfx::touch(view);
+
+								m_uniforms.m_layer = ((i == (blurPasses - 1)) ? (float)pass : 0.0f);
+								m_uniforms.submit();
+
+								bgfx::setUniform(u_rect, halfResRect);
+
+								bgfx::setImage(0, i == (blurPasses - 1) ? m_finalResults : pPongRT, 0, bgfx::Access::Write, bgfx::TextureFormat::RG8);
+								bgfx::setTexture(1, s_blurInput, pPingRT, m_settings.m_qualityLevel > 0 ? SAMPLER_POINT_MIRROR : SAMPLER_LINEAR_CLAMP);
+
+								if (m_settings.m_qualityLevel > 0)
+								{
+									if (wideBlursRemaining > 0)
+									{
+										bgfx::dispatch(view, m_smartBlurWideProgram, halfResNumX, halfResNumY);
+										wideBlursRemaining--;
+									}
+									else
+									{
+										bgfx::dispatch(view, m_smartBlurProgram, halfResNumX, halfResNumY);
+									}
+								}
+								else
+								{
+									bgfx::dispatch(view, m_nonSmartBlurProgram, halfResNumX, halfResNumY); // just for quality level 0 (and -1)
+								}
+
+								bgfx::TextureHandle temp = pPingRT;
+								pPingRT = pPongRT;
+								pPongRT = temp;
+							}
+						}
+					}
+
+					if (ssaoPass == 0 && m_settings.m_qualityLevel == 3)
+					{	// Generate importance map
+						m_uniforms.submit();
+						bgfx::setImage(0, m_importanceMap, 0, bgfx::Access::Write, bgfx::TextureFormat::R8);
+						bgfx::setTexture(1, s_finalSSAO, m_finalResults, SAMPLER_POINT_CLAMP);
+						bgfx::dispatch(view, m_generateImportanceMapProgram, (m_quarterSize[0] + 7) / 8, (m_quarterSize[1] + 7) / 8);
+
+						m_uniforms.submit();
+						bgfx::setImage(0, m_importanceMapPong, 0, bgfx::Access::Write, bgfx::TextureFormat::R8);
+						bgfx::setTexture(1, s_importanceMap, m_importanceMap);
+						bgfx::dispatch(view, m_postprocessImportanceMapAProgram, (m_quarterSize[0] + 7) / 8, (m_quarterSize[1] + 7) / 8);
+
+						bgfx::setImage(0, m_loadCounter, 0, bgfx::Access::ReadWrite, bgfx::TextureFormat::R32U);
+						bgfx::dispatch(view, m_loadCounterClearProgram, 1,1);
+
+						m_uniforms.submit();
+						bgfx::setImage(0, m_importanceMap, 0, bgfx::Access::Write, bgfx::TextureFormat::R8);
+						bgfx::setTexture(1, s_importanceMap, m_importanceMapPong);
+						bgfx::setImage(2, m_loadCounter, 0, bgfx::Access::ReadWrite, bgfx::TextureFormat::R32U);
+						bgfx::dispatch(view, m_postprocessImportanceMapBProgram, (m_quarterSize[0]+7) / 8, (m_quarterSize[1]+7) / 8);
+						++view;
+					}
+				}
+
+				// Apply
+				{
+					// select 4 deinterleaved AO textures (texture array)
+					bgfx::setImage(0, m_aoMap, 0, bgfx::Access::Write, bgfx::TextureFormat::R8);
+					bgfx::setTexture(1, s_finalSSAO, m_finalResults);
+
+					m_uniforms.submit();
+
+					float rect[4] = {(float)m_fullResOutScissorRect[0], (float)m_fullResOutScissorRect[1], (float)m_fullResOutScissorRect[2], (float)m_fullResOutScissorRect[3] };
+					bgfx::setUniform(u_rect, rect);
+
+					bgfx::ProgramHandle program;
+					if (m_settings.m_qualityLevel < 0)
+						program = m_nonSmartHalfApplyProgram;
+					else if (m_settings.m_qualityLevel == 0)
+						program = m_nonSmartApplyProgram;
+					else
+						program = m_applyProgram;
+					bgfx::dispatch(view, program, (m_fullResOutScissorRect[2]- m_fullResOutScissorRect[0] + 7) / 8,
+												(m_fullResOutScissorRect[3] - m_fullResOutScissorRect[1] + 7) / 8);
+
+
+					++view;
+				}
+
+				{	// combine
+					bgfx::setViewFrameBuffer(view, BGFX_INVALID_HANDLE);
+					bgfx::setViewName(view, "Combine");
+					bgfx::setViewRect(view, 0, 0, (uint16_t)m_width, (uint16_t)m_height);
+					float orthoProj[16];
+					bx::mtxOrtho(orthoProj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, caps->homogeneousDepth);
+					bgfx::setViewTransform(view, NULL, orthoProj);
+
+					bgfx::setTexture(0, s_color, bgfx::getTexture(m_gbuffer, GBUFFER_RT_COLOR), SAMPLER_POINT_CLAMP);
+					bgfx::setTexture(1, s_normal, bgfx::getTexture(m_gbuffer, GBUFFER_RT_NORMAL), SAMPLER_POINT_CLAMP);
+
+					bgfx::setTexture(2, s_ao, m_aoMap, SAMPLER_POINT_CLAMP);
+
+					m_uniforms.submit();
+					float combineParams[8] = { m_enableTexturing ? 1.0f : 0.0f, m_enableSSAO ? 1.0f : 0.0f, 0.0f,0.0f,
+						(float)(m_size[0]-2*m_border) / (float)m_size[0], (float)(m_size[1] - 2 * m_border) / (float)m_size[1],
+						(float)m_border / (float)m_size[0], (float)m_border / (float)m_size[1] };
+					bgfx::setUniform(u_combineParams, combineParams, 2);
+					screenSpaceQuad((float)m_width, (float)m_height, m_texelHalf, caps->originBottomLeft);
+					bgfx::setState(BGFX_STATE_WRITE_RGB | BGFX_STATE_WRITE_A | BGFX_STATE_DEPTH_TEST_ALWAYS);
+					bgfx::submit(view, m_combineProgram);
+					++view;
+				}
+#endif
+
+				// Draw UI
+				imguiBeginFrame(m_mouseState.m_mx
+					, m_mouseState.m_my
+					, (m_mouseState.m_buttons[entry::MouseButton::Left] ? IMGUI_MBUT_LEFT : 0)
+					| (m_mouseState.m_buttons[entry::MouseButton::Right] ? IMGUI_MBUT_RIGHT : 0)
+					| (m_mouseState.m_buttons[entry::MouseButton::Middle] ? IMGUI_MBUT_MIDDLE : 0)
+					, m_mouseState.m_mz
+					, uint16_t(m_width)
+					, uint16_t(m_height)
+				);
+
+				showExampleDialog(this);
+
+				ImGui::SetNextWindowPos(
+					ImVec2(m_width - m_width / 4.0f - 10.0f, 10.0f)
+					, ImGuiCond_FirstUseEver
+				);
+				ImGui::SetNextWindowSize(
+					ImVec2(m_width / 4.0f, m_height / 2.0f)
+					, ImGuiCond_FirstUseEver
+				);
+				ImGui::Begin("Settings"
+					, NULL
+					, 0
+				);
+
+				ImGui::PushItemWidth(ImGui::GetWindowWidth() * 0.5f);
+				ImGui::Checkbox("Enable SSAO", &m_enableSSAO);
+				ImGui::Checkbox("Enable Texturing & Lighting", &m_enableTexturing);
+				ImGui::Separator();
+
+				int quality = m_settings.m_qualityLevel + 1;
+				if (ImGui::Combo("Quality Level", &quality, "lowest(half res)\0low\0medium\0high\0very high / adaptive\0\0"))
+					m_settings.m_qualityLevel = quality - 1;
+				ImGui::Checkbox("Generate Normals", &m_settings.m_generateNormals);
+				if (ImGui::Checkbox("Framebuffer Gutter", &m_framebufferGutter))
+					m_recreateFrameBuffers = true;
+				ImGui::SliderFloat("Effect Radius", &m_settings.m_radius, 0.0f, 4.0f);
+				ImGui::SliderFloat("Effect Strength", &m_settings.m_shadowMultiplier, 0.0f, 5.0f);
+				ImGui::SliderFloat("Effect Power", &m_settings.m_shadowPower, 0.5f, 4.0f);
+				ImGui::SliderFloat("Effect Max Limit", &m_settings.m_shadowClamp, 0.0f, 1.0f);
+				ImGui::SliderFloat("Horizon Angle Threshold", &m_settings.m_horizonAngleThreshold, 0.0f, 0.2f);
+				ImGui::SliderFloat("Fade Out From", &m_settings.m_fadeOutFrom, 0.0f, 100.0f);
+				ImGui::SliderFloat("Fade Out To", &m_settings.m_fadeOutTo, 0.0f, 300.0f);
+				if ( m_settings.m_qualityLevel == 3)
+					ImGui::SliderFloat("Adaptive Quality Limit", &m_settings.m_adaptiveQualityLimit, 0.0f, 1.0f);
+				ImGui::SliderInt("Blur Pass Count", &m_settings.m_blurPassCount, 0, 6);
+				ImGui::SliderFloat("Sharpness", &m_settings.m_sharpness, 0.0f, 1.0f);
+				ImGui::SliderFloat("Temporal Supersampling Angle Offset", &m_settings.m_temporalSupersamplingAngleOffset, 0.0f, bx::kPi);
+				ImGui::SliderFloat("Temporal Supersampling Radius Offset", &m_settings.m_temporalSupersamplingRadiusOffset, 0.0f, 2.0f);
+				ImGui::SliderFloat("Detail Shadow Strength", &m_settings.m_detailShadowStrength, 0.0f, 4.0f);
+
+				ImGui::End();
+
+				imguiEndFrame();
+
+				// Advance to next frame. Rendering thread will be kicked to
+				// process submitted rendering primitives.
+				m_currFrame = bgfx::frame();
+
+				return true;
+			}
+
+			return false;
+		}
+
+		void drawAllModels(uint8_t _pass, bgfx::ProgramHandle _program)
+		{
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_models); ++ii)
+			{
+				const Model& model = m_models[ii];
+
+				// Set up transform matrix for each model
+				float scale = s_meshScale[model.mesh];
+				float mtx[16];
+				bx::mtxSRT(mtx
+					, scale
+					, scale
+					, scale
+					, 0.0f
+					, 0.0f
+					, 0.0f
+					, model.position[0]
+					, model.position[1]
+					, model.position[2]
+				);
+
+				// Submit mesh to gbuffer
+				bgfx::setTexture(0, s_albedo, m_modelTexture);
+				meshSubmit(m_meshes[model.mesh], _pass, _program, mtx);
+			}
+
+			// Draw ground
+			float mtxScale[16];
+			float scale = 10.0;
+			bx::mtxScale(mtxScale
+				, scale
+				, scale
+				, scale
+			);
+			float mtxTrans[16];
+			bx::mtxTranslate(mtxTrans
+				, 0.0f
+				, -10.0f
+				, 0.0f
+			);
+			float mtx[16];
+			bx::mtxMul(mtx, mtxScale, mtxTrans);
+			bgfx::setTexture(0, s_albedo, m_groundTexture);
+			meshSubmit(m_ground, _pass, _program, mtx);
+		}
+
+		void createFramebuffers()
+		{
+			// update resolution and camera FOV if there's border expansion
+			const int drawResolutionBorderExpansionFactor = 12; // will be expanded by Height / expansionFactor
+			const float fovY = 60.0f;
+
+			m_border = 0;
+
+			if (m_framebufferGutter)
+			{
+				m_border = (bx::min(m_width, m_height) / drawResolutionBorderExpansionFactor) / 2 * 2;
+				int expandedSceneResolutionY = m_height + m_border * 2;
+				float yScaleDueToBorder = (expandedSceneResolutionY * 0.5f) / (float)(m_height * 0.5f);
+
+				float nonExpandedTan = bx::tan(bx::toRad(fovY / 2.0f));
+				m_fovY = bx::toDeg(bx::atan(nonExpandedTan * yScaleDueToBorder) * 2.0f);
+			}
+			else
+				m_fovY = fovY;
+
+			m_size[0] = m_width + 2 * m_border;
+			m_size[1] = m_height + 2 * m_border;
+			m_halfSize[0] = (m_size[0] + 1) / 2;
+			m_halfSize[1] = (m_size[1] + 1) / 2;
+			m_quarterSize[0] = (m_halfSize[0] + 1) / 2;
+			m_quarterSize[1] = (m_halfSize[1] + 1) / 2;
+
+			vec4iSet(m_fullResOutScissorRect, m_border, m_border, m_width + m_border, m_height + m_border); 
+			vec4iSet(m_halfResOutScissorRect, m_fullResOutScissorRect[0] / 2, m_fullResOutScissorRect[1] / 2, (m_fullResOutScissorRect[2] + 1) / 2, (m_fullResOutScissorRect[3] + 1) / 2);
+
+			int blurEnlarge = cMaxBlurPassCount + bx::max(0, cMaxBlurPassCount - 2);  // +1 for max normal blurs, +2 for wide blurs
+			vec4iSet(m_halfResOutScissorRect, bx::max(0, m_halfResOutScissorRect[0] - blurEnlarge), bx::max(0, m_halfResOutScissorRect[1] - blurEnlarge), 
+						bx::min(m_halfSize[0], m_halfResOutScissorRect[2] + blurEnlarge), bx::min(m_halfSize[1], m_halfResOutScissorRect[3] + blurEnlarge));
+
+			// Make gbuffer and related textures
+			const uint64_t tsFlags = 0
+				| BGFX_TEXTURE_RT
+				| BGFX_SAMPLER_MIN_POINT
+				| BGFX_SAMPLER_MAG_POINT
+				| BGFX_SAMPLER_MIP_POINT
+				| BGFX_SAMPLER_U_CLAMP
+				| BGFX_SAMPLER_V_CLAMP
+				;
+
+			bgfx::TextureHandle gbufferTex[3];
+			gbufferTex[GBUFFER_RT_NORMAL] = bgfx::createTexture2D(uint16_t(m_size[0]), uint16_t(m_size[1]), false, 1, bgfx::TextureFormat::BGRA8, tsFlags);
+			gbufferTex[GBUFFER_RT_COLOR] = bgfx::createTexture2D(uint16_t(m_size[0]), uint16_t(m_size[1]), false, 1, bgfx::TextureFormat::BGRA8, tsFlags);
+			gbufferTex[GBUFFER_RT_DEPTH] = bgfx::createTexture2D(uint16_t(m_size[0]), uint16_t(m_size[1]), false, 1, bgfx::TextureFormat::D24, tsFlags);
+			m_gbuffer = bgfx::createFrameBuffer(BX_COUNTOF(gbufferTex), gbufferTex, true);
+
+			for (int i = 0; i < 4; i++)
+			{
+				m_halfDepths[i] = bgfx::createTexture2D(uint16_t(m_halfSize[0]), uint16_t(m_halfSize[1]), true, 1, bgfx::TextureFormat::R16F, BGFX_TEXTURE_COMPUTE_WRITE | SAMPLER_POINT_CLAMP);
+			}
+
+			m_pingPongHalfResultA = bgfx::createTexture2D(uint16_t(m_halfSize[0]), uint16_t(m_halfSize[1]), false, 2, bgfx::TextureFormat::RG8, BGFX_TEXTURE_COMPUTE_WRITE);
+			m_pingPongHalfResultB = bgfx::createTexture2D(uint16_t(m_halfSize[0]), uint16_t(m_halfSize[1]),  false, 2, bgfx::TextureFormat::RG8, BGFX_TEXTURE_COMPUTE_WRITE);
+
+			m_finalResults = bgfx::createTexture2D(uint16_t(m_halfSize[0]), uint16_t(m_halfSize[1]),  false, 4, bgfx::TextureFormat::RG8, BGFX_TEXTURE_COMPUTE_WRITE | SAMPLER_LINEAR_CLAMP);
+
+			m_normals = bgfx::createTexture2D(uint16_t(m_size[0]), uint16_t(m_size[1]),  false, 1, bgfx::TextureFormat::RGBA8, BGFX_TEXTURE_COMPUTE_WRITE);
+
+			m_importanceMap = bgfx::createTexture2D(uint16_t(m_quarterSize[0]), uint16_t(m_quarterSize[1]), false, 1, bgfx::TextureFormat::R8, BGFX_TEXTURE_COMPUTE_WRITE | SAMPLER_LINEAR_CLAMP);
+			m_importanceMapPong = bgfx::createTexture2D(uint16_t(m_quarterSize[0]), uint16_t(m_quarterSize[1]), false, 1, bgfx::TextureFormat::R8, BGFX_TEXTURE_COMPUTE_WRITE | SAMPLER_LINEAR_CLAMP);
+
+			m_aoMap = bgfx::createTexture2D(uint16_t(m_size[0]), uint16_t(m_size[1]), false, 1, bgfx::TextureFormat::R8, BGFX_TEXTURE_COMPUTE_WRITE | SAMPLER_POINT_CLAMP);
+		}
+
+		void destroyFramebuffers()
+		{
+			bgfx::destroy(m_gbuffer);
+
+			for (uint32_t ii = 0; ii < BX_COUNTOF(m_halfDepths); ++ii)
+				bgfx::destroy(m_halfDepths[ii]);
+			bgfx::destroy(m_pingPongHalfResultA);
+			bgfx::destroy(m_pingPongHalfResultB);
+			bgfx::destroy(m_finalResults);
+			bgfx::destroy(m_normals);
+			bgfx::destroy(m_aoMap);
+
+			bgfx::destroy(m_importanceMap);
+			bgfx::destroy(m_importanceMapPong);
+		}
+
+		void updateUniforms(int _pass)
+		{
+			vec2Set(m_uniforms.m_viewportPixelSize, 1.0f / (float)m_size[0], 1.0f / (float)m_size[1]);
+			vec2Set(m_uniforms.m_halfViewportPixelSize, 1.0f / (float)m_halfSize[0], 1.0f / (float)m_halfSize[1]);
+
+			vec2Set(m_uniforms.m_viewport2xPixelSize, m_uniforms.m_viewportPixelSize[0] * 2.0f, m_uniforms.m_viewportPixelSize[1] * 2.0f);
+			vec2Set(m_uniforms.m_viewport2xPixelSize_x_025, m_uniforms.m_viewport2xPixelSize[0] * 0.25f, m_uniforms.m_viewport2xPixelSize[1] * 0.25f);
+
+			float depthLinearizeMul = -m_proj2[3*4+2];           // float depthLinearizeMul = ( clipFar * clipNear ) / ( clipFar - clipNear );
+			float depthLinearizeAdd = m_proj2[2*4+2];           // float depthLinearizeAdd = clipFar / ( clipFar - clipNear );
+																												   // correct the handedness issue. need to make sure this below is correct, but I think it is.
+			if (depthLinearizeMul * depthLinearizeAdd < 0)
+				depthLinearizeAdd = -depthLinearizeAdd;
+			vec2Set(m_uniforms.m_depthUnpackConsts, depthLinearizeMul, depthLinearizeAdd);
+
+			float tanHalfFOVY = 1.0f / m_proj2[1*4+1];    // = tanf( drawContext.Camera.GetYFOV( ) * 0.5f );
+			float tanHalfFOVX = 1.0F / m_proj2[0];    // = tanHalfFOVY * drawContext.Camera.GetAspect( );
+
+			if (bgfx::getRendererType() == bgfx::RendererType::OpenGL)
+			{
+				vec2Set(m_uniforms.m_ndcToViewMul, tanHalfFOVX * 2.0f, tanHalfFOVY * 2.0f);
+				vec2Set(m_uniforms.m_ndcToViewAdd, tanHalfFOVX * -1.0f, tanHalfFOVY * -1.0f);
+			}
+			else
+			{
+				vec2Set(m_uniforms.m_ndcToViewMul, tanHalfFOVX * 2.0f, tanHalfFOVY * -2.0f);
+				vec2Set(m_uniforms.m_ndcToViewAdd, tanHalfFOVX * -1.0f, tanHalfFOVY * 1.0f);
+			}
+
+			m_uniforms.m_effectRadius = bx::clamp(m_settings.m_radius, 0.0f, 100000.0f);
+			m_uniforms.m_effectShadowStrength = bx::clamp(m_settings.m_shadowMultiplier * 4.3f, 0.0f, 10.0f);
+			m_uniforms.m_effectShadowPow = bx::clamp(m_settings.m_shadowPower, 0.0f, 10.0f);
+			m_uniforms.m_effectShadowClamp = bx::clamp(m_settings.m_shadowClamp, 0.0f, 1.0f);
+			m_uniforms.m_effectFadeOutMul = -1.0f / (m_settings.m_fadeOutTo - m_settings.m_fadeOutFrom);
+			m_uniforms.m_effectFadeOutAdd = m_settings.m_fadeOutFrom / (m_settings.m_fadeOutTo - m_settings.m_fadeOutFrom) + 1.0f;
+			m_uniforms.m_effectHorizonAngleThreshold = bx::clamp(m_settings.m_horizonAngleThreshold, 0.0f, 1.0f);
+
+			// 1.2 seems to be around the best trade off - 1.0 means on-screen radius will stop/slow growing when the camera is at 1.0 distance, so, depending on FOV, basically filling up most of the screen
+			// This setting is viewspace-dependent and not screen size dependent intentionally, so that when you change FOV the effect stays (relatively) similar.
+			float effectSamplingRadiusNearLimit = (m_settings.m_radius * 1.2f);
+
+			// if the depth precision is switched to 32bit float, this can be set to something closer to 1 (0.9999 is fine)
+			m_uniforms.m_depthPrecisionOffsetMod = 0.9992f;
+
+			// used to get average load per pixel; 9.0 is there to compensate for only doing every 9th InterlockedAdd in PSPostprocessImportanceMapB for performance reasons
+			m_uniforms.m_loadCounterAvgDiv = 9.0f / (float)(m_quarterSize[0] * m_quarterSize[1] * 255.0);
+
+			// Special settings for lowest quality level - just nerf the effect a tiny bit
+			if (m_settings.m_qualityLevel <= 0)
+			{
+				effectSamplingRadiusNearLimit *= 1.50f;
+
+				if (m_settings.m_qualityLevel < 0)
+					m_uniforms.m_effectRadius *= 0.8f;
+			}
+			effectSamplingRadiusNearLimit /= tanHalfFOVY; // to keep the effect same regardless of FOV
+
+			m_uniforms.m_effectSamplingRadiusNearLimitRec = 1.0f / effectSamplingRadiusNearLimit;
+
+			m_uniforms.m_adaptiveSampleCountLimit = m_settings.m_adaptiveQualityLimit;
+
+			m_uniforms.m_negRecEffectRadius = -1.0f / m_uniforms.m_effectRadius;
+
+			if (bgfx::getCaps()->originBottomLeft)
+			{
+				vec2Set(m_uniforms.m_perPassFullResCoordOffset, (float)(_pass % 2), 1.0f-(float)(_pass / 2));
+				vec2Set(m_uniforms.m_perPassFullResUVOffset, ((_pass % 2) - 0.0f) / m_size[0], (1.0f-((_pass / 2) - 0.0f)) / m_size[1]);
+			}
+			else
+			{
+				vec2Set(m_uniforms.m_perPassFullResCoordOffset, (float)(_pass % 2), (float)(_pass / 2));
+				vec2Set(m_uniforms.m_perPassFullResUVOffset, ((_pass % 2) - 0.0f) / m_size[0], ((_pass / 2) - 0.0f) / m_size[1]);
+			}
+
+			m_uniforms.m_invSharpness = bx::clamp(1.0f - m_settings.m_sharpness, 0.0f, 1.0f);
+			m_uniforms.m_passIndex = (float)_pass;
+			vec2Set(m_uniforms.m_quarterResPixelSize, 1.0f / (float)m_quarterSize[0], 1.0f / (float)m_quarterSize[1]);
+
+			float additionalAngleOffset = m_settings.m_temporalSupersamplingAngleOffset;  // if using temporal supersampling approach (like "Progressive Rendering Using Multi-frame Sampling" from GPU Pro 7, etc.)
+			float additionalRadiusScale = m_settings.m_temporalSupersamplingRadiusOffset; // if using temporal supersampling approach (like "Progressive Rendering Using Multi-frame Sampling" from GPU Pro 7, etc.)
+			const int subPassCount = 5;
+			for (int subPass = 0; subPass < subPassCount; subPass++)
+			{
+				int a = _pass;
+				int b = subPass;
+
+				int spmap[5]{ 0, 1, 4, 3, 2 };
+				b = spmap[subPass];
+
+				float ca, sa;
+				float angle0 = ((float)a + (float)b / (float)subPassCount) * (3.1415926535897932384626433832795f) * 0.5f;
+				angle0 += additionalAngleOffset;
+
+				ca = bx::cos(angle0);
+				sa = bx::sin(angle0);
+
+				float scale = 1.0f + (a - 1.5f + (b - (subPassCount - 1.0f) * 0.5f) / (float)subPassCount) * 0.07f;
+				scale *= additionalRadiusScale;
+
+				vec4Set(m_uniforms.m_patternRotScaleMatrices[subPass], scale * ca, scale * -sa, -scale * sa, -scale * ca);
+			}
+
+			m_uniforms.m_normalsUnpackMul = 2.0f;
+			m_uniforms.m_normalsUnpackAdd = -1.0f;
+
+			m_uniforms.m_detailAOStrength = m_settings.m_detailShadowStrength;
+
+			if (m_settings.m_generateNormals)
+			{
+				bx::mtxIdentity(m_uniforms.m_normalsWorldToViewspaceMatrix);
+			}
+			else
+			{
+				bx::mtxTranspose(m_uniforms.m_normalsWorldToViewspaceMatrix, m_view);
+			}
+		}
+
+
+		uint32_t m_width;
+		uint32_t m_height;
+		uint32_t m_debug;
+		uint32_t m_reset;
+
+		entry::MouseState m_mouseState;
+
+		Uniforms m_uniforms;
+
+		 // Resource handles
+		bgfx::ProgramHandle m_gbufferProgram;
+		bgfx::ProgramHandle m_combineProgram;
+
+		bgfx::ProgramHandle m_prepareDepthsProgram;
+		bgfx::ProgramHandle m_prepareDepthsAndNormalsProgram;
+		bgfx::ProgramHandle m_prepareDepthsHalfProgram;
+		bgfx::ProgramHandle m_prepareDepthsAndNormalsHalfProgram;
+		bgfx::ProgramHandle m_prepareDepthMipProgram;
+		bgfx::ProgramHandle m_generateQ0Program;
+		bgfx::ProgramHandle m_generateQ1Program;
+		bgfx::ProgramHandle m_generateQ2Program;
+		bgfx::ProgramHandle m_generateQ3Program;
+		bgfx::ProgramHandle m_generateQ3BaseProgram;
+		bgfx::ProgramHandle m_smartBlurProgram;
+		bgfx::ProgramHandle m_smartBlurWideProgram;
+		bgfx::ProgramHandle m_nonSmartBlurProgram;
+		bgfx::ProgramHandle m_applyProgram;
+		bgfx::ProgramHandle m_nonSmartApplyProgram;
+		bgfx::ProgramHandle m_nonSmartHalfApplyProgram;
+		bgfx::ProgramHandle m_generateImportanceMapProgram;
+		bgfx::ProgramHandle m_postprocessImportanceMapAProgram;
+		bgfx::ProgramHandle m_postprocessImportanceMapBProgram;
+		bgfx::ProgramHandle m_loadCounterClearProgram;
+
+		bgfx::FrameBufferHandle m_gbuffer;
+
+		// Shader uniforms
+		bgfx::UniformHandle u_rect;
+		bgfx::UniformHandle u_combineParams;
+
+		// Uniforms to identify texture samples
+		bgfx::UniformHandle s_normal;
+		bgfx::UniformHandle s_depth;
+		bgfx::UniformHandle s_color;
+		bgfx::UniformHandle s_albedo;
+		bgfx::UniformHandle s_ao;
+		bgfx::UniformHandle s_blurInput;
+		bgfx::UniformHandle s_finalSSAO;
+		bgfx::UniformHandle s_depthSource;
+		bgfx::UniformHandle s_viewspaceDepthSource;
+		bgfx::UniformHandle s_viewspaceDepthSourceMirror;
+		bgfx::UniformHandle s_importanceMap;
+
+		// Various render targets
+		bgfx::TextureHandle m_halfDepths[4];
+		bgfx::TextureHandle m_pingPongHalfResultA;
+		bgfx::TextureHandle m_pingPongHalfResultB;
+		bgfx::TextureHandle m_finalResults;
+		bgfx::TextureHandle m_aoMap;
+		bgfx::TextureHandle m_normals;
+
+		// Only needed for quality level 3 (adaptive quality)
+		bgfx::TextureHandle m_importanceMap;
+		bgfx::TextureHandle m_importanceMapPong;
+		bgfx::TextureHandle m_loadCounter; 
+
+		struct Model
+		{
+			uint32_t mesh; // Index of mesh in m_meshes
+			float position[3];
+		};
+
+		Model m_models[MODEL_COUNT];
+		Mesh* m_meshes[BX_COUNTOF(s_meshPaths)];
+		Mesh* m_ground;
+
+		bgfx::TextureHandle m_groundTexture;
+		bgfx::TextureHandle m_modelTexture;
+
+		uint32_t m_currFrame;
+
+		// UI
+		Settings m_settings;
+		bool	m_enableSSAO;
+		bool	m_enableTexturing;
+
+		float m_texelHalf;
+		float m_fovY;
+
+		bool m_framebufferGutter;
+		bool m_recreateFrameBuffers;
+
+		float m_view[16];
+		float m_proj[16];
+		float m_proj2[16];
+		int   m_size[2];
+		int   m_halfSize[2];
+		int   m_quarterSize[2];
+		int   m_fullResOutScissorRect[4];
+		int   m_halfResOutScissorRect[4];
+		int   m_border;
+	};
+
+} // namespace
+
+ENTRY_IMPLEMENT_MAIN(ExampleASSAO, "39-assao", "Adaptive Screen Space Ambient Occlusion.");
+
+

+ 103 - 0
examples/39-assao/cs_assao_apply.sc

@@ -0,0 +1,103 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+IMAGE2D_WR(s_target, r8, 0);
+SAMPLER2DARRAY(s_finalSSAO,  1); 
+
+// unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions!
+
+vec4 UnpackEdges( float _packedVal )
+{
+    uint packedVal = uint(_packedVal * 255.5);
+    vec4 edgesLRTB;
+    edgesLRTB.x = float((packedVal >> 6) & 0x03) / 3.0;          // there's really no need for mask (as it's an 8 bit input) but I'll leave it in so it doesn't cause any trouble in the future
+    edgesLRTB.y = float((packedVal >> 4) & 0x03) / 3.0;
+    edgesLRTB.z = float((packedVal >> 2) & 0x03) / 3.0;
+    edgesLRTB.w = float((packedVal >> 0) & 0x03) / 3.0;
+
+    return saturate( edgesLRTB + u_invSharpness );
+}
+
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
+	if (all(lessThan(dtID.xy, u_rect.zw) ) )
+	{
+		float ao;
+		uvec2 pixPos     = uvec2(dtID.xy);
+		uvec2 pixPosHalf = pixPos / uvec2(2, 2);
+
+		// calculate index in the four deinterleaved source array texture
+		int mx = (int(pixPos.x) % 2);
+#if BGFX_SHADER_LANGUAGE_GLSL
+		int dimy = imageSize(s_target).y; 
+		int my = (int(dimy-1-pixPos.y) % 2);
+#else
+		int my = (int(pixPos.y) % 2);
+#endif
+		int ic = mx + my * 2;       // center index
+		int ih = (1-mx) + my * 2;   // neighbouring, horizontal
+		int iv = mx + (1-my) * 2;   // neighbouring, vertical
+		int id = (1-mx) + (1-my)*2; // diagonal
+
+		vec2 centerVal = texelFetch(s_finalSSAO, ivec3(pixPosHalf, ic), 0 ).xy;
+    
+		ao = centerVal.x;
+
+	#if 1   // change to 0 if you want to disable last pass high-res blur (for debugging purposes, etc.)
+		vec4 edgesLRTB = UnpackEdges( centerVal.y );
+
+		// return 1.0 - vec4( edgesLRTB.x, edgesLRTB.y * 0.5 + edgesLRTB.w * 0.5, edgesLRTB.z, 0.0 ); // debug show edges
+
+		// convert index shifts to sampling offsets
+		float fmx   = float(mx);
+		float fmy   = float(my);
+    
+		// in case of an edge, push sampling offsets away from the edge (towards pixel center)
+		float fmxe  = (edgesLRTB.y - edgesLRTB.x);
+		float fmye  = (edgesLRTB.w - edgesLRTB.z);
+
+		// calculate final sampling offsets and sample using bilinear filter
+#if BGFX_SHADER_LANGUAGE_GLSL
+		vec2  uvH = (dtID.xy + vec2( fmx + fmxe - 0.5, 1.0 - (0.5 - fmy) ) ) * 0.5 * u_halfViewportPixelSize;
+#else
+		vec2  uvH = (dtID.xy + vec2( fmx + fmxe - 0.5, 0.5 - fmy ) ) * 0.5 * u_halfViewportPixelSize;
+#endif
+		float   aoH = texture2DArrayLod(s_finalSSAO, vec3( uvH, ih ), 0 ).x;
+#if BGFX_SHADER_LANGUAGE_GLSL
+		vec2  uvV = (dtID.xy + vec2( 0.5 - fmx, 1.0 - (fmy - 0.5 + fmye) ) ) * 0.5 * u_halfViewportPixelSize;
+#else
+		vec2  uvV = (dtID.xy + vec2( 0.5 - fmx, fmy - 0.5 + fmye ) ) * 0.5 * u_halfViewportPixelSize;
+#endif
+		float   aoV = texture2DArrayLod(s_finalSSAO, vec3( uvV, iv ), 0 ).x;
+#if BGFX_SHADER_LANGUAGE_GLSL
+		vec2  uvD = (dtID.xy + vec2( fmx - 0.5 + fmxe, 1.0 - (fmy - 0.5 + fmye) ) ) * 0.5 * u_halfViewportPixelSize;
+#else
+		vec2  uvD = (dtID.xy + vec2( fmx - 0.5 + fmxe, fmy - 0.5 + fmye ) ) * 0.5 * u_halfViewportPixelSize;
+#endif
+		float   aoD = texture2DArrayLod(s_finalSSAO, vec3( uvD, id ), 0 ).x;
+
+		// reduce weight for samples near edge - if the edge is on both sides, weight goes to 0
+		vec4 blendWeights;
+		blendWeights.x = 1.0;
+		blendWeights.y = (edgesLRTB.x + edgesLRTB.y) * 0.5;
+		blendWeights.z = (edgesLRTB.z + edgesLRTB.w) * 0.5;
+		blendWeights.w = (blendWeights.y + blendWeights.z) * 0.5;
+
+		// calculate weighted average
+		float blendWeightsSum   = dot( blendWeights, vec4( 1.0, 1.0, 1.0, 1.0 ) );
+		ao = dot( vec4( ao, aoH, aoV, aoD ), blendWeights ) / blendWeightsSum;
+	#endif
+
+		ao = pow(ao,1.0/2.2);
+
+		imageStore(s_target, ivec2(dtID.xy), ao.xxxx);
+	}
+}
+

+ 50 - 0
examples/39-assao/cs_assao_generate_importance_map.sc

@@ -0,0 +1,50 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+IMAGE2D_WR(s_target, r8, 0);
+SAMPLER2DARRAY(s_finalSSAO,  1); 
+
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
+
+	uvec2 dim = imageSize(s_target).xy;
+	if (all(lessThan(dtID.xy, dim) ) )
+	{
+		uvec2 basePos = uvec2(dtID.xy) * 2;
+
+		vec2 baseUV = (vec2(basePos) + vec2( 0.5, 0.5 ) ) * u_halfViewportPixelSize;
+		vec2 gatherUV = (vec2(basePos) + vec2( 1.0, 1.0 ) ) * u_halfViewportPixelSize;
+
+		float avg = 0.0;
+		float minV = 1.0;
+		float maxV = 0.0;
+		UNROLL
+		for( int i = 0; i < 4; i++ )
+		{
+			vec4 vals = textureGather(s_finalSSAO, vec3( gatherUV, i ) );
+
+			// apply the same modifications that would have been applied in the main shader
+			vals = u_effectShadowStrength * vals;
+
+			vals = 1-vals;
+
+			vals = pow( saturate( vals ), u_effectShadowPow.xxxx );
+
+			avg += dot( vec4( vals.x, vals.y, vals.z, vals.w ), vec4( 1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0 ) );
+
+			maxV = max( maxV, max( max( vals.x, vals.y ), max( vals.z, vals.w ) ) );
+			minV = min( minV, min( min( vals.x, vals.y ), min( vals.z, vals.w ) ) );
+		}
+
+		float minMaxDiff = maxV - minV;
+
+		imageStore(s_target, ivec2(dtID.xy), pow( saturate( minMaxDiff * 2.0 ), 0.8 ).xxxx);
+	}
+}

+ 520 - 0
examples/39-assao/cs_assao_generate_q.sh

@@ -0,0 +1,520 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+// progressive poisson-like pattern; x, y are in [-1, 1] range, .z is length( vec2(x,y) ), .w is log2( z )
+#define INTELSSAO_MAIN_DISK_SAMPLE_COUNT (32)
+CONST(vec4 g_samplePatternMain[INTELSSAO_MAIN_DISK_SAMPLE_COUNT]) =
+{
+   { 0.78488064,  0.56661671,  1.500000, -0.126083},    { 0.26022232, -0.29575172,  1.500000, -1.064030},    { 0.10459357,  0.08372527,  1.110000, -2.730563},    {-0.68286800,  0.04963045,  1.090000, -0.498827},
+   {-0.13570161, -0.64190155,  1.250000, -0.532765},    {-0.26193795, -0.08205118,  0.670000, -1.783245},    {-0.61177456,  0.66664219,  0.710000, -0.044234},    { 0.43675563,  0.25119025,  0.610000, -1.167283},
+   { 0.07884444,  0.86618668,  0.640000, -0.459002},    {-0.12790935, -0.29869005,  0.600000, -1.729424},    {-0.04031125,  0.02413622,  0.600000, -4.792042},    { 0.16201244, -0.52851415,  0.790000, -1.067055},
+   {-0.70991218,  0.47301072,  0.640000, -0.335236},    { 0.03277707, -0.22349690,  0.600000, -1.982384},    { 0.68921727,  0.36800742,  0.630000, -0.266718},    { 0.29251814,  0.37775412,  0.610000, -1.422520},
+   {-0.12224089,  0.96582592,  0.600000, -0.426142},    { 0.11071457, -0.16131058,  0.600000, -2.165947},    { 0.46562141, -0.59747696,  0.600000, -0.189760},    {-0.51548797,  0.11804193,  0.600000, -1.246800},
+   { 0.89141309, -0.42090443,  0.600000,  0.028192},    {-0.32402530, -0.01591529,  0.600000, -1.543018},    { 0.60771245,  0.41635221,  0.600000, -0.605411},    { 0.02379565, -0.08239821,  0.600000, -3.809046},
+   { 0.48951152, -0.23657045,  0.600000, -1.189011},    {-0.17611565, -0.81696892,  0.600000, -0.513724},    {-0.33930185, -0.20732205,  0.600000, -1.698047},    {-0.91974425,  0.05403209,  0.600000,  0.062246},
+   {-0.15064627, -0.14949332,  0.600000, -1.896062},    { 0.53180975, -0.35210401,  0.600000, -0.758838},    { 0.41487166,  0.81442589,  0.600000, -0.505648},    {-0.24106961, -0.32721516,  0.600000, -1.665244}
+};
+
+// these values can be changed (up to SSAO_MAX_TAPS) with no changes required elsewhere; values for 4th and 5th preset are ignored but array needed to avoid compilation errors
+// the actual number of texture samples is two times this value (each "tap" has two symmetrical depth texture samples)
+CONST(uint g_numTaps[5]) = { 3, 5, 12, 0, 0 };
+
+// an example of higher quality low/medium/high settings
+// CONST(uint g_numTaps[5])  = { 4, 9, 16, 0, 0 };
+
+// ** WARNING ** if changing anything here, please remember to update the corresponding C++ code!
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Optional parts that can be enabled for a required quality preset level and above (0 == Low, 1 == Medium, 2 == High, 3 == Highest/Adaptive, 4 == reference/unused )
+// Each has its own cost. To disable just set to 5 or above.
+//
+// (experimental) tilts the disk (although only half of the samples!) towards surface normal; this helps with effect uniformity between objects but reduces effect distance and has other side-effects
+#define SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET                      (99)        // to disable simply set to 99 or similar
+#define SSAO_TILT_SAMPLES_AMOUNT                                        (0.4)
+//
+#define SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET                 (1)         // to disable simply set to 99 or similar
+#define SSAO_HALOING_REDUCTION_AMOUNT                                   (0.6)       // values from 0.0 - 1.0, 1.0 means max weighting (will cause artifacts, 0.8 is more reasonable)
+//
+#define SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET                (2)         // to disable simply set to 99 or similar
+#define SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD                           (0.5)       // use 0-0.1 for super-sharp normal-based edges
+//
+#define SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET                         (1)         // whether to use DetailAOStrength; to disable simply set to 99 or similar
+//
+#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET                        (2)         // !!warning!! the MIP generation on the C++ side will be enabled on quality preset 2 regardless of this value, so if changing here, change the C++ side too
+#define SSAO_DEPTH_MIPS_GLOBAL_OFFSET                                   (-4.3)      // best noise/quality/performance tradeoff, found empirically
+//
+// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for 
+// testing purposes, it will not yield performance gains (or correct results)
+#define SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET                 (1)     
+//
+#define SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET  (99)        // 99 means disabled; only helpful if artifacts at the edges caused by lack of out of screen depth data are not acceptable with the depth sampler in either clamp or mirror modes
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+SAMPLER2D(s_viewspaceDepthSource,  0); 
+SAMPLER2D(s_viewspaceDepthSourceMirror,  1); 
+IMAGE2D_RO(s_normalmapSource, rgba8, 2);
+UIMAGE2D_RO(s_loadCounter, r32ui, 3); 
+SAMPLER2D(s_importanceMap,  4); 
+IMAGE2D_ARRAY_RO(s_baseSSAO, rg8, 5);
+IMAGE2D_ARRAY_WR(s_target, rg8, 6);
+
+// packing/unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions!
+float PackEdges( vec4 edgesLRTB )
+{
+//    ivec4 edgesLRTBi = ivec4( saturate( edgesLRTB ) * 3.0 + 0.5 );
+//    return ( (edgesLRTBi.x << 6) + (edgesLRTBi.y << 4) + (edgesLRTBi.z << 2) + (edgesLRTBi.w << 0) ) / 255.0;
+
+    // optimized, should be same as above
+    edgesLRTB = round( saturate( edgesLRTB ) * 3.05 );
+    return dot( edgesLRTB, vec4( 64.0 / 255.0, 16.0 / 255.0, 4.0 / 255.0, 1.0 / 255.0 ) ) ;
+}
+
+vec3 NDCToViewspace( vec2 pos, float viewspaceDepth )
+{
+    vec3 ret;
+
+    ret.xy = (u_ndcToViewMul * pos.xy + u_ndcToViewAdd) * viewspaceDepth;
+
+    ret.z = viewspaceDepth;
+
+    return ret;
+}
+
+// calculate effect radius and fit our screen sampling pattern inside it
+void CalculateRadiusParameters( const float pixCenterLength, const vec2 pixelDirRBViewspaceSizeAtCenterZ, out float pixLookupRadiusMod, out float effectRadius, out float falloffCalcMulSq )
+{
+    effectRadius = u_effectRadius;
+
+    // leaving this out for performance reasons: use something similar if radius needs to scale based on distance
+    //effectRadius *= pow( pixCenterLength, u_radiusDistanceScalingFunctionPow);
+
+    // when too close, on-screen sampling disk will grow beyond screen size; limit this to avoid closeup temporal artifacts
+    const float tooCloseLimitMod = saturate( pixCenterLength * u_effectSamplingRadiusNearLimitRec ) * 0.8 + 0.2;
+    
+    effectRadius *= tooCloseLimitMod;
+
+    // 0.85 is to reduce the radius to allow for more samples on a slope to still stay within influence
+    pixLookupRadiusMod = (0.85 * effectRadius) / pixelDirRBViewspaceSizeAtCenterZ.x;
+
+    // used to calculate falloff (both for AO samples and per-sample weights)
+    falloffCalcMulSq= -1.0f / (effectRadius*effectRadius);
+}
+
+vec4 CalculateEdges( const float centerZ, const float leftZ, const float rightZ, const float topZ, const float bottomZ )
+{
+    // slope-sensitive depth-based edge detection
+    vec4 edgesLRTB = vec4( leftZ, rightZ, topZ, bottomZ ) - centerZ;
+    vec4 edgesLRTBSlopeAdjusted = edgesLRTB + edgesLRTB.yxwz;
+    edgesLRTB = min( abs( edgesLRTB ), abs( edgesLRTBSlopeAdjusted ) );
+    return saturate( ( 1.3 - edgesLRTB / (centerZ * 0.040) ) );
+
+    // cheaper version but has artifacts
+    // edgesLRTB = abs( vec4( leftZ, rightZ, topZ, bottomZ ) - centerZ; );
+    // return saturate( ( 1.3 - edgesLRTB / (pixZ * 0.06 + 0.1) ) );
+}
+
+vec3 DecodeNormal( vec3 encodedNormal )
+{
+    vec3 normal = encodedNormal * u_normalsUnpackMul.xxx + u_normalsUnpackAdd.xxx;
+
+#if SSAO_ENABLE_NORMAL_WORLD_TO_VIEW_CONVERSION
+	normal = vec3( dot(normal, u_normalsWorldToViewspaceMatrix0.xyz),
+					dot(normal, u_normalsWorldToViewspaceMatrix1.xyz),
+					dot(normal, u_normalsWorldToViewspaceMatrix2.xyz));
+#endif
+
+    // normal = normalize( normal );    // normalize adds around 2.5% cost on High settings but makes little (PSNR 66.7) visual difference when normals are as in the sample (stored in R8G8B8A8_UNORM,
+    //                                  // decoded in the shader), however it will likely be required if using different encoding/decoding or the inputs are not normalized, etc.
+
+    return normal;
+}
+
+vec3 LoadNormal( ivec2 pos )
+{
+    vec3 encodedNormal = imageLoad(s_normalmapSource, pos).xyz;
+    return DecodeNormal( encodedNormal );
+}
+
+vec3 LoadNormal( ivec2 pos, ivec2 offset )
+{
+    vec3 encodedNormal = imageLoad(s_normalmapSource, pos + offset ).xyz;
+    return DecodeNormal( encodedNormal );
+}
+
+// all vectors in viewspace
+float CalculatePixelObscurance( vec3 pixelNormal, vec3 hitDelta, float falloffCalcMulSq )
+{
+  float lengthSq = dot( hitDelta, hitDelta );
+  float NdotD = dot(pixelNormal, hitDelta) / sqrt(lengthSq);
+
+  float falloffMult = max( 0.0, lengthSq * falloffCalcMulSq + 1.0 );
+
+  return max( 0, NdotD - u_effectHorizonAngleThreshold ) * falloffMult;
+}
+
+void SSAOTapInner( const int qualityLevel, inout float obscuranceSum, inout float weightSum, const vec2 samplingUV, const float mipLevel, const vec3 pixCenterPos, const vec3 negViewspaceDir,vec3 pixelNormal, const float falloffCalcMulSq, const float weightMod, const int dbgTapIndex)
+{
+    // get depth at sample
+    float viewspaceSampleZ = texture2DLod(s_viewspaceDepthSource, samplingUV.xy, mipLevel ).x;
+
+    // convert to viewspace
+    vec3 hitPos = NDCToViewspace( samplingUV.xy, viewspaceSampleZ ).xyz;
+    vec3 hitDelta = hitPos - pixCenterPos;
+
+    float obscurance = CalculatePixelObscurance( pixelNormal, hitDelta, falloffCalcMulSq );
+    float weight = 1.0;
+ 
+    if( qualityLevel >= SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET )
+    {
+        //float reduct = max( 0, dot( hitDelta, negViewspaceDir ) );
+        float reduct = max( 0, -hitDelta.z ); // cheaper, less correct version
+        reduct = saturate( reduct * u_negRecEffectRadius + 2.0 ); // saturate( 2.0 - reduct / u_effectRadius );
+        weight = SSAO_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - SSAO_HALOING_REDUCTION_AMOUNT);
+    }
+    weight *= weightMod;
+    obscuranceSum += obscurance * weight;
+    weightSum += weight;
+}
+
+void SSAOTap( const int qualityLevel, inout float obscuranceSum, inout float weightSum, const int tapIndex, const mat2 rotScale, const vec3 pixCenterPos, const vec3 negViewspaceDir, vec3 pixelNormal, const vec2 normalizedScreenPos, const float mipOffset, const float falloffCalcMulSq, float weightMod, vec2 normXY, float normXYLength)
+{
+    vec2  sampleOffset;
+    float   samplePow2Len;
+
+    // patterns
+    {
+        vec4 newSample = g_samplePatternMain[tapIndex];
+        sampleOffset    = mul( rotScale, newSample.xy );
+        samplePow2Len   = newSample.w;                      // precalculated, same as: samplePow2Len = log2( length( newSample.xy ) );
+        weightMod *= newSample.z;
+    }
+
+    // snap to pixel center (more correct obscurance math, avoids artifacts)
+    sampleOffset                    = round(sampleOffset);
+
+    // calculate MIP based on the sample distance from the centre, similar to as described 
+    // in http://graphics.cs.williams.edu/papers/SAOHPG12/.
+    float mipLevel = ( qualityLevel < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET )?(0):(samplePow2Len + mipOffset);
+
+#if BGFX_SHADER_LANGUAGE_GLSL
+	sampleOffset.y = -sampleOffset.y;
+#endif
+    vec2 samplingUV = sampleOffset * u_viewport2xPixelSize + normalizedScreenPos;
+
+    SSAOTapInner( qualityLevel, obscuranceSum, weightSum, samplingUV, mipLevel, pixCenterPos, negViewspaceDir, pixelNormal, falloffCalcMulSq, weightMod, tapIndex * 2);
+
+    // for the second tap, just use the mirrored offset
+    vec2 sampleOffsetMirroredUV    = -sampleOffset;
+
+    // tilt the second set of samples so that the disk is effectively rotated by the normal
+    // effective at removing one set of artifacts, but too expensive for lower quality settings
+    if( qualityLevel >= SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET )
+    {
+        float dotNorm = dot( sampleOffsetMirroredUV, normXY );
+        sampleOffsetMirroredUV -= dotNorm * normXYLength * normXY;
+        sampleOffsetMirroredUV = round(sampleOffsetMirroredUV);
+    }
+
+    // snap to pixel center (more correct obscurance math, avoids artifacts)
+    vec2 samplingMirroredUV = sampleOffsetMirroredUV * u_viewport2xPixelSize + normalizedScreenPos;
+
+    SSAOTapInner( qualityLevel, obscuranceSum, weightSum, samplingMirroredUV, mipLevel, pixCenterPos, negViewspaceDir, pixelNormal, falloffCalcMulSq, weightMod, tapIndex * 2 + 1);
+}
+
+// this function is designed to only work with half/half depth at the moment - there's a couple of hardcoded paths that expect pixel/texel size, so it will not work for full res
+void GenerateSSAOShadowsInternal( out float outShadowTerm, out vec4 outEdges, out float outWeight, 
+	const vec2 SVPos, const int qualityLevel, bool adaptiveBase)
+{
+    vec2 SVPosRounded = trunc( SVPos );
+    uvec2 SVPosui = uvec2( SVPosRounded ); //same as uvec2( SVPos )
+
+    const uint numberOfTaps = (adaptiveBase)?(SSAO_ADAPTIVE_TAP_BASE_COUNT) : ( g_numTaps[qualityLevel] );
+    float pixZ, pixLZ, pixTZ, pixRZ, pixBZ;
+
+#if BGFX_SHADER_LANGUAGE_GLSL  
+    vec4 valuesUL     = textureGather(s_viewspaceDepthSourceMirror, SVPosRounded * u_halfViewportPixelSize + vec2(0.0,u_halfViewportPixelSize.y)).wzyx;
+    vec4 valuesBR     = textureGatherOffset(s_viewspaceDepthSourceMirror, SVPosRounded * u_halfViewportPixelSize + vec2(0.0,u_halfViewportPixelSize.y), ivec2( 1, -1 ) ).wzyx;
+#else
+    vec4 valuesUL     = textureGather(s_viewspaceDepthSourceMirror, SVPosRounded * u_halfViewportPixelSize );
+    vec4 valuesBR     = textureGatherOffset(s_viewspaceDepthSourceMirror, SVPosRounded * u_halfViewportPixelSize, ivec2( 1, 1 ) );
+#endif
+
+    // get this pixel's viewspace depth
+    pixZ = valuesUL.y; 
+
+    // get left right top bottom neighbouring pixels for edge detection (gets compiled out on qualityLevel == 0)
+    pixLZ   = valuesUL.x;
+    pixTZ   = valuesUL.z;
+    pixRZ   = valuesBR.z;
+    pixBZ   = valuesBR.x;
+
+    vec2 normalizedScreenPos = SVPosRounded * u_viewport2xPixelSize + u_viewport2xPixelSize_x_025;
+    vec3 pixCenterPos = NDCToViewspace( normalizedScreenPos, pixZ ); // g
+
+    // Load this pixel's viewspace normal
+    uvec2 fullResCoord = uvec2(SVPosui * 2 + u_perPassFullResCoordOffset.xy);
+    vec3 pixelNormal = LoadNormal( ivec2(fullResCoord) );
+
+    const vec2 pixelDirRBViewspaceSizeAtCenterZ = pixCenterPos.z * u_ndcToViewMul * u_viewport2xPixelSize;  // optimized approximation of:  vec2 pixelDirRBViewspaceSizeAtCenterZ = NDCToViewspace( normalizedScreenPos.xy + u_viewportPixelSize.xy, pixCenterPos.z ).xy - pixCenterPos.xy;
+
+    float pixLookupRadiusMod;
+    float falloffCalcMulSq;
+
+    // calculate effect radius and fit our screen sampling pattern inside it
+    float effectViewspaceRadius;
+    CalculateRadiusParameters( length( pixCenterPos ), pixelDirRBViewspaceSizeAtCenterZ, pixLookupRadiusMod, effectViewspaceRadius, falloffCalcMulSq );
+
+    // calculate samples rotation/scaling
+    mat2 rotScale;
+    {
+        // reduce effect radius near the screen edges slightly; ideally, one would render a larger depth buffer (5% on each side) instead
+        if( !adaptiveBase && (qualityLevel >= SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET) )
+        {
+            float nearScreenBorder = min( min( normalizedScreenPos.x, 1.0 - normalizedScreenPos.x ), min( normalizedScreenPos.y, 1.0 - normalizedScreenPos.y ) );
+            nearScreenBorder = saturate( 10.0 * nearScreenBorder + 0.6 );
+            pixLookupRadiusMod *= nearScreenBorder;
+        }
+
+        // load & update pseudo-random rotation matrix
+#if BGFX_SHADER_LANGUAGE_GLSL
+        uint pseudoRandomIndex = uint( (imageSize(s_target).y-1.0-SVPosRounded.y) * 2 + SVPosRounded.x ) % 5;
+#else
+        uint pseudoRandomIndex = uint( SVPosRounded.y * 2 + SVPosRounded.x ) % 5;
+#endif
+        vec4 rs = u_patternRotScaleMatrices( pseudoRandomIndex );
+        rotScale = mat2( rs.x * pixLookupRadiusMod, rs.y * pixLookupRadiusMod, rs.z * pixLookupRadiusMod, rs.w * pixLookupRadiusMod );
+    }
+
+    // the main obscurance & sample weight storage
+    float obscuranceSum = 0.0;
+    float weightSum = 0.0;
+
+    // edge mask for between this and left/right/top/bottom neighbour pixels - not used in quality level 0 so initialize to "no edge" (1 is no edge, 0 is edge)
+    vec4 edgesLRTB = vec4( 1.0, 1.0, 1.0, 1.0 );
+
+    // Move center pixel slightly towards camera to avoid imprecision artifacts due to using of 16bit depth buffer; a lot smaller offsets needed when using 32bit floats
+    pixCenterPos *= u_depthPrecisionOffsetMod;
+
+    if( !adaptiveBase && (qualityLevel >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET) )
+    {
+        edgesLRTB = CalculateEdges( pixZ, pixLZ, pixRZ, pixTZ, pixBZ );
+    }
+
+    // adds a more high definition sharp effect, which gets blurred out (reuses left/right/top/bottom samples that we used for edge detection)
+    if( !adaptiveBase && (qualityLevel >= SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET) )
+    {
+        // disable in case of quality level 4 (reference)
+        if( qualityLevel != 4 )
+        {
+            //approximate neighbouring pixels positions (actually just deltas or "positions - pixCenterPos" )
+            vec3 viewspaceDirZNormalized = vec3( pixCenterPos.xy / pixCenterPos.zz, 1.0 );
+            vec3 pixLDelta  = vec3( -pixelDirRBViewspaceSizeAtCenterZ.x, 0.0, 0.0 ) + viewspaceDirZNormalized * (pixLZ - pixCenterPos.z); // very close approximation of: vec3 pixLPos  = NDCToViewspace( normalizedScreenPos + vec2( -u_halfViewportPixelSize.x, 0.0 ), pixLZ ).xyz - pixCenterPos.xyz;
+            vec3 pixRDelta  = vec3( +pixelDirRBViewspaceSizeAtCenterZ.x, 0.0, 0.0 ) + viewspaceDirZNormalized * (pixRZ - pixCenterPos.z); // very close approximation of: vec3 pixRPos  = NDCToViewspace( normalizedScreenPos + vec2( +u_halfViewportPixelSize.x, 0.0 ), pixRZ ).xyz - pixCenterPos.xyz;
+#if BGFX_SHADER_LANGUAGE_GLSL
+            vec3 pixTDelta  = vec3( 0.0, +pixelDirRBViewspaceSizeAtCenterZ.y, 0.0 ) + viewspaceDirZNormalized * (pixTZ - pixCenterPos.z); // very close approximation of: vec3 pixTPos  = NDCToViewspace( normalizedScreenPos + vec2( 0.0, -u_halfViewportPixelSize.y ), pixTZ ).xyz - pixCenterPos.xyz;
+            vec3 pixBDelta  = vec3( 0.0, -pixelDirRBViewspaceSizeAtCenterZ.y, 0.0 ) + viewspaceDirZNormalized * (pixBZ - pixCenterPos.z); // very close approximation of: vec3 pixBPos  = NDCToViewspace( normalizedScreenPos + vec2( 0.0, +u_halfViewportPixelSize.y ), pixBZ ).xyz - pixCenterPos.xyz;
+#else
+            vec3 pixTDelta  = vec3( 0.0, -pixelDirRBViewspaceSizeAtCenterZ.y, 0.0 ) + viewspaceDirZNormalized * (pixTZ - pixCenterPos.z); // very close approximation of: vec3 pixTPos  = NDCToViewspace( normalizedScreenPos + vec2( 0.0, -u_halfViewportPixelSize.y ), pixTZ ).xyz - pixCenterPos.xyz;
+            vec3 pixBDelta  = vec3( 0.0, +pixelDirRBViewspaceSizeAtCenterZ.y, 0.0 ) + viewspaceDirZNormalized * (pixBZ - pixCenterPos.z); // very close approximation of: vec3 pixBPos  = NDCToViewspace( normalizedScreenPos + vec2( 0.0, +u_halfViewportPixelSize.y ), pixBZ ).xyz - pixCenterPos.xyz;
+#endif
+
+            const float rangeReductionConst         = 4.0f;                         // this is to avoid various artifacts
+            const float modifiedFalloffCalcMulSq    = rangeReductionConst * falloffCalcMulSq;
+
+            vec4 additionalObscurance;
+            additionalObscurance.x = CalculatePixelObscurance( pixelNormal, pixLDelta, modifiedFalloffCalcMulSq );
+            additionalObscurance.y = CalculatePixelObscurance( pixelNormal, pixRDelta, modifiedFalloffCalcMulSq );
+            additionalObscurance.z = CalculatePixelObscurance( pixelNormal, pixTDelta, modifiedFalloffCalcMulSq );
+            additionalObscurance.w = CalculatePixelObscurance( pixelNormal, pixBDelta, modifiedFalloffCalcMulSq );
+
+            obscuranceSum += u_detailAOStrength * dot( additionalObscurance, edgesLRTB );
+        }
+    }
+
+    // Sharp normals also create edges - but this adds to the cost as well
+    if( !adaptiveBase && (qualityLevel >= SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET ) )
+    {
+        vec3 neighbourNormalL  = LoadNormal( ivec2(fullResCoord), ivec2( -2,  0 ) );
+        vec3 neighbourNormalR  = LoadNormal( ivec2(fullResCoord), ivec2(  2,  0 ) );
+#if BGFX_SHADER_LANGUAGE_GLSL
+        vec3 neighbourNormalT  = LoadNormal( ivec2(fullResCoord), ivec2(  0,  2 ) );
+        vec3 neighbourNormalB  = LoadNormal( ivec2(fullResCoord), ivec2(  0, -2 ) );
+#else
+        vec3 neighbourNormalT  = LoadNormal( ivec2(fullResCoord), ivec2(  0, -2 ) );
+        vec3 neighbourNormalB  = LoadNormal( ivec2(fullResCoord), ivec2(  0,  2 ) );
+#endif
+
+        const float dotThreshold = SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD;
+
+        vec4 normalEdgesLRTB;
+        normalEdgesLRTB.x = saturate( (dot( pixelNormal, neighbourNormalL ) + dotThreshold ) );
+        normalEdgesLRTB.y = saturate( (dot( pixelNormal, neighbourNormalR ) + dotThreshold ) );
+        normalEdgesLRTB.z = saturate( (dot( pixelNormal, neighbourNormalT ) + dotThreshold ) );
+        normalEdgesLRTB.w = saturate( (dot( pixelNormal, neighbourNormalB ) + dotThreshold ) );
+
+//#define SSAO_SMOOTHEN_NORMALS // fixes some aliasing artifacts but kills a lot of high detail and adds to the cost - not worth it probably but feel free to play with it
+#ifdef SSAO_SMOOTHEN_NORMALS
+        //neighbourNormalL  = LoadNormal( fullResCoord, ivec2( -1,  0 ) );
+        //neighbourNormalR  = LoadNormal( fullResCoord, ivec2(  1,  0 ) );
+        //neighbourNormalT  = LoadNormal( fullResCoord, ivec2(  0, -1 ) );
+        //neighbourNormalB  = LoadNormal( fullResCoord, ivec2(  0,  1 ) );
+        pixelNormal += neighbourNormalL * edgesLRTB.x + neighbourNormalR * edgesLRTB.y + neighbourNormalT * edgesLRTB.z + neighbourNormalB * edgesLRTB.w;
+        pixelNormal = normalize( pixelNormal );
+#endif
+
+        edgesLRTB *= normalEdgesLRTB;
+    }
+
+    const float globalMipOffset     = SSAO_DEPTH_MIPS_GLOBAL_OFFSET;
+    float mipOffset = ( qualityLevel < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET ) ? ( 0 ) : ( log2( pixLookupRadiusMod ) + globalMipOffset );
+
+    // Used to tilt the second set of samples so that the disk is effectively rotated by the normal
+    // effective at removing one set of artifacts, but too expensive for lower quality settings
+    vec2 normXY = vec2( pixelNormal.x, pixelNormal.y );
+    float normXYLength = length( normXY );
+    normXY /= vec2( normXYLength, -normXYLength );
+    normXYLength *= SSAO_TILT_SAMPLES_AMOUNT;
+
+    const vec3 negViewspaceDir = -normalize( pixCenterPos );
+
+    // standard, non-adaptive approach
+    if( (qualityLevel != 3) || adaptiveBase )
+    {
+        // [unroll] // <- doesn't seem to help on any platform, although the compilers seem to unroll anyway if const number of tap used!
+        for( uint i = 0; i < numberOfTaps; i++ )
+        {
+            SSAOTap( qualityLevel, obscuranceSum, weightSum, int(i), rotScale, pixCenterPos, negViewspaceDir, pixelNormal, normalizedScreenPos, mipOffset, falloffCalcMulSq, 1.0, normXY, normXYLength);
+        }
+    }
+    else // if( qualityLevel == 3 ) adaptive approach
+    {
+        // add new ones if needed
+        vec2 fullResUV = normalizedScreenPos + u_perPassFullResUVOffset.xy;
+		float importance = texture2DLod(s_importanceMap, fullResUV, 0.0 ).x;
+
+        // this is to normalize SSAO_DETAIL_AO_AMOUNT across all pixel regardless of importance
+        obscuranceSum *= (SSAO_ADAPTIVE_TAP_BASE_COUNT / float(SSAO_MAX_TAPS)) + (importance * SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT / float(SSAO_MAX_TAPS));
+
+        // load existing base values
+        vec2 baseValues = imageLoad(s_baseSSAO, ivec3( SVPosui, u_passIndex ) ).xy;
+        weightSum += baseValues.y * (float(SSAO_ADAPTIVE_TAP_BASE_COUNT) * 4.0);
+        obscuranceSum += (baseValues.x) * weightSum;
+
+        // increase importance around edges
+        float edgeCount = dot( 1.0-edgesLRTB, vec4( 1.0, 1.0, 1.0, 1.0 ) );
+        //importance += edgeCount / (float)SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT;
+
+        float avgTotalImportance = float(imageLoad(s_loadCounter,ivec2(0,0)).x) * u_loadCounterAvgDiv;
+
+        float importanceLimiter = saturate( u_adaptiveSampleCountLimit / avgTotalImportance );
+        importance *= importanceLimiter;
+
+        float additionalSampleCountFlt = SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT * importance;
+
+        const float blendRange = 3.0; // use 1 to just blend the last one; use larger number to blend over more for a more smooth transition
+        const float blendRangeInv = 1.0 / blendRange;
+
+        additionalSampleCountFlt += 0.5;
+        uint additionalSamples   = uint( additionalSampleCountFlt );
+        uint additionalSamplesTo = min( SSAO_MAX_TAPS, additionalSamples + SSAO_ADAPTIVE_TAP_BASE_COUNT );
+
+        // additional manual unroll doesn't help unfortunately
+        LOOP
+        for( uint i = SSAO_ADAPTIVE_TAP_BASE_COUNT; i < additionalSamplesTo; i++ )
+        {
+            additionalSampleCountFlt -= 1.0f;
+            float weightMod = saturate(additionalSampleCountFlt * blendRangeInv); // slowly blend in the last few samples
+            SSAOTap( qualityLevel, obscuranceSum, weightSum, int(i), rotScale, pixCenterPos, negViewspaceDir, pixelNormal, normalizedScreenPos, mipOffset, falloffCalcMulSq, weightMod, normXY, normXYLength);
+        }
+    }
+
+    // early out for adaptive base - just output weight (used for the next pass)
+    if( adaptiveBase )
+    {
+        float obscurance = obscuranceSum / weightSum;
+
+        outShadowTerm   = obscurance;
+        outEdges        = vec4(0,0,0,0);
+        outWeight       = weightSum;
+        return;
+    }
+
+    // calculate weighted average
+    float obscurance = obscuranceSum / weightSum;
+
+    // calculate fadeout (1 close, gradient, 0 far)
+    float fadeOut = saturate( pixCenterPos.z * u_effectFadeOutMul + u_effectFadeOutAdd );
+  
+    // Reduce the SSAO shadowing if we're on the edge to remove artifacts on edges (we don't care for the lower quality one)
+    if( !adaptiveBase && (qualityLevel >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET) )
+    {
+        // float edgeCount = dot( 1.0-edgesLRTB, vec4( 1.0, 1.0, 1.0, 1.0 ) );
+
+        // when there's more than 2 opposite edges, start fading out the occlusion to reduce aliasing artifacts
+        float edgeFadeoutFactor = saturate( (1.0 - edgesLRTB.x - edgesLRTB.y) * 0.35) + saturate( (1.0 - edgesLRTB.z - edgesLRTB.w) * 0.35 );
+
+        // (experimental) if you want to reduce the effect next to any edge
+        // edgeFadeoutFactor += 0.1 * saturate( dot( 1 - edgesLRTB, vec4( 1, 1, 1, 1 ) ) );
+
+        fadeOut *= saturate( 1.0 - edgeFadeoutFactor );
+    }
+    
+    // same as a bove, but a lot more conservative version
+    // fadeOut *= saturate( dot( edgesLRTB, vec4( 0.9, 0.9, 0.9, 0.9 ) ) - 2.6 );
+
+    // strength
+    obscurance = u_effectShadowStrength * obscurance;
+    
+    // clamp
+    obscurance = min( obscurance, u_effectShadowClamp );
+    
+    // fadeout
+    obscurance *= fadeOut;
+
+    // conceptually switch to occlusion with the meaning being visibility (grows with visibility, occlusion == 1 implies full visibility), 
+    // to be in line with what is more commonly used.
+    float occlusion = 1.0 - obscurance;
+
+    // modify the gradient
+    // note: this cannot be moved to a later pass because of loss of precision after storing in the render target
+    occlusion = pow( saturate( occlusion ), u_effectShadowPow );
+
+    // outputs!
+    outShadowTerm   = occlusion;    // Our final 'occlusion' term (0 means fully occluded, 1 means fully lit)
+    outEdges        = edgesLRTB;    // These are used to prevent blurring across edges, 1 means no edge, 0 means edge, 0.5 means half way there, etc.
+    outWeight       = weightSum;
+}
+
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
+	if (all(lessThan(dtID.xy, u_rect.zw) ) )
+	{ 
+		float   outShadowTerm;
+		float   outWeight;
+		vec4  outEdges;
+		GenerateSSAOShadowsInternal( outShadowTerm, outEdges, outWeight, vec2(dtID.xy), ASSAO_QUALITY, ASSAO_ADAPTIVE_BASE);
+		vec2 out0;
+		out0.x = outShadowTerm;
+
+		if ( ASSAO_ADAPTIVE_BASE )
+		{
+			out0.y = outWeight / (float(SSAO_ADAPTIVE_TAP_BASE_COUNT) * 4.0); //0.0; //frac(outWeight / 6.0);// / (float)(SSAO_MAX_TAPS * 4.0);
+		}
+		else
+		{
+			if (ASSAO_QUALITY == 0)
+				out0.y = PackEdges( vec4( 1, 1, 1, 1 ) ); // no edges in low quality
+			else
+				out0.y = PackEdges( outEdges );
+		}
+		imageStore(s_target, ivec3(dtID.xy, u_layer), out0.xyyy);
+	}
+}

+ 9 - 0
examples/39-assao/cs_assao_generate_q0.sc

@@ -0,0 +1,9 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#define ASSAO_QUALITY 0
+#define ASSAO_ADAPTIVE_BASE false
+
+#include "cs_assao_generate_q.sh" 

+ 9 - 0
examples/39-assao/cs_assao_generate_q1.sc

@@ -0,0 +1,9 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#define ASSAO_QUALITY 1
+#define ASSAO_ADAPTIVE_BASE false
+
+#include "cs_assao_generate_q.sh" 

+ 9 - 0
examples/39-assao/cs_assao_generate_q2.sc

@@ -0,0 +1,9 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#define ASSAO_QUALITY 2
+#define ASSAO_ADAPTIVE_BASE false
+
+#include "cs_assao_generate_q.sh" 

+ 9 - 0
examples/39-assao/cs_assao_generate_q3.sc

@@ -0,0 +1,9 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#define ASSAO_QUALITY 3
+#define ASSAO_ADAPTIVE_BASE false
+
+#include "cs_assao_generate_q.sh" 

+ 9 - 0
examples/39-assao/cs_assao_generate_q3base.sc

@@ -0,0 +1,9 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#define ASSAO_QUALITY 3
+#define ASSAO_ADAPTIVE_BASE true
+
+#include "cs_assao_generate_q.sh" 

+ 15 - 0
examples/39-assao/cs_assao_load_counter_clear.sc

@@ -0,0 +1,15 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+UIMAGE2D_WR(s_loadCounterOutputUAV, r32ui, 0);
+
+NUM_THREADS(1, 1, 1)
+void main() 
+{
+	imageStore(s_loadCounterOutputUAV, ivec2(0, 0), uvec4(0,0,0,0));
+}

+ 29 - 0
examples/39-assao/cs_assao_non_smart_apply.sc

@@ -0,0 +1,29 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+IMAGE2D_WR(s_target, r8, 0);
+SAMPLER2DARRAY(s_finalSSAO,	1);
+
+// edge-ignorant blur & apply (for the lowest quality level 0)
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
+	if (all(lessThan(dtID.xy, u_rect.zw) ) )
+	{
+		vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_viewportPixelSize;
+		float a = texture2DArrayLod(s_finalSSAO, vec3( inUV.xy, 0 ), 0.0 ).x;
+		float b = texture2DArrayLod(s_finalSSAO, vec3( inUV.xy, 1 ), 0.0 ).x;
+		float c = texture2DArrayLod(s_finalSSAO, vec3( inUV.xy, 2 ), 0.0 ).x;
+		float d = texture2DArrayLod(s_finalSSAO, vec3( inUV.xy, 3 ), 0.0 ).x;
+		float avg = (a+b+c+d) * 0.25;
+		avg = pow(avg,1.0/2.2);
+		imageStore(s_target, ivec2(dtID.xy), avg.xxxx);
+	}
+}
+

+ 37 - 0
examples/39-assao/cs_assao_non_smart_blur.sc

@@ -0,0 +1,37 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+IMAGE2D_ARRAY_WR(s_target, rg8, 0);
+SAMPLER2DARRAY(s_blurInput,  1); 
+
+// edge-ignorant blur in x and y directions, 9 pixels touched (for the lowest quality level 0)
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
+	if (all(lessThan(dtID.xy, u_rect.zw) ) )
+	{
+		vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_halfViewportPixelSize;
+		vec2 halfPixel = u_halfViewportPixelSize * 0.5f;
+
+#if BGFX_SHADER_LANGUAGE_GLSL
+		halfPixel.y = -halfPixel.y;
+#endif
+
+		vec2 centre = texture2DArrayLod(s_blurInput, vec3(inUV, 0.0), 0.0 ).xy;
+
+		vec4 vals;
+		vals.x = texture2DArrayLod(s_blurInput, vec3(inUV + vec2( -halfPixel.x * 3, -halfPixel.y ),0.0) , 0.0 ).x;
+		vals.y = texture2DArrayLod(s_blurInput, vec3(inUV + vec2( +halfPixel.x, -halfPixel.y * 3 ),0.0) , 0.0 ).x;
+		vals.z = texture2DArrayLod(s_blurInput, vec3(inUV + vec2( -halfPixel.x, +halfPixel.y * 3 ),0.0) , 0.0 ).x;
+		vals.w = texture2DArrayLod(s_blurInput, vec3(inUV + vec2( +halfPixel.x * 3, +halfPixel.y ),0.0) , 0.0 ).x;
+
+		imageStore(s_target, ivec3(dtID.xy,u_layer), vec4(dot( vals, 0.2.xxxx ) + centre.x * 0.2, centre.y, 0.0, 0.0));
+	}
+}
+

+ 26 - 0
examples/39-assao/cs_assao_non_smart_half_apply.sc

@@ -0,0 +1,26 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+IMAGE2D_WR(s_target, r8, 0);
+SAMPLER2DARRAY(s_finalSSAO, 1);
+
+// edge-ignorant blur & apply, skipping half pixels in checkerboard pattern (for the Lowest quality level 0 and Settings::SkipHalfPixelsOnLowQualityLevel == true )
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
+	if (all(lessThan(dtID.xy, u_rect.zw) ) )
+	{
+		vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_viewportPixelSize;
+		float a = texture2DArrayLod(s_finalSSAO, vec3( inUV.xy, 0 ), 0.0 ).x;
+		float d = texture2DArrayLod(s_finalSSAO, vec3( inUV.xy, 3 ), 0.0 ).x;
+		float avg = (a+d) * 0.5;
+		avg = pow(avg,1.0/2.2);
+		imageStore(s_target, ivec2(dtID.xy), avg.xxxx);
+	}
+}

+ 47 - 0
examples/39-assao/cs_assao_postprocess_importance_map_a.sc

@@ -0,0 +1,47 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+IMAGE2D_WR(s_target, r8, 0);
+SAMPLER2D(s_importanceMap, 1);
+
+// Shaders below only needed for adaptive quality level
+
+CONST(float cSmoothenImportance) = 1.0;
+
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
+
+	uvec2 dim = imageSize(s_target).xy;
+	if (all(lessThan(dtID.xy, dim) ) )
+	{
+		uvec2 pos = uvec2(dtID.xy);
+		vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_quarterResPixelSize;
+
+		float centre = texture2DLod(s_importanceMap, inUV, 0.0 ).x;
+		//return centre;
+
+		vec2 halfPixel = u_quarterResPixelSize * 0.5f;
+
+#if BGFX_SHADER_LANGUAGE_GLSL
+		halfPixel.y = -halfPixel.y;
+#endif 
+		vec4 vals;
+		vals.x = texture2DLod(s_importanceMap, inUV + vec2( -halfPixel.x * 3, -halfPixel.y ), 0.0 ).x;
+		vals.y = texture2DLod(s_importanceMap, inUV + vec2( +halfPixel.x, -halfPixel.y * 3 ), 0.0 ).x;
+		vals.z = texture2DLod(s_importanceMap, inUV + vec2( +halfPixel.x * 3, +halfPixel.y ), 0.0 ).x;
+		vals.w = texture2DLod(s_importanceMap, inUV + vec2( -halfPixel.x, +halfPixel.y * 3 ), 0.0 ).x;
+
+		float avgVal = dot( vals, vec4( 0.25, 0.25, 0.25, 0.25 ) );
+		vals.xy = max( vals.xy, vals.zw );
+		float maxVal = max( centre, max( vals.x, vals.y ) );
+
+		imageStore(s_target, ivec2(dtID.xy), mix( maxVal, avgVal, cSmoothenImportance ).xxxx);
+	}
+}

+ 55 - 0
examples/39-assao/cs_assao_postprocess_importance_map_b.sc

@@ -0,0 +1,55 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+IMAGE2D_WR(s_target, r8, 0);
+SAMPLER2D(s_importanceMap, 1);
+UIMAGE2D_RW(s_loadCounterOutputUAV, r32ui, 2);
+
+CONST(float cSmoothenImportance) = 1.0;
+
+// Shaders below only needed for adaptive quality level
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
+
+	uvec2 dim = imageSize(s_target).xy;
+	if (all(lessThan(dtID.xy, dim) ) )
+	{
+		vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_quarterResPixelSize;
+
+		float centre = texture2DLod(s_importanceMap, inUV, 0.0 ).x;
+		//return centre;
+
+		vec2 halfPixel = u_quarterResPixelSize * 0.5f;
+
+		vec4 vals;
+		vals.x = texture2DLod(s_importanceMap, inUV + vec2( -halfPixel.x, -halfPixel.y * 3 ), 0.0 ).x;
+		vals.y = texture2DLod(s_importanceMap, inUV + vec2( +halfPixel.x * 3, -halfPixel.y ), 0.0 ).x;
+		vals.z = texture2DLod(s_importanceMap, inUV + vec2( +halfPixel.x, +halfPixel.y * 3 ), 0.0 ).x;
+		vals.w = texture2DLod(s_importanceMap, inUV + vec2( -halfPixel.x * 3, +halfPixel.y ), 0.0 ).x;
+
+		float avgVal = dot( vals, vec4( 0.25, 0.25, 0.25, 0.25 ) );
+		vals.xy = max( vals.xy, vals.zw );
+		float maxVal = max( centre, max( vals.x, vals.y ) );
+
+		float retVal = mix( maxVal, avgVal, cSmoothenImportance );
+
+		// sum the average; to avoid overflowing we assume max AO resolution is not bigger than 16384x16384; so quarter res (used here) will be 4096x4096, which leaves us with 8 bits per pixel 
+		uint sum = uint(saturate(retVal) * 255.0 + 0.5);
+    
+		// save every 9th to avoid InterlockedAdd congestion - since we're blurring, this is good enough; compensated by multiplying LoadCounterAvgDiv by 9
+#if BGFX_SHADER_LANGUAGE_GLSL 
+		if( ((dtID.x % 3) + ((dim.y-1-dtID.y) % 3)) == 0  )
+#else
+		if( ((dtID.x % 3) + (dtID.y % 3)) == 0  )
+#endif
+			imageAtomicAdd(s_loadCounterOutputUAV, ivec2(0, 0), sum );
+		imageStore(s_target, ivec2(dtID.xy), retVal.xxxx);
+	}
+}

+ 103 - 0
examples/39-assao/cs_assao_prepare_depth_mip.sc

@@ -0,0 +1,103 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+IMAGE2D_RO(s_viewspaceDepthSource0, r16f, 0); 
+IMAGE2D_RO(s_viewspaceDepthSource1, r16f, 1);
+IMAGE2D_RO(s_viewspaceDepthSource2, r16f, 2);
+IMAGE2D_RO(s_viewspaceDepthSource3, r16f, 3);
+
+IMAGE2D_WR(s_target0, r16f, 4);
+IMAGE2D_WR(s_target1, r16f, 5);
+IMAGE2D_WR(s_target2, r16f, 6);
+IMAGE2D_WR(s_target3, r16f, 7);
+
+// calculate effect radius and fit our screen sampling pattern inside it
+void CalculateRadiusParameters( const float pixCenterLength, const vec2 pixelDirRBViewspaceSizeAtCenterZ, out float pixLookupRadiusMod, out float effectRadius, out float falloffCalcMulSq )
+{
+    effectRadius = u_effectRadius;
+
+    // leaving this out for performance reasons: use something similar if radius needs to scale based on distance
+    //effectRadius *= pow( pixCenterLength, u_radiusDistanceScalingFunctionPow);
+
+    // when too close, on-screen sampling disk will grow beyond screen size; limit this to avoid closeup temporal artifacts
+    const float tooCloseLimitMod = saturate( pixCenterLength * u_effectSamplingRadiusNearLimitRec ) * 0.8 + 0.2;
+    
+    effectRadius *= tooCloseLimitMod;
+
+    // 0.85 is to reduce the radius to allow for more samples on a slope to still stay within influence
+    pixLookupRadiusMod = (0.85 * effectRadius) / pixelDirRBViewspaceSizeAtCenterZ.x;
+
+    // used to calculate falloff (both for AO samples and per-sample weights)
+    falloffCalcMulSq= -1.0f / (effectRadius*effectRadius);
+}
+
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
+
+	uvec2 dim = uvec2(u_rect.zw);
+	if (all(lessThan(dtID.xy, dim) ) )
+	{ 
+		ivec2 baseCoords = ivec2(dtID.xy) * 2;
+
+		vec4 depthsArr[4];
+		float depthsOutArr[4];
+
+		// how to Gather a specific mip level?
+		depthsArr[0].x = imageLoad(s_viewspaceDepthSource0, baseCoords + ivec2( 0, 0 )).x ;
+		depthsArr[0].y = imageLoad(s_viewspaceDepthSource0, baseCoords + ivec2( 1, 0 )).x ;
+		depthsArr[0].z = imageLoad(s_viewspaceDepthSource0, baseCoords + ivec2( 0, 1 )).x ;
+		depthsArr[0].w = imageLoad(s_viewspaceDepthSource0, baseCoords + ivec2( 1, 1 )).x ;
+		depthsArr[1].x = imageLoad(s_viewspaceDepthSource1, baseCoords + ivec2( 0, 0 )).x;
+		depthsArr[1].y = imageLoad(s_viewspaceDepthSource1, baseCoords + ivec2( 1, 0 )).x;
+		depthsArr[1].z = imageLoad(s_viewspaceDepthSource1, baseCoords + ivec2( 0, 1 )).x;
+		depthsArr[1].w = imageLoad(s_viewspaceDepthSource1, baseCoords + ivec2( 1, 1 )).x;
+		depthsArr[2].x = imageLoad(s_viewspaceDepthSource2, baseCoords + ivec2( 0, 0 )).x;
+		depthsArr[2].y = imageLoad(s_viewspaceDepthSource2, baseCoords + ivec2( 1, 0 )).x;
+		depthsArr[2].z = imageLoad(s_viewspaceDepthSource2, baseCoords + ivec2( 0, 1 )).x;
+		depthsArr[2].w = imageLoad(s_viewspaceDepthSource2, baseCoords + ivec2( 1, 1 )).x;
+		depthsArr[3].x = imageLoad(s_viewspaceDepthSource3, baseCoords + ivec2( 0, 0 )).x;
+		depthsArr[3].y = imageLoad(s_viewspaceDepthSource3, baseCoords + ivec2( 1, 0 )).x;
+		depthsArr[3].z = imageLoad(s_viewspaceDepthSource3, baseCoords + ivec2( 0, 1 )).x;
+		depthsArr[3].w = imageLoad(s_viewspaceDepthSource3, baseCoords + ivec2( 1, 1 )).x;
+		
+	    const uvec2 SVPosui         = uvec2( dtID.xy );
+		const uint pseudoRandomA    = (SVPosui.x ) + 2 * (SVPosui.y );
+
+		float dummyUnused1;
+		float dummyUnused2;
+		float falloffCalcMulSq, falloffCalcAdd;
+ 
+		UNROLL
+		for( int i = 0; i < 4; i++ )
+		{
+			vec4 depths = depthsArr[i];
+			float closest = min( min( depths.x, depths.y ), min( depths.z, depths.w ) );
+
+			CalculateRadiusParameters( abs( closest ), vec2(1.0,1.0), dummyUnused1, dummyUnused2, falloffCalcMulSq );
+
+			vec4 dists = depths - closest.xxxx;
+
+			vec4 weights = saturate( dists * dists * falloffCalcMulSq + 1.0 );
+
+			float smartAvg = dot( weights, depths ) / dot( weights, vec4( 1.0, 1.0, 1.0, 1.0 ) );
+
+			const uint pseudoRandomIndex = ( pseudoRandomA + i ) % 4;
+
+			//depthsOutArr[i] = closest;
+			//depthsOutArr[i] = depths[ pseudoRandomIndex ];
+			depthsOutArr[i] = smartAvg;
+		}
+
+		imageStore(s_target0, ivec2(dtID.xy), depthsOutArr[0].xxxx);
+		imageStore(s_target1, ivec2(dtID.xy), depthsOutArr[1].xxxx);
+		imageStore(s_target2, ivec2(dtID.xy), depthsOutArr[2].xxxx);
+		imageStore(s_target3, ivec2(dtID.xy), depthsOutArr[3].xxxx);
+	}
+}

+ 58 - 0
examples/39-assao/cs_assao_prepare_depths.sc

@@ -0,0 +1,58 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+SAMPLER2D(s_depthSource, 0);
+
+IMAGE2D_WR(s_target0, r16f, 1); 
+IMAGE2D_WR(s_target1, r16f, 2);
+IMAGE2D_WR(s_target2, r16f, 3);
+IMAGE2D_WR(s_target3, r16f, 4);
+
+float ScreenSpaceToViewSpaceDepth( float screenDepth )
+{
+    float depthLinearizeMul = u_depthUnpackConsts.x;
+    float depthLinearizeAdd = u_depthUnpackConsts.y;
+
+    // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar"
+
+    // Set your depthLinearizeMul and depthLinearizeAdd to:
+    // depthLinearizeMul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear );
+    // depthLinearizeAdd = cameraClipFar / ( cameraClipFar - cameraClipNear );
+
+    return depthLinearizeMul / ( depthLinearizeAdd - screenDepth );
+}
+
+
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
+
+	uvec2 dim = imageSize(s_target0).xy;
+	if (all(lessThan(dtID.xy, dim) ) )
+	{ 
+		ivec2 baseCoord = ivec2(dtID.xy) * 2;
+#if BGFX_SHADER_LANGUAGE_GLSL 
+		float a = texelFetch(s_depthSource, baseCoord + ivec2( 0, 1 ), 0).x;
+		float b = texelFetch(s_depthSource, baseCoord + ivec2( 1, 1 ), 0).x;
+		float c = texelFetch(s_depthSource, baseCoord + ivec2( 0, 0 ), 0).x;
+		float d = texelFetch(s_depthSource, baseCoord + ivec2( 1, 0 ), 0).x;
+#else
+		float a = texelFetch(s_depthSource, baseCoord + ivec2( 0, 0 ), 0).x;
+		float b = texelFetch(s_depthSource, baseCoord + ivec2( 1, 0 ), 0).x;
+		float c = texelFetch(s_depthSource, baseCoord + ivec2( 0, 1 ), 0).x;
+		float d = texelFetch(s_depthSource, baseCoord + ivec2( 1, 1 ), 0).x;
+#endif
+
+		imageStore(s_target0, ivec2(dtID.xy), ScreenSpaceToViewSpaceDepth( a ).xxxx);
+		imageStore(s_target1, ivec2(dtID.xy), ScreenSpaceToViewSpaceDepth( b ).xxxx);
+		imageStore(s_target2, ivec2(dtID.xy), ScreenSpaceToViewSpaceDepth( c ).xxxx);
+		imageStore(s_target3, ivec2(dtID.xy), ScreenSpaceToViewSpaceDepth( d ).xxxx);
+	}
+}
+

+ 192 - 0
examples/39-assao/cs_assao_prepare_depths_and_normals.sc

@@ -0,0 +1,192 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+SAMPLER2D(s_depthSource, 0);
+
+IMAGE2D_WR(s_target0, r16f, 1);
+IMAGE2D_WR(s_target1, r16f, 2);
+IMAGE2D_WR(s_target2, r16f, 3);
+IMAGE2D_WR(s_target3, r16f, 4);
+IMAGE2D_WR(s_normalsOutputUAV, rgba8, 5);
+
+float ScreenSpaceToViewSpaceDepth( float screenDepth )
+{
+    float depthLinearizeMul = u_depthUnpackConsts.x;
+    float depthLinearizeAdd = u_depthUnpackConsts.y;
+
+    // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar"
+
+    // Set your depthLinearizeMul and depthLinearizeAdd to:
+    // depthLinearizeMul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear );
+    // depthLinearizeAdd = cameraClipFar / ( cameraClipFar - cameraClipNear );
+
+    return depthLinearizeMul / ( depthLinearizeAdd - screenDepth );
+}
+
+vec3 NDCToViewspace( vec2 pos, float viewspaceDepth )
+{
+    vec3 ret;
+
+    ret.xy = (u_ndcToViewMul * pos.xy + u_ndcToViewAdd) * viewspaceDepth;
+
+    ret.z = viewspaceDepth;
+
+    return ret;
+}
+
+vec4 CalculateEdges( const float centerZ, const float leftZ, const float rightZ, const float topZ, const float bottomZ )
+{
+    // slope-sensitive depth-based edge detection
+    vec4 edgesLRTB = vec4( leftZ, rightZ, topZ, bottomZ ) - centerZ;
+    vec4 edgesLRTBSlopeAdjusted = edgesLRTB + edgesLRTB.yxwz;
+    edgesLRTB = min( abs( edgesLRTB ), abs( edgesLRTBSlopeAdjusted ) );
+    return saturate( ( 1.3 - edgesLRTB / (centerZ * 0.040) ) );
+
+    // cheaper version but has artifacts
+    // edgesLRTB = abs( vec4( leftZ, rightZ, topZ, bottomZ ) - centerZ; );
+    // return saturate( ( 1.3 - edgesLRTB / (pixZ * 0.06 + 0.1) ) );
+}
+
+vec3 CalculateNormal( const vec4 edgesLRTB, vec3 pixCenterPos, vec3 pixLPos, vec3 pixRPos, vec3 pixTPos, vec3 pixBPos )
+{
+    // Get this pixel's viewspace normal
+    vec4 acceptedNormals  = vec4( edgesLRTB.x*edgesLRTB.z, edgesLRTB.z*edgesLRTB.y, edgesLRTB.y*edgesLRTB.w, edgesLRTB.w*edgesLRTB.x );
+
+    pixLPos = normalize(pixLPos - pixCenterPos);
+    pixRPos = normalize(pixRPos - pixCenterPos);
+    pixTPos = normalize(pixTPos - pixCenterPos);
+    pixBPos = normalize(pixBPos - pixCenterPos);
+
+    vec3 pixelNormal = vec3( 0, 0, -0.0005 );
+    pixelNormal += ( acceptedNormals.x ) * cross( pixLPos, pixTPos );
+    pixelNormal += ( acceptedNormals.y ) * cross( pixTPos, pixRPos );
+    pixelNormal += ( acceptedNormals.z ) * cross( pixRPos, pixBPos );
+    pixelNormal += ( acceptedNormals.w ) * cross( pixBPos, pixLPos );
+    pixelNormal = normalize( pixelNormal );
+    
+    return pixelNormal;
+}
+
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
+
+	uvec2 dim = imageSize(s_target0).xy;
+	if (all(lessThan(dtID.xy, dim) ) )
+	{
+		ivec2 baseCoords = ivec2(dtID.xy) * 2;
+#if BGFX_SHADER_LANGUAGE_GLSL
+		vec2 upperLeftUV = (vec2(dtID.xy) + vec2(0.25,0.75)) * u_viewport2xPixelSize;
+#else
+		vec2 upperLeftUV = (vec2(dtID.xy) + vec2(0.25,0.25)) * u_viewport2xPixelSize;
+#endif
+
+		ivec2 baseCoord = ivec2(dtID.xy) * 2;
+#if BGFX_SHADER_LANGUAGE_GLSL
+		float z0 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 1 ) ).x );
+		float z1 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 1 ) ).x );
+		float z2 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 0 ) ).x );
+		float z3 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 0 ) ).x );
+#else
+		float z0 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 0 ) ).x );
+		float z1 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 0 ) ).x );
+		float z2 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 1 ) ).x );
+		float z3 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 1 ) ).x );
+#endif
+		imageStore(s_target0, ivec2(dtID.xy), z0.xxxx );
+		imageStore(s_target1, ivec2(dtID.xy), z1.xxxx );
+		imageStore(s_target2, ivec2(dtID.xy), z2.xxxx );
+		imageStore(s_target3, ivec2(dtID.xy), z3.xxxx );
+
+		float pixZs[4][4];
+
+		// middle 4
+		pixZs[1][1] = z0;
+		pixZs[2][1] = z1;
+		pixZs[1][2] = z2;
+		pixZs[2][2] = z3;
+
+#if BGFX_SHADER_LANGUAGE_GLSL
+		// left 2
+		pixZs[0][1] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, 0 ) ).x ); 
+		pixZs[0][2] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, -1 ) ).x ); 
+
+		// right 2
+		pixZs[3][1] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  2, 0 ) ).x ); 
+		pixZs[3][2] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  2, -1 ) ).x ); 
+
+		// top 2
+		pixZs[1][0] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  0, 1 ) ).x );
+		pixZs[2][0] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  1, 1 ) ).x );
+
+		// bottom 2
+		pixZs[1][3] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  0,  -2 ) ).x );
+		pixZs[2][3] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  1,  -2 ) ).x );
+#else
+		// left 2
+		pixZs[0][1] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, 0 ) ).x ); 
+		pixZs[0][2] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, 1 ) ).x ); 
+		// right 2
+		pixZs[3][1] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  2, 0 ) ).x ); 
+		pixZs[3][2] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  2, 1 ) ).x ); 
+		// top 2
+		pixZs[1][0] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  0, -1 ) ).x );
+		pixZs[2][0] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  1, -1 ) ).x );
+		// bottom 2
+		pixZs[1][3] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  0,  2 ) ).x );
+		pixZs[2][3] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  1,  2 ) ).x );
+#endif
+
+		vec4 edges0 = CalculateEdges( pixZs[1][1], pixZs[0][1], pixZs[2][1], pixZs[1][0], pixZs[1][2] );
+		vec4 edges1 = CalculateEdges( pixZs[2][1], pixZs[1][1], pixZs[3][1], pixZs[2][0], pixZs[2][2] );
+		vec4 edges2 = CalculateEdges( pixZs[1][2], pixZs[0][2], pixZs[2][2], pixZs[1][1], pixZs[1][3] );
+		vec4 edges3 = CalculateEdges( pixZs[2][2], pixZs[1][2], pixZs[3][2], pixZs[2][1], pixZs[2][3] );
+
+		vec2 viewportPixelSize = u_viewportPixelSize;
+#if BGFX_SHADER_LANGUAGE_GLSL
+		viewportPixelSize.y = -viewportPixelSize.y;
+#endif
+
+			vec3 pixPos[4][4];
+		// middle 4
+		pixPos[1][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0,  0.0 ), pixZs[1][1] );
+		pixPos[2][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0,  0.0 ), pixZs[2][1] );
+		pixPos[1][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0,  1.0 ), pixZs[1][2] );
+		pixPos[2][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0,  1.0 ), pixZs[2][2] );
+		// left 2
+		pixPos[0][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( -1.0,  0.0), pixZs[0][1] );
+		pixPos[0][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( -1.0,  1.0), pixZs[0][2] );
+		// right 2                                                                                     
+		pixPos[3][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2(  2.0,  0.0), pixZs[3][1] );
+		pixPos[3][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2(  2.0,  1.0), pixZs[3][2] );
+		// top 2                                                                                       
+		pixPos[1][0] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0, -1.0 ), pixZs[1][0] );
+		pixPos[2][0] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0, -1.0 ), pixZs[2][0] );
+		// bottom 2                                                                                   
+		pixPos[1][3] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0,  2.0 ), pixZs[1][3] );
+		pixPos[2][3] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0,  2.0 ), pixZs[2][3] );
+
+		vec3 norm0 = CalculateNormal( edges0, pixPos[1][1], pixPos[0][1], pixPos[2][1], pixPos[1][0], pixPos[1][2] );
+		vec3 norm1 = CalculateNormal( edges1, pixPos[2][1], pixPos[1][1], pixPos[3][1], pixPos[2][0], pixPos[2][2] );
+		vec3 norm2 = CalculateNormal( edges2, pixPos[1][2], pixPos[0][2], pixPos[2][2], pixPos[1][1], pixPos[1][3] );
+		vec3 norm3 = CalculateNormal( edges3, pixPos[2][2], pixPos[1][2], pixPos[3][2], pixPos[2][1], pixPos[2][3] );
+
+#if BGFX_SHADER_LANGUAGE_GLSL
+		imageStore(s_normalsOutputUAV, baseCoords + ivec2( 0, 1 ), vec4( norm0 * 0.5 + 0.5, 0.0 ));
+		imageStore(s_normalsOutputUAV, baseCoords + ivec2( 1, 1 ), vec4( norm1 * 0.5 + 0.5, 0.0 ));
+		imageStore(s_normalsOutputUAV, baseCoords + ivec2( 0, 0 ), vec4( norm2 * 0.5 + 0.5, 0.0 ));
+		imageStore(s_normalsOutputUAV, baseCoords + ivec2( 1, 0 ), vec4( norm3 * 0.5 + 0.5, 0.0 ));
+#else
+		imageStore(s_normalsOutputUAV, baseCoords + ivec2( 0, 0 ), vec4( norm0 * 0.5 + 0.5, 0.0 ));
+		imageStore(s_normalsOutputUAV, baseCoords + ivec2( 1, 0 ), vec4( norm1 * 0.5 + 0.5, 0.0 ));
+		imageStore(s_normalsOutputUAV, baseCoords + ivec2( 0, 1 ), vec4( norm2 * 0.5 + 0.5, 0.0 ));
+		imageStore(s_normalsOutputUAV, baseCoords + ivec2( 1, 1 ), vec4( norm3 * 0.5 + 0.5, 0.0 ));
+#endif
+	}
+}

+ 188 - 0
examples/39-assao/cs_assao_prepare_depths_and_normals_half.sc

@@ -0,0 +1,188 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+SAMPLER2D(s_depthSource, 0);
+
+IMAGE2D_WR(s_target0, r16f, 1);
+IMAGE2D_WR(s_target1, r16f, 2);
+IMAGE2D_WR(s_normalsOutputUAV, rgba8, 5);
+
+float ScreenSpaceToViewSpaceDepth( float screenDepth )
+{
+    float depthLinearizeMul = u_depthUnpackConsts.x;
+    float depthLinearizeAdd = u_depthUnpackConsts.y;
+
+    // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar"
+
+    // Set your depthLinearizeMul and depthLinearizeAdd to:
+    // depthLinearizeMul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear );
+    // depthLinearizeAdd = cameraClipFar / ( cameraClipFar - cameraClipNear );
+
+    return depthLinearizeMul / ( depthLinearizeAdd - screenDepth );
+}
+
+vec3 NDCToViewspace( vec2 pos, float viewspaceDepth )
+{
+    vec3 ret;
+
+    ret.xy = (u_ndcToViewMul * pos.xy + u_ndcToViewAdd) * viewspaceDepth;
+    ret.z = viewspaceDepth;
+
+    return ret;
+}
+
+vec4 CalculateEdges( const float centerZ, const float leftZ, const float rightZ, const float topZ, const float bottomZ )
+{
+    // slope-sensitive depth-based edge detection
+    vec4 edgesLRTB = vec4( leftZ, rightZ, topZ, bottomZ ) - centerZ;
+    vec4 edgesLRTBSlopeAdjusted = edgesLRTB + edgesLRTB.yxwz;
+    edgesLRTB = min( abs( edgesLRTB ), abs( edgesLRTBSlopeAdjusted ) );
+    return saturate( ( 1.3 - edgesLRTB / (centerZ * 0.040) ) );
+
+    // cheaper version but has artifacts
+    // edgesLRTB = abs( vec4( leftZ, rightZ, topZ, bottomZ ) - centerZ; );
+    // return saturate( ( 1.3 - edgesLRTB / (pixZ * 0.06 + 0.1) ) );
+}
+
+
+vec3 CalculateNormal( const vec4 edgesLRTB, vec3 pixCenterPos, vec3 pixLPos, vec3 pixRPos, vec3 pixTPos, vec3 pixBPos )
+{
+    // Get this pixel's viewspace normal
+    vec4 acceptedNormals  = vec4( edgesLRTB.x*edgesLRTB.z, edgesLRTB.z*edgesLRTB.y, edgesLRTB.y*edgesLRTB.w, edgesLRTB.w*edgesLRTB.x );
+
+    pixLPos = normalize(pixLPos - pixCenterPos);
+    pixRPos = normalize(pixRPos - pixCenterPos);
+    pixTPos = normalize(pixTPos - pixCenterPos);
+    pixBPos = normalize(pixBPos - pixCenterPos);
+
+    vec3 pixelNormal = vec3( 0, 0, -0.0005 );
+    pixelNormal += ( acceptedNormals.x ) * cross( pixLPos, pixTPos );
+    pixelNormal += ( acceptedNormals.y ) * cross( pixTPos, pixRPos );
+    pixelNormal += ( acceptedNormals.z ) * cross( pixRPos, pixBPos );
+    pixelNormal += ( acceptedNormals.w ) * cross( pixBPos, pixLPos );
+    pixelNormal = normalize( pixelNormal );
+    
+    return pixelNormal;
+}
+
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
+
+	uvec2 dim = imageSize(s_target0).xy;
+	if (all(lessThan(dtID.xy, dim) ) )
+	{
+		ivec2 baseCoords = ivec2(dtID.xy) * 2;
+#if BGFX_SHADER_LANGUAGE_GLSL
+		vec2 upperLeftUV = (vec2(dtID.xy) + vec2(0.25,0.75)) * u_viewport2xPixelSize;
+#else
+		vec2 upperLeftUV = (vec2(dtID.xy) + vec2(0.25,0.25)) * u_viewport2xPixelSize;
+#endif
+
+		ivec2 baseCoord = ivec2(dtID.xy) * 2;
+#if BGFX_SHADER_LANGUAGE_GLSL
+		float z0 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 1 ) ).x );
+		float z1 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 1 ) ).x );
+		float z2 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 0 ) ).x );
+		float z3 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 0 ) ).x );
+#else
+		float z0 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 0 ) ).x );
+		float z1 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 0 ) ).x );
+		float z2 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 1 ) ).x );
+		float z3 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 1 ) ).x );
+#endif
+
+		imageStore(s_target0, ivec2(dtID.xy), z0.xxxx );
+		imageStore(s_target1, ivec2(dtID.xy), z3.xxxx );
+
+		float pixZs[4][4];
+
+		// middle 4
+		pixZs[1][1] = z0;
+		pixZs[2][1] = z1;
+		pixZs[1][2] = z2;
+		pixZs[2][2] = z3;
+#if BGFX_SHADER_LANGUAGE_GLSL
+		// left 2
+		pixZs[0][1] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, 0 ) ).x ); 
+		pixZs[0][2] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, -1 ) ).x ); 
+
+		// right 2
+		pixZs[3][1] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  2, 0 ) ).x ); 
+		pixZs[3][2] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  2, -1 ) ).x ); 
+
+		// top 2
+		pixZs[1][0] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  0, 1 ) ).x );
+		pixZs[2][0] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  1, 1 ) ).x );
+
+		// bottom 2
+		pixZs[1][3] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  0,  -2 ) ).x );
+		pixZs[2][3] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  1,  -2 ) ).x );
+#else
+		// left 2
+		pixZs[0][1] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, 0 ) ).x ); 
+		pixZs[0][2] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, 1 ) ).x ); 
+
+		// right 2
+		pixZs[3][1] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  2, 0 ) ).x ); 
+		pixZs[3][2] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  2, 1 ) ).x ); 
+
+		// top 2
+		pixZs[1][0] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  0, -1 ) ).x );
+		pixZs[2][0] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  1, -1 ) ).x );
+
+		// bottom 2
+		pixZs[1][3] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  0,  2 ) ).x );
+		pixZs[2][3] = ScreenSpaceToViewSpaceDepth(  texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2(  1,  2 ) ).x );
+#endif
+
+		vec4 edges0 = CalculateEdges( pixZs[1][1], pixZs[0][1], pixZs[2][1], pixZs[1][0], pixZs[1][2] );
+		vec4 edges1 = CalculateEdges( pixZs[2][1], pixZs[1][1], pixZs[3][1], pixZs[2][0], pixZs[2][2] );
+		vec4 edges2 = CalculateEdges( pixZs[1][2], pixZs[0][2], pixZs[2][2], pixZs[1][1], pixZs[1][3] );
+		vec4 edges3 = CalculateEdges( pixZs[2][2], pixZs[1][2], pixZs[3][2], pixZs[2][1], pixZs[2][3] );
+
+		vec2 viewportPixelSize = u_viewportPixelSize;
+#if BGFX_SHADER_LANGUAGE_GLSL
+		viewportPixelSize.y = -viewportPixelSize.y;
+#endif
+
+		vec3 pixPos[4][4];
+		// there is probably a way to optimize the math below; however no approximation will work, has to be precise.
+
+		// middle 4
+		pixPos[1][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0,  0.0 ), pixZs[1][1] );
+		pixPos[2][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0,  0.0 ), pixZs[2][1] );
+		pixPos[1][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0,  1.0 ), pixZs[1][2] );
+		pixPos[2][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0,  1.0 ), pixZs[2][2] );
+
+		// left 2
+		pixPos[0][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( -1.0,  0.0), pixZs[0][1] );
+		//pixPos[0][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( -1.0,  1.0), pixZs[0][2] );
+		// right 2                                                                                     
+		//pixPos[3][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2(  2.0,  0.0), pixZs[3][1] );
+		pixPos[3][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2(  2.0,  1.0), pixZs[3][2] );
+		// top 2                                                                                       
+		pixPos[1][0] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0, -1.0 ), pixZs[1][0] );
+		//pixPos[2][0] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0, -1.0 ), pixZs[2][0] );
+		// bottom 2                                                                                    
+		//pixPos[1][3] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0,  2.0 ), pixZs[1][3] );
+		pixPos[2][3] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0,  2.0 ), pixZs[2][3] );
+
+		vec3 norm0 = CalculateNormal( edges0, pixPos[1][1], pixPos[0][1], pixPos[2][1], pixPos[1][0], pixPos[1][2] );
+		vec3 norm3 = CalculateNormal( edges3, pixPos[2][2], pixPos[1][2], pixPos[3][2], pixPos[2][1], pixPos[2][3] );
+
+#if BGFX_SHADER_LANGUAGE_GLSL
+		imageStore(s_normalsOutputUAV, baseCoords + ivec2( 0, 1 ), vec4( norm0 * 0.5 + 0.5, 0.0 ));
+		imageStore(s_normalsOutputUAV, baseCoords + ivec2( 1, 0 ), vec4( norm3 * 0.5 + 0.5, 0.0 ));
+#else
+		imageStore(s_normalsOutputUAV, baseCoords + ivec2( 0, 0 ), vec4( norm0 * 0.5 + 0.5, 0.0 ));
+		imageStore(s_normalsOutputUAV, baseCoords + ivec2( 1, 1 ), vec4( norm3 * 0.5 + 0.5, 0.0 ));
+#endif
+	}
+}

+ 48 - 0
examples/39-assao/cs_assao_prepare_depths_half.sc

@@ -0,0 +1,48 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+SAMPLER2D(s_depthSource, 0);
+IMAGE2D_WR(s_target0, r16f, 1);
+IMAGE2D_WR(s_target1, r16f, 2);
+
+float ScreenSpaceToViewSpaceDepth( float screenDepth )
+{
+    float depthLinearizeMul = u_depthUnpackConsts.x;
+    float depthLinearizeAdd = u_depthUnpackConsts.y;
+
+    // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar"
+
+    // Set your depthLinearizeMul and depthLinearizeAdd to:
+    // depthLinearizeMul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear );
+    // depthLinearizeAdd = cameraClipFar / ( cameraClipFar - cameraClipNear );
+
+    return depthLinearizeMul / ( depthLinearizeAdd - screenDepth );
+}
+
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
+
+	uvec2 dim = imageSize(s_target0).xy;
+	if (all(lessThan(dtID.xy, dim) ) )
+	{
+		ivec2 baseCoord = ivec2(dtID.xy) * 2;
+#if BGFX_SHADER_LANGUAGE_GLSL 
+		float a = texelFetch(s_depthSource, baseCoord + ivec2( 0, 1 ), 0).x;
+		float d = texelFetch(s_depthSource, baseCoord + ivec2( 1, 0 ), 0).x;
+#else
+		float a = texelFetch(s_depthSource, baseCoord + ivec2( 0, 0 ), 0).x;
+		float d = texelFetch(s_depthSource, baseCoord + ivec2( 1, 1 ), 0).x;
+#endif
+
+		imageStore(s_target0, ivec2(dtID.xy), ScreenSpaceToViewSpaceDepth( a ).xxxx);
+		imageStore(s_target1, ivec2(dtID.xy), ScreenSpaceToViewSpaceDepth( d ).xxxx);
+	}
+}
+

+ 82 - 0
examples/39-assao/cs_assao_smart_blur.sc

@@ -0,0 +1,82 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+IMAGE2D_ARRAY_WR(s_target, rg8, 0);
+SAMPLER2DARRAY(s_blurInput, 1);
+
+// unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions!
+vec4 UnpackEdges( float _packedVal )
+{
+    uint packedVal = uint(_packedVal * 255.5);
+    vec4 edgesLRTB;
+    edgesLRTB.x = float((packedVal >> 6) & 0x03) / 3.0;          // there's really no need for mask (as it's an 8 bit input) but I'll leave it in so it doesn't cause any trouble in the future
+    edgesLRTB.y = float((packedVal >> 4) & 0x03) / 3.0;
+    edgesLRTB.z = float((packedVal >> 2) & 0x03) / 3.0;
+    edgesLRTB.w = float((packedVal >> 0) & 0x03) / 3.0;
+
+    return saturate( edgesLRTB + u_invSharpness );
+}
+
+// ********************************************************************************************************
+// Pixel shader that does smart blurring (to avoid bleeding)
+
+void AddSample( float ssaoValue, float edgeValue, inout float sum, inout float sumWeight )
+{
+    float weight = edgeValue;    
+
+    sum += (weight * ssaoValue);
+    sumWeight += weight;
+}
+
+vec2 SampleBlurred( ivec2 inPos, vec2 coord )
+{
+    float packedEdges   = texelFetch(s_blurInput, ivec3(inPos.xy,0.0), 0 ).y;
+    vec4 edgesLRTB    = UnpackEdges( packedEdges );
+
+#if BGFX_SHADER_LANGUAGE_GLSL
+    vec4 valuesUL     = textureGather(s_blurInput, vec3(coord - u_halfViewportPixelSize * 0.5 + vec2(0.0,u_halfViewportPixelSize.y), 0.0)).wzyx;
+    vec4 valuesBR     = textureGather(s_blurInput, vec3(coord + u_halfViewportPixelSize * 0.5 + vec2(0.0,-u_halfViewportPixelSize.y), 0.0)).wzyx;
+#else
+    vec4 valuesUL     = textureGather(s_blurInput, vec3(coord - u_halfViewportPixelSize * 0.5, 0.0));
+    vec4 valuesBR     = textureGather(s_blurInput, vec3(coord + u_halfViewportPixelSize * 0.5, 0.0));
+#endif
+
+    float ssaoValue     = valuesUL.y;
+    float ssaoValueL    = valuesUL.x;
+    float ssaoValueT    = valuesUL.z;
+    float ssaoValueR    = valuesBR.z;
+    float ssaoValueB    = valuesBR.x;
+
+    float sumWeight = 0.5f;
+    float sum = ssaoValue * sumWeight;
+
+    AddSample( ssaoValueL, edgesLRTB.x, sum, sumWeight );
+    AddSample( ssaoValueR, edgesLRTB.y, sum, sumWeight );
+
+    AddSample( ssaoValueT, edgesLRTB.z, sum, sumWeight );
+    AddSample( ssaoValueB, edgesLRTB.w, sum, sumWeight );
+
+    float ssaoAvg = sum / sumWeight;
+
+    ssaoValue = ssaoAvg; //min( ssaoValue, ssaoAvg ) * 0.2 + ssaoAvg * 0.8;
+
+    return vec2( ssaoValue, packedEdges );
+}
+
+// edge-sensitive blur
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
+	if (all(lessThan(dtID.xy, u_rect.zw) ) )
+	{
+		vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_halfViewportPixelSize;
+	    imageStore(s_target, ivec3(dtID.xy, u_layer), SampleBlurred( ivec2(dtID.xy), inUV ).xyyy);
+	}
+}
+

+ 83 - 0
examples/39-assao/cs_assao_smart_blur_wide.sc

@@ -0,0 +1,83 @@
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh" 
+#include "uniforms.sh"
+
+IMAGE2D_ARRAY_WR(s_target, rg8, 0);
+SAMPLER2DARRAY(s_blurInput, 1);
+
+// unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions!
+vec4 UnpackEdges( float _packedVal )
+{
+    uint packedVal = uint(_packedVal * 255.5);
+    vec4 edgesLRTB;
+    edgesLRTB.x = float((packedVal >> 6) & 0x03) / 3.0;          // there's really no need for mask (as it's an 8 bit input) but I'll leave it in so it doesn't cause any trouble in the future
+    edgesLRTB.y = float((packedVal >> 4) & 0x03) / 3.0;
+    edgesLRTB.z = float((packedVal >> 2) & 0x03) / 3.0;
+    edgesLRTB.w = float((packedVal >> 0) & 0x03) / 3.0;
+
+    return saturate( edgesLRTB + u_invSharpness );
+}
+
+// ********************************************************************************************************
+// Pixel shader that does smart blurring (to avoid bleeding)
+
+void AddSample( float ssaoValue, float edgeValue, inout float sum, inout float sumWeight )
+{
+    float weight = edgeValue;    
+
+    sum += (weight * ssaoValue);
+    sumWeight += weight;
+}
+
+vec2 SampleBlurredWide(vec3 coord)
+{
+	vec2 vC = texture2DArrayLodOffset(s_blurInput, coord, 0.0, ivec2(0, 0)).xy;
+	vec2 vL = texture2DArrayLodOffset(s_blurInput, coord, 0.0, ivec2(-2, 0)).xy;
+	vec2 vT = texture2DArrayLodOffset(s_blurInput, coord, 0.0, ivec2(0, -2)).xy;
+	vec2 vR = texture2DArrayLodOffset(s_blurInput, coord, 0.0, ivec2(2, 0)).xy;
+	vec2 vB = texture2DArrayLodOffset(s_blurInput, coord, 0.0, ivec2(0, 2)).xy;
+
+	float packedEdges = vC.y;
+	vec4 edgesLRTB = UnpackEdges(packedEdges);
+	edgesLRTB.x *= UnpackEdges(vL.y).y;
+	edgesLRTB.z *= UnpackEdges(vT.y).w;
+	edgesLRTB.y *= UnpackEdges(vR.y).x;
+	edgesLRTB.w *= UnpackEdges(vB.y).z;
+
+	float ssaoValue = vC.x;
+	float ssaoValueL = vL.x;
+	float ssaoValueT = vT.x;
+	float ssaoValueR = vR.x;
+	float ssaoValueB = vB.x;
+
+	float sumWeight = 0.8f;
+	float sum = ssaoValue * sumWeight;
+
+	AddSample(ssaoValueL, edgesLRTB.x, sum, sumWeight);
+	AddSample(ssaoValueR, edgesLRTB.y, sum, sumWeight);
+	AddSample(ssaoValueT, edgesLRTB.z, sum, sumWeight);
+	AddSample(ssaoValueB, edgesLRTB.w, sum, sumWeight);
+
+	float ssaoAvg = sum / sumWeight;
+
+	ssaoValue = ssaoAvg; //min( ssaoValue, ssaoAvg ) * 0.2 + ssaoAvg * 0.8;
+
+	return vec2(ssaoValue, packedEdges);
+}
+
+// edge-sensitive blur (wider kernel)
+NUM_THREADS(8, 8, 1)
+void main() 
+{
+	uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
+	if (all(lessThan(dtID.xy, u_rect.zw) ) )
+	{
+		vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_halfViewportPixelSize;
+		imageStore(s_target, ivec3(dtID.xy,u_layer), SampleBlurredWide(vec3(inUV,0.0)).xyyy);
+	}
+}
+

+ 43 - 0
examples/39-assao/fs_assao_deferred_combine.sc

@@ -0,0 +1,43 @@
+$input v_texcoord0
+
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "../common/common.sh"
+
+SAMPLER2D(s_color, 0);
+SAMPLER2D(s_normal, 1);
+SAMPLER2D(s_ao, 2);
+
+uniform vec4 u_combineParams[2];
+
+void main()
+{
+	vec2 tc0 = v_texcoord0 * u_combineParams[1].xy + u_combineParams[1].zw;
+	vec3 albedoColor = vec3(1.0,1.0,1.0);
+	if (u_combineParams[0].x > 0.0)
+	{
+		albedoColor = texture2D(s_color, tc0).rgb;
+	}
+
+	float light = 1.0;
+	if (u_combineParams[0].x > 0.0)
+	{
+		vec3 n  = texture2D(s_normal, tc0).xyz;
+		// Expand out normal
+		n = n*2.0-1.0;
+		vec3 l = normalize(vec3(-0.8,0.75,-1.0));
+		light = max(0.0,dot(n,l)) * 1.2+ 0.3; 
+	}
+
+	float ao = 1.0;
+	if ( u_combineParams[0].y > 0.0)
+	{
+		ao = texture2D(s_ao, tc0).x;
+	}
+
+	gl_FragColor = vec4(albedoColor * light * ao, 1.0f);
+} 
+ 

+ 22 - 0
examples/39-assao/fs_assao_gbuffer.sc

@@ -0,0 +1,22 @@
+$input v_normal,  v_texcoord0
+
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "../common/common.sh"
+
+SAMPLER2D(s_albedo, 0);
+
+void main()
+{
+	vec3 normalWorldSpace = v_normal;
+
+	// Write normal
+	gl_FragData[0].xyz = normalWorldSpace.xyz; // Normal is already compressed to [0,1] so can fit in gbuffer
+	gl_FragData[0].w = 0.0;
+
+	// Write color
+	gl_FragData[1] = texture2D(s_albedo,  v_texcoord0);
+}

+ 10 - 0
examples/39-assao/makefile

@@ -0,0 +1,10 @@
+#
+# Copyright 2011-2018 Branimir Karadzic. All rights reserved.
+# License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+#
+
+BGFX_DIR=../..
+RUNTIME_DIR=$(BGFX_DIR)/examples/runtime
+BUILD_DIR=../../.build
+
+include $(BGFX_DIR)/scripts/shader.mk

+ 42 - 0
examples/39-assao/uniforms.sh

@@ -0,0 +1,42 @@
+uniform vec4 u_params[19];
+uniform vec4 u_rect;
+
+#define u_viewportPixelSize					u_params[0].xy 
+#define u_halfViewportPixelSize				u_params[0].zw 
+#define u_depthUnpackConsts					u_params[1].xy
+#define u_ndcToViewMul						u_params[2].xy
+#define u_ndcToViewAdd						u_params[2].zw
+#define u_perPassFullResCoordOffset			u_params[3].xy
+#define u_perPassFullResUVOffset			u_params[3].zw
+#define u_viewport2xPixelSize				u_params[4].xy
+#define u_viewport2xPixelSize_x_025			u_params[4].zw
+#define u_effectRadius						u_params[5].x
+#define u_effectShadowStrength				u_params[5].y
+#define u_effectShadowPow					u_params[5].z
+#define u_effectShadowClamp					u_params[5].w
+#define u_effectFadeOutMul					u_params[6].x
+#define u_effectFadeOutAdd					u_params[6].y
+#define u_effectHorizonAngleThreshold		u_params[6].z
+#define u_effectSamplingRadiusNearLimitRec	u_params[6].w
+#define u_depthPrecisionOffsetMod			u_params[7].x
+#define u_negRecEffectRadius				u_params[7].y
+#define u_loadCounterAvgDiv					u_params[7].z
+#define u_adaptiveSampleCountLimit			u_params[7].w
+#define u_invSharpness						u_params[8].x
+#define u_passIndex							u_params[8].y
+#define u_quarterResPixelSize				u_params[8].zw
+#define u_patternRotScaleMatrices(i)		u_params[9+(i)]
+#define u_normalsUnpackMul					u_params[14].x
+#define u_normalsUnpackAdd					u_params[14].y
+#define u_detailAOStrength					u_params[14].z
+#define u_layer								u_params[14].w
+#define u_normalsWorldToViewspaceMatrix0	u_params[15]
+#define u_normalsWorldToViewspaceMatrix1	u_params[16]
+#define u_normalsWorldToViewspaceMatrix2	u_params[17]
+#define u_normalsWorldToViewspaceMatrix3	u_params[18]
+
+#define SSAO_MAX_TAPS                               32
+#define SSAO_ADAPTIVE_TAP_BASE_COUNT                5
+#define SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT            (SSAO_MAX_TAPS-SSAO_ADAPTIVE_TAP_BASE_COUNT)
+#define SSAO_DEPTH_MIP_LEVELS                       4
+#define SSAO_ENABLE_NORMAL_WORLD_TO_VIEW_CONVERSION 1

+ 7 - 0
examples/39-assao/varying.def.sc

@@ -0,0 +1,7 @@
+vec4 a_position  : POSITION;
+vec2 a_texcoord0 : TEXCOORD0;
+vec3 a_normal    : NORMAL;
+
+vec2  v_texcoord0 : TEXCOORD0;
+vec3 v_normal    : NORMAL    = vec3(0.0, 0.0, 1.0); 
+

+ 16 - 0
examples/39-assao/vs_assao.sc

@@ -0,0 +1,16 @@
+$input a_position, a_texcoord0
+$output v_texcoord0
+
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "../common/common.sh"
+
+void main()
+{
+	gl_Position = mul(u_modelViewProj, vec4(a_position.xyz, 1.0) );
+	v_texcoord0 = a_texcoord0; 
+}
+

+ 27 - 0
examples/39-assao/vs_assao_gbuffer.sc

@@ -0,0 +1,27 @@
+$input a_position, a_normal, a_texcoord0
+$output v_normal, v_texcoord0
+
+/*
+ * Copyright 2018 Attila Kocsis. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+
+#include "../common/common.sh"
+
+void main()
+{
+	// Calculate vertex position
+	vec3 pos = a_position.xyz;
+	gl_Position = mul(u_modelViewProj, vec4(pos, 1.0) );
+
+	// Calculate normal.  Note that compressed normal is stored in the vertices
+	vec3 normalObjectSpace = a_normal.xyz*2.0+-1.0; // Normal is stored in [0,1], remap to [-1,1].
+
+	// Transform normal into world space.  
+	vec3 normalWorldSpace = mul(u_model[0], vec4(normalObjectSpace, 0.0) ).xyz;
+	// Normalize to remove (uniform...) scaling, however, recompress
+	v_normal.xyz = normalize(normalWorldSpace)*0.5+0.5;
+
+	v_texcoord0 = a_texcoord0 * 16.0;
+}