Sfoglia il codice sorgente

xx-sss - Screen Space Shadows (#2350)

* add denoise example

/*
* Implement SVGF style denoising as bgfx example. Goal is to explore various
* options and parameters, not produce an optimized, efficient denoiser.
*
* Starts with deferred rendering scene with very basic lighting. Lighting is
* masked out with a noise pattern to provide something to denoise. There are
* two options for the noise pattern. One is a fixed 2x2 dither pattern to
* stand-in for lighting at quarter resolution. The other is the common
* shadertoy random pattern as a stand-in for some fancier lighting without
* enough samples per pixel, like ray tracing.
*
* First a temporal denoising filter is applied. The temporal filter is only
* using normals to reject previous samples. The SVGF paper also describes using
* depth comparison to reject samples but that is not implemented here.
*
* Followed by some number of spatial filters. These are implemented like in the
* SVGF paper. As an alternative to the 5x5 Edge-Avoiding A-Trous filter, can
* select a 3x3 filter instead. The 3x3 filter takes fewer samples and covers a
* smaller area, but takes less time to compute. From a loosely eyeballed
* comparison, N 5x5 passes looks similar to N+1 3x3 passes. The wider spatial
* filters take a fair chunk of time to compute. I wonder if it would be a good
* idea to interleave the input texture before computing, after the first pass
* which skips zero pixels.
*
* I have not implemetened the variance guided part.
*
* There's also an optional TXAA pass to be applied after. I am not happy with
* its implementation yet, so it defaults to off here.
*/

/*
* References:
* Spatiotemporal Variance-Guided Filtering: Real-Time Reconstruction for
*	Path-Traced Global Illumination. by Christoph Schied and more.
*	- SVGF denoising algorithm
*
* Streaming G-Buffer Compression for Multi-Sample Anti-Aliasing.
*	by E. Kerzner and M. Salvi.
*	- details about history comparison for temporal denoising filter
*
* Edge-Avoiding À-Trous Wavelet Transform for Fast Global Illumination
*	Filtering. by Holger Dammertz and more.
*	- details about a-trous algorithm for spatial denoising filter
*/

* screen space shadows sample

implement screen space shadows. requires deferred rendering or a depth prepass. convert rendered depth to linear depth to skip reconstructing multiple times when doing shadow test.

project light into screen space to find direction from each pixel to the light. walk through screen space texture towards light. sample depth to reconstruct position represented by this sample pixel and compare to position along interpolated ray from pixel to light. if position represented by depth is closer to the eye than the light ray, an initial pixel is in shadow.

specify distance of shadow ray via world units or pixels in screen space.

optionally offset the initial sample position by noise to reduce banding.

demonstrate other ways to reduce hard edge of screen space shadow.

* clean out denoise sample for pull request...

* rename folder to 44- add missing file
elvencache 5 anni fa
parent
commit
e5d6a5a22b

+ 111 - 0
examples/44-sss/fs_screen_space_shadows.sc

@@ -0,0 +1,111 @@
+$input v_texcoord0
+
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#include "../common/common.sh"
+#include "parameters.sh"
+
+SAMPLER2D(s_depth, 0);
+
+#define DEPTH_EPSILON	1e-4
+
+// from assao sample, cs_assao_prepare_depths.sc
+vec3 NDCToViewspace( vec2 pos, float viewspaceDepth )
+{
+	vec3 ret;
+
+	ret.xy = (u_ndcToViewMul * pos.xy + u_ndcToViewAdd) * viewspaceDepth;
+
+	ret.z = viewspaceDepth;
+
+	return ret;
+}
+
+float ShadertoyNoise (vec2 uv) {
+	return fract(sin(dot(uv.xy, vec2(12.9898,78.233))) * 43758.5453123);
+}
+
+void main()
+{
+	vec2 texCoord = v_texcoord0;
+	float linearDepth = texture2D(s_depth, texCoord).x;
+	vec3 viewSpacePosition = NDCToViewspace(texCoord, linearDepth);
+
+	vec3 lightStep = normalize(u_lightPosition - viewSpacePosition);
+
+	// screen space radius not usable directly. convert value given in pixels,
+	// to world units. this is important later when comparing depth in world units
+	float radius = u_shadowRadius;
+	if (0.0 < u_useScreenSpaceRadius)
+	{
+		// is there a better way to do this calculation?
+		float radiusTexCoordX = u_shadowRadius * u_viewTexel.x + texCoord.x;
+		float radiusPositionX = u_ndcToViewMul.x * radiusTexCoordX + u_ndcToViewAdd.x;
+		radius = abs(radiusPositionX * linearDepth - viewSpacePosition.x);
+	}
+	lightStep *= (radius / u_shadowSteps);
+
+	vec3 samplePosition = viewSpacePosition;
+	float random = ShadertoyNoise(gl_FragCoord.xy + vec2(314.0, 159.0)*u_frameIdx);
+	float initialOffset = (0.0 < u_useNoiseOffset) ? (0.5+random) : 1.0;
+	samplePosition += initialOffset * lightStep;
+
+	mat4 viewToProj = mat4(
+		u_viewToProj0,
+		u_viewToProj1,
+		u_viewToProj2,
+		u_viewToProj3
+	);
+
+	float occluded = 0.0;
+	float softOccluded = 0.0;
+	float firstHit = u_shadowSteps;
+	for (int i = 0; i < int(u_shadowSteps); ++i, samplePosition += lightStep)
+	{
+		vec3 psSamplePosition = instMul(viewToProj, vec4(samplePosition, 1.0)).xyw;
+		psSamplePosition.xy *= (1.0/psSamplePosition.z);
+
+		vec2 sampleCoord = psSamplePosition.xy * 0.5 + 0.5;
+		sampleCoord.y = 1.0 - sampleCoord.y;
+
+		// using texture2Dlod because dx9 compiler doesn't like
+		// gradient instructions within this loop
+		float sampleDepth = texture2DLod(s_depth, sampleCoord, 0).x;
+
+		float delta = (samplePosition.z - sampleDepth);
+		if (DEPTH_EPSILON < delta && delta < radius)
+		{
+			firstHit = min(firstHit, float(i));
+			// for hard, soft occlusion
+			occluded += 1.0;
+			// for very soft occlusion
+			softOccluded += saturate(radius - delta);
+		}
+	}
+
+	float shadow;
+	if (1.5 < u_contactShadowsMode)
+	{
+		// very soft occlusion, includes distance falloff above
+		shadow = softOccluded * (1.0 - (firstHit / u_shadowSteps));
+		shadow = 1.0 - saturate(shadow);
+		shadow = shadow*shadow;
+	}
+	else if (0.5 < u_contactShadowsMode)
+	{
+		// soft occlusion
+		shadow = occluded * (1.0 - (firstHit / u_shadowSteps));
+		shadow = 1.0 - saturate(shadow);
+		shadow = shadow*shadow;
+	}
+	else // == 0
+	{
+		// hard occlusion
+		shadow = 0.0 < occluded ? 0.0 : 1.0;
+	}
+
+	gl_FragColor = vec4_splat(shadow);
+}

+ 83 - 0
examples/44-sss/fs_sss_deferred_combine.sc

@@ -0,0 +1,83 @@
+$input v_texcoord0
+
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#include "../common/common.sh"
+#include "parameters.sh"
+#include "normal_encoding.sh"
+
+SAMPLER2D(s_color, 0);
+SAMPLER2D(s_normal, 1);
+SAMPLER2D(s_depth, 2);
+SAMPLER2D(s_shadows, 3);
+
+// from assao sample, cs_assao_prepare_depths.sc
+vec3 NDCToViewspace( vec2 pos, float viewspaceDepth )
+{
+	vec3 ret;
+
+	ret.xy = (u_ndcToViewMul * pos.xy + u_ndcToViewAdd) * viewspaceDepth;
+
+	ret.z = viewspaceDepth;
+
+	return ret;
+}
+
+void main()
+{
+	vec2 texCoord = v_texcoord0;
+
+	vec4 colorId = texture2D(s_color, texCoord);
+	vec3 color = toLinear(colorId.xyz);
+	float materialId = colorId.w;
+
+	if (0.0 < materialId)
+	{
+		vec4 normalRoughness = texture2D(s_normal, texCoord);
+		vec3 normal = NormalDecode(normalRoughness.xyz);
+		float roughness = normalRoughness.w;
+
+		// transform normal into view space
+		mat4 worldToView = mat4(
+			u_worldToView0,
+			u_worldToView1,
+			u_worldToView2,
+			u_worldToView3
+		);
+		vec3 vsNormal = instMul(worldToView, vec4(normal, 0.0)).xyz;
+
+		// read depth and recreate position
+		float linearDepth = texture2D(s_depth, texCoord).x;
+		vec3 viewSpacePosition = NDCToViewspace(texCoord, linearDepth);
+
+		float shadow = texture2D(s_shadows, texCoord).x;
+
+		// need to get a valid view vector for any microfacet stuff :(
+		float gloss = 1.0-roughness;
+		float specPower = 62.0 * gloss + 2.0;
+
+		vec3 light = (u_lightPosition - viewSpacePosition);
+		float lightDistSq = dot(light, light) + 1e-5;
+		light = normalize(light);
+		float NdotL = saturate(dot(vsNormal, light));
+		float diffuse = NdotL * (1.0/lightDistSq);
+		float specular = 5.0 * pow(NdotL, specPower);
+
+		float lightAmount = mix(diffuse, specular, 0.04) * shadow;
+
+		color = (color * lightAmount);
+		color = toGamma(color);
+
+		// debug display shadows only
+		if (0.0 < u_displayShadows)
+		{
+			color = vec3_splat(shadow);
+		}
+	}
+	// else, assume color is unlit
+
+	gl_FragColor = vec4(color, 1.0);
+}

+ 62 - 0
examples/44-sss/fs_sss_gbuffer.sc

@@ -0,0 +1,62 @@
+$input v_normal, v_texcoord0, v_texcoord1
+
+#include "../common/common.sh"
+#include "parameters.sh"
+#include "normal_encoding.sh"
+
+SAMPLER2D(s_albedo, 0);
+SAMPLER2D(s_normal, 1);
+
+// http://www.thetenthplanet.de/archives/1180
+// "followup: normal mapping without precomputed tangents"
+mat3 cotangentFrame(vec3 N, vec3 p, vec2 uv)
+{
+	// get edge vectors of the pixel triangle
+	vec3 dp1 = dFdx(p);
+	vec3 dp2 = dFdy(p);
+	vec2 duv1 = dFdx(uv);
+	vec2 duv2 = dFdy(uv);
+
+	// solve the linear system
+	vec3 dp2perp = cross(dp2, N);
+	vec3 dp1perp = cross(N, dp1);
+	vec3 T = dp2perp * duv1.x + dp1perp * duv2.x;
+	vec3 B = dp2perp * duv1.y + dp1perp * duv2.y;
+	
+	// construct a scale-invariant frame
+	float invMax = inversesqrt(max(dot(T,T), dot(B,B)));
+	return mat3(T*invMax, B*invMax, N);
+}
+
+void main()
+{
+	vec3 albedo = toLinear(texture2D(s_albedo, v_texcoord0).xyz);
+
+	// get vertex normal
+	vec3 normal = normalize(v_normal);
+	
+	// get normal map normal, unpack, and calculate z
+	vec3 normalMap;
+	normalMap.xy = texture2D(s_normal, v_texcoord0).xy;
+	normalMap.xy = normalMap.xy * 2.0 - 1.0;
+	normalMap.z = sqrt(1.0 - dot(normalMap.xy, normalMap.xy));
+	
+	// swap x and y, because the brick texture looks flipped, don't copy this...
+	normalMap.xy = normalMap.yx;
+
+	// perturb geometry normal by normal map
+	vec3 pos = v_texcoord1.xyz; // contains world space pos
+	mat3 TBN = cotangentFrame(normal, pos, v_texcoord0);
+	vec3 bumpedNormal = normalize(instMul(TBN, normalMap));
+	
+	// need some proxy for roughness value w/o roughness texture
+	// assume horizontal (blue) normal map is smooth, and then
+	// modulate with albedo for some higher frequency detail
+	float roughness = normalMap.z * mix(0.9, 1.0, albedo.y);
+	roughness = roughness * 0.6 + 0.2;
+
+	vec3 bufferNormal = NormalEncode(bumpedNormal);
+
+	gl_FragData[0] = vec4(toGamma(albedo), 1.0);
+	gl_FragData[1] = vec4(bufferNormal, roughness);
+}

+ 35 - 0
examples/44-sss/fs_sss_linear_depth.sc

@@ -0,0 +1,35 @@
+$input v_texcoord0
+
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#include "../common/common.sh"
+#include "parameters.sh"
+
+SAMPLER2D(s_depth, 0);
+
+// from assao sample, cs_assao_prepare_depths.sc
+float ScreenSpaceToViewSpaceDepth( float screenDepth )
+{
+	float depthLinearizeMul = u_depthUnpackConsts.x;
+	float depthLinearizeAdd = u_depthUnpackConsts.y;
+
+	// Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar"
+
+	// Set your depthLinearizeMul and depthLinearizeAdd to:
+	// depthLinearizeMul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear );
+	// depthLinearizeAdd = cameraClipFar / ( cameraClipFar - cameraClipNear );
+
+	return depthLinearizeMul / ( depthLinearizeAdd - screenDepth );
+}
+
+
+void main()
+{
+	vec2 texCoord = v_texcoord0;
+	float depth = texture2D(s_depth, texCoord).x;
+	float linearDepth = ScreenSpaceToViewSpaceDepth(depth);
+	gl_FragColor = vec4_splat(linearDepth);
+}

+ 21 - 0
examples/44-sss/fs_sss_unlit.sc

@@ -0,0 +1,21 @@
+$input v_normal, v_texcoord0, v_texcoord1
+
+#include "../common/common.sh"
+#include "parameters.sh"
+#include "normal_encoding.sh"
+
+void main()
+{
+	vec3 albedo = vec3_splat(1.0);
+
+	// get vertex normal
+	vec3 normal = normalize(v_normal);
+	float roughness = 1.0;
+
+	vec3 bufferNormal = NormalEncode(normal);
+
+	// write data to alpha channel of color buffer to signify different handling
+	// while lighting/shading these pixels in the gbuffer combine pass
+	gl_FragData[0] = vec4(toGamma(albedo), 0.0);
+	gl_FragData[1] = vec4(bufferNormal, roughness);
+}

+ 10 - 0
examples/44-sss/makefile

@@ -0,0 +1,10 @@
+#
+# Copyright 2011-2019 Branimir Karadzic. All rights reserved.
+# License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+#
+
+BGFX_DIR=../..
+RUNTIME_DIR=$(BGFX_DIR)/examples/runtime
+BUILD_DIR=../../.build
+
+include $(BGFX_DIR)/scripts/shader.mk

+ 92 - 0
examples/44-sss/normal_encoding.sh

@@ -0,0 +1,92 @@
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#ifndef NORMAL_ENCODING_SH
+#define NORMAL_ENCODING_SH
+
+#define NE_USE_OCTAHEDRAL_REPRESENTATION   1
+
+// From "A Survey of Efficient Representations for Independent Unit Vectors"
+// http://jcgt.org/published/0003/02/01/paper.pdf
+
+// Convert an oct24 (2x12bit normal) to an rgb8 value for storing in texture
+vec3 snorm12x2_to_unorm8x3 (vec2 f) {
+
+	f        = clamp(f, -1.0, 1.0);//min(max(f, vec2(-1.0)), vec2(1.0));
+	vec2   u = floor(f * 2047.0 + 2047.5);
+	float  t = floor(u.y / 256.0);
+
+	// "This code assumes that rounding will occur during storage."
+	// -- Not certain but this appears to mainly apply to the x channel.
+	//    From paper: x = u.x / 16.0 - 0.5
+	//    Instead round by +0.5 and floor.
+	return vec3(floor(u.x / 16.0), fract(u.x / 16.0) * 256.0 + t, u.y - t * 256.0) / 255.0;
+}
+
+// Unpack oct24 (2x12bit normal) from an rgb8 value stored in texture (normal spec)
+vec2 unorm8x3_to_snorm12x2 (vec3 u) {
+
+	u *= 255.0;
+	u.y *= (1.0 / 16.0);
+	vec2 s = vec2(u.x * 16.0 + floor(u.y), fract(u.y) * (16.0 * 256.0) + u.z);
+
+	s = s * (1.0 / 2047.0) - 1.0;
+	return min(max(s, -1.0), 1.0);
+}
+
+// Built in sign test could return 0, don't want that
+vec2 signNotZero (vec2 v) {
+	return vec2((v.x >= 0.0) ? 1.0 : -1.0, (v.y >= 0.0) ? 1.0 : -1.0);
+}
+
+// Assume normalized input. Output is (-1, 1) for each component
+vec2 float32x3_to_oct(vec3 v) {
+
+	// Project the sphere onto the octahedron, and then onto the xy plane
+	vec2 p = v.xy * (1.0 / (abs(v.x) + abs(v.y) + abs(v.z)));
+	
+	// Reflect the folds of the lower hemisphere over the diagonals
+	return (v.z <= 0.0) ? ((1.0 - abs(p.yx)) * signNotZero(p)) : p;
+}
+
+// Get a float3 normal from an oct representation
+vec3 oct_to_float32x3 (vec2 e) {
+	vec3 v = vec3(e.xy, 1.0 - abs(e.x) - abs(e.y));
+	if (v.z < 0.0) {
+		v.xy = (1.0 - abs(v.yx)) * signNotZero(v.xy);
+	}
+
+	return normalize(v);
+}
+
+vec3 SignedNormalEncodeToOct (vec3 normal) {
+
+	return snorm12x2_to_unorm8x3(float32x3_to_oct(normal));
+}
+
+vec3 SignedNormalDecodeFromOct (vec3 normal) {
+
+	return oct_to_float32x3(unorm8x3_to_snorm12x2(normal));
+}
+
+vec3 NormalEncode (vec3 normal)
+{
+#if NE_USE_OCTAHEDRAL_REPRESENTATION
+	return SignedNormalEncodeToOct(normal);
+#else
+	return normal * 0.5 + 0.5;
+#endif
+}
+
+vec3 NormalDecode (vec3 normal)
+{
+#if NE_USE_OCTAHEDRAL_REPRESENTATION
+	return SignedNormalDecodeFromOct(normal);
+#else
+	return normal * 2.0 - 1.0;
+#endif
+}
+
+#endif // NORMAL_ENCODING_SH

+ 33 - 0
examples/44-sss/parameters.sh

@@ -0,0 +1,33 @@
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#ifndef PARAMETERS_SH
+#define PARAMETERS_SH
+
+uniform vec4 u_params[12];
+
+#define u_frameIdx					(u_params[0].x)
+#define u_shadowRadius				(u_params[0].y)
+#define u_shadowSteps				(u_params[0].z)
+#define u_useNoiseOffset			(u_params[0].w)
+
+#define u_depthUnpackConsts			(u_params[1].xy)
+#define u_contactShadowsMode		(u_params[1].z)
+#define u_useScreenSpaceRadius		(u_params[1].w)
+#define u_ndcToViewMul				(u_params[2].xy)
+#define u_ndcToViewAdd				(u_params[2].zw)
+#define u_lightPosition				(u_params[3].xyz)
+#define u_displayShadows			(u_params[3].w)
+
+#define u_worldToView0				(u_params[4])
+#define u_worldToView1				(u_params[5])
+#define u_worldToView2				(u_params[6])
+#define u_worldToView3				(u_params[7])
+#define u_viewToProj0				(u_params[8])
+#define u_viewToProj1				(u_params[9])
+#define u_viewToProj2				(u_params[10])
+#define u_viewToProj3				(u_params[11])
+
+#endif // PARAMETERS_SH

+ 850 - 0
examples/44-sss/screen_space_shadows.cpp

@@ -0,0 +1,850 @@
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+/*
+* Implement screen space shadows as bgfx example. Goal is to explore various
+* options and parameters.
+*
+* radius
+* ======
+* Use radius/shadow distance defined in screen space pixels or world units.
+*
+* In world uints, the screen distance will shrink as objects get farther away.
+* This can provide more natural looking shadows and fade out the effect at a
+* distance, leaving screen space shadows as an added detail effect near the
+* camera.
+*
+* Screen space units mean that objects will cast the same length of shadow
+* regardless of how far they are away from the camera. Pull back the camera
+* and objects' shadows will appear to grow. On the other hand, this can be
+* desired because it will allow objects at the horizon like hills and trees to
+* cast a shadow. Depending on your scene, such far objects may be outside of
+* the area affected by regular shadow maps. Even with multiple cascades, you
+* may not be able to afford shadow maps across the entire scene.
+*
+* This sample does not put effort into avoiding the initial pixel or avoiding
+* resampling the same value if the step size is relatively smaller than the
+* sampled distance in screen space. May want to set a minimum distance so each
+* sample covers a unique value or take care to select a neighboring pixel for
+* the first sample.
+*
+* soft contact shadows
+* ====================
+* If hard screen space shadows are added to a scene that already has soft
+* shadows via shadow maps, the hard edge can look out of place. Additionally,
+* it is common for screen space shadows to not quite line up with other
+* shadows. This is because the depth buffer does not specify thickness,
+* leaving some pixels incorrectly occluded. For example, you would not want
+* some thin feature like a pipe to cast a shadow as if you were seeing the
+* side of a metal wall.
+*
+* These soft contact shadows are an attempt to minimize the problems described
+* above. By adding a smoother falloff, they may blend into the scene better.
+* Inspired by screen space ambient occlusion, this sample takes into account
+* distance from shadowed pixel to its occluders.
+*
+* - hard	If there's any occluder found, mark the source pixel as shadowed.
+*
+* - soft	Modulate shadow by distance to the first occluder. Assuming a
+*			nearby pixel is closer and more likely to represent an accurate
+*			shadow, it is darker. If the first pixel to be an occluder is far
+*			away, it should likely cast a softer shadow.
+*
+* - very	In addition to the same modulation used by soft mode, also
+*	soft	reduce the occlusion contribution from pixels that are farther
+*			away. This sample compares the depth difference to the shadow
+*			radius, a 1D distance, instead of comparing the actually
+*			distance in 3D space.
+*/
+
+
+#include <common.h>
+#include <camera.h>
+#include <bgfx_utils.h>
+#include <imgui/imgui.h>
+#include <bx/rng.h>
+#include <bx/os.h>
+
+
+namespace {
+
+// Gbuffer has multiple render targets
+#define GBUFFER_RT_COLOR		0
+#define GBUFFER_RT_NORMAL		1
+#define GBUFFER_RT_DEPTH		2
+#define GBUFFER_RENDER_TARGETS	3
+
+#define MODEL_COUNT				100
+
+static const char * s_meshPaths[] =
+{
+	"meshes/unit_sphere.bin",
+	"meshes/column.bin",
+	"meshes/tree.bin",
+	"meshes/hollowcube.bin",
+	"meshes/bunny.bin"
+};
+
+static const float s_meshScale[] =
+{
+	0.25f,
+	0.05f,
+	0.15f,
+	0.25f,
+	0.25f
+};
+
+// Vertex decl for our screen space quad (used in deferred rendering)
+struct PosTexCoord0Vertex
+{
+	float m_x;
+	float m_y;
+	float m_z;
+	float m_u;
+	float m_v;
+
+	static void init()
+	{
+		ms_layout
+			.begin()
+			.add(bgfx::Attrib::Position, 3, bgfx::AttribType::Float)
+			.add(bgfx::Attrib::TexCoord0, 2, bgfx::AttribType::Float)
+			.end();
+	}
+
+	static bgfx::VertexLayout ms_layout;
+};
+
+bgfx::VertexLayout PosTexCoord0Vertex::ms_layout;
+
+struct Uniforms
+{
+	enum { NumVec4 = 12 };
+
+	void init() {
+		u_params = bgfx::createUniform("u_params", bgfx::UniformType::Vec4, NumVec4);
+	};
+
+	void submit() const {
+		bgfx::setUniform(u_params, m_params, NumVec4);
+	}
+
+	void destroy() {
+		bgfx::destroy(u_params);
+	}
+
+	union
+	{
+		struct
+		{
+			/* 0    */ struct { float m_frameIdx; float m_shadowRadius; float m_shadowSteps; float m_useNoiseOffset; };
+			/* 1    */ struct { float m_depthUnpackConsts[2]; float m_contactShadowsMode; float m_useScreenSpaceRadius; };
+			/* 2    */ struct { float m_ndcToViewMul[2]; float m_ndcToViewAdd[2]; };
+			/* 3    */ struct { float m_lightPosition[3]; float m_displayShadows; };
+			/* 4-7  */ struct { float m_worldToView[16]; }; // built-in u_view will be transform for quad during screen passes
+			/* 8-11 */ struct { float m_viewToProj[16]; };	 // built-in u_proj will be transform for quad during screen passes
+		};
+
+		float m_params[NumVec4 * 4];
+	};
+
+	bgfx::UniformHandle u_params;
+};
+
+struct RenderTarget
+{
+	void init(uint32_t _width, uint32_t _height, bgfx::TextureFormat::Enum _format, uint64_t _flags)
+	{
+		m_texture = bgfx::createTexture2D(uint16_t(_width), uint16_t(_height), false, 1, _format, _flags);
+		const bool destroyTextures = true;
+		m_buffer = bgfx::createFrameBuffer(1, &m_texture, destroyTextures);
+	}
+
+	void destroy()
+	{
+		// also responsible for destroying texture
+		bgfx::destroy(m_buffer);
+	}
+
+	bgfx::TextureHandle m_texture;
+	bgfx::FrameBufferHandle m_buffer;
+};
+
+void screenSpaceQuad(float _textureWidth, float _textureHeight, float _texelHalf, bool _originBottomLeft, float _width = 1.0f, float _height = 1.0f)
+{
+	if (3 == bgfx::getAvailTransientVertexBuffer(3, PosTexCoord0Vertex::ms_layout))
+	{
+		bgfx::TransientVertexBuffer vb;
+		bgfx::allocTransientVertexBuffer(&vb, 3, PosTexCoord0Vertex::ms_layout);
+		PosTexCoord0Vertex* vertex = (PosTexCoord0Vertex*)vb.data;
+
+		const float minx = -_width;
+		const float maxx =  _width;
+		const float miny = 0.0f;
+		const float maxy =  _height * 2.0f;
+
+		const float texelHalfW = _texelHalf / _textureWidth;
+		const float texelHalfH = _texelHalf / _textureHeight;
+		const float minu = -1.0f + texelHalfW;
+		const float maxu =  1.0f + texelHalfW;
+
+		const float zz = 0.0f;
+
+		float minv = texelHalfH;
+		float maxv = 2.0f + texelHalfH;
+
+		if (_originBottomLeft)
+		{
+			float temp = minv;
+			minv = maxv;
+			maxv = temp;
+
+			minv -= 1.0f;
+			maxv -= 1.0f;
+		}
+
+		vertex[0].m_x = minx;
+		vertex[0].m_y = miny;
+		vertex[0].m_z = zz;
+		vertex[0].m_u = minu;
+		vertex[0].m_v = minv;
+
+		vertex[1].m_x = maxx;
+		vertex[1].m_y = miny;
+		vertex[1].m_z = zz;
+		vertex[1].m_u = maxu;
+		vertex[1].m_v = minv;
+
+		vertex[2].m_x = maxx;
+		vertex[2].m_y = maxy;
+		vertex[2].m_z = zz;
+		vertex[2].m_u = maxu;
+		vertex[2].m_v = maxv;
+
+		bgfx::setVertexBuffer(0, &vb);
+	}
+}
+
+void vec2Set(float* _v, float _x, float _y)
+{
+	_v[0] = _x;
+	_v[1] = _y;
+}
+
+void vec4Set(float* _v, float _x, float _y, float _z, float _w)
+{
+	_v[0] = _x;
+	_v[1] = _y;
+	_v[2] = _z;
+	_v[3] = _w;
+}
+
+void mat4Set(float * _m, const float * _src)
+{
+	const uint32_t MAT4_FLOATS = 16;
+	for (uint32_t ii = 0; ii < MAT4_FLOATS; ++ii) {
+		_m[ii] = _src[ii];
+	}
+}
+
+class ExampleScreenSpaceShadows : public entry::AppI
+{
+public:
+	ExampleScreenSpaceShadows(const char* _name, const char* _description)
+		: entry::AppI(_name, _description)
+		, m_currFrame(UINT32_MAX)
+		, m_texelHalf(0.0f)
+	{
+	}
+
+	void init(int32_t _argc, const char* const* _argv, uint32_t _width, uint32_t _height) override
+	{
+		Args args(_argc, _argv);
+
+		m_width = _width;
+		m_height = _height;
+		m_debug = BGFX_DEBUG_NONE;
+		m_reset = BGFX_RESET_VSYNC;
+
+		bgfx::Init init;
+		init.type = args.m_type;
+
+		init.vendorId = args.m_pciId;
+		init.resolution.width = m_width;
+		init.resolution.height = m_height;
+		init.resolution.reset = m_reset;
+		bgfx::init(init);
+
+		// Enable debug text.
+		bgfx::setDebug(m_debug);
+
+		// Create uniforms
+		m_uniforms.init();
+
+		// Create texture sampler uniforms (used when we bind textures)
+		s_albedo = bgfx::createUniform("s_albedo", bgfx::UniformType::Sampler); // Model's source albedo
+		s_color = bgfx::createUniform("s_color", bgfx::UniformType::Sampler); // Color (albedo) gbuffer, default color input
+		s_normal = bgfx::createUniform("s_normal", bgfx::UniformType::Sampler); // Normal gbuffer, Model's source normal
+		s_depth = bgfx::createUniform("s_depth", bgfx::UniformType::Sampler); // Depth gbuffer
+		s_shadows = bgfx::createUniform("s_shadows", bgfx::UniformType::Sampler);
+
+		// Create program from shaders.
+		m_gbufferProgram = loadProgram("vs_sss_gbuffer", "fs_sss_gbuffer"); // Fill gbuffer
+		m_sphereProgram = loadProgram("vs_sss_gbuffer", "fs_sss_unlit");
+		m_linearDepthProgram = loadProgram("vs_sss_screenquad", "fs_sss_linear_depth");
+		m_shadowsProgram = loadProgram("vs_sss_screenquad", "fs_screen_space_shadows");
+		m_combineProgram = loadProgram("vs_sss_screenquad", "fs_sss_deferred_combine"); // Compute lighting from gbuffer
+
+		// Load some meshes
+		for (uint32_t ii = 0; ii < BX_COUNTOF(s_meshPaths); ++ii)
+		{
+			m_meshes[ii] = meshLoad(s_meshPaths[ii]);
+		}
+
+		// sphere is first mesh
+		m_lightModel.mesh = 0;
+
+		// Randomly create some models
+		bx::RngMwc mwc;
+		for (uint32_t ii = 0; ii < BX_COUNTOF(m_models); ++ii)
+		{
+			Model& model = m_models[ii];
+
+			model.mesh = mwc.gen() % BX_COUNTOF(s_meshPaths);
+			model.position[0] = (((mwc.gen() % 256)) - 128.0f) / 20.0f;
+			model.position[1] = 0;
+			model.position[2] = (((mwc.gen() % 256)) - 128.0f) / 20.0f;
+		}
+
+		// Load ground, just use the cube
+		m_ground = meshLoad("meshes/cube.bin");
+
+		m_groundTexture = loadTexture("textures/fieldstone-rgba.dds");
+		m_normalTexture = loadTexture("textures/fieldstone-n.dds");
+
+		m_recreateFrameBuffers = false;
+		createFramebuffers();
+	
+		// Vertex decl
+		PosTexCoord0Vertex::init();
+
+		// Init camera
+		cameraCreate();
+		cameraSetPosition({ 0.0f, 1.5f, -4.0f });
+		cameraSetVerticalAngle(-0.3f);
+		m_fovY = 60.0f;
+
+		cameraGetViewMtx(m_view);
+		bx::mtxProj(m_proj, m_fovY, float(m_size[0]) / float(m_size[1]), 0.01f, 100.0f,  bgfx::getCaps()->homogeneousDepth);
+
+		// Track whether previous results are valid
+		m_havePrevious = false;
+
+		// Get renderer capabilities info.
+		const bgfx::RendererType::Enum renderer = bgfx::getRendererType();
+		m_texelHalf = bgfx::RendererType::Direct3D9 == renderer ? 0.5f : 0.0f;
+
+		imguiCreate();
+	}
+
+	int32_t shutdown() override
+	{
+		for (uint32_t ii = 0; ii < BX_COUNTOF(s_meshPaths); ++ii)
+		{
+			meshUnload(m_meshes[ii]);
+		}
+		meshUnload(m_ground);
+
+		bgfx::destroy(m_normalTexture);
+		bgfx::destroy(m_groundTexture);
+
+		bgfx::destroy(m_gbufferProgram);
+		bgfx::destroy(m_sphereProgram);
+		bgfx::destroy(m_linearDepthProgram);
+		bgfx::destroy(m_shadowsProgram);
+		bgfx::destroy(m_combineProgram);
+
+		m_uniforms.destroy();
+
+		bgfx::destroy(s_albedo);
+		bgfx::destroy(s_color);
+		bgfx::destroy(s_normal);
+		bgfx::destroy(s_depth);
+		bgfx::destroy(s_shadows);
+
+		destroyFramebuffers();
+
+		cameraDestroy();
+
+		imguiDestroy();
+
+		bgfx::shutdown();
+
+		return 0;
+	}
+
+	bool update() override
+	{
+		if (!entry::processEvents(m_width, m_height, m_debug, m_reset, &m_mouseState))
+		{
+			// skip processing when minimized, otherwise crashing
+			if (0 == m_width || 0 == m_height)
+			{
+				return true;
+			}
+
+			// Update frame timer
+			int64_t now = bx::getHPCounter();
+			static int64_t last = now;
+			const int64_t frameTime = now - last;
+			last = now;
+			const double freq = double(bx::getHPFrequency());
+			const float deltaTime = float(frameTime / freq);
+			const bgfx::Caps* caps = bgfx::getCaps();
+
+			if (m_size[0] != (int32_t)m_width
+			||  m_size[1] != (int32_t)m_height
+			||  m_recreateFrameBuffers)
+			{
+				destroyFramebuffers();
+				createFramebuffers();
+				m_recreateFrameBuffers = false;
+			}
+
+			// rotate light
+			const float rotationSpeed = m_moveLight ? 0.75f : 0.0f;
+			m_lightRotation += deltaTime * rotationSpeed;
+			if (bx::kPi2 < m_lightRotation)
+			{
+				m_lightRotation -= bx::kPi2;
+			}
+			m_lightModel.position[0] = bx::cos(m_lightRotation) * 3.0f;
+			m_lightModel.position[1] = 1.5f;
+			m_lightModel.position[2] = bx::sin(m_lightRotation) * 3.0f;
+
+			// Update camera
+			cameraUpdate(deltaTime*0.15f, m_mouseState);
+
+			// Set up matrices for gbuffer
+			cameraGetViewMtx(m_view);
+
+			updateUniforms();
+
+			bx::mtxProj(m_proj, m_fovY, float(m_size[0]) / float(m_size[1]), 0.01f, 100.0f, caps->homogeneousDepth);
+			bx::mtxProj(m_proj2, m_fovY, float(m_size[0]) / float(m_size[1]), 0.01f, 100.0f, false);
+
+			bgfx::ViewId view = 0;
+
+			// Draw everything into gbuffer
+			{
+				bgfx::setViewName(view, "gbuffer");
+				bgfx::setViewClear(view
+					, BGFX_CLEAR_COLOR | BGFX_CLEAR_DEPTH
+					, 0
+					, 1.0f
+					, 0
+				);
+
+				bgfx::setViewRect(view, 0, 0, uint16_t(m_size[0]), uint16_t(m_size[1]));
+				bgfx::setViewTransform(view, m_view, m_proj);
+				// Make sure when we draw it goes into gbuffer and not backbuffer
+				bgfx::setViewFrameBuffer(view, m_gbuffer);
+
+				bgfx::setState(0
+					| BGFX_STATE_WRITE_RGB
+					| BGFX_STATE_WRITE_A
+					| BGFX_STATE_WRITE_Z
+					| BGFX_STATE_DEPTH_TEST_LESS
+					);
+
+				drawAllModels(view, m_gbufferProgram, m_uniforms);
+
+				// draw sphere to visualize light
+				{
+					const float scale = s_meshScale[m_lightModel.mesh];
+					float mtx[16];
+					bx::mtxSRT(mtx
+						, scale
+						, scale
+						, scale
+						, 0.0f
+						, 0.0f
+						, 0.0f
+						, m_lightModel.position[0]
+						, m_lightModel.position[1]
+						, m_lightModel.position[2]
+						);
+
+					m_uniforms.submit();
+					meshSubmit(m_meshes[m_lightModel.mesh], view, m_sphereProgram, mtx);
+				}
+
+				++view;
+			}
+
+			float orthoProj[16];
+			bx::mtxOrtho(orthoProj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, caps->homogeneousDepth);
+			{
+				// clear out transform stack
+				float identity[16];
+				bx::mtxIdentity(identity);
+				bgfx::setTransform(identity);
+			}
+
+			// Convert depth to linear depth for shadow depth compare
+			{
+				bgfx::setViewName(view, "linear depth");
+
+				bgfx::setViewRect(view, 0, 0, uint16_t(m_width), uint16_t(m_height));
+				bgfx::setViewTransform(view, NULL, orthoProj);
+				bgfx::setViewFrameBuffer(view, m_linearDepth.m_buffer);
+				bgfx::setState(0
+					| BGFX_STATE_WRITE_RGB
+					| BGFX_STATE_WRITE_A
+					| BGFX_STATE_DEPTH_TEST_ALWAYS
+					);
+				bgfx::setTexture(0, s_depth, m_gbufferTex[GBUFFER_RT_DEPTH]);
+				m_uniforms.submit();
+				screenSpaceQuad(float(m_width), float(m_height), m_texelHalf, caps->originBottomLeft);
+				bgfx::submit(view, m_linearDepthProgram);
+				++view;
+			}
+
+			// Do screen space shadows
+			{
+				bgfx::setViewName(view, "screen space shadows");
+
+				bgfx::setViewRect(view, 0, 0, uint16_t(m_width), uint16_t(m_height));
+				bgfx::setViewTransform(view, NULL, orthoProj);
+				bgfx::setViewFrameBuffer(view, m_shadows.m_buffer);
+				bgfx::setState(0
+					| BGFX_STATE_WRITE_RGB
+					| BGFX_STATE_WRITE_A
+					| BGFX_STATE_DEPTH_TEST_ALWAYS
+					);
+				bgfx::setTexture(0, s_depth, m_linearDepth.m_texture);
+				m_uniforms.submit();
+				screenSpaceQuad(float(m_width), float(m_height), m_texelHalf, caps->originBottomLeft);
+				bgfx::submit(view, m_shadowsProgram);
+				++view;
+			}
+
+			// Shade gbuffer
+			{
+				bgfx::setViewName(view, "combine");
+
+				bgfx::setViewRect(view, 0, 0, uint16_t(m_width), uint16_t(m_height));
+				bgfx::setViewTransform(view, NULL, orthoProj);
+				bgfx::setViewFrameBuffer(view, BGFX_INVALID_HANDLE);
+				bgfx::setState(0
+					| BGFX_STATE_WRITE_RGB
+					| BGFX_STATE_WRITE_A
+					| BGFX_STATE_DEPTH_TEST_ALWAYS
+					);
+				bgfx::setTexture(0, s_color, m_gbufferTex[GBUFFER_RT_COLOR]);
+				bgfx::setTexture(1, s_normal, m_gbufferTex[GBUFFER_RT_NORMAL]);
+				bgfx::setTexture(2, s_depth, m_linearDepth.m_texture);
+				bgfx::setTexture(3, s_shadows, m_shadows.m_texture);
+				m_uniforms.submit();
+				screenSpaceQuad(float(m_width), float(m_height), m_texelHalf, caps->originBottomLeft);
+				bgfx::submit(view, m_combineProgram);
+				++view;
+			}
+
+			// Draw UI
+			imguiBeginFrame(m_mouseState.m_mx
+				, m_mouseState.m_my
+				, (m_mouseState.m_buttons[entry::MouseButton::Left] ? IMGUI_MBUT_LEFT : 0)
+				| (m_mouseState.m_buttons[entry::MouseButton::Right] ? IMGUI_MBUT_RIGHT : 0)
+				| (m_mouseState.m_buttons[entry::MouseButton::Middle] ? IMGUI_MBUT_MIDDLE : 0)
+				, m_mouseState.m_mz
+				, uint16_t(m_width)
+				, uint16_t(m_height)
+				);
+
+			showExampleDialog(this);
+
+			ImGui::SetNextWindowPos(
+				ImVec2(m_width - m_width / 4.0f - 10.0f, 10.0f)
+				, ImGuiCond_FirstUseEver
+				);
+			ImGui::SetNextWindowSize(
+				ImVec2(m_width / 4.0f, m_height / 2.3f)
+				, ImGuiCond_FirstUseEver
+				);
+			ImGui::Begin("Settings"
+				, NULL
+				, 0
+				);
+
+			ImGui::PushItemWidth(ImGui::GetWindowWidth() * 0.5f);
+
+			{
+				ImGui::Text("shadow controls:");
+				ImGui::Checkbox("screen space radius", &m_useScreenSpaceRadius);
+				if (ImGui::IsItemHovered())
+					ImGui::SetTooltip("define radius in pixels or world units");
+
+				if (m_useScreenSpaceRadius)
+				{
+					ImGui::SliderFloat("radius in pixels", &m_shadowRadiusPixels, 1.0f, 100.0f);
+				}
+				else
+				{
+					ImGui::SliderFloat("radius in world units", &m_shadowRadius, 1e-3f, 1.0f);
+				}
+
+				ImGui::SliderInt("shadow steps", &m_shadowSteps, 1, 64);
+				if (ImGui::IsItemHovered())
+					ImGui::SetTooltip("number of steps/samples to take between shaded pixel and radius");
+
+				ImGui::Combo("contact shadows mode", &m_contactShadowsMode, "hard\0soft\0very soft\0\0");
+				if (ImGui::IsItemHovered())
+				{
+					ImGui::BeginTooltip();
+					ImGui::Text("hard");
+					ImGui::BulletText("any occluder, fully shadowed");
+					ImGui::Text("soft");
+					ImGui::BulletText("modulate shadow by distance to first occluder");
+					ImGui::Text("very soft");
+					ImGui::BulletText("also reduce each shadow contribution by distance");
+					ImGui::EndTooltip();
+				}
+
+				ImGui::Checkbox("add random offset to initial position", &m_useNoiseOffset);
+				if (ImGui::IsItemHovered())
+					ImGui::SetTooltip("hide banding with noise");
+
+				ImGui::Checkbox("use different offset each frame", &m_dynamicNoise);
+				ImGui::Separator();
+
+				ImGui::Text("scene controls:");
+				ImGui::Checkbox("display shadows only", &m_displayShadows);
+				ImGui::Checkbox("move light", &m_moveLight);
+			}
+
+			ImGui::End();
+
+			imguiEndFrame();
+
+			// Advance to next frame. Rendering thread will be kicked to
+			// process submitted rendering primitives.
+			m_currFrame = bgfx::frame();
+
+			return true;
+		}
+
+		return false;
+	}
+
+	void drawAllModels(bgfx::ViewId _pass, bgfx::ProgramHandle _program, const Uniforms & _uniforms)
+	{
+		for (uint32_t ii = 0; ii < BX_COUNTOF(m_models); ++ii)
+		{
+			const Model& model = m_models[ii];
+
+			// Set up transform matrix for each model
+			const float scale = s_meshScale[model.mesh];
+			float mtx[16];
+			bx::mtxSRT(mtx
+				, scale
+				, scale
+				, scale
+				, 0.0f
+				, 0.0f
+				, 0.0f
+				, model.position[0]
+				, model.position[1]
+				, model.position[2]
+				);
+
+			// Submit mesh to gbuffer
+			bgfx::setTexture(0, s_albedo, m_groundTexture);
+			bgfx::setTexture(1, s_normal, m_normalTexture);
+			_uniforms.submit();
+
+			meshSubmit(m_meshes[model.mesh], _pass, _program, mtx);
+		}
+
+		// Draw ground
+		float mtxScale[16];
+		const float scale = 10.0f;
+		bx::mtxScale(mtxScale, scale, scale, scale);
+
+		float mtxTranslate[16];
+		bx::mtxTranslate(mtxTranslate
+			, 0.0f
+			, -10.0f
+			, 0.0f
+			);
+
+		float mtx[16];
+		bx::mtxMul(mtx, mtxScale, mtxTranslate);
+		bgfx::setTexture(0, s_albedo, m_groundTexture);
+		bgfx::setTexture(1, s_normal, m_normalTexture);
+		_uniforms.submit();
+
+		meshSubmit(m_ground, _pass, _program, mtx);
+	}
+
+	void createFramebuffers()
+	{
+		m_size[0] = m_width;
+		m_size[1] = m_height;
+
+		const uint64_t pointSampleFlags = 0
+			| BGFX_TEXTURE_RT
+			| BGFX_SAMPLER_U_CLAMP
+			| BGFX_SAMPLER_V_CLAMP
+			| BGFX_SAMPLER_MIN_POINT
+			| BGFX_SAMPLER_MAG_POINT
+			| BGFX_SAMPLER_MIP_POINT
+			;
+
+		m_gbufferTex[GBUFFER_RT_COLOR]    = bgfx::createTexture2D(uint16_t(m_size[0]), uint16_t(m_size[1]), false, 1, bgfx::TextureFormat::BGRA8, pointSampleFlags);
+		m_gbufferTex[GBUFFER_RT_NORMAL]   = bgfx::createTexture2D(uint16_t(m_size[0]), uint16_t(m_size[1]), false, 1, bgfx::TextureFormat::BGRA8, pointSampleFlags);
+		m_gbufferTex[GBUFFER_RT_DEPTH]    = bgfx::createTexture2D(uint16_t(m_size[0]), uint16_t(m_size[1]), false, 1, bgfx::TextureFormat::D24, pointSampleFlags);
+		m_gbuffer = bgfx::createFrameBuffer(BX_COUNTOF(m_gbufferTex), m_gbufferTex, true);
+
+		m_linearDepth.init(m_size[0], m_size[1], bgfx::TextureFormat::R16F, pointSampleFlags);
+		m_shadows.init(m_size[0], m_size[1], bgfx::TextureFormat::R16F, pointSampleFlags);
+	}
+
+	// all buffers set to destroy their textures
+	void destroyFramebuffers()
+	{
+		bgfx::destroy(m_gbuffer);
+
+		m_linearDepth.destroy();
+		m_shadows.destroy();
+	}
+
+	void updateUniforms()
+	{
+		m_uniforms.m_displayShadows = m_displayShadows ? 1.0f : 0.0f;
+		m_uniforms.m_frameIdx = m_dynamicNoise
+			? float(m_currFrame % 8)
+			: 0.0f;
+		m_uniforms.m_shadowRadius = m_useScreenSpaceRadius ? m_shadowRadiusPixels : m_shadowRadius;
+		m_uniforms.m_shadowSteps = float(m_shadowSteps);
+		m_uniforms.m_useNoiseOffset = m_useNoiseOffset ? 1.0f : 0.0f;
+		m_uniforms.m_contactShadowsMode = float(m_contactShadowsMode);
+		m_uniforms.m_useScreenSpaceRadius = m_useScreenSpaceRadius ? 1.0f : 0.0f;
+
+		mat4Set(m_uniforms.m_worldToView, m_view);
+		mat4Set(m_uniforms.m_viewToProj, m_proj);
+
+		// from assao sample, cs_assao_prepare_depths.sc
+		{
+			// float depthLinearizeMul = ( clipFar * clipNear ) / ( clipFar - clipNear );
+			// float depthLinearizeAdd = clipFar / ( clipFar - clipNear );
+			// correct the handedness issue. need to make sure this below is correct, but I think it is.
+
+			float depthLinearizeMul = -m_proj2[3*4+2];
+			float depthLinearizeAdd =  m_proj2[2*4+2];
+
+			if (depthLinearizeMul * depthLinearizeAdd < 0)
+			{
+				depthLinearizeAdd = -depthLinearizeAdd;
+			}
+
+			vec2Set(m_uniforms.m_depthUnpackConsts, depthLinearizeMul, depthLinearizeAdd);
+
+			float tanHalfFOVY = 1.0f / m_proj2[1*4+1];	// = tanf( drawContext.Camera.GetYFOV( ) * 0.5f );
+			float tanHalfFOVX = 1.0F / m_proj2[0];		// = tanHalfFOVY * drawContext.Camera.GetAspect( );
+
+			if (bgfx::getRendererType() == bgfx::RendererType::OpenGL)
+			{
+				vec2Set(m_uniforms.m_ndcToViewMul, tanHalfFOVX * 2.0f, tanHalfFOVY * 2.0f);
+				vec2Set(m_uniforms.m_ndcToViewAdd, tanHalfFOVX * -1.0f, tanHalfFOVY * -1.0f);
+			}
+			else
+			{
+				vec2Set(m_uniforms.m_ndcToViewMul, tanHalfFOVX * 2.0f, tanHalfFOVY * -2.0f);
+				vec2Set(m_uniforms.m_ndcToViewAdd, tanHalfFOVX * -1.0f, tanHalfFOVY * 1.0f);
+			}
+		}
+
+		{
+			float lightPosition[4];
+			bx::memCopy(lightPosition, m_lightModel.position, 3*sizeof(float));
+			lightPosition[3] = 1.0f;
+			float viewSpaceLightPosition[4];
+			bx::vec4MulMtx(viewSpaceLightPosition, lightPosition, m_view);
+			bx::memCopy(m_uniforms.m_lightPosition, viewSpaceLightPosition, 3*sizeof(float));
+		}
+	}
+
+
+	uint32_t m_width;
+	uint32_t m_height;
+	uint32_t m_debug;
+	uint32_t m_reset;
+
+	entry::MouseState m_mouseState;
+
+	// Resource handles
+	bgfx::ProgramHandle m_gbufferProgram;
+	bgfx::ProgramHandle m_sphereProgram;
+	bgfx::ProgramHandle m_linearDepthProgram;
+	bgfx::ProgramHandle m_shadowsProgram;
+	bgfx::ProgramHandle m_combineProgram;
+
+	// Shader uniforms
+	Uniforms m_uniforms;
+
+	// Uniforms to indentify texture samplers
+	bgfx::UniformHandle s_albedo;
+	bgfx::UniformHandle s_color;
+	bgfx::UniformHandle s_normal;
+	bgfx::UniformHandle s_depth;
+	bgfx::UniformHandle s_shadows;
+
+	bgfx::FrameBufferHandle m_gbuffer;
+	bgfx::TextureHandle m_gbufferTex[GBUFFER_RENDER_TARGETS];
+
+	RenderTarget m_linearDepth;
+	RenderTarget m_shadows;
+
+	struct Model
+	{
+		uint32_t mesh; // Index of mesh in m_meshes
+		float position[3];
+	};
+
+	Model m_lightModel;
+	Model m_models[MODEL_COUNT];
+	Mesh* m_meshes[BX_COUNTOF(s_meshPaths)];
+	Mesh* m_ground;
+	bgfx::TextureHandle m_groundTexture;
+	bgfx::TextureHandle m_normalTexture;
+
+	uint32_t m_currFrame;
+	float m_lightRotation = 0.0f;
+	float m_texelHalf = 0.0f;
+	float m_fovY = 60.0f;
+	bool m_recreateFrameBuffers = false;
+	bool m_havePrevious = false;
+
+	float m_view[16];
+	float m_proj[16];
+	float m_proj2[16];
+	int32_t m_size[2];
+
+	// UI parameters
+	bool m_displayShadows = false;
+	bool m_useNoiseOffset = true;
+	bool m_dynamicNoise = true;
+	float m_shadowRadius = 0.25f;
+	float m_shadowRadiusPixels = 25.0f;
+	int32_t m_shadowSteps = 8;
+	bool m_moveLight = true;
+	int32_t m_contactShadowsMode = 0;
+	bool m_useScreenSpaceRadius = false;
+};
+
+} // namespace
+
+ENTRY_IMPLEMENT_MAIN(ExampleScreenSpaceShadows, "xx-sss", "Screen Space Shadows.");

+ 7 - 0
examples/44-sss/varying.def.sc

@@ -0,0 +1,7 @@
+vec4 a_position  : POSITION;
+vec2 a_texcoord0 : TEXCOORD0;
+vec3 a_normal    : NORMAL;
+
+vec2 v_texcoord0 : TEXCOORD0;
+vec4 v_texcoord1 : TEXCOORD1;
+vec3 v_normal    : NORMAL = vec3(0.0, 0.0, 1.0);

+ 30 - 0
examples/44-sss/vs_sss_gbuffer.sc

@@ -0,0 +1,30 @@
+$input a_position, a_normal, a_texcoord0
+$output v_normal, v_texcoord0, v_texcoord1
+
+/*
+* Copyright 2021 elven cache. All rights reserved.
+* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+*/
+
+#include "../common/common.sh"
+#include "parameters.sh"
+
+void main()
+{
+	// Calculate vertex position
+	vec3 pos = a_position.xyz;
+	gl_Position = mul(u_modelViewProj, vec4(pos, 1.0));
+
+	// Calculate normal, unpack
+	vec3 osNormal = a_normal.xyz * 2.0 - 1.0;
+
+	// Transform normal into world space
+	vec3 wsNormal = mul(u_model[0], vec4(osNormal, 0.0)).xyz;
+	v_normal.xyz = normalize(wsNormal);
+
+	v_texcoord0 = a_texcoord0;
+
+	// Pass through world space position
+	vec3 wsPos  = mul(u_model[0], vec4(pos, 1.0)).xyz;
+	v_texcoord1 = vec4(wsPos, 1.0);
+}

+ 10 - 0
examples/44-sss/vs_sss_screenquad.sc

@@ -0,0 +1,10 @@
+$input a_position, a_texcoord0
+$output v_texcoord0
+
+#include "../common/common.sh"
+
+void main()
+{
+	gl_Position = mul(u_modelViewProj, vec4(a_position.xyz, 1.0));
+	v_texcoord0 = a_texcoord0;
+}