Bläddra i källkod

Add GLSL variants of all built-in shaders

BearishSun 9 år sedan
förälder
incheckning
ec9001814b

+ 2 - 2
Data/Raw/Engine/Includes/NormalVertexInput.bslinc

@@ -151,7 +151,7 @@ Technique
 				tangentToLocal[2] = normal;
 			}
 
-			void getVertexWorldPosition(VertexIntermediate intermediate, out vec4 result)
+			void getVertexIntermediate(out VertexIntermediate result)
 			{
 				vec3 normal = bs_normal * 2.0f - 1.0f;
 				vec3 tangent = bs_tangent.xyz * 2.0f - 1.0f;
@@ -172,7 +172,7 @@ Technique
 				result.worldTangent = vec4(tangentToWorld[0].xyz, tangentSign); // Tangent basis vector
 			}
 			
-			void getVertexWorldPosition(out vec4 result)
+			void getVertexWorldPosition(VertexIntermediate intermediate, out vec4 result)
 			{
 				#ifdef USE_BLEND_SHAPES
 					vec4 position = vec4(bs_position + bs_position1, 1.0f);

+ 21 - 1
Data/Raw/Engine/Includes/PPBase.bslinc

@@ -43,9 +43,29 @@ Technique : base("PPBase") =
 	
 	Pass =
 	{
+		DepthWrite = false;
+		DepthRead = false;
+	
 		Vertex =
 		{
-			// TODO
+			in vec2 bs_position;
+			in vec2 bs_texcoord0;
+			
+			out VStoFS
+			{
+				vec2 uv0;
+			} VSOutput;
+			
+			out gl_PerVertex
+			{
+				vec4 gl_Position;
+			};
+		
+			void main()
+			{
+				gl_Position = vec4(bs_position, 0, 1);
+				VSOutput.uv0 = bs_texcoord0;
+			}			
 		};
 	};
 };

+ 99 - 2
Data/Raw/Engine/Includes/PPTonemapCommon.bslinc

@@ -121,9 +121,106 @@ Technique : base("PPTonemapCommon") =
 	
 	Pass =
 	{
-		Vertex =
+		Fragment =
 		{
-			// TODO
+			const mat3x3 sRGBToXYZMatrix = mat3x3(
+				vec3(0.4124564f, 0.2126729f, 0.0193339f),
+				vec3(0.3575761f, 0.7151522f, 0.1191920f),
+				vec3(0.1804375f, 0.0721750f, 0.9503041f)
+			);
+			
+			const mat3x3 XYZTosRGBMatrix = mat3x3(
+				vec3(3.2409699419f, -0.9692436363f, 0.0556300797f),
+				vec3(-1.5373831776f, 1.8759675015f, -0.2039769589f),
+				vec3(-0.4986107603f, 0.0415550574f, 1.0569715142f)
+			);
+			
+			const mat3x3 D65ToD60Matrix = mat3x3(
+				vec3(1.01303f, 0.00769823f, -0.00284131f),
+				vec3(0.00610531f, 0.998165f, 0.00468516f),
+				vec3(-0.014971f, -0.00503203f, 0.924507f)
+			);
+
+			const mat3x3 D60ToD65Matrix = mat3x3(
+				vec3(0.987224f, -0.00759836f, 0.00307257f),
+				vec3(-0.00611327f, 1.00186f, -0.00509595f),
+				vec3(0.0159533f, 0.00533002f, 1.08168f)
+			);
+
+			const mat3x3 XYZToACES2065Matrix = mat3x3(
+				vec3(1.0498110175f, -0.4959030231f, 0.0000000000f),
+				vec3(0.0000000000f, 1.3733130458f, 0.0000000000f),
+				vec3(-0.0000974845f, 0.0982400361f, 0.9912520182f)
+			);
+
+			const mat3x3 XYZToACEScgMatrix = mat3x3(
+				vec3(1.6410233797f, -0.6636628587f, 0.0117218943f),
+				vec3(-0.3248032942f, 1.6153315917f, -0.0082844420f),
+				vec3(-0.2364246952f, 0.0167563477f, 0.9883948585f)
+			);
+
+			const mat3x3 ACEScgToXYZMatrix = mat3x3(
+				vec3(0.6624541811f, 0.2722287168f, -0.0055746495f),
+				vec3(0.1340042065f, 0.6740817658f, 0.0040607335f),
+				vec3(0.1561876870f, 0.0536895174f, 1.0103391003f)
+			);
+
+			/**
+			 * Encodes a 10bit linear color into 8bits by converting it to log space.
+			 *
+			 * @param 	linearColor		Linear color.
+			 * @return					Encoded color in log space.
+			 */			
+			void LinearToLogColor(vec3 linearColor, out vec3 result)
+			{
+				float linearRange = 14.0f;
+				float linearGrey = 0.18f;
+				float exposureGrey = 444.0f;
+
+				vec3 logColor = log2(linearColor) / linearRange - log2(linearGrey) / linearRange + exposureGrey / 1023.0f;
+				result = clamp(logColor, 0.0f, 1.0f);
+			}
+
+			/**
+			 * Decodes a 8bit log encoded color back into linear space.
+			 *
+			 * @param 	logColor		Log space color.
+			 * @return					Color in linear space.
+			 */			
+			void LogToLinearColor(vec3 logColor, out vec3 result)
+			{
+				float linearRange = 14.0f;
+				float linearGrey = 0.18f;
+				float exposureGrey = 444.0f;
+
+				result = exp2((logColor - exposureGrey / 1023.0f) * linearRange) * linearGrey;
+			}
+
+			/**
+			 * Converts a linear color value in sRGB/Rec.709 color space into gamma space (applies Rec.709 transfer function). 
+			 * Rec.709 values are suitable for HDTVs and projectors.
+			 *
+			 * @param 	linearColor		Linear color in sRGB/Rec.709 color space.
+			 * @return					Gamma corrected color.
+			 */				
+			void LinearToGammaRec709(vec3 linearColor, out vec3 result) 
+			{
+				// TODO: Clamp lower end of linear color so it isn't denormalized?
+				result = min(linearColor * 4.5f, pow(max(linearColor, 0.018f), vec3(0.45f)) * 1.099f - 0.099f);
+			}
+
+			/**
+			 * Converts a linear color value in sRGB/Rec.709 color space into gamma space (applies sRGB transfer function). 
+			 * sRGB values are suitable for PC displays.
+			 *
+			 * @param 	linearColor		Linear color in sRGB/Rec.709 color space.
+			 * @return					Gamma corrected color.
+			 */		
+			void LinearToGammasRGB(vec3 linearColor, out vec3 result) 
+			{
+				// TODO: Clamp lower end of linear color so it isn't denormalized?
+				result = min(linearColor * 12.92f, pow(max(linearColor, 0.00313067f), vec3(1.0f/2.4f)) * 1.055f - 0.055f);
+			}			
 		};
 	};
 };

+ 216 - 0
Data/Raw/Engine/Includes/PPWhiteBalance.bslinc

@@ -213,4 +213,220 @@ Technique : base("PPWhiteBalance") =
 Technique : base("PPWhiteBalance") =
 {
 	Language = "GLSL";
+	
+	Pass =
+	{
+		Fragment =
+		{
+			/**
+			 * Calculates correlated color temperature from chomaticity coordinates using the McCamy's formula.
+			 * Coordinates should be near the Planckian locus otherwise the returned temperature becomes meaningless.
+			 *
+			 * @param 	coords	CIE 1931 x chomaticity coordinates.
+			 * @return			Correlated color temperature in degrees Kelvin.
+			 */
+			void CCT(vec2 coords, out float result)
+			{
+				float n = (coords.x - 0.3320f) / (0.1858f - coords.y);
+				float n2 = n * n;
+				float n3 = n2 * n;
+				
+				result = -449.0f * n3 + 3525.0f * n2 - 6823.3f * n + 5520.33f;
+			}
+
+			/**
+			 * Calculates chromaticity coordinates from a correlated color temperature. Uses the Planckian locus formula
+			 * which works for values in range [1000K, 15000K].
+			 *
+			 * @param	T	Correlated color temperature in degrees Kelvin.
+			 * @return		CIE 1960 UCS chomaticity coordinates.
+			 */
+			void PlanckianLocusChromaticity(float T, out vec2 result)
+			{
+				float T2 = T * T;
+
+				// Calculates CIE 1960 UCS coordinates
+				float u = (0.860117757f + 1.54118254e-4f * T + 1.28641212e-7f * T2) / (1.0f + 8.42420235e-4f * T + 7.08145163e-7f * T2);
+				float v = (0.317398726f + 4.22806245e-5f * T + 4.20481691e-8f * T2) / (1.0f - 2.89741816e-5f * T + 1.61456053e-7f * T2);
+				
+				result = vec2(u, v);
+			}
+
+			/**
+			 * Calculates chromaticity coordinates from a correlated color temperature. Uses the formula for series
+			 * D standard illuminants (D55, D65, D75, etc.). Valid for values in range [4000K, 25000K].
+			 *
+			 * @param	T	Correlated color temperature in degrees Kelvin.
+			 * @return		CIE 1931 chomaticity coordinates.
+			 */
+			void DSeriesIlluminantChromaticity(float T, out vec2 result)
+			{
+				float x = T <= 7000.0f 
+					? 0.244063f + (0.09911e3 + (2.9678e6 - 4.6070e9 / T) / T) / T 
+					: 0.237040f + (0.24748e3 + (1.9018e6 - 2.0064e9 / T) / T) / T;
+				
+				float y = -3.0f * x * x + 2.87f * x - 0.275f;
+
+				result = vec2(x, y);
+			}
+
+			/**
+			 * Converts chomaticity coordinates from CIE 1960 uniform color space to CIE 1931 color space.
+			 *
+			 * @param	uv	Chromaticity coordinates in CIE 1960 UCS.
+			 * @return		Chromaticity coordinates in CIE 1931.
+			 */
+			void CIE1960ToCIE1931(vec2 uv, out vec2 result)
+			{
+				float x = (3 * uv.x) / (2 * uv.x - 8 * uv.y + 4);
+				float y = (2 * uv.y) / (2 * uv.x - 8 * uv.y + 4);
+
+				result = vec2(x, y);
+			}
+
+			/**
+			 * Adds the specified offset along the Planckian isothermal line and returns the chromaticity coordinates for the offset position.
+			 *
+			 * @param	uv		Chromaticity coordiantes in CIE 1960 UCS for the correlated color temperature along the Planckian locus.
+			 * @param	offset	Offset to be added along the isothermal. In range [-1, 1]. The actual offset in chromaticity
+			 *					coordinates is scaled to |0.05| since values farther than that usually aren't useful.
+			 * @return			CIE 1931 chomaticity coordinates.
+			 */
+			void PlanckianIsothermalOffset(vec2 uv, float offset, out vec2 result)
+			{
+				// Rotate uv by 90 degrees and normalize it to get the isotherm line
+				vec2 isotherm = normalize(vec2(-uv.y, uv.x));
+				
+				uv += isotherm * offset * 0.05f;
+				CIE1960ToCIE1931(uv, result);
+			}
+			
+			/**
+			 * Converts from CIE 1931 xyY color space to XYZ color space.
+			 *
+			 * @param	xyY		Coordinates in xyY color space.
+			 * @return			Coordinates in XYZ color space.
+			 */
+			void xyYToXYZ(vec3 xyY, out vec3 result)
+			{
+				float divisor = max(xyY.y, 1e-10f);
+			
+				vec3 XYZ;
+				XYZ.x = (xyY.x * xyY.z) / divisor;
+				XYZ.y = xyY.z;  
+				XYZ.z = ((1.0 - xyY.x - xyY.y) * xyY.z) / divisor;
+
+				result = XYZ;
+			}
+			
+			/**
+			 * Converts from CIE 1931 XYZ color space to xyY color space.
+			 *
+			 * @param	XYZ		Coordinates in XYZ color space.
+			 * @return			Coordinates in xyY color space.
+			 */
+			void XYZToxyY(vec3 XYZ, out vec3 result)
+			{
+				vec3 xyY;
+				float divisor = XYZ.x + XYZ.y + XYZ.z;
+				if (divisor == 0.0f) 
+					divisor = 1e-10f;
+				
+				xyY.x = XYZ.x / divisor;
+				xyY.y = XYZ.y / divisor;  
+				xyY.z = XYZ.y;
+			  
+				result = xyY;
+			}			
+			
+			/**
+			 * Returns a matrix that transform XYZ tristimulus values for a given white point to
+			 * a new white point.
+			 *
+			 * @param	orgWhite	Chromaticity coordinates in CIE 1931 for the original white point.
+			 * @param	newWhite	Chromaticity coordinates in CIE 1931 for the new white point.
+			 * @return				Matrix that transform from the original to new white point.
+			 */
+			void ChromaticAdaptation(vec2 orgWhite, vec2 newWhite, out mat3x3 result)
+			{
+				// Convert xyY to XYZ
+				vec3 orgWhite3;
+				xyYToXYZ(vec3(orgWhite.xy, 1.0f), orgWhite3);
+				
+				vec3 newWhite3;
+				xyYToXYZ(vec3(newWhite.xy, 1.0f), newWhite3);
+				
+				// Convert to cone response domain using Bradford's matrix
+				const mat3x3 coneResponse = mat3x3(
+					vec3(0.8951f, -0.7502f, 0.0389f),
+					vec3(0.2664f, 1.7135f, -0.0685f),
+					vec3(-0.1614f, 0.0367f, 1.0296f)
+				);
+				
+				const mat3x3 invConeResponse = mat3x3(
+					vec3(0.9870f, 0.4323f, -0.0085f),
+					vec3(-0.1471f, 0.5184f, 0.0400f),
+					vec3(0.1600f, 0.0493f, 0.9685f)
+				);
+				
+				orgWhite3 = coneResponse * orgWhite3;
+				newWhite3 = coneResponse * newWhite3;
+				
+				// Generate transformation matrix
+				mat3x3 adaptation = mat3x3(
+					vec3(newWhite3.x / orgWhite3.x, 0.0f, 0.0f),
+					vec3(0.0f, newWhite3.y / orgWhite3.y, 0.0f),
+					vec3(0.0f, 0.0f, newWhite3.z / orgWhite3.z)
+				);
+				
+				result = invConeResponse * (adaptation * coneResponse);
+			}
+			
+			uniform WhiteBalanceInput
+			{
+				float gWhiteTemp;
+				float gWhiteOffset;
+			};
+			
+			/**
+			 * Applies color balancing to the provided color. The color is transformed from its original white point
+			 * (provided by gWhiteTemp and gWhiteOffset) to a D65 white point.
+			 * 
+			 * @param	color 	Color in linear sRGB/Rec.709 color space.
+			 * @return			White balanced linear color.
+			 */
+			void WhiteBalance(vec3 color, out vec3 result)
+			{
+				vec2 orgPlanckianUV;
+				PlanckianLocusChromaticity(gWhiteTemp, orgPlanckianUV);
+				
+				vec2 orgWhiteXY;
+				if(gWhiteTemp < 4000)
+				{
+					PlanckianIsothermalOffset(orgPlanckianUV, gWhiteOffset, orgWhiteXY);
+				}
+				else
+				{
+					DSeriesIlluminantChromaticity(gWhiteTemp, orgWhiteXY);
+					
+					vec2 tfrmdPlanckianUV;
+					CIE1960ToCIE1931(orgPlanckianUV, tfrmdPlanckianUV);
+					
+					vec2 isothermalOffset;
+					PlanckianIsothermalOffset(orgPlanckianUV, gWhiteOffset, isothermalOffset);
+					
+					vec2 offsetXY = isothermalOffset - tfrmdPlanckianUV;
+					orgWhiteXY += offsetXY;
+				}
+			
+				vec2 newWhiteXY = vec2(0.3128f, 0.3290f); // D65 white point
+				
+				mat3x3 adaptation;
+				ChromaticAdaptation(orgWhiteXY, newWhiteXY, adaptation);
+				
+				adaptation = XYZTosRGBMatrix * (adaptation * sRGBToXYZMatrix);
+				result = adaptation * color;
+			}
+		};
+	};
 };

+ 7 - 7
Data/Raw/Engine/Includes/SkinnedVertexInput.bslinc

@@ -161,7 +161,7 @@ Technique
 			in vec4 bs_tangent;
 			in vec2 bs_texcoord0;
 		
-			in uvec4 bs_blendindices;
+			in ivec4 bs_blendindices;
 			in vec4 bs_blendweights;
 				
 			#ifdef USE_BLEND_SHAPES
@@ -184,7 +184,7 @@ Technique
 				vec4 gl_Position;
 			};
 			
-			void getBoneMatrix(uint idx, out mat4x3 result)
+			void getBoneMatrix(int idx, out mat4x3 result)
 			{
 				mat3x4 temp;
 			
@@ -199,16 +199,16 @@ Technique
 			{
 				mat4x3 boneMatrix;
 				
-				getBoneMatrix(bs_blendindices.x, out boneMatrix);
+				getBoneMatrix(bs_blendindices.x, boneMatrix);
 				result = bs_blendweights.x * boneMatrix;
 				
-				getBoneMatrix(bs_blendindices.y, out boneMatrix);
+				getBoneMatrix(bs_blendindices.y, boneMatrix);
 				result += bs_blendweights.y * boneMatrix;
 				
-				getBoneMatrix(bs_blendindices.z, out boneMatrix);
+				getBoneMatrix(bs_blendindices.z, boneMatrix);
 				result += bs_blendweights.z * boneMatrix;
 				
-				getBoneMatrix(bs_blendindices.w, out boneMatrix);
+				getBoneMatrix(bs_blendindices.w, boneMatrix);
 				result += bs_blendweights.w * boneMatrix;
 			}
 			
@@ -242,7 +242,7 @@ Technique
 				
 				float tangentSign;
 				mat3 tangentToLocal;
-				getSkinnedTangentToLocal(tangentSign, tangentToLocal);
+				getSkinnedTangentToLocal(result.blendMatrix, tangentSign, tangentToLocal);
 				
 				mat3 tangentToWorld = mat3(gMatWorldNoScale) * tangentToLocal;
 				result.worldNormal = tangentToWorld[2]; // Normal basis vector

+ 69 - 1
Data/Raw/Engine/Includes/VolumeRenderBase.bslinc

@@ -79,9 +79,77 @@ Technique : base("VolumeRenderBase") =
 	
 	Pass =
 	{
+		DepthWrite = false;
+		DepthRead = false;
+	
 		Vertex =
 		{
-			// TODO
+			in vec2 bs_position;
+			in vec2 bs_texcoord0;
+			
+			out VStoGS
+			{
+				vec2 uv0;
+				flat uint layerIdx;
+			} VSOutput;
+			
+			out gl_PerVertex
+			{
+				vec4 gl_Position;
+			};
+		
+			void main()
+			{
+				gl_Position = vec4(bs_position, 0, 1);
+				VSOutput.uv0 = bs_texcoord0;
+				VSOutput.layerIdx = gl_InstanceID;
+			}
+		};
+		
+		Geometry = 
+		{		
+			layout (triangles) in;
+			layout (triangle_strip, max_vertices=3) out;
+		
+			in VStoGS
+			{
+				vec2 uv0;
+				flat uint layerIdx;
+			} GSInput[3];
+		
+			out GStoFS
+			{
+				vec2 uv0;
+				flat uint layerIdx;
+			} GSOutput;
+		
+			in gl_PerVertex 
+			{
+				vec4 gl_Position;
+			} gl_in[];
+		
+			out gl_PerVertex
+			{
+				vec4 gl_Position;
+			};
+		
+			void main()
+			{
+				gl_Position = gl_in[0].gl_Position;
+				GSOutput.uv0 = GSInput[0].uv0;
+				GSOutput.layerIdx = GSInput[0].layerIdx;
+				EmitVertex();
+				
+				gl_Position = gl_in[1].gl_Position;
+				GSOutput.uv0 = GSInput[1].uv0;
+				GSOutput.layerIdx = GSInput[1].layerIdx;
+				EmitVertex();
+				
+				gl_Position = gl_in[2].gl_Position;
+				GSOutput.uv0 = GSInput[2].uv0;
+				GSOutput.layerIdx = GSInput[2].layerIdx;
+				EmitVertex();
+			}
 		};
 	};
 };

+ 2 - 1
Data/Raw/Engine/Shaders/DeferredDirectionalLightPass.bsl

@@ -60,7 +60,8 @@ Technique
 	};
 };
 
-Technique =
+Technique 
+	: inherits("DeferredLightPass") =
 {
 	Language = "GLSL";
 	

+ 118 - 1
Data/Raw/Engine/Shaders/PPCreateTonemapLUT.bsl

@@ -134,9 +134,126 @@ Technique
 	
 	Pass =
 	{
+		DepthWrite = false;
+		DepthRead = false;
+	
 		Fragment =
 		{
-			// TODO
+			uniform Input
+			{
+				// [0]: x - shoulder strength, y - linear strength, z - linear angle, w - toe strength
+				// [1]: x - toe numerator, y - toe denominator, z - linear white point, w - unused
+				vec4 gTonemapParams[2];
+				
+				float gGammaAdjustment;
+				// 0 - sRGB, 1 - Rec.709, 2 - 2.2 gamma
+				uint gGammaCorrectionType;
+				
+				vec3 gSaturation;
+				vec3 gContrast;
+				vec3 gGain;
+				vec3 gOffset;
+			};
+		
+			/**
+			 * Filmic curve used for tonemapping.
+			 *
+			 * @param 	linearColor		Linear color.
+			 * @return					Transformed color.
+			 */			
+			void FilmicCurve(vec3 color, out vec3 result)
+			{
+				// Formula from John Hable's Uncharted 2 presentation
+				vec3 a = color * (gTonemapParams[0].x * color + vec3(gTonemapParams[0].y * gTonemapParams[0].z));
+				vec3 b = vec3(gTonemapParams[0].w * gTonemapParams[1].x);
+				vec3 c = color * (gTonemapParams[0].x * color + vec3(gTonemapParams[0].y));
+				vec3 d = vec3(gTonemapParams[0].w * gTonemapParams[1].y);
+				
+				result = (a + b)/(c + d) - vec3(gTonemapParams[1].x / gTonemapParams[1].y);
+			}
+			
+			/**
+			 * Applies filmic curve tonemapping to the provided color.
+			 *
+			 * @param 	linearColor		Linear color in ACEScg color space.
+			 * @return					Tonemapped color in ACEScg color space.
+			 */		
+			void FilmicTonemapping(vec3 color, out vec3 result)
+			{
+				vec3 filmicColor;
+				FilmicCurve(color, filmicColor);
+			
+				vec3 filmicWhitePoint;
+				FilmicCurve(vec3(gTonemapParams[1].z), filmicWhitePoint);
+			
+				result = filmicColor / filmicWhitePoint;
+			}
+			
+			/**
+			 * Applies color grading to the provided color.
+			 *
+			 * @param 	linearColor		Linear color in ACEScg color space.
+			 * @return					Graded color in ACEScg color space.
+			 */				
+			void ColorGrading(vec3 color, out vec3 result)
+			{
+				const vec3 RGBToY = vec3(0.2722287168f, 0.6740817658f, 0.0536895174f);
+			
+				float luminance = dot(color, RGBToY);
+				
+				color = max(vec3(0.0f), mix(luminance.xxx, color, gSaturation));
+				color = pow(color * (1.0f / 0.18f), gContrast) * 0.18f;
+				color = color * gGain + gOffset;
+
+				result = color;
+			}		
+			
+			in GStoFS
+			{
+				vec2 uv0;
+				flat uint layerIdx;
+			} input;
+			
+			out vec4 fragColor;
+			
+			void main()
+			{
+				// Constants
+				const mat3x3 sRGBToACES2065Matrix = XYZToACES2065Matrix * (D65ToD60Matrix * sRGBToXYZMatrix);
+				const mat3x3 sRGBToACEScgMatrix = XYZToACEScgMatrix * (D65ToD60Matrix * sRGBToXYZMatrix);
+				const mat3x3 ACEScgTosRGBMatrix = XYZTosRGBMatrix * (D60ToD65Matrix * ACEScgToXYZMatrix);
+				
+				// By default pixel centers will be sampled, but we want to encode the entire range, so
+				// offset the sampling by half a pixel, and extend the entire range by one pixel.
+				vec2 uv = input.uv0 - (0.5f / LUT_SIZE);
+				vec3 logColor = vec3(uv * LUT_SIZE / float(LUT_SIZE - 1), input.layerIdx / float(LUT_SIZE - 1));
+				
+				vec3 linearColor;
+				LogToLinearColor(logColor, linearColor);
+				
+				WhiteBalance(linearColor, linearColor);
+				linearColor = sRGBToACEScgMatrix * linearColor;
+				ColorGrading(linearColor, linearColor);
+				
+				// Note: Improve this so it's closer to the ACES curve?
+				FilmicTonemapping(linearColor, linearColor);
+				// TODO - Does the white point provided in filmic curve conflict with the white balancing?
+				
+				linearColor = ACEScgTosRGBMatrix * linearColor;
+				
+				// Transform to gamma space
+				vec3 gammaColor = pow(linearColor, vec3(gGammaAdjustment)); // User adjustment, usually 1.0f
+					
+				if(gGammaCorrectionType == 0)
+					LinearToGammasRGB(gammaColor, gammaColor);
+				else if(gGammaCorrectionType == 1)
+					LinearToGammaRec709(gammaColor, gammaColor);
+				else
+					gammaColor = pow(gammaColor, vec3(1.0f/2.2f));
+				
+				// TODO - Divide by 1.05f here and then re-apply it when decoding from the texture?
+				fragColor = vec4(gammaColor, 0.0f);
+			}	
 		};
 	};
 };

+ 32 - 1
Data/Raw/Engine/Shaders/PPDownsample.bsl

@@ -59,7 +59,38 @@ Technique : inherits("PPBase") =
 	{
 		Fragment =
 		{
-			// TODO
+			in VStoFS
+			{
+				vec2 uv0;
+			} input;		
+		
+			uniform Input
+			{
+				vec2 gInvTexSize;
+			};
+			
+			uniform sampler2D gInputTex;
+			out vec4 fragColor;
+
+			void main()
+			{
+				vec2 UV[4];
+
+				// Blur using a 4x4 kernel. It's assumed current position is right in the middle of a 2x2 kernel (because the output
+				// texture should be 1/2 the size of the output texture), and moving by one in each direction will sample areas
+				// between a 2x2 kernel as well if bilinear filtering is enabled.
+				UV[0] = input.uv0 + gInvTexSize * vec2(-1, -1);
+				UV[1] = input.uv0 + gInvTexSize * vec2( 1, -1);
+				UV[2] = input.uv0 + gInvTexSize * vec2(-1,  1);
+				UV[3] = input.uv0 + gInvTexSize * vec2( 1,  1);
+
+				vec4 samples[4];
+
+				for(uint i = 0; i < 4; i++)
+					samples[i] = texture2D(gInputTex, UV[i]);
+
+				fragColor = (samples[0] + samples[1] + samples[2] + samples[3]) * 0.25f;
+			}	
 		};
 	};
 };

+ 100 - 3
Data/Raw/Engine/Shaders/PPEyeAdaptHistogram.bsl

@@ -27,7 +27,7 @@ Technique =
 				// x - histogram scale, y - histogram offset
 				float2 gHistogramParams;
 				uint2 gThreadGroupCount;
-			}		
+			}
 		
 			Texture2D gSceneColorTex;
 			RWTexture2D<float4> gOutputTex;
@@ -127,8 +127,105 @@ Technique =
 	Pass =
 	{
 		Compute =
-		{
-			// TODO
+		{	
+			layout (local_size_x = THREADGROUP_SIZE_X, local_size_y = THREADGROUP_SIZE_Y) in;
+		
+			uniform Input
+			{
+				// xy - offset, zw - size
+				uvec4 gPixelOffsetAndSize;
+			
+				// x - histogram scale, y - histogram offset
+				vec2 gHistogramParams;
+				uvec2 gThreadGroupCount;
+			};
+		
+			uniform sampler2D gSceneColorTex;
+			layout (rgba32f) uniform image2D gOutputTex;
+			
+			// Keep elements in this order as it ensures coalesced memory operations for non-random ops
+			shared float sharedData[NUM_BUCKETS][THREADGROUP_SIZE_X][THREADGROUP_SIZE_Y];
+			
+			void calcHistogramPos(float luminance, out float result)
+			{
+				result = clamp(log2(luminance) * gHistogramParams.x + gHistogramParams.y, 0.0f, 1.0f);
+			}			
+			
+			void main()
+			{
+				// Clear everything
+				for(uint i = 0; i < NUM_BUCKETS; i++)
+					sharedData[i][gl_LocalInvocationID.x][gl_LocalInvocationID.y] = 0.0f;
+					
+				groupMemoryBarrier();
+				barrier();
+				
+				// Sort all pixel luminance for the current thread into histogram buckets
+				uvec2 tileSize = uvec2(LOOP_COUNT_X, LOOP_COUNT_Y);
+				uvec2 maxExtent = gPixelOffsetAndSize.xy + gPixelOffsetAndSize.zw;
+				
+				uvec2 tileStart = gl_GlobalInvocationID.xy * tileSize + gPixelOffsetAndSize.xy;
+				for(uint y = 0; y < LOOP_COUNT_Y; y++)
+				{
+					uvec2 texelPos = tileStart + uvec2(0, y);
+					if(texelPos.y > maxExtent.y)
+						break;
+				
+					for(uint x = 0; x < LOOP_COUNT_X; x++)
+					{
+						if(texelPos.x > maxExtent.x)
+							break;
+					
+						vec4 hdrColor = texelFetch(gSceneColorTex, ivec2(texelPos), 0);
+						float luminance = dot(hdrColor.rgb, vec3(0.299f, 0.587f, 0.114f)); // TODO - Perhaps just use max() of all values?
+						
+						float histogramPos;
+						calcHistogramPos(luminance, histogramPos);
+						
+						float bucket = histogramPos * (NUM_BUCKETS - 1) * 0.9999f;
+					
+						uint bucketAIdx = uint(bucket);
+						uint bucketBIdx = bucketAIdx + 1;
+						
+						float weightB = fract(bucket);
+						float weightA = 1.0f - weightB;
+						
+						if(bucketAIdx != 0)
+							sharedData[bucketAIdx][gl_LocalInvocationID.x][gl_LocalInvocationID.y] += weightA;
+					
+						sharedData[bucketBIdx][gl_LocalInvocationID.x][gl_LocalInvocationID.y] += weightB;
+					
+						texelPos.x++;
+					}
+				}
+				
+				groupMemoryBarrier();
+				barrier();
+
+				// Accumulate bucketed values from all threads in the group
+				if(gl_LocalInvocationIndex < (NUM_BUCKETS / 4))
+				{
+					vec4 sum = vec4(0.0f);
+					for(uint y = 0; y < THREADGROUP_SIZE_Y; y++)
+					{
+						for(uint x = 0; x < THREADGROUP_SIZE_X; x++)
+						{
+							sum += vec4(
+								sharedData[gl_LocalInvocationIndex * 4 + 0][x][y],
+								sharedData[gl_LocalInvocationIndex * 4 + 1][x][y],
+								sharedData[gl_LocalInvocationIndex * 4 + 2][x][y],
+								sharedData[gl_LocalInvocationIndex * 4 + 3][x][y]
+							);
+						}
+					}
+					
+					// Normalize and output histogram for the group (single line per group)
+					float groupArea = THREADGROUP_SIZE_X * LOOP_COUNT_X * THREADGROUP_SIZE_Y * LOOP_COUNT_Y;
+
+					ivec2 outCoords = ivec2(gl_LocalInvocationIndex, gl_WorkGroupID.x + gl_WorkGroupID.y * gThreadGroupCount.x);
+					imageStore(gOutputTex, outCoords, sum / groupArea);
+				}
+			}	
 		};
 	};
 };

+ 35 - 1
Data/Raw/Engine/Shaders/PPEyeAdaptHistogramReduce.bsl

@@ -59,7 +59,41 @@ Technique : inherits("PPBase") =
 	{
 		Fragment =
 		{
-			// TODO
+			in VStoFS
+			{
+				vec2 uv0;
+			} input;
+		
+			uniform Input
+			{
+				uint gThreadGroupCount;
+			};
+		
+			uniform sampler2D gHistogramTex;
+			uniform sampler2D gEyeAdaptationTex;
+
+			out vec4 fragColor;
+			
+			void main()
+			{
+				ivec2 iUV = ivec2(trunc(input.uv0));
+				vec4 outputValue = vec4(0.0f);
+
+				// Output texture only has two rows, store histogram on the first
+				if(input.uv0.y < 1.0f)
+				{
+					// TODO - Potentially optimize using bilinear filtering
+					for(uint i = 0; i < gThreadGroupCount; i++)
+						outputValue += texelFetch(gHistogramTex, ivec2(iUV.x, i), 0);
+
+					fragColor = outputValue / gThreadGroupCount;
+				}
+				else
+				{
+					// Store eye adaptation from last frame in the second row of the texture
+					fragColor = texelFetch(gEyeAdaptationTex, ivec2(0, 0), 0).xxxx;
+				}
+			}	
 		};
 	};
 };

+ 174 - 1
Data/Raw/Engine/Shaders/PPEyeAdaptation.bsl

@@ -178,6 +178,179 @@ Technique : inherits("PPBase") =
 	
 	Pass =
 	{
-		// TODO
+		Fragment =
+		{
+			#define NUM_BUCKETS (THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y)
+		
+			uniform Input
+			{
+				// [0]: x - histogram scale, y - histogram offset, z - histogram percent low, w - histogram percent high
+				// [1]: x - min adaptation, y - max adaptation, z - adaptation speed up, w - adaptation speed down
+				// [2]: x - exposure scale, y - frame time delta, zw - nothing
+				vec4 gEyeAdaptationParams[3];
+			};
+			
+			uniform sampler2D gHistogramTex;
+			
+			/** 
+			 * Returns luminance of the histogram bucket.
+			 *
+			 * @param pos	Position of the histogram bucket in range [0, 1].
+			 * @return		Luminance of the bucket.
+			 */
+			void calcHistogramLuminance(float pos, out float result)
+			{
+				result = exp2((pos - gEyeAdaptationParams[0].y) / gEyeAdaptationParams[0].x);
+			}	
+			
+			/**
+			 * Returns value of the histogram bucket.
+			 *
+			 * @param	histogram	Texture containing the histogram buckets in the first row.
+			 * @param	bucketIdx	Index of the bucket. Caller must ensure it is in valid range.
+			 * @return				Value of the needed histogram bucket.
+			 */
+			void getHistogramValue(sampler2D histogram, uint bucketIdx, out float result)
+			{
+				uint texelIdx = bucketIdx / 4;
+				
+				vec4 packedValue = texelFetch(histogram, ivec2(texelIdx, 0), 0);
+				vec4 mask = vec4(
+					(bucketIdx % 4) == 0,
+					(bucketIdx % 4) == 1,
+					(bucketIdx % 4) == 2,
+					(bucketIdx % 4) == 3);
+
+				result = dot(packedValue, mask);	
+			}
+
+			/** 
+			 * Calculates the sum of all values in the histogram.
+			 *
+			 * @param	histogram	Texture containing the histogram buckets in the first row.
+			 * @return				Sum of all the values in the histogram.
+			 */
+			void calcHistogramSum(sampler2D histogram, out float result)
+			{
+				float sum = 0;
+
+				for(uint i = 0; i < NUM_BUCKETS; i++)
+				{
+					float histogramValue;
+					getHistogramValue(histogram, i, histogramValue);
+					
+					sum += histogramValue;
+				}
+				
+				result = sum;
+			}	
+
+			/**
+			 * Calculates the average luminance in the histogram, while ignoring the outlier values that may skew the result.
+			 *
+			 * @param	histogram	Texture containing the histogram buckets in the first row.
+			 * @param	low			Sum below which to ignore values (removing lower end outliers), in range [0, histogramSum].
+			 * @param	high		Sum above which to ignore values (removing higher end outliers), in range [0, histogramSum]. 
+			 *                      Must be higher than @low.
+			 * @return				Average luminance in the histogram.
+			 */
+			void calcHistogramAverageLuminance(sampler2D histogram, float low, float high, out float result)
+			{
+				vec2 sumAndWeight = vec2(0.0f, 0.0f);
+
+				for(uint i = 0; i < NUM_BUCKETS; i++)
+				{
+					float value;
+					getHistogramValue(histogram, i, value);
+
+					// Ignore any values below the @low parameter, and then shift the valid range
+					// by the amount we ignored. Eventually the low end of the range reaches zero
+					// and values are no longer ignored.
+					float offset = min(value, low);
+					value = value - offset;
+					low -= offset;
+					high -= offset;
+
+					// Ignore any values above the @high parameter, and then shift the valid range.
+					value = min(value, high);
+					high -= value;
+
+					float histogramPos = i / float(NUM_BUCKETS);
+					float luminance;
+					calcHistogramLuminance(histogramPos, luminance);
+					
+					sumAndWeight += vec2(luminance, 1) * value;
+				}
+				
+				result = sumAndWeight.x / max(0.0001f, sumAndWeight.y);
+			}
+			
+			/**
+			 * Calculates the eye adaptation from the luminance in the provided histogram. Eye adaptation value will be 
+			 * used for automatically scaling expsure based on scene brightness.
+			 *
+			 * @param	histogram	Texture containing the histogram buckets in the first row.
+			 * @return				Ideal eye adaptation value for the provided luminance.
+			 */
+			void calcEyeAdaptation(sampler2D histogram, out float result)
+			{
+				float sum;
+				calcHistogramSum(histogram, sum);
+				
+				float lowRange = gEyeAdaptationParams[0].z * sum;
+				float highRange = gEyeAdaptationParams[0].w * sum;
+				
+				float avgLuminance;
+				calcHistogramAverageLuminance(histogram, lowRange, highRange, avgLuminance);
+				
+				avgLuminance = clamp(avgLuminance, gEyeAdaptationParams[1].x, gEyeAdaptationParams[1].y);
+				result = avgLuminance;
+			}
+			
+			/** 
+			 * Smooths out eye adaptation changes over multiple frames so they aren't as jarring.
+			 *
+			 * @param	old			Eye adaptation value from the previous frame.
+			 * @param	target		Ideal eye adaptation value for this frame.
+			 * @param	frameDelta	Time difference between this and last frame, in seconds.
+			 * @return				Smoothed eye adaptation.
+			 */
+			void smoothEyeAdaptation(float old, float target, float frameDelta, out float result)
+			{
+				float diff = target - old;
+
+				float speedUp = gEyeAdaptationParams[1].z;
+				float speedDown = gEyeAdaptationParams[1].w;
+
+				float adaptionSpeed = (diff > 0) ? speedUp : speedDown;
+				float scale = 1.0f - exp2(-frameDelta * adaptionSpeed);
+
+				result = clamp(old + diff * scale, gEyeAdaptationParams[1].x, gEyeAdaptationParams[1].y);
+			}
+			
+			in VStoFS
+			{
+				vec2 uv0;
+			} VSInput;
+			
+			out vec4 fragColor;
+			
+			void main()
+			{
+				float exposureScale = gEyeAdaptationParams[2].x;
+	
+				float targetAdaptation;
+				calcEyeAdaptation(gHistogramTex, targetAdaptation);
+				
+				float oldExposure = texelFetch(gHistogramTex, ivec2(0, 1), 0).x;
+				float oldAdaptation = exposureScale / oldExposure; // Assuming same exposure scale as last frame
+				float frameDelta = gEyeAdaptationParams[2].y;
+				
+				float smoothAdaptation;
+				smoothEyeAdaptation(oldAdaptation, targetAdaptation, frameDelta, smoothAdaptation);
+				
+				fragColor = vec4(exposureScale / smoothAdaptation); // Returns exposure
+			}	
+		};
 	};
 };

+ 78 - 1
Data/Raw/Engine/Shaders/PPTonemapping.bsl

@@ -106,9 +106,86 @@ Technique : inherits("PPTonemapCommon") =
 	
 	Pass =
 	{
+		DepthWrite = false;
+		DepthRead = false;
+	
+		Vertex =
+		{
+			in vec2 bs_position;
+			in vec2 bs_texcoord0;
+			
+			out VStoFS
+			{
+				vec2 uv0;
+				float exposureScale;
+			} VSOutput;
+			
+			uniform sampler2D gEyeAdaptationTex;
+			
+			out gl_PerVertex
+			{
+				vec4 gl_Position;
+			};			
+			
+			void main()
+			{
+				gl_Position = vec4(bs_position, 0, 1);
+				VSOutput.uv0 = bs_texcoord0;
+				VSOutput.exposureScale = texelFetch(gEyeAdaptationTex, ivec2(0, 0), 0).r;
+			}			
+		};	
+	
 		Fragment =
 		{
-			// TODO
+			in VStoFS
+			{
+				vec2 uv0;
+				float exposureScale;
+			} FSInput;
+		
+			uniform sampler2D gInputTex;
+			uniform sampler3D gColorLUT;
+			
+			uniform Input
+			{
+				float gRawGamma;
+				float gManualExposureScale;
+			};
+
+			out vec4 fragColor;
+			
+			void ColorLookupTable(vec3 linearColor, out vec3 result)
+			{
+				vec3 logColor;
+				LinearToLogColor(linearColor, logColor);
+				
+				vec3 UVW = logColor * ((LUT_SIZE - 1) / float(LUT_SIZE)) + (0.5f / LUT_SIZE);
+				
+				vec3 gradedColor = texture(gColorLUT, UVW).rgb;
+				result = gradedColor;
+			}
+						
+			void main()
+			{
+				vec4 sceneColor = texture2D(gInputTex, FSInput.uv0);
+				
+				#if AUTO_EXPOSURE
+					sceneColor.rgb = sceneColor.rgb * FSInput.exposureScale;
+				#else
+					sceneColor.rgb = sceneColor.rgb * gManualExposureScale;
+				#endif
+				
+				#if GAMMA_ONLY
+					sceneColor.rgb = pow(sceneColor.rgb, vec3(gRawGamma));				
+				#else
+					vec3 lookupColor;
+					ColorLookupTable(sceneColor.rgb, lookupColor);
+					
+					sceneColor.rgb = lookupColor;
+				#endif
+
+				fragColor = sceneColor;
+			}	
 		};
 	};
 };

+ 8 - 3
Source/BansheeCore/Source/BsAnimationManager.cpp

@@ -465,7 +465,7 @@ namespace BansheeEngine
 
 		// Increments counter and ensures all writes are recorded
 		mWorkerState.store(WorkerState::DataReady, std::memory_order_release);
-		mDataReadyCount.fetch_add(1, std::memory_order_release);
+		mDataReadyCount.fetch_add(1, std::memory_order_acq_rel);
 	}
 
 	void AnimationManager::waitUntilComplete()
@@ -474,13 +474,18 @@ namespace BansheeEngine
 
 		// Read counter, and ensure all reads are done after writes on anim thread complete
 		INT32 dataReadyCount = mDataReadyCount.load(std::memory_order_acquire);
-		assert(dataReadyCount <= CoreThread::NUM_SYNC_BUFFERS);
+
+		if (dataReadyCount > CoreThread::NUM_SYNC_BUFFERS)
+		{
+			LOGERR("Animation manager threading issue. Too many entries in queue: " + toString(dataReadyCount));
+			assert(dataReadyCount <= CoreThread::NUM_SYNC_BUFFERS);
+		}
 
 		mDataReady = dataReadyCount > 0;
 		if (!mDataReady)
 			return;
 
-		mDataReadyCount.fetch_add(-1, std::memory_order_relaxed);
+		mDataReadyCount.fetch_add(-1, std::memory_order_release);
 		mPoseReadBufferIdx = (mPoseReadBufferIdx + 1) % CoreThread::NUM_SYNC_BUFFERS;
 	}
 

+ 4 - 0
Source/BansheeGLRenderAPI/Source/BsGLSLParamParser.cpp

@@ -534,18 +534,22 @@ namespace BansheeEngine
 			desc.elementSize = 4;
 			break;
 		case GL_INT:
+		case GL_UNSIGNED_INT:
 			desc.type = GPDT_INT1;
 			desc.elementSize = 1;
 			break;
 		case GL_INT_VEC2:
+		case GL_UNSIGNED_INT_VEC2:
 			desc.type = GPDT_INT2;
 			desc.elementSize = 2;
 			break;
 		case GL_INT_VEC3:
+		case GL_UNSIGNED_INT_VEC3:
 			desc.type = GPDT_INT3;
 			desc.elementSize = 3;
 			break;
 		case GL_INT_VEC4:
+		case GL_UNSIGNED_INT_VEC4:
 			desc.type = GPDT_INT4;
 			desc.elementSize = 4;
 			break;