Browse Source

Move FinalComposite to HLSL

Panagiotis Christopoulos Charitos 3 years ago
parent
commit
47c0cf9364

+ 1 - 0
AnKi/ShaderCompiler/Glslang.cpp

@@ -307,6 +307,7 @@ Error compileGlslToSpirv(CString src, ShaderType shaderType, BaseMemoryPool& tmp
 	shader.setOverrideVersion(460);
 	if(!shader.parse(&GLSLANG_LIMITS, 100, false, messages))
 	{
+		// printf("%s\n", src.cstr());
 		createErrorLog(shader.getInfoLog(), src, tmpPool, errorMessage);
 		return Error::kUserData;
 	}

+ 53 - 37
AnKi/Shaders/FinalComposite.ankiprog

@@ -3,86 +3,102 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
+#pragma anki hlsl
+
 #pragma anki mutator FILM_GRAIN 0 1
 #pragma anki mutator BLOOM_ENABLED 0 1
 #pragma anki mutator DBG_ENABLED 0 1
 
 #pragma anki start vert
-#include <AnKi/Shaders/QuadVert.glsl>
+#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki end
 
 #pragma anki start frag
-#include <AnKi/Shaders/Common.glsl>
-#include <AnKi/Shaders/Functions.glsl>
-#include <AnKi/Shaders/MotionBlur.glsl>
+#include <AnKi/Shaders/Functions.hlsl>
+#include <AnKi/Shaders/MotionBlur.hlsl>
 
 ANKI_SPECIALIZATION_CONSTANT_U32(kLutSize, 0u);
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 1u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kMotionBlurSamples, 3u);
 
-layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
-layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 2) uniform sampler u_trilinearRepeatSampler;
+[[vk::binding(0)]] SamplerState u_nearestAnyClampSampler;
+[[vk::binding(1)]] SamplerState u_linearAnyClampSampler;
+[[vk::binding(2)]] SamplerState u_trilinearRepeatSampler;
 
-layout(set = 0, binding = 3) uniform ANKI_RP texture2D u_lightShadingRt;
-layout(set = 0, binding = 4) uniform ANKI_RP texture2D u_ppsBloomLfRt;
-layout(set = 0, binding = 5) uniform ANKI_RP texture3D u_lut;
-layout(set = 0, binding = 6) uniform texture2D u_motionVectorsRt;
-layout(set = 0, binding = 7) uniform texture2D u_depthRt;
+[[vk::binding(3)]] Texture2D<RVec4> u_lightShadingRt;
+[[vk::binding(4)]] Texture2D<RVec4> u_ppsBloomLfRt;
+[[vk::binding(5)]] Texture3D<RVec4> u_lut;
+[[vk::binding(6)]] Texture2D u_motionVectorsRt;
+[[vk::binding(7)]] Texture2D u_depthRt;
 #if DBG_ENABLED
-layout(set = 0, binding = 8) uniform ANKI_RP texture2D u_dbgOutlineRt;
+[[vk::binding(8)]] Texture2D<RVec4> u_dbgOutlineRt;
 #endif
 
-layout(push_constant, std140) uniform b_pc
+struct PushConstants
+{
+	Vec2 m_padding0;
+	F32 m_filmGrainStrength;
+	U32 m_frameCount;
+};
+
+[[vk::push_constant]] ConstantBuffer<PushConstants> u_pc;
+
+struct VertOut
 {
-	Vec2 u_padding0;
-	F32 u_filmGrainStrength;
-	U32 u_frameCount;
+	[[vk::location(0)]] Vec2 m_uv : TEXCOORD;
 };
 
-layout(location = 0) in Vec2 in_uv;
-layout(location = 0) out ANKI_RP Vec3 out_color;
+struct FragOut
+{
+	RVec3 m_color : SV_TARGET0;
+};
 
-ANKI_RP Vec3 colorGrading(ANKI_RP Vec3 color)
+RVec3 colorGrading(RVec3 color)
 {
-	const ANKI_RP Vec3 kLutScale = Vec3((F32(kLutSize) - 1.0) / F32(kLutSize));
-	const ANKI_RP Vec3 kLutOffset = Vec3(1.0 / (2.0 * F32(kLutSize)));
+	constexpr RF32 kLutSizef = (RF32)kLutSize;
+	constexpr RVec3 kLutScale = ((kLutSizef - 1.0) / kLutSizef).xxx;
+	constexpr RVec3 kLutOffset = (1.0 / (2.0 * kLutSize)).xxx;
 
-	color = min(color, Vec3(1.0));
-	const ANKI_RP Vec3 lutCoords = color * kLutScale + kLutOffset;
-	return textureLod(u_lut, u_trilinearRepeatSampler, lutCoords, 0.0).rgb;
+	color = min(color, RVec3(1.0, 1.0, 1.0));
+	const RVec3 lutCoords = color * kLutScale + kLutOffset;
+	return u_lut.SampleLevel(u_trilinearRepeatSampler, lutCoords, 0.0).rgb;
 }
 
-void main()
+FragOut main(VertOut input)
 {
-	const Vec2 uv = in_uv;
+	const Vec2 uv = input.m_uv;
+	RVec3 outColor;
 
 	if(kMotionBlurSamples > 0u)
 	{
-		out_color = motionBlur(u_motionVectorsRt, u_nearestAnyClampSampler, u_lightShadingRt, Vec2(kFramebufferSize),
-							   u_linearAnyClampSampler, uv, kMotionBlurSamples);
+		outColor = motionBlur(u_motionVectorsRt, u_nearestAnyClampSampler, u_lightShadingRt, Vec2(kFramebufferSize),
+							  u_linearAnyClampSampler, uv, kMotionBlurSamples);
 	}
 	else
 	{
-		out_color = textureLod(u_lightShadingRt, u_linearAnyClampSampler, uv, 0.0).rgb;
+		outColor = u_lightShadingRt.SampleLevel(u_linearAnyClampSampler, uv, 0.0).rgb;
 	}
 
 #if BLOOM_ENABLED
-	const ANKI_RP Vec3 bloom = textureLod(u_ppsBloomLfRt, u_linearAnyClampSampler, uv, 0.0).rgb;
-	out_color += bloom;
+	const RVec3 bloom = u_ppsBloomLfRt.SampleLevel(u_linearAnyClampSampler, uv, 0.0).rgb;
+	outColor += bloom;
 #endif
 
-	out_color = colorGrading(out_color);
+	outColor = colorGrading(outColor);
 
 #if FILM_GRAIN
-	const ANKI_RP F32 dt = 1.0;
-	out_color = filmGrain(out_color, uv, u_filmGrainStrength, F32(u_frameCount % 0xFFFFu) * dt);
+	const F32 dt = 1.0;
+	outColor = filmGrain(outColor, uv, u_pc.m_filmGrainStrength, (F32)(u_pc.m_frameCount % 0xFFFFu) * dt);
 #endif
 
 #if DBG_ENABLED
-	const ANKI_RP Vec4 dbg = textureLod(u_dbgOutlineRt, u_linearAnyClampSampler, uv, 0.0);
-	out_color = mix(out_color, dbg.rgb, dbg.a);
+	const RVec4 dbg = u_dbgOutlineRt.SampleLevel(u_linearAnyClampSampler, uv, 0.0);
+	outColor = lerp(outColor, dbg.rgb, dbg.a);
 #endif
+
+	FragOut output;
+	output.m_color = outColor;
+	return output;
 }
 
 #pragma anki end

+ 2 - 12
AnKi/Shaders/Functions.hlsl

@@ -667,22 +667,12 @@ Vec3 sRgbToLinear(Vec3 sRgb)
 #endif
 }
 
-#if ANKI_GLSL
-ANKI_RP Vec3 filmGrain(ANKI_RP Vec3 color, Vec2 uv, ANKI_RP F32 strength, ANKI_RP F32 time)
+RVec3 filmGrain(RVec3 color, Vec2 uv, F32 strength, F32 time)
 {
 	const F32 x = (uv.x + 4.0) * (uv.y + 4.0) * time;
-	const F32 grain = 1.0 - (mod((mod(x, 13.0) + 1.0) * (mod(x, 123.0) + 1.0), 0.01) - 0.005) * strength;
-	return color * grain;
-}
-#else
-template<typename TVec3, typename TFloat>
-TVec3 filmGrain(TVec3 color, Vec2 uv, TFloat strength, TFloat time)
-{
-	const TFloat x = (uv.x + 4.0) * (uv.y + 4.0) * time;
-	const TFloat grain = 1.0 - (mod((mod(x, 13.0) + 1.0) * (mod(x, 123.0) + 1.0), 0.01) - 0.005) * strength;
+	const F32 grain = 1.0 - (fmod((fmod(x, 13.0) + 1.0) * (fmod(x, 123.0) + 1.0), 0.01) - 0.005) * strength;
 	return color * grain;
 }
-#endif
 
 /// Sin approximation: https://www.desmos.com/calculator/svgcjfskne
 F32 fastSin(F32 x)

+ 41 - 3
AnKi/Shaders/Include/Common.h

@@ -54,6 +54,9 @@ void maybeUnused(T a)
 }
 #	define ANKI_MAYBE_UNUSED(x) maybeUnused(x)
 
+#	define _ANKI_CONCATENATE(a, b) a##b
+#	define ANKI_CONCATENATE(a, b) _ANKI_CONCATENATE(a, b)
+
 #	define ANKI_BINDLESS_SET(s) \
 		[[vk::binding(0, s)]] Texture2D<uint4> u_bindlessTextures2dU32[kMaxBindlessTextures]; \
 		[[vk::binding(0, s)]] Texture2D<int4> u_bindlessTextures2dI32[kMaxBindlessTextures]; \
@@ -61,6 +64,43 @@ void maybeUnused(T a)
 		[[vk::binding(0, s)]] Texture2DArray<RVec4> u_bindlessTextures2dArrayF32[kMaxBindlessTextures]; \
 		[[vk::binding(1, s)]] Buffer<float4> u_bindlessTextureBuffersF32[kMaxBindlessReadonlyTextureBuffers];
 
+#	define _ANKI_SCONST_X(type, n, id) [[vk::constant_id(id)]] const type n = (type)1;
+
+#	define _ANKI_SCONST_X2(type, componentType, n, id) \
+		[[vk::constant_id(id + 0u)]] const componentType ANKI_CONCATENATE(_anki_const_0_2_, n) = (componentType)1; \
+		[[vk::constant_id(id + 1u)]] const componentType ANKI_CONCATENATE(_anki_const_1_2_, n) = (componentType)1; \
+		static const type n = type(ANKI_CONCATENATE(_anki_const_0_2_, n), ANKI_CONCATENATE(_anki_const_1_2_, n))
+
+#	define _ANKI_SCONST_X3(type, componentType, n, id) \
+		[[vk::constant_id(id + 0u)]] const componentType ANKI_CONCATENATE(_anki_const_0_3_, n) = (componentType)1; \
+		[[vk::constant_id(id + 1u)]] const componentType ANKI_CONCATENATE(_anki_const_1_3_, n) = (componentType)1; \
+		[[vk::constant_id(id + 2u)]] const componentType ANKI_CONCATENATE(_anki_const_2_3_, n) = (componentType)1; \
+		static const type n = type(ANKI_CONCATENATE(_anki_const_0_3_, n), ANKI_CONCATENATE(_anki_const_1_3_, n), \
+								   ANKI_CONCATENATE(_anki_const_2_3_, n))
+
+#	define _ANKI_SCONST_X4(type, componentType, n, id) \
+		[[vk::constant_id(id + 0u)]] const componentType ANKI_CONCATENATE(_anki_const_0_4_, n) = (componentType)1; \
+		[[vk::constant_id(id + 1u)]] const componentType ANKI_CONCATENATE(_anki_const_1_4_, n) = (componentType)1; \
+		[[vk::constant_id(id + 2u)]] const componentType ANKI_CONCATENATE(_anki_const_2_4_, n) = (componentType)1; \
+		[[vk::constant_id(id + 3u)]] const componentType ANKI_CONCATENATE(_anki_const_3_4_, n) = (componentType)1; \
+		static const type n = type(ANKI_CONCATENATE(_anki_const_0_4_, n), ANKI_CONCATENATE(_anki_const_1_4_, n), \
+								   ANKI_CONCATENATE(_anki_const_2_4_, n), ANKI_CONCATENATE(_anki_const_2_4_, n))
+
+#	define ANKI_SPECIALIZATION_CONSTANT_I32(n, id) _ANKI_SCONST_X(I32, n, id)
+#	define ANKI_SPECIALIZATION_CONSTANT_IVEC2(n, id) _ANKI_SCONST_X2(IVec2, I32, n, id)
+#	define ANKI_SPECIALIZATION_CONSTANT_IVEC3(n, id) _ANKI_SCONST_X3(IVec3, I32, n, id)
+#	define ANKI_SPECIALIZATION_CONSTANT_IVEC4(n, id) _ANKI_SCONST_X4(IVec4, I32, n, id)
+
+#	define ANKI_SPECIALIZATION_CONSTANT_U32(n, id) _ANKI_SCONST_X(U32, n, id)
+#	define ANKI_SPECIALIZATION_CONSTANT_UVEC2(n, id) _ANKI_SCONST_X2(UVec2, U32, n, id)
+#	define ANKI_SPECIALIZATION_CONSTANT_UVEC3(n, id) _ANKI_SCONST_X3(UVec3, U32, n, id)
+#	define ANKI_SPECIALIZATION_CONSTANT_UVEC4(n, id) _ANKI_SCONST_X4(UVec4, U32, n, id)
+
+#	define ANKI_SPECIALIZATION_CONSTANT_F32(n, id) _ANKI_SCONST_X(F32, n, id)
+#	define ANKI_SPECIALIZATION_CONSTANT_VEC2(n, id) _ANKI_SCONST_X2(Vec2, F32, n, id)
+#	define ANKI_SPECIALIZATION_CONSTANT_VEC3(n, id) _ANKI_SCONST_X3(Vec3, F32, n, id)
+#	define ANKI_SPECIALIZATION_CONSTANT_VEC4(n, id) _ANKI_SCONST_X4(Vec4, F32, n, id)
+
 typedef float F32;
 constexpr uint kSizeof_F32 = 4u;
 typedef float2 Vec2;
@@ -342,9 +382,7 @@ const uint kSizeof_mat4x3 = 48u;
 #	define sizeof(type) _ANKI_CONCATENATE(kSizeof_, type)
 #	define alignof(type) _ANKI_CONCATENATE(kAlignof_, type)
 
-#	define _ANKI_SCONST_X(type, n, id) \
-		layout(constant_id = id) const type n = type(1); \
-		const U32 ANKI_CONCATENATE(n, _CONST_ID) = id
+#	define _ANKI_SCONST_X(type, n, id) layout(constant_id = id) const type n = type(1)
 
 #	define _ANKI_SCONST_X2(type, componentType, n, id, constWorkaround) \
 		layout(constant_id = id + 0u) const componentType ANKI_CONCATENATE(_anki_const_0_2_, n) = componentType(1); \

+ 12 - 12
AnKi/Shaders/MotionBlur.glsl → AnKi/Shaders/MotionBlur.hlsl

@@ -5,30 +5,30 @@
 
 #pragma once
 
-#include <AnKi/Shaders/Common.glsl>
+#include <AnKi/Shaders/Common.hlsl>
 
 // Perform motion blur.
-ANKI_RP Vec3 motionBlur(texture2D motionVectorsRt, sampler motionVectorsRtSampler, ANKI_RP texture2D toBlurRt,
-						Vec2 toBlurRtSize, sampler toBlurRtSampler, Vec2 uv, U32 maxSamples)
+RVec3 motionBlur(Texture2D motionVectorsRt, SamplerState motionVectorsRtSampler, Texture2D<RVec4> toBlurRt,
+				 Vec2 toBlurRtSize, SamplerState toBlurRtSampler, Vec2 uv, U32 maxSamples)
 {
 	// Compute velocity. Get the max velocity around the curent sample to avoid outlines. TAA's result and the motion
 	// vectors RT do not quite overlap
-	Vec2 velocityMin = textureLod(motionVectorsRt, motionVectorsRtSampler, uv, 0.0).rg;
+	Vec2 velocityMin = motionVectorsRt.SampleLevel(motionVectorsRtSampler, uv, 0.0).rg;
 	Vec2 velocityMax = velocityMin;
 
-	Vec2 v = textureLodOffset(sampler2D(motionVectorsRt, motionVectorsRtSampler), uv, 0.0, ivec2(-2, -2)).rg;
+	Vec2 v = motionVectorsRt.SampleLevel(motionVectorsRtSampler, uv, 0.0, IVec2(-2, -2)).rg;
 	velocityMin = min(velocityMin, v);
 	velocityMax = max(velocityMax, v);
 
-	v = textureLodOffset(sampler2D(motionVectorsRt, motionVectorsRtSampler), uv, 0.0, ivec2(2, 2)).rg;
+	v = motionVectorsRt.SampleLevel(motionVectorsRtSampler, uv, 0.0, IVec2(2, 2)).rg;
 	velocityMin = min(velocityMin, v);
 	velocityMax = max(velocityMax, v);
 
-	v = textureLodOffset(sampler2D(motionVectorsRt, motionVectorsRtSampler), uv, 0.0, ivec2(-2, 2)).rg;
+	v = motionVectorsRt.SampleLevel(motionVectorsRtSampler, uv, 0.0, IVec2(-2, 2)).rg;
 	velocityMin = min(velocityMin, v);
 	velocityMax = max(velocityMax, v);
 
-	v = textureLodOffset(sampler2D(motionVectorsRt, motionVectorsRtSampler), uv, 0.0, ivec2(2, -2)).rg;
+	v = motionVectorsRt.SampleLevel(motionVectorsRtSampler, uv, 0.0, IVec2(2, -2)).rg;
 	velocityMin = min(velocityMin, v);
 	velocityMax = max(velocityMax, v);
 
@@ -44,14 +44,14 @@ ANKI_RP Vec3 motionBlur(texture2D motionVectorsRt, sampler motionVectorsRtSample
 	sampleCountf = round(sampleCountf);
 
 	// Sample
-	const ANKI_RP F32 weight = 1.0 / sampleCountf;
-	ANKI_RP Vec3 outColor = textureLod(toBlurRt, toBlurRtSampler, uv, 0.0).rgb * weight;
-	[[dont_unroll]] for(F32 s = 1.0; s < sampleCountf; s += 1.0)
+	const RF32 weight = 1.0 / sampleCountf;
+	RVec3 outColor = toBlurRt.SampleLevel(toBlurRtSampler, uv, 0.0).rgb * weight;
+	[[loop]] for(F32 s = 1.0; s < sampleCountf; s += 1.0)
 	{
 		const F32 f = s / sampleCountf;
 		const Vec2 sampleUv = uv + velocity * f;
 
-		outColor += textureLod(toBlurRt, toBlurRtSampler, sampleUv, 0.0).rgb * weight;
+		outColor += toBlurRt.SampleLevel(toBlurRtSampler, sampleUv, 0.0).rgb * weight;
 	}
 
 	return outColor;

+ 22 - 0
AnKi/Shaders/QuadVert.hlsl

@@ -0,0 +1,22 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <AnKi/Shaders/Common.hlsl>
+
+struct VertOut
+{
+	Vec4 m_position : SV_POSITION;
+	[[vk::location(0)]] Vec2 m_uv : TEXCOORD;
+};
+
+VertOut main(U32 vertId : SV_VERTEXID)
+{
+	VertOut output;
+	output.m_uv = Vec2(vertId & 1, vertId >> 1) * 2.0;
+
+	output.m_position = Vec4(output.m_uv * 2.0 - 1.0, 0.0, 1.0);
+
+	return output;
+}

BIN
ThirdParty/Bin/Windows64/MaliOfflineCompiler/graphics/Mali-Gxx_r41p0-00rel0.dll


BIN
ThirdParty/Bin/Windows64/MaliOfflineCompiler/graphics/Mali-T600_r23p0-00rel0.dll


BIN
ThirdParty/Bin/Windows64/MaliOfflineCompiler/malioc.exe


+ 1 - 1
Tools/Shader/ShaderProgramBinaryDumpMain.cpp

@@ -90,7 +90,7 @@ Error dumpStats(const ShaderProgramBinary& bin)
 #if ANKI_OS_LINUX
 				ANKI_SOURCE_DIRECTORY "/ThirdParty/Bin/Linux64/MaliOfflineCompiler/malioc",
 #elif ANKI_OS_WINDOWS
-				ANKI_SOURCE_DIRECTORY "/ThirdParty/Bin/Linux64/MaliOfflineCompiler/malioc.exe",
+				ANKI_SOURCE_DIRECTORY "/ThirdParty/Bin/Windows64/MaliOfflineCompiler/malioc.exe",
 #else
 #	error "Not supported"
 #endif