Browse Source

Add RP to some shaders

Panagiotis Christopoulos Charitos 4 years ago
parent
commit
205ffa09af

+ 1 - 1
AnKi/Resource/ShaderProgramResource.cpp

@@ -387,7 +387,7 @@ void ShaderProgramResource::initVariant(const ShaderProgramResourceVariantInitIn
 			inf.m_constValues.setArray((constValueCount) ? constValues.getBegin() : nullptr, constValueCount);
 			ShaderPtr shader = getManager().getGrManager().newShader(inf);
 
-			if(false)
+			if(true && (ANKI_OS_LINUX || ANKI_OS_WINDOWS))
 			{
 				MaliOfflineCompilerOut maliocOut;
 				const Error err =

+ 27 - 1
AnKi/ShaderCompiler/MaliOfflineCompiler.cpp

@@ -192,7 +192,7 @@ static Error runMaliOfflineCompilerInternal(CString maliocExecutable, CString sp
 			return Error::FUNCTION_FAILED;
 		}
 	}
-	else
+	else if(shaderType == ShaderType::FRAGMENT)
 	{
 		if(std::regex_search(stdoutstl, match,
 							 std::regex("Total instruction cycles:\\s*" ANKI_FLOAT_REGEX "\\s*" ANKI_FLOAT_REGEX
@@ -217,6 +217,32 @@ static Error runMaliOfflineCompilerInternal(CString maliocExecutable, CString sp
 			return Error::FUNCTION_FAILED;
 		}
 	}
+	else
+	{
+		ANKI_ASSERT(shaderType == ShaderType::COMPUTE);
+
+		if(std::regex_search(stdoutstl, match,
+							 std::regex("Total instruction cycles:\\s*" ANKI_FLOAT_REGEX "\\s*" ANKI_FLOAT_REGEX
+										"\\s*" ANKI_FLOAT_REGEX "\\s*" ANKI_FLOAT_REGEX "\\s*" ANKI_FLOAT_REGEX
+										"\\s*([A-Z]+)")))
+		{
+			ANKI_ASSERT(match.size() == 7);
+
+			U32 count = 1;
+			ANKI_CHECK(CString(match[count++].str().c_str()).toNumber(out.m_fma));
+			ANKI_CHECK(CString(match[count++].str().c_str()).toNumber(out.m_cvt));
+			ANKI_CHECK(CString(match[count++].str().c_str()).toNumber(out.m_sfu));
+			ANKI_CHECK(CString(match[count++].str().c_str()).toNumber(out.m_loadStore));
+			ANKI_CHECK(CString(match[count++].str().c_str()).toNumber(out.m_texture));
+
+			out.m_boundUnit = strToHwUnit(match[count++].str().c_str());
+		}
+		else
+		{
+			ANKI_SHADER_COMPILER_LOGE("Error parsing instruction cycles");
+			return Error::FUNCTION_FAILED;
+		}
+	}
 
 #undef ANKI_FLOAT_REGEX
 

+ 2 - 0
AnKi/ShaderCompiler/ShaderProgramParser.cpp

@@ -252,6 +252,8 @@ static const char* SHADER_HEADER = R"(#version 460 core
 
 layout(std140, row_major) uniform;
 layout(std140, row_major) buffer;
+
+#define ANKI_RP mediump
 )";
 
 static const U64 SHADER_HEADER_HASH = computeHash(SHADER_HEADER, sizeof(SHADER_HEADER));

+ 2 - 2
AnKi/Shaders/ClusteredShadingCommon.glsl

@@ -44,7 +44,7 @@ layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_REFLECTIONS_BIND
 };
 
 layout(set = CLUSTERED_SHADING_SET,
-	   binding = CLUSTERED_SHADING_REFLECTIONS_BINDING + 1) uniform textureCubeArray u_reflectionsTex;
+	   binding = CLUSTERED_SHADING_REFLECTIONS_BINDING + 1) uniform ANKI_RP textureCubeArray u_reflectionsTex;
 layout(set = CLUSTERED_SHADING_SET,
 	   binding = CLUSTERED_SHADING_REFLECTIONS_BINDING + 2) uniform texture2D u_integrationLut;
 #endif
@@ -77,7 +77,7 @@ layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_FOG_BINDING, sca
 // GI (2)
 //
 #if defined(CLUSTERED_SHADING_GI_BINDING)
-layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_GI_BINDING) uniform texture3D
+layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_GI_BINDING) uniform ANKI_RP texture3D
 	u_globalIlluminationTextures[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES];
 
 layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_GI_BINDING + 1, scalar) uniform b_giProbes

+ 1 - 0
AnKi/Shaders/Common.glsl

@@ -12,6 +12,7 @@
 
 // Constants
 const F32 EPSILON = 0.000001;
+const ANKI_RP F32 EPSILON_RP = 0.0001; // Divisions by this should be OK according to http://weitz.de/ieee/
 const F32 FLT_MAX = 3.402823e+38;
 const U32 MAX_U32 = 0xFFFFFFFFu;
 

+ 2 - 0
AnKi/Shaders/Include/Common.h

@@ -36,6 +36,8 @@ ANKI_END_NAMESPACE
 
 #	define ANKI_CPP_CODE(x) x
 
+#	define ANKI_RP
+
 //
 // Macros & functions GLSL
 //

+ 4 - 4
AnKi/Shaders/Include/IndirectDiffuseTypes.h

@@ -14,11 +14,11 @@ struct IndirectDiffuseUniforms
 	UVec2 m_viewportSize;
 	Vec2 m_viewportSizef;
 	Vec4 m_projectionMat;
-	F32 m_radius; ///< In meters.
+	ANKI_RP F32 m_radius; ///< In meters.
 	U32 m_sampleCount;
-	F32 m_sampleCountf;
-	F32 m_ssaoBias;
-	F32 m_ssaoStrength;
+	ANKI_RP F32 m_sampleCountf;
+	ANKI_RP F32 m_ssaoBias;
+	ANKI_RP F32 m_ssaoStrength;
 	F32 m_padding[3u];
 };
 

+ 16 - 16
AnKi/Shaders/IndirectDiffuse.ankiprog

@@ -32,10 +32,10 @@ layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y) in;
 layout(set = 0, binding = 4) writeonly uniform image2D u_outImage;
 
 layout(set = 0, binding = 5) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 6) uniform texture2D u_gbufferRt2;
+layout(set = 0, binding = 6) ANKI_RP uniform texture2D u_gbufferRt2;
 layout(set = 0, binding = 7) uniform texture2D u_depthRt;
-layout(set = 0, binding = 8) uniform texture2D u_lightBufferRt;
-layout(set = 0, binding = 9) uniform texture2D u_historyTex;
+layout(set = 0, binding = 8) ANKI_RP uniform texture2D u_lightBufferRt;
+layout(set = 0, binding = 9) ANKI_RP uniform texture2D u_historyTex;
 layout(set = 0, binding = 10) uniform texture2D u_motionVectorsTex;
 layout(set = 0, binding = 11) uniform texture2D u_motionVectorsRejectionTex;
 
@@ -73,15 +73,15 @@ void main()
 	const Vec3 viewPos = v4.xyz / v4.w;
 
 	// SSGI
-	Vec3 outColor = Vec3(0.0);
-	F32 ssao = 0.0;
+	ANKI_RP Vec3 outColor = Vec3(0.0);
+	ANKI_RP F32 ssao = 0.0;
 	if(ENABLE_SSGI)
 	{
 		// Find the projected radius
-		const Vec3 sphereLimit = viewPos + Vec3(u_unis.m_radius, 0.0, 0.0);
-		const Vec4 projSphereLimit = cheapProject(Vec4(sphereLimit, 1.0));
-		const Vec2 projSphereLimit2 = projSphereLimit.xy / projSphereLimit.w;
-		const F32 projRadius = length(projSphereLimit2 - ndc);
+		const ANKI_RP Vec3 sphereLimit = viewPos + Vec3(u_unis.m_radius, 0.0, 0.0);
+		const ANKI_RP Vec4 projSphereLimit = cheapProject(Vec4(sphereLimit, 1.0));
+		const ANKI_RP Vec2 projSphereLimit2 = projSphereLimit.xy / projSphereLimit.w;
+		const ANKI_RP F32 projRadius = length(projSphereLimit2 - ndc);
 
 		// Loop to compute
 		const UVec2 random = rand3DPCG16(UVec3(gl_GlobalInvocationID.xy, u_clusteredShading.m_frame)).xy;
@@ -100,7 +100,7 @@ void main()
 			// Compute factor
 			const Vec3 dir = s - viewPos;
 			const F32 len = length(dir);
-			const Vec3 n = dir / len;
+			const Vec3 n = normalize(dir);
 			const F32 NoL = max(0.0, dot(viewNormal, n));
 			// const F32 distFactor = 1.0 - sin(min(1.0, len / u_unis.m_radius) * PI / 2.0);
 			const F32 distFactor = 1.0 - min(1.0, len / u_unis.m_radius);
@@ -120,14 +120,14 @@ void main()
 
 			// Append color
 			const F32 w = distFactor * NoL;
-			const Vec3 c = textureLod(u_lightBufferRt, u_linearAnyClampSampler, lastFrameUv, 100.0).xyz;
+			const ANKI_RP Vec3 c = textureLod(u_lightBufferRt, u_linearAnyClampSampler, lastFrameUv, 100.0).xyz;
 			outColor += c * w;
 
 			// Compute SSAO as well
 			ssao += max(dot(viewNormal, dir) + u_unis.m_ssaoBias, EPSILON) / max(len * len, EPSILON);
 		}
 
-		const F32 scount = 1.0 / u_unis.m_sampleCountf;
+		const ANKI_RP F32 scount = 1.0 / u_unis.m_sampleCountf;
 		outColor *= scount * 2.0 * PI;
 		ssao *= scount;
 	}
@@ -138,7 +138,7 @@ void main()
 	{
 		// Sample probes
 
-		Vec3 probeColor = Vec3(0.0);
+		ANKI_RP Vec3 probeColor = Vec3(0.0);
 
 		// Get the cluster
 		Cluster cluster = getClusterFragCoord(Vec3(fragCoord * 2.0, depth));
@@ -176,8 +176,8 @@ void main()
 				totalBlendWeight += blendWeight;
 
 				// Sample
-				const Vec3 c = sampleGlobalIllumination(worldPos, worldNormal, probe, u_globalIlluminationTextures,
-														u_linearAnyClampSampler);
+				const ANKI_RP Vec3 c = sampleGlobalIllumination(worldPos, worldNormal, probe,
+																u_globalIlluminationTextures, u_linearAnyClampSampler);
 				probeColor += c * blendWeight;
 			}
 
@@ -209,7 +209,7 @@ void main()
 		const F32 blendFactor = mix(lowestBlendFactor, 1.0, historyRejectionFactor);
 
 		// Blend with history
-		const Vec3 history = textureLod(u_historyTex, u_linearAnyClampSampler, historyUv, 0.0).rgb;
+		const ANKI_RP Vec3 history = textureLod(u_historyTex, u_linearAnyClampSampler, historyUv, 0.0).rgb;
 		outColor = mix(history, outColor, blendFactor);
 	}
 

+ 7 - 7
AnKi/Shaders/IndirectDiffuseDenoise.ankiprog

@@ -16,10 +16,10 @@ const UVec2 WORKGROUP_SIZE = UVec2(8u, 8u);
 layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y) in;
 
 layout(set = 0, binding = 0) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 1) uniform texture2D u_toDenoiseTex;
+layout(set = 0, binding = 1) uniform ANKI_RP texture2D u_toDenoiseTex;
 layout(set = 0, binding = 2) uniform texture2D u_depthTex;
-layout(set = 0, binding = 3) uniform texture2D u_gbuffer2Tex;
-layout(set = 0, binding = 4) writeonly uniform image2D u_outImg;
+layout(set = 0, binding = 3) uniform ANKI_RP texture2D u_gbuffer2Tex;
+layout(set = 0, binding = 4) writeonly uniform ANKI_RP image2D u_outImg;
 
 layout(push_constant, std430, row_major) uniform b_pc
 {
@@ -51,11 +51,11 @@ void main()
 	}
 
 	const Vec3 positionCenter = unproject(UV_TO_NDC(uv), depthCenter);
-	const Vec3 normalCenter = readNormalFromGBuffer(u_gbuffer2Tex, u_linearAnyClampSampler, uv);
+	const ANKI_RP Vec3 normalCenter = readNormalFromGBuffer(u_gbuffer2Tex, u_linearAnyClampSampler, uv);
 
 	// Sample
-	F32 weight = EPSILON;
-	Vec3 color = Vec3(0.0);
+	ANKI_RP F32 weight = EPSILON_RP;
+	ANKI_RP Vec3 color = Vec3(0.0);
 
 	for(F32 i = -u_unis.m_sampleCountDiv2; i <= u_unis.m_sampleCountDiv2; i += 1.0)
 	{
@@ -71,7 +71,7 @@ void main()
 		const Vec3 normalTap =
 			unpackNormalFromGBuffer(textureLod(u_gbuffer2Tex, u_linearAnyClampSampler, sampleUv, 0.0));
 
-		F32 w = calculateBilateralWeightPlane(positionCenter, normalCenter, positionTap, normalTap, 1.0);
+		ANKI_RP F32 w = calculateBilateralWeightPlane(positionCenter, normalCenter, positionTap, normalTap, 1.0);
 		// w *= gaussianWeight(0.4, abs(F32(i)) / (sampleCount + 1.0));
 		weight += w;
 

+ 13 - 10
AnKi/Shaders/LightFunctions.glsl

@@ -327,24 +327,26 @@ F32 computeProbeBlendWeight(Vec3 fragPos, // Doesn't need to be inside the AABB
 
 // Given the value of the 6 faces of the dice and a normal, sample the correct weighted value.
 // https://www.shadertoy.com/view/XtcBDB
-Vec3 sampleAmbientDice(Vec3 posx, Vec3 negx, Vec3 posy, Vec3 negy, Vec3 posz, Vec3 negz, Vec3 normal)
+ANKI_RP Vec3 sampleAmbientDice(ANKI_RP Vec3 posx, ANKI_RP Vec3 negx, ANKI_RP Vec3 posy, ANKI_RP Vec3 negy,
+							   ANKI_RP Vec3 posz, ANKI_RP Vec3 negz, ANKI_RP Vec3 normal)
 {
-	const Vec3 axisWeights = abs(normal);
-	const Vec3 uv = NDC_TO_UV(normal);
+	const ANKI_RP Vec3 axisWeights = abs(normal);
+	const ANKI_RP Vec3 uv = NDC_TO_UV(normal);
 
-	Vec3 col = mix(negx, posx, uv.x) * axisWeights.x;
+	ANKI_RP Vec3 col = mix(negx, posx, uv.x) * axisWeights.x;
 	col += mix(negy, posy, uv.y) * axisWeights.y;
 	col += mix(negz, posz, uv.z) * axisWeights.z;
 
 	// Divide by weight
-	col /= axisWeights.x + axisWeights.y + axisWeights.z + EPSILON;
+	col /= axisWeights.x + axisWeights.y + axisWeights.z + EPSILON_RP;
 
 	return col;
 }
 
 // Sample the irradiance term from the clipmap
-Vec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const GlobalIlluminationProbe probe,
-							  texture3D textures[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES], sampler linearAnyClampSampler)
+ANKI_RP Vec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const GlobalIlluminationProbe probe,
+									  ANKI_RP texture3D textures[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES],
+									  sampler linearAnyClampSampler)
 {
 	// Find the UVW
 	Vec3 uvw = (worldPos - probe.m_aabbMin) / (probe.m_aabbMax - probe.m_aabbMin);
@@ -356,7 +358,7 @@ Vec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const Glob
 	uvw.x = clamp(uvw.x, probe.m_halfTexelSizeU, (1.0 / 6.0) - probe.m_halfTexelSizeU);
 
 	// Read the irradiance
-	Vec3 irradiancePerDir[6u];
+	ANKI_RP Vec3 irradiancePerDir[6u];
 	ANKI_UNROLL for(U32 dir = 0u; dir < 6u; ++dir)
 	{
 		// Point to the correct UV
@@ -368,8 +370,9 @@ Vec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const Glob
 	}
 
 	// Sample the irradiance
-	const Vec3 irradiance = sampleAmbientDice(irradiancePerDir[0], irradiancePerDir[1], irradiancePerDir[2],
-											  irradiancePerDir[3], irradiancePerDir[4], irradiancePerDir[5], normal);
+	const ANKI_RP Vec3 irradiance =
+		sampleAmbientDice(irradiancePerDir[0], irradiancePerDir[1], irradiancePerDir[2], irradiancePerDir[3],
+						  irradiancePerDir[4], irradiancePerDir[5], normal);
 
 	return irradiance;
 }