Browse Source

HLSL 16bit fixes

Panagiotis Christopoulos Charitos 1 year ago
parent
commit
40fc7d04ae

+ 4 - 3
AnKi/Shaders/BilateralFilter.hlsl

@@ -11,13 +11,14 @@
 
 // https://cs.dartmouth.edu/~wjarosz/publications/mara17towards.html
 // phi can be equal to 1
-F32 calculateBilateralWeightDepth(F32 depthCenter, F32 depthTap, F32 phi)
+template<typename T>
+T calculateBilateralWeightDepth(T depthCenter, T depthTap, T phi)
 {
-	const F32 diff = abs(depthTap - depthCenter);
+	const T diff = abs(depthTap - depthCenter);
 #if 0
 	return max(0.0, 1.0 - diff * phi);
 #else
-	return sqrt(1.0 / (kEpsilonF32 + diff)) * phi;
+	return sqrt(1.0 / (getEpsilon<T>() + diff)) * phi;
 #endif
 }
 

+ 16 - 27
AnKi/Shaders/Common.hlsl

@@ -17,8 +17,9 @@
 constexpr F32 kEpsilonF32 = 0.000001f;
 #if ANKI_SUPPORTS_16BIT_TYPES
 constexpr F16 kEpsilonF16 = (F16)0.0001f; // Divisions by this should be OK according to http://weitz.de/ieee
-#endif
+#else
 constexpr RF32 kEpsilonRF32 = 0.0001f;
+#endif
 
 template<typename T>
 T getEpsilon();
@@ -37,7 +38,7 @@ F16 getEpsilon()
 }
 #endif
 
-#if !ANKI_FORCE_FULL_FP_PRECISION
+#if !ANKI_FORCE_FULL_FP_PRECISION && !ANKI_SUPPORTS_16BIT_TYPES
 template<>
 RF32 getEpsilon()
 {
@@ -47,7 +48,9 @@ RF32 getEpsilon()
 
 constexpr U32 kMaxU32 = 0xFFFFFFFFu;
 constexpr F32 kMaxF32 = 3.402823e+38;
+#if !ANKI_SUPPORTS_16BIT_TYPES
 constexpr RF32 kMaxRF32 = 65504.0f; // Max half float value according to wikipedia
+#endif
 #if ANKI_SUPPORTS_16BIT_TYPES
 constexpr F16 kMaxF16 = (F16)65504.0;
 #endif
@@ -61,7 +64,7 @@ F32 getMaxNumericLimit()
 	return kMaxF32;
 }
 
-#if !ANKI_FORCE_FULL_FP_PRECISION
+#if !ANKI_FORCE_FULL_FP_PRECISION && !ANKI_SUPPORTS_16BIT_TYPES
 template<>
 RF32 getMaxNumericLimit()
 {
@@ -122,30 +125,16 @@ struct Barycentrics
 		}
 #endif
 
-#if ANKI_SUPPORTS_16BIT_TYPES
-#	define ANKI_BINDLESS2(texType) \
-		ANKI_BINDLESS(texType, UVec4) \
-		ANKI_BINDLESS(texType, IVec4) \
-		ANKI_BINDLESS(texType, Vec4)
-
-#	define ANKI_BINDLESS3() \
-		ANKI_BINDLESS2(2D) \
-		ANKI_BINDLESS2(Cube) \
-		ANKI_BINDLESS2(2DArray) \
-		ANKI_BINDLESS2(3D)
-#else
-#	define ANKI_BINDLESS2(texType) \
-		ANKI_BINDLESS(texType, UVec4) \
-		ANKI_BINDLESS(texType, IVec4) \
-		ANKI_BINDLESS(texType, Vec4) \
-		ANKI_BINDLESS(texType, RVec4)
-
-#	define ANKI_BINDLESS3() \
-		ANKI_BINDLESS2(2D) \
-		ANKI_BINDLESS2(Cube) \
-		ANKI_BINDLESS2(2DArray) \
-		ANKI_BINDLESS2(3D)
-#endif
+#define ANKI_BINDLESS2(texType) \
+	ANKI_BINDLESS(texType, UVec4) \
+	ANKI_BINDLESS(texType, IVec4) \
+	ANKI_BINDLESS(texType, Vec4)
+
+#define ANKI_BINDLESS3() \
+	ANKI_BINDLESS2(2D) \
+	ANKI_BINDLESS2(Cube) \
+	ANKI_BINDLESS2(2DArray) \
+	ANKI_BINDLESS2(3D)
 
 ANKI_BINDLESS3()
 

+ 1 - 1
AnKi/Shaders/ForwardShadingCommon.hlsl

@@ -27,7 +27,7 @@ void packGBuffer(Vec4 color, out PixelOut output)
 	output.m_color = RVec4(color.rgb, color.a);
 }
 
-RVec4 readAnimatedTextureRgba(Texture2DArray<RVec4> tex, SamplerState sampl, F32 period, Vec2 uv, F32 time)
+RVec4 readAnimatedTextureRgba(Texture2DArray<Vec4> tex, SamplerState sampl, F32 period, Vec2 uv, F32 time)
 {
 	Vec2 texSize;
 	F32 layerCount;

+ 1 - 1
AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog

@@ -62,7 +62,7 @@ PixelOut main(VertOut input)
 	output.m_color = RVec4(1.0, 1.0, 1.0, 1.0);
 
 #	if TEXTURE == 1
-	output.m_color = getBindlessTexture2DRVec4(localConstants.m_texture).Sample(g_globalSampler, input.m_uv);
+	output.m_color = getBindlessTexture2DVec4(localConstants.m_texture).Sample(g_globalSampler, input.m_uv);
 #	endif
 
 #	if LIGHT == 1

+ 2 - 2
AnKi/Shaders/ForwardShadingParticles.ankiprog

@@ -76,10 +76,10 @@ PixelOut main(VertOut input)
 	const AnKiLocalConstants localConstants = loadAnKiLocalConstants(g_gpuScene, WaveReadLaneFirst(input.m_constantsOffset));
 
 #	if ANIMATED_TEXTURE == 1
-	RVec4 texCol = readAnimatedTextureRgba(getBindlessTexture2DArrayRVec4(localConstants.m_diffuseMap), g_globalSampler,
+	RVec4 texCol = readAnimatedTextureRgba(getBindlessTexture2DArrayVec4(localConstants.m_diffuseMap), g_globalSampler,
 										   localConstants.m_animationPeriod, input.m_uv, g_globalRendererConstants.m_time);
 #	else
-	RVec4 texCol = getBindlessTexture2DRVec4(localConstants.m_diffuseMap).Sample(g_globalSampler, input.m_uv);
+	RVec4 texCol = getBindlessTexture2DVec4(localConstants.m_diffuseMap).Sample(g_globalSampler, input.m_uv);
 #	endif
 
 #	if LIGHT

+ 2 - 2
AnKi/Shaders/Functions.hlsl

@@ -702,7 +702,7 @@ vector<T, 3> filmGrain(vector<T, 3> color, Vec2 uv, T strength, F32 time)
 /// Perturb normal, see http://www.thetenthplanet.de/archives/1180
 /// Does normal mapping in the fragment shader. It assumes that green is up. viewDir and geometricNormal need to be in the same space.
 /// viewDir is the -(eye - vertexPos)
-RVec3 perturbNormal(RVec3 tangentNormal, Vec3 viewDir, Vec2 uv, Vec3 geometricNormal)
+Vec3 perturbNormal(Vec3 tangentNormal, Vec3 viewDir, Vec2 uv, Vec3 geometricNormal)
 {
 	tangentNormal.y = -tangentNormal.y; // Green is up
 
@@ -721,7 +721,7 @@ RVec3 perturbNormal(RVec3 tangentNormal, Vec3 viewDir, Vec2 uv, Vec3 geometricNo
 	// Construct a scale-invariant frame
 	const F32 invmax = rsqrt(max(dot(T, T), dot(B, B)));
 
-	RMat3 TBN;
+	Mat3 TBN;
 	TBN.setColumns(T * invmax, B * invmax, geometricNormal);
 	return normalize(mul(TBN, tangentNormal));
 }

+ 6 - 6
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -465,9 +465,9 @@ main(U32 svGroupId : SV_GROUPID, U32 svGroupIndex : SV_GROUPINDEX, out vertices
 #if ANKI_PIXEL_SHADER
 
 #	if SW_MESHLETS
-#		define BINDLESS(x) getBindlessTextureNonUniformIndex2DRVec4(x)
+#		define BINDLESS(x) getBindlessTextureNonUniformIndex2DVec4(x)
 #	else
-#		define BINDLESS(x) getBindlessTexture2DRVec4(x)
+#		define BINDLESS(x) getBindlessTexture2DVec4(x)
 #	endif
 
 #	if !GBUFFER && !REALLY_ALPHA_TEST // Shadows without alpha
@@ -659,7 +659,7 @@ GBufferPixelOut main(
 
 	const AnKiLocalConstants localConstants = loadAnKiLocalConstants(g_gpuScene, g_gpuSceneRenderable.m_constantsOffset);
 	const RVec4 diffColorA =
-		getBindlessTexture2DRVec4(localConstants.m_diffuseTex).SampleLevel(g_globalSampler, uv, 0.0) * localConstants.m_diffuseScale;
+		getBindlessTexture2DVec4(localConstants.m_diffuseTex).SampleLevel(g_globalSampler, uv, 0.0) * localConstants.m_diffuseScale;
 
 	if(diffColorA.a < 1.0)
 	{
@@ -716,7 +716,7 @@ GBufferPixelOut main(
 
 	// Metallness
 #		if ROUGHNESS_METALNESS_TEX
-	const RVec3 comp = getBindlessTexture2DRVec4(localConstants.m_roughnessMetalnessTex).SampleLevel(g_globalSampler, uv, payload.m_textureLod).xyz;
+	const RVec3 comp = getBindlessTexture2DVec4(localConstants.m_roughnessMetalnessTex).SampleLevel(g_globalSampler, uv, payload.m_textureLod).xyz;
 	const RF32 metallic = comp.b * localConstants.m_metalnessScale;
 #		else
 	const RF32 metallic = localConstants.m_metalnessScale;
@@ -724,7 +724,7 @@ GBufferPixelOut main(
 
 	// Diffuse
 #		if DIFFUSE_TEX
-	RVec3 diffColor = getBindlessTexture2DRVec4(localConstants.m_diffuseTex).SampleLevel(g_globalSampler, uv, payload.m_textureLod).xyz;
+	RVec3 diffColor = getBindlessTexture2DVec4(localConstants.m_diffuseTex).SampleLevel(g_globalSampler, uv, payload.m_textureLod).xyz;
 #		else
 	RVec3 diffColor = 1.0;
 #		endif
@@ -733,7 +733,7 @@ GBufferPixelOut main(
 
 	// Emissive
 #		if EMISSIVE_TEX
-	RVec3 emission = getBindlessTexture2DRVec4(localConstants.m_emissiveTex).SampleLevel(g_globalSampler, uv, payload.m_textureLod).rgb;
+	RVec3 emission = getBindlessTexture2DVec4(localConstants.m_emissiveTex).SampleLevel(g_globalSampler, uv, payload.m_textureLod).rgb;
 #		else
 	RVec3 emission = 1.0;
 #		endif

+ 1 - 1
AnKi/Shaders/GBufferVisualizeProbe.ankiprog

@@ -60,7 +60,7 @@ struct PixelOut
 UVec3 getCellCount(GpuSceneGlobalIlluminationProbe probe)
 {
 	UVec3 texSize;
-	getBindlessTextureNonUniformIndex3DRVec4(probe.m_volumeTexture).GetDimensions(texSize.x, texSize.y, texSize.z);
+	getBindlessTextureNonUniformIndex3DVec4(probe.m_volumeTexture).GetDimensions(texSize.x, texSize.y, texSize.z);
 	texSize.x /= 6u;
 	return texSize;
 }

+ 10 - 7
AnKi/Shaders/Include/Common.h

@@ -396,20 +396,23 @@ _ANKI_MAT3(Mat3, Vec3, F32)
 _ANKI_MAT4(Mat4, Vec4, F32)
 _ANKI_MAT3x4(Mat3x4, Vec4, Vec3, F32)
 
-#	if ANKI_FORCE_FULL_FP_PRECISION
+#	if ANKI_SUPPORTS_16BIT_TYPES == 0
+#		if ANKI_FORCE_FULL_FP_PRECISION
 	typedef float RF32;
 typedef float2 RVec2;
 typedef float3 RVec3;
 typedef float4 RVec4;
 _ANKI_MAT3(RMat3, Vec3, F32)
-#	else
+#		else
 	typedef min16float RF32;
 typedef min16float2 RVec2;
 typedef min16float3 RVec3;
 typedef min16float4 RVec4;
 _ANKI_MAT3(RMat3, RVec3, RF32)
-#	endif
-#endif
+#		endif
+#	endif // ANKI_SUPPORTS_16BIT_TYPES == 0
+
+#endif // defined(__HLSL_VERSION)
 
 //! == Common ==========================================================================================================
 ANKI_BEGIN_NAMESPACE
@@ -430,9 +433,9 @@ constexpr U32 kMeshletGroupSize = ANKI_TASK_SHADER_THREADGROUP_SIZE;
 #define ANKI_MESH_SHADER_THREADGROUP_SIZE 32u
 static_assert(kMaxVerticesPerMeshlet % ANKI_MESH_SHADER_THREADGROUP_SIZE == 0);
 
-constexpr RF32 kPcfTexelRadius = 4.0f;
-constexpr RF32 kPcssSearchTexelRadius = 12.0;
-constexpr RF32 kPcssTexelRadius = 12.0;
+constexpr F32 kPcfTexelRadius = 4.0f;
+constexpr F32 kPcssSearchTexelRadius = 12.0;
+constexpr F32 kPcssTexelRadius = 12.0;
 constexpr F32 kPcssDirLightMaxPenumbraMeters = 6.0; // If the occluder and the reciever have more than this value then do full penumbra
 
 struct DrawIndirectArgs

+ 5 - 5
AnKi/Shaders/Reflections.ankiprog

@@ -345,7 +345,7 @@ void bestCandidateToHallucinate(IVec2 svGroupThreadId, IVec2 offset, F32 depth,
 
 	candidateCount += 1.0;
 
-	const F32 weight = calculateBilateralWeightDepth(depth, g_colorAndDepth[svGroupThreadId2.x][svGroupThreadId2.y].w, 1.0);
+	const F32 weight = calculateBilateralWeightDepth<F32>(depth, g_colorAndDepth[svGroupThreadId2.x][svGroupThreadId2.y].w, 1.0);
 	if(weight > depthWeight)
 	{
 		depthWeight = weight;
@@ -796,7 +796,7 @@ void reconstructCheckerboardBlack(IVec2 svGroupThreadId, F32 refDepth, inout Vec
 		return;
 	}
 
-	const F32 weight = calculateBilateralWeightDepth(refDepth, g_hitPosAndDepth[svGroupThreadId.x][svGroupThreadId.y], 1.0);
+	const F32 weight = calculateBilateralWeightDepth<F32>(refDepth, g_hitPosAndDepth[svGroupThreadId.x][svGroupThreadId.y], 1.0);
 
 	color += g_colorAndPdf[svGroupThreadId.x][svGroupThreadId.y].xyz * weight;
 	pdf += g_colorAndPdf[svGroupThreadId.x][svGroupThreadId.y].w * weight;
@@ -918,7 +918,7 @@ void reconstructCheckerboardBlack(IVec2 svGroupThreadId, F32 refDepth, inout Vec
 			const Vec3 reflectedDir = normalize(hitPos - worldPos);
 			const F32 pdf = pdfVndfIsotropic(reflectedDir, viewDir, alpha, worldNormal);
 
-			const F32 weight = pdf * calculateBilateralWeightDepth(refDepth, sampleDepth, 1.0);
+			const F32 weight = pdf * calculateBilateralWeightDepth<F32>(refDepth, sampleDepth, 1.0);
 
 			if(weight > 0.001)
 			{
@@ -1239,7 +1239,7 @@ F32 computeVarianceCenter(IVec2 coord, UVec2 textureSize)
 		const Vec3 sampleColor = rgba.xyz;
 
 		const F32 gWeight = gaussianWeight<F32>(kGaussianSigma, x / sampleCount);
-		const F32 depthWeight = calculateBilateralWeightDepth(refDepth, sampleDepth, 1.0);
+		const F32 depthWeight = calculateBilateralWeightDepth<F32>(refDepth, sampleDepth, 1.0);
 		const F32 weight = gWeight * depthWeight;
 
 		colorSum += sampleColor * weight;
@@ -1308,7 +1308,7 @@ RWStructuredBuffer<DispatchIndirectArgs> g_indirectArgs : register(u1);
 		decodeColorDepthAndSampleCount(g_colorAndDepthAndSampleCount[newCoord], sampleColor, sampleDepth, unused);
 
 		const F32 gWeight = gaussianWeight<F32>(kGaussianSigma, y / sampleCount);
-		const F32 depthWeight = calculateBilateralWeightDepth(refDepth, sampleDepth, 1.0);
+		const F32 depthWeight = calculateBilateralWeightDepth<F32>(refDepth, sampleDepth, 1.0);
 		const F32 weight = gWeight * depthWeight;
 
 		colorSum += sampleColor * weight;

+ 1 - 1
AnKi/Shaders/RtShadowsUpscale.ankiprog

@@ -42,7 +42,7 @@ Texture2D<Vec4> g_fullDepthTex : register(t2);
 			const Vec2 sampleUv = uv + Vec2(x, y) * texelSize;
 			const F32 depthTap = g_quarterDepthTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).x;
 
-			const F32 w = calculateBilateralWeightDepth(depthCenter, depthTap, 1.0);
+			const F32 w = calculateBilateralWeightDepth<F32>(depthCenter, depthTap, 1.0);
 
 			const F32 localShadowFactor = g_quarterShadowsTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).x;