Browse Source

Fix a bug in tangent calculation. Add some code for backface culling of meshets (doesn't work well)

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
cc4e44c002

+ 1 - 1
AnKi/Gr/Vulkan/GrManagerImpl.cpp

@@ -178,7 +178,7 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 	if(m_capabilities.m_pipelineQuery)
 	{
 		m_pipelineQueryFactories[PipelineQueryType::kPrimitivesPassedClipping].init(VK_QUERY_TYPE_PIPELINE_STATISTICS,
-																					VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT);
+																					VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT);
 	}
 
 	// See if unaligned formats are supported

+ 10 - 0
AnKi/Math/Mat.h

@@ -1364,6 +1364,16 @@ public:
 		return TMat(invertedTsl.xyz0(), invertedRot);
 	}
 
+	/// If we suppose this matrix represents a transformation, return the inverted transformation
+	ANKI_ENABLE_METHOD(kTRowCount == 3 && kTColumnCount == 4)
+	TMat getInverseTransformation() const
+	{
+		const TMat<T, 3, 3> invertedRot = getRotationPart().getTransposed();
+		TVec<T, 3> invertedTsl = getTranslationPart().xyz();
+		invertedTsl = -(invertedRot * invertedTsl);
+		return TMat(invertedTsl.xyz(), invertedRot);
+	}
+
 	/// @note 9 muls, 9 adds
 	ANKI_ENABLE_METHOD(kTColumnCount == 4 && kTRowCount == 4)
 	TVec<T, 3> transform(const TVec<T, 3>& v) const

+ 2 - 2
AnKi/Renderer/IndirectDiffuseProbes.cpp

@@ -286,7 +286,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 
 				RenderableDrawerArguments args;
 				args.m_viewMatrix = viewMats[faceIdx];
-				args.m_cameraTransform = Mat3x4::getIdentity(); // Don't care
+				args.m_cameraTransform = args.m_viewMatrix.getInverseTransformation();
 				args.m_viewProjectionMatrix = viewProjMats[faceIdx];
 				args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
 				args.m_renderingTechinuqe = RenderingTechnique::kGBuffer;
@@ -357,7 +357,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 
 				RenderableDrawerArguments args;
 				args.m_viewMatrix = cascadeViewMats[faceIdx];
-				args.m_cameraTransform = Mat3x4::getIdentity(); // Don't care
+				args.m_cameraTransform = cascadeViewMats[faceIdx].getInverseTransformation();
 				args.m_viewProjectionMatrix = cascadeViewProjMats[faceIdx];
 				args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
 				args.m_sampler = getRenderer().getSamplers().m_trilinearRepeat.get();

+ 2 - 2
AnKi/Renderer/ProbeReflections.cpp

@@ -199,7 +199,7 @@ void ProbeReflections::runGBuffer(const Array<GpuVisibilityOutput, 6>& visOuts,
 
 	RenderableDrawerArguments args;
 	args.m_viewMatrix = viewMats[faceIdx];
-	args.m_cameraTransform = Mat3x4(Mat4(viewMats[faceIdx], Vec4(0.0f, 0.0f, 0.0f, 1.0f)).getInverse());
+	args.m_cameraTransform = viewMats[faceIdx].getInverseTransformation();
 	args.m_viewProjectionMatrix = viewProjMatx[faceIdx];
 	args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care about prev mats
 	args.m_sampler = getRenderer().getSamplers().m_trilinearRepeat.get();
@@ -594,7 +594,7 @@ void ProbeReflections::runShadowMapping(const Array<GpuVisibilityOutput, 6>& vis
 
 	RenderableDrawerArguments args;
 	args.m_viewMatrix = viewMats[faceIdx];
-	args.m_cameraTransform = Mat3x4::getIdentity(); // Don't care
+	args.m_cameraTransform = viewMats[faceIdx].getInverseTransformation();
 	args.m_viewProjectionMatrix = viewProjMats[faceIdx];
 	args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
 	args.m_sampler = getRenderer().getSamplers().m_trilinearRepeatAniso.get();

+ 11 - 4
AnKi/Renderer/Renderer.cpp

@@ -52,7 +52,7 @@ static NumericCVar<F32> g_internalRenderScalingCVar(CVarSubsystem::kRenderer, "I
 NumericCVar<F32> g_renderScalingCVar(CVarSubsystem::kRenderer, "RenderScaling", 1.0f, 0.5f, 8.0f,
 									 "A factor over the requested swapchain resolution. Applies to post-processing and UI");
 static NumericCVar<U32> g_zSplitCountCVar(CVarSubsystem::kRenderer, "ZSplitCount", 64, 8, kMaxZsplitCount, "Clusterer number of Z splits");
-static NumericCVar<U8> g_textureAnisotropyCVar(CVarSubsystem::kRenderer, "TextureAnisotropy", (ANKI_PLATFORM_MOBILE) ? 1 : 8, 1, 16,
+static NumericCVar<U8> g_textureAnisotropyCVar(CVarSubsystem::kRenderer, "TextureAnisotropy", (ANKI_PLATFORM_MOBILE) ? 1 : 16, 1, 16,
 											   "Texture anisotropy for the main passes");
 BoolCVar g_preferComputeCVar(CVarSubsystem::kRenderer, "PreferCompute", !ANKI_PLATFORM_MOBILE, "Prefer compute shaders");
 static BoolCVar g_highQualityHdrCVar(CVarSubsystem::kRenderer, "HighQualityHdr", !ANKI_PLATFORM_MOBILE,
@@ -288,9 +288,16 @@ Error Renderer::initInternal(UVec2 swapchainResolution)
 		sinit.m_addressing = SamplingAddressing::kRepeat;
 		m_samplers.m_trilinearRepeat = GrManager::getSingleton().newSampler(sinit);
 
-		sinit.setName("TrilinearRepeatAniso");
-		sinit.m_anisotropyLevel = g_textureAnisotropyCVar.get();
-		m_samplers.m_trilinearRepeatAniso = GrManager::getSingleton().newSampler(sinit);
+		if(g_textureAnisotropyCVar.get() <= 1u)
+		{
+			m_samplers.m_trilinearRepeatAniso = m_samplers.m_trilinearRepeat;
+		}
+		else
+		{
+			sinit.setName("TrilinearRepeatAniso");
+			sinit.m_anisotropyLevel = g_textureAnisotropyCVar.get();
+			m_samplers.m_trilinearRepeatAniso = GrManager::getSingleton().newSampler(sinit);
+		}
 
 		sinit.setName("TrilinearRepeatAnisoRezScalingBias");
 		F32 scalingMipBias = log2(F32(m_internalResolution.x()) / F32(m_postProcessResolution.x()));

+ 2 - 2
AnKi/Renderer/ShadowMapping.cpp

@@ -618,10 +618,10 @@ void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
 		RenderableDrawerArguments args;
 		args.m_renderingTechinuqe = RenderingTechnique::kDepth;
 		args.m_viewMatrix = work.m_viewMat;
-		args.m_cameraTransform = Mat3x4::getIdentity(); // Don't care
+		args.m_cameraTransform = work.m_viewMat.getInverseTransformation();
 		args.m_viewProjectionMatrix = work.m_viewProjMat;
 		args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
-		args.m_sampler = getRenderer().getSamplers().m_trilinearRepeatAniso.get();
+		args.m_sampler = getRenderer().getSamplers().m_trilinearRepeat.get();
 		args.m_viewport = UVec4(work.m_viewport[0], work.m_viewport[1], work.m_viewport[2], work.m_viewport[3]);
 		args.fillMdi(work.m_visOut);
 

+ 0 - 103
AnKi/Renderer/Utils/Drawer.cpp

@@ -18,28 +18,12 @@
 
 namespace anki {
 
-static StatCounter g_executedDrawcallsStatVar(StatCategory::kRenderer, "Drawcalls executed", StatFlag::kZeroEveryFrame);
-static StatCounter g_maxDrawcallsStatVar(StatCategory::kRenderer, "Drawcalls possible", StatFlag::kZeroEveryFrame);
-static StatCounter g_renderedPrimitivesStatVar(StatCategory::kRenderer, "Rendered primitives", StatFlag::kZeroEveryFrame);
-
 RenderableDrawer::~RenderableDrawer()
 {
 }
 
 Error RenderableDrawer::init()
 {
-#if ANKI_STATS_ENABLED
-	constexpr Array<MutatorValue, 3> kColorAttachmentCounts = {0, 1, 4};
-
-	U32 count = 0;
-	for(MutatorValue attachmentCount : kColorAttachmentCounts)
-	{
-		ANKI_CHECK(loadShaderProgram("ShaderBinaries/DrawerStats.ankiprogbin", Array<SubMutation, 1>{{{"COLOR_ATTACHMENT_COUNT", attachmentCount}}},
-									 m_stats.m_statsProg, m_stats.m_updateStatsGrProgs[count]));
-		++count;
-	}
-#endif
-
 	return Error::kNone;
 }
 
@@ -89,91 +73,6 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 		return;
 	}
 
-#if ANKI_STATS_ENABLED
-	U32 variant = 0;
-	switch(args.m_renderingTechinuqe)
-	{
-	case RenderingTechnique::kGBuffer:
-		variant = 2;
-		break;
-	case RenderingTechnique::kForward:
-		variant = 1;
-		break;
-	case RenderingTechnique::kDepth:
-		variant = 0;
-		break;
-	default:
-		ANKI_ASSERT(0);
-	}
-
-	{
-		constexpr U32 kFragmentThreadCount = 16;
-		using StatsArray = Array<U32, kFragmentThreadCount * 2>;
-
-		LockGuard lock(m_stats.m_mtx);
-
-		if(m_stats.m_frameIdx != getRenderer().getFrameCount())
-		{
-			m_stats.m_frameIdx = getRenderer().getFrameCount();
-
-			// Get previous stats
-			StatsArray prevFrameStats;
-			PtrSize dataRead;
-			getRenderer().getReadbackManager().readMostRecentData(m_stats.m_readback, &prevFrameStats, sizeof(prevFrameStats), dataRead);
-			if(dataRead > 0) [[likely]]
-			{
-				U32 drawCount = 0;
-				U32 primitiveCount = 0;
-				for(U32 tid = 0; tid < kFragmentThreadCount; ++tid)
-				{
-					drawCount += prevFrameStats[tid];
-					primitiveCount += prevFrameStats[kFragmentThreadCount + tid] / 3;
-				}
-
-				g_executedDrawcallsStatVar.set(drawCount);
-				g_renderedPrimitivesStatVar.set(primitiveCount);
-			}
-
-			// Get place to write new stats
-			getRenderer().getReadbackManager().allocateData(m_stats.m_readback, sizeof(prevFrameStats), m_stats.m_statsBuffer,
-															m_stats.m_statsBufferOffset);
-
-			// Allocate another atomic to count the passes. Do that because the calls to drawMdi might not be in the same order as they run on the GPU
-			U32* counter;
-			m_stats.m_threadCountBuffer = RebarTransientMemoryPool::getSingleton().allocateFrame(sizeof(U32), counter);
-			*counter = 0;
-		}
-
-		cmdb.pushDebugMarker("Draw stats", Vec3(0.0f, 1.0f, 0.0f));
-
-		cmdb.bindShaderProgram(m_stats.m_updateStatsGrProgs[variant].get());
-
-		cmdb.bindUavBuffer(0, 0, args.m_mdiDrawCountsBuffer);
-		cmdb.bindUavBuffer(0, 1, args.m_drawIndexedIndirectArgsBuffer);
-
-		DynamicArray<U32, MemoryPoolPtrWrapper<StackMemoryPool>> offsets(&getRenderer().getFrameMemoryPool());
-		U32 allUserCount = 0;
-		RenderStateBucketContainer::getSingleton().iterateBuckets(
-			args.m_renderingTechinuqe, [&]([[maybe_unused]] const RenderStateInfo& state, U32 userCount, [[maybe_unused]] U32 meshletGroupCount) {
-				offsets.emplaceBack(allUserCount);
-				allUserCount += userCount;
-			});
-		U32* firstDrawArgIndices;
-		BufferOffsetRange firstDrawArgIndicesBuffer = RebarTransientMemoryPool::getSingleton().allocateFrame(offsets.getSize(), firstDrawArgIndices);
-		memcpy(firstDrawArgIndices, &offsets[0], offsets.getSizeInBytes());
-		cmdb.bindUavBuffer(0, 2, firstDrawArgIndicesBuffer);
-
-		cmdb.bindUavBuffer(0, 3, m_stats.m_statsBuffer, m_stats.m_statsBufferOffset, sizeof(StatsArray));
-		cmdb.bindUavBuffer(0, 4, m_stats.m_threadCountBuffer);
-
-		cmdb.setPushConstants(&args.m_viewport, sizeof(args.m_viewport));
-
-		cmdb.draw(PrimitiveTopology::kTriangles, 6);
-
-		cmdb.popDebugMarker();
-	}
-#endif
-
 #if ANKI_STATS_ENABLED
 	PipelineQueryPtr pplineQuery;
 
@@ -336,8 +235,6 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 		cmdb.endPipelineQuery(pplineQuery.get());
 	}
 #endif
-
-	g_maxDrawcallsStatVar.increment(allUserCount);
 }
 
 } // end namespace anki

+ 0 - 19
AnKi/Renderer/Utils/Drawer.h

@@ -63,25 +63,6 @@ public:
 	void drawMdi(const RenderableDrawerArguments& args, CommandBuffer& cmdb);
 
 private:
-#if ANKI_STATS_ENABLED
-	class
-	{
-	public:
-		MultiframeReadbackToken m_readback;
-
-		ShaderProgramResourcePtr m_statsProg;
-		Array<ShaderProgramPtr, 3> m_updateStatsGrProgs;
-
-		U64 m_frameIdx = kMaxU64;
-		SpinLock m_mtx;
-
-		Buffer* m_statsBuffer = nullptr;
-		PtrSize m_statsBufferOffset = 0;
-
-		BufferOffsetRange m_threadCountBuffer;
-	} m_stats;
-#endif
-
 	void setState(const RenderableDrawerArguments& args, CommandBuffer& cmdb);
 };
 /// @}

+ 2 - 3
AnKi/Resource/MeshResource.cpp

@@ -326,9 +326,8 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 				outMeshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint = (inMeshlet.m_primitiveCount << 16u) | inMeshlet.m_vertexCount;
 				outMeshlet.m_sphereCenter = inMeshlet.m_boundingVolume.m_sphereCenter;
 				outMeshlet.m_sphereRadius = inMeshlet.m_boundingVolume.m_sphereRadius;
-				outMeshlet.m_coneApex = inMeshlet.m_coneApex;
-				outMeshlet.m_coneDirection_R8G8B8_Snorm_coneCosOfHalfAngle_R8_Snorm =
-					packSnorm4x8(Vec4(inMeshlet.m_coneDirection, cos(inMeshlet.m_coneAngle / 2.0f)));
+				outMeshlet.m_coneDirection_R8G8B8_Snorm_minusSinAngle_R8_Snorm =
+					packSnorm4x8(Vec4(inMeshlet.m_coneDirection, -sin(inMeshlet.m_coneAngle)));
 			}
 
 			cmdb->copyBufferToBuffer(&handle2.getBuffer(), handle2.getOffset(), unifiedGeometryBuffer, lod.m_meshlets.getOffset(),

+ 17 - 14
AnKi/Shaders/Functions.hlsl

@@ -445,7 +445,9 @@ Mat3 rotationFromDirection(Vec3 zAxis)
 	const Vec3 y = Vec3(b, sign + a * pow(z.y, 2.0), -z.y);
 #endif
 
-	return constructMatrixColumns(x, y, z);
+	Mat3 o;
+	o.setColumns(x, y, z);
+	return o;
 }
 
 #if defined(ANKI_COMPUTE_SHADER) && ANKI_GLSL
@@ -710,26 +712,27 @@ F32 fastCos(F32 x)
 #endif
 
 /// Perturb normal, see http://www.thetenthplanet.de/archives/1180
-/// Does normal mapping in the fragment shader. It assumes that green is up. geometricNormal is in world space.
-RVec3 perturbNormal(RVec3 tangentNormal, Vec3 worldPosition, Vec2 uv, RVec3 geometricNormal)
+/// Does normal mapping in the fragment shader. It assumes that green is up. viewDir and geometricNormal need to be in the same space.
+RVec3 perturbNormal(RVec3 tangentNormal, Vec3 viewDir, Vec2 uv, Vec3 geometricNormal)
 {
 	tangentNormal.y = -tangentNormal.y; // Green is up
 
-	// get edge vectors of the pixel triangle
-	const Vec3 dp1 = ddx(worldPosition);
-	const Vec3 dp2 = ddy(worldPosition);
+	// Get edge vectors of the pixel triangle
+	const Vec3 dp1 = ddx(viewDir);
+	const Vec3 dp2 = ddy(viewDir);
 	const Vec2 duv1 = ddx(uv);
 	const Vec2 duv2 = ddy(uv);
 
-	// solve the linear system
-	const RVec3 dp2perp = cross(dp2, geometricNormal);
-	const RVec3 dp1perp = cross(geometricNormal, dp1);
-	const RVec3 T = normalize(dp2perp * duv1.x + dp1perp * duv2.x);
-	const RVec3 B = normalize(dp2perp * duv1.y + dp1perp * duv2.y);
+	// Solve the linear system
+	const Vec3 dp2perp = cross(dp2, geometricNormal);
+	const Vec3 dp1perp = cross(geometricNormal, dp1);
+	const Vec3 T = dp2perp * duv1.x + dp1perp * duv2.x;
+	const Vec3 B = dp2perp * duv1.y + dp1perp * duv2.y;
 
-	// construct a scale-invariant frame
-	const RF32 invmax = rsqrt(max(dot(T, T), dot(B, B)));
+	// Construct a scale-invariant frame
+	const F32 invmax = rsqrt(max(dot(T, T), dot(B, B)));
 
-	const RMat3 TBN = constructMatrixColumns(T * invmax, B * invmax, geometricNormal);
+	RMat3 TBN;
+	TBN.setColumns(T * invmax, B * invmax, geometricNormal);
 	return normalize(mul(TBN, tangentNormal));
 }

+ 53 - 3
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -24,6 +24,8 @@
 #define REALLY_VELOCITY ((ANKI_VELOCITY || ANKI_BONES) && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER)
 #define REALLY_USING_PARALLAX (PARALLAX == 1 && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER && ALPHA_TEST == 0)
 
+#define MESHLET_BACKFACE_CULLING 0
+
 #include <AnKi/Shaders/Include/MaterialTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneFunctions.h>
 #include <AnKi/Shaders/PackFunctions.hlsl>
@@ -97,6 +99,7 @@ struct FragOut
 struct MeshShaderPayload
 {
 	U32 m_firstMeshletIndex;
+	U32 m_visibleMeshletsRelativeIndices[kMeshletGroupSize / sizeof(U32)];
 	U32 m_worldTransformsOffset;
 	U32 m_constantsOffset;
 	U32 m_boneTransformsOrParticleEmitterOffset;
@@ -213,6 +216,7 @@ VertOut main(VertIn input)
 #pragma anki start task
 
 groupshared MeshShaderPayload s_payload;
+groupshared U32 s_visibleMeshletCount;
 
 struct FirstPayload
 {
@@ -243,9 +247,50 @@ struct FirstPayload
 		s_payload.m_boneTransformsOrParticleEmitterOffset = renderable.m_boneTransformsOffset;
 		s_payload.m_positionScale = meshLod.m_positionScale;
 		s_payload.m_positionTranslation = meshLod.m_positionTranslation;
+
+		s_visibleMeshletCount = 0;
+
+		[unroll] for(U32 i = 0; i < kMeshletGroupSize / sizeof(U32); ++i)
+		{
+			s_payload.m_visibleMeshletsRelativeIndices[i] = 0u;
+		}
+	}
+
+	GroupMemoryBarrierWithGroupSync();
+
+	if(svGroupIndex < meshletCount)
+	{
+		Bool cull = false;
+
+#if MESHLET_BACKFACE_CULLING
+		const Meshlet meshlet = g_meshlets[firstMeshlet + svGroupIndex];
+		const Vec4 coneData = unpackSnorm4x8(meshlet.m_coneDirection_R8G8B8_Snorm_minusSinAngle_R8_Snorm);
+
+		const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
+
+		const Vec3 meshletCenterWspace = mul(worldTransform, Vec4(meshlet.m_sphereCenter, 1.0f));
+		const Vec3 coneAxisWspace = normalize(mul(worldTransform, Vec4(coneData.xyz, 0.0f)));
+		const Vec3 cameraPos = Vec3(g_globalConstants.m_cameraTransform.m_row0.w, g_globalConstants.m_cameraTransform.m_row1.w,
+									g_globalConstants.m_cameraTransform.m_row2.w);
+		const Vec3 viewDir = normalize(meshletCenterWspace - cameraPos);
+
+		cull = cull || dot(coneAxisWspace, -viewDir) < coneData.w;
+#endif
+
+		if(!cull)
+		{
+			U32 idx;
+			InterlockedAdd(s_visibleMeshletCount, 1u, idx);
+
+			const U32 groupIdx = idx / 4u;
+			const U32 localIdx = idx % 4u;
+
+			const U32 mask = svGroupIndex << (localIdx * 8u);
+			InterlockedOr(s_payload.m_visibleMeshletsRelativeIndices[groupIdx], mask);
+		}
 	}
 
-	DispatchMesh(meshletCount, 1, 1, s_payload);
+	DispatchMesh(s_visibleMeshletCount, 1, 1, s_payload);
 }
 
 #pragma anki end task
@@ -258,7 +303,11 @@ constexpr U32 g_dummy = 0; // The formater is getting confused so add this
 main(in payload MeshShaderPayload payload, out vertices VertOut verts[kMaxVerticesPerMeshlet], out indices UVec3 indices[kMaxPrimitivesPerMeshlet],
 	 U32 svGroupId : SV_GROUPID, U32 svGroupIndex : SV_GROUPINDEX)
 {
-	const Meshlet meshlet = g_meshlets[payload.m_firstMeshletIndex + svGroupId];
+	const U32 groupIdx = svGroupId / 4u;
+	const U32 localIdx = svGroupId % 4u;
+	const U32 relativeMeshletIdx = (payload.m_visibleMeshletsRelativeIndices[groupIdx] >> (localIdx * 8u)) & 0xFFu;
+
+	const Meshlet meshlet = g_meshlets[payload.m_firstMeshletIndex + relativeMeshletIdx];
 	const U32 primCount = meshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint >> 16u;
 	const U32 vertCount = meshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint & 0xFFFFu;
 
@@ -390,7 +439,8 @@ FragOut main(VertOut input)
 
 #	if NORMAL_TEX
 	const RVec3 nAtTangentspace = normalize((g_bindlessTextures2dF32[localConstants.m_normalTex].Sample(g_globalSampler, uv).rgb - 0.5) * 2.0);
-	const RVec3 normal = perturbNormal(nAtTangentspace, input.m_worldPos, uv, input.m_normal);
+	const Vec3 viewDir = normalize(g_globalConstants.m_cameraTransform.getTranslationPart() - input.m_worldPos);
+	const RVec3 normal = perturbNormal(nAtTangentspace, viewDir, uv, normalize(input.m_normal));
 #	else
 	const RVec3 normal = normalize(input.m_normal);
 #	endif

+ 37 - 21
AnKi/Shaders/Include/Common.h

@@ -268,6 +268,13 @@ struct Mat3
 	Vec3 m_row2;
 
 	_ANKI_DEFINE_ALL_OPERATORS_ROWS3(Mat3, F32)
+
+	void setColumns(Vec3 c0, Vec3 c1, Vec3 c2)
+	{
+		m_row0 = Vec3(c0.x, c1.x, c2.x);
+		m_row1 = Vec3(c0.y, c1.y, c2.y);
+		m_row2 = Vec3(c0.z, c1.z, c2.z);
+	}
 };
 
 struct Mat4
@@ -278,6 +285,19 @@ struct Mat4
 	Vec4 m_row3;
 
 	_ANKI_DEFINE_ALL_OPERATORS_ROWS4(Mat4, F32)
+
+	Vec4 getTranslationPart()
+	{
+		return Vec4(m_row0.w, m_row1.w, m_row2.w, m_row3.w);
+	}
+
+	void setColumns(Vec4 c0, Vec4 c1, Vec4 c2, Vec4 c3)
+	{
+		m_row0 = Vec4(c0.x, c1.x, c2.x, c3.x);
+		m_row1 = Vec4(c0.y, c1.y, c2.y, c3.y);
+		m_row2 = Vec4(c0.z, c1.z, c2.z, c3.z);
+		m_row3 = Vec4(c0.w, c1.w, c2.w, c3.w);
+	}
 };
 
 struct Mat3x4
@@ -292,6 +312,13 @@ struct Mat3x4
 	{
 		return Vec3(m_row0.w, m_row1.w, m_row2.w);
 	}
+
+	void setColumns(Vec3 c0, Vec3 c1, Vec3 c2, Vec3 c3)
+	{
+		m_row0 = Vec4(c0.x, c1.x, c2.x, c3.x);
+		m_row1 = Vec4(c0.y, c1.y, c2.y, c3.y);
+		m_row2 = Vec4(c0.z, c1.z, c2.z, c3.z);
+	}
 };
 
 #	if ANKI_FORCE_FULL_FP_PRECISION
@@ -313,30 +340,17 @@ struct RMat3
 	RVec3 m_row2;
 
 	_ANKI_DEFINE_ALL_OPERATORS_ROWS3(RMat3, RF32)
+
+	void setColumns(RVec3 c0, RVec3 c1, RVec3 c2)
+	{
+		m_row0 = RVec3(c0.x, c1.x, c2.x);
+		m_row1 = RVec3(c0.y, c1.y, c2.y);
+		m_row2 = RVec3(c0.z, c1.z, c2.z);
+	}
 };
 #	endif
 
 // Matrix functions
-Mat3 constructMatrixColumns(Vec3 c0, Vec3 c1, Vec3 c2)
-{
-	Mat3 m;
-	m.m_row0 = Vec3(c0.x, c1.x, c2.x);
-	m.m_row1 = Vec3(c0.y, c1.y, c2.y);
-	m.m_row2 = Vec3(c0.z, c1.z, c2.z);
-	return m;
-}
-
-#	if !ANKI_FORCE_FULL_FP_PRECISION
-RMat3 constructMatrixColumns(RVec3 c0, RVec3 c1, RVec3 c2)
-{
-	RMat3 m;
-	m.m_row0 = RVec3(c0.x, c1.x, c2.x);
-	m.m_row1 = RVec3(c0.y, c1.y, c2.y);
-	m.m_row2 = RVec3(c0.z, c1.z, c2.z);
-	return m;
-}
-#	endif
-
 Vec3 mul(Mat3 m, Vec3 v)
 {
 	const F32 a = dot(m.m_row0, v);
@@ -404,7 +418,9 @@ Vec3 mul(Mat3x4 m, Vec4 v)
 
 Mat3 transpose(Mat3 m)
 {
-	return constructMatrixColumns(m.m_row0, m.m_row1, m.m_row2);
+	Mat3 o;
+	o.setColumns(m.m_row0, m.m_row1, m.m_row2);
+	return o;
 }
 
 Mat3x4 combineTransformations(Mat3x4 a_, Mat3x4 b_)

+ 2 - 3
AnKi/Shaders/Include/MeshTypes.h

@@ -88,13 +88,12 @@ struct Meshlet
 	U32 m_vertexOffsets[(U32)VertexStreamId::kMeshRelatedCount];
 	U32 m_firstPrimitive; // In size of kMeshletPrimitiveFormat
 	U32 m_primitiveCount_R16_Uint_vertexCount_R16_Uint;
-	U32 m_padding;
+	U32 m_coneDirection_R8G8B8_Snorm_minusSinAngle_R8_Snorm;
 
 	Vec3 m_sphereCenter;
 	F32 m_sphereRadius;
 
-	Vec3 m_coneApex;
-	U32 m_coneDirection_R8G8B8_Snorm_coneCosOfHalfAngle_R8_Snorm;
+	Vec4 m_padding;
 };
 // Power of 2 because the sizeof will be used as allocation alignment and allocation alignments need to be power of 2
 static_assert(isPowerOfTwo(sizeof(Meshlet)));

+ 13 - 0
AnKi/Shaders/PackFunctions.hlsl

@@ -80,6 +80,19 @@ Vec4 newUnpackUnorm4x8(const U32 u)
 	return c * (1.0 / 255.0);
 }
 
+U32 packSnorm4x8(Vec4 value)
+{
+	const IVec4 packed = IVec4(round(clamp(value, -1.0f, 1.0f) * 127.0f)) & 0xFFu;
+	return U32(packed.x | (packed.y << 8) | (packed.z << 16) | (packed.w << 24));
+}
+
+Vec4 unpackSnorm4x8(U32 value)
+{
+	const I32 signedValue = (I32)value;
+	const IVec4 packed = IVec4(signedValue << 24, signedValue << 16, signedValue << 8, signedValue) >> 24;
+	return clamp(Vec4(packed) / 127.0f, -1.0f, 1.0f);
+}
+
 // Convert from RGB to YCbCr.
 // The RGB should be in [0, 1] and the output YCbCr will be in [0, 1] as well.
 Vec3 rgbToYCbCr(const Vec3 rgb)

BIN
Samples/SimpleScene/Assets/Mesh_0_d56f58fc33de003f.ankimesh


BIN
Samples/SimpleScene/Assets/Mesh_1_266a0dd9d2092f46.ankimesh


BIN
Samples/SimpleScene/Assets/Mesh_2_be53007bec464649.ankimesh


BIN
Samples/SimpleScene/Assets/Mesh_3_c026fdb5b74773ed.ankimesh


BIN
Samples/SimpleScene/Assets/Mesh_4_4d4aae6c030c4fd5.ankimesh


BIN
Samples/SimpleScene/Assets/Mesh_5_629309b27fa549a7.ankimesh


BIN
Samples/SimpleScene/Assets/Mesh_6_a078cf217893be6f.ankimesh


BIN
Samples/SimpleScene/Assets/Mesh_7_4b76b132380d8a62.ankimesh