浏览代码

Math library re-factoring

Panagiotis Christopoulos Charitos 9 月之前
父节点
当前提交
cccd687dc2
共有 43 个文件被更改,包括 619 次插入648 次删除
  1. 2 2
      AnKi/Collision/ConvexHullShape.cpp
  2. 6 6
      AnKi/Collision/FunctionsMisc.cpp
  3. 9 9
      AnKi/Collision/FunctionsTestCollision.cpp
  4. 3 3
      AnKi/Collision/Obb.cpp
  5. 3 3
      AnKi/Collision/Plane.cpp
  6. 1 1
      AnKi/Collision/Ray.h
  7. 3 3
      AnKi/Collision/Sphere.cpp
  8. 1 1
      AnKi/Collision/Sphere.h
  9. 2 2
      AnKi/Gr/Vulkan/VkCommandBuffer.cpp
  10. 5 5
      AnKi/Importer/GltfImporter.cpp
  11. 2 3
      AnKi/Importer/GltfImporterAnimation.cpp
  12. 2 2
      AnKi/Importer/GltfImporterMesh.cpp
  13. 1 1
      AnKi/Math/Functions.h
  14. 143 165
      AnKi/Math/Mat.h
  15. 49 34
      AnKi/Math/Quat.h
  16. 14 18
      AnKi/Math/Transform.h
  17. 140 157
      AnKi/Math/Vec.h
  18. 1 1
      AnKi/Physics/PhysicsBody.cpp
  19. 3 3
      AnKi/Renderer/IndirectDiffuseProbes.cpp
  20. 3 3
      AnKi/Renderer/ProbeReflections.cpp
  21. 3 3
      AnKi/Renderer/Renderer.cpp
  22. 2 2
      AnKi/Renderer/RtShadows.cpp
  23. 2 2
      AnKi/Renderer/ShadowMapping.cpp
  24. 23 2
      AnKi/Resource/MaterialResource.cpp
  25. 1 2
      AnKi/Resource/MeshResource.cpp
  26. 1 1
      AnKi/Scene/Components/BodyComponent.cpp
  27. 2 2
      AnKi/Scene/Components/DecalComponent.cpp
  28. 5 5
      AnKi/Scene/Components/LightComponent.cpp
  29. 1 2
      AnKi/Scene/Components/ParticleEmitterComponent.cpp
  30. 1 1
      AnKi/Scene/Frustum.cpp
  31. 1 1
      AnKi/Scene/SceneNode.h
  32. 4 4
      AnKi/Scene/SoftwareRasterizer.cpp
  33. 36 156
      AnKi/Script/Math.cpp
  34. 6 9
      AnKi/Script/Math.xml
  35. 1 1
      AnKi/Shaders/ForwardShadingFog.ankiprog
  36. 106 6
      AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog
  37. 1 1
      AnKi/Shaders/ForwardShadingParticles.ankiprog
  38. 3 10
      AnKi/Shaders/GBufferGeneric.ankiprog
  39. 1 1
      AnKi/Shaders/Include/GpuSceneFunctions.h
  40. 17 6
      AnKi/Shaders/MaterialShadersCommon.hlsl
  41. 4 4
      Samples/PhysicsPlayground/Main.cpp
  42. 1 1
      Tests/Gr/GrAsyncCompute.cpp
  43. 4 4
      Tests/Math/Math.cpp

+ 2 - 2
AnKi/Collision/ConvexHullShape.cpp

@@ -15,7 +15,7 @@ ConvexHullShape ConvexHullShape::getTransformed(const Transform& trf) const
 	ConvexHullShape out = *this;
 
 	out.m_trf = m_trf.combineTransformations(trf);
-	out.m_invTrf = m_trf.getInverse();
+	out.m_invTrf = m_trf.invert();
 	out.m_trfIdentity = false;
 
 	return out;
@@ -24,7 +24,7 @@ ConvexHullShape ConvexHullShape::getTransformed(const Transform& trf) const
 void ConvexHullShape::setTransform(const Transform& trf)
 {
 	m_trf = trf;
-	m_invTrf = m_trf.getInverse();
+	m_invTrf = m_trf.invert();
 	m_trfIdentity = false;
 }
 

+ 6 - 6
AnKi/Collision/FunctionsMisc.cpp

@@ -18,7 +18,7 @@ void extractClipPlane(const Mat4& mvp, FrustumPlaneType id, Plane& plane)
 	{ \
 		const Vec4 planeEqationCoefs = mvp.getRow(a) op mvp.getRow(b); \
 		const Vec4 n = planeEqationCoefs.xyz0(); \
-		const F32 len = n.getLength(); \
+		const F32 len = n.length(); \
 		plane = Plane(n / len, -planeEqationCoefs.w() / len); \
 		break; \
 	}
@@ -29,7 +29,7 @@ void extractClipPlane(const Mat4& mvp, FrustumPlaneType id, Plane& plane)
 	{
 		const Vec4 planeEqationCoefs = mvp.getRow(2);
 		const Vec4 n = planeEqationCoefs.xyz0();
-		const F32 len = n.getLength();
+		const F32 len = n.length();
 		plane = Plane(n / len, -planeEqationCoefs.w() / len);
 		break;
 	}
@@ -72,7 +72,7 @@ static Vec4 computeBoundingSphere2(const Vec3 O, const Vec3 A)
 
 	const Vec3 o = 0.5f * a;
 
-	const F32 radius = o.getLength() + kEpsilonf;
+	const F32 radius = o.length() + kEpsilonf;
 	const Vec3 center = O + o;
 
 	return Vec4(center, radius);
@@ -90,7 +90,7 @@ static Vec4 computeBoundingSphere3(const Vec3 O, const Vec3 A, const Vec3 B)
 	{
 		// A pair in A,B,O are the same point or they are in the same line
 
-		if(a.getLengthSquared() > b.getLengthSquared())
+		if(a.lengthSquared() > b.lengthSquared())
 		{
 			return computeBoundingSphere2(O, A);
 		}
@@ -104,7 +104,7 @@ static Vec4 computeBoundingSphere3(const Vec3 O, const Vec3 A, const Vec3 B)
 	o += a.dot(a) * b.cross(acrossb);
 	o /= denominator;
 
-	const F32 radius = o.getLength() + kEpsilonf;
+	const F32 radius = o.length() + kEpsilonf;
 	const Vec3 center = O + o;
 
 	return Vec4(center, radius);
@@ -167,7 +167,7 @@ Vec4 computeBoundingSphereRecursive(WeakArray<const Vec3*> pPoints, U32 begin, U
 
 	for(U32 i = 0; i < p; i++)
 	{
-		const F32 distSq = (sphere.xyz() - *pPoints[begin + i]).getLengthSquared();
+		const F32 distSq = (sphere.xyz() - *pPoints[begin + i]).lengthSquared();
 		const F32 radiusSq = sphere.w() * sphere.w();
 
 		if(distSq > radiusSq)

+ 9 - 9
AnKi/Collision/FunctionsTestCollision.cpp

@@ -97,7 +97,7 @@ Bool testCollision(const Aabb& aabb, const Sphere& s)
 	// it's always smaller than rsq
 	const Vec4 sub = c - cp;
 
-	return (sub.getLengthSquared() <= (s.getRadius() * s.getRadius())) ? true : false;
+	return (sub.lengthSquared() <= (s.getRadius() * s.getRadius())) ? true : false;
 }
 
 Bool testCollision(const Aabb& aabb, const Obb& obb)
@@ -170,7 +170,7 @@ Bool testCollision([[maybe_unused]] const Aabb& aabb, [[maybe_unused]] const Con
 Bool testCollision(const Sphere& a, const Sphere& b)
 {
 	const F32 tmp = a.getRadius() + b.getRadius();
-	return (a.getCenter() - b.getCenter()).getLengthSquared() <= tmp * tmp;
+	return (a.getCenter() - b.getCenter()).lengthSquared() <= tmp * tmp;
 }
 
 Bool testCollision(const Sphere& sphere, const Obb& obb)
@@ -192,7 +192,7 @@ Bool testCollision(const Sphere& s, const LineSegment& ls)
 
 	if(w0dv < 0.0f) // if the ang is >90
 	{
-		return w0.getLengthSquared() <= rsq;
+		return w0.lengthSquared() <= rsq;
 	}
 
 	const Vec4 w1 = w0 - v; // aka center - P1, where P1 = seg.origin + seg.dir
@@ -200,12 +200,12 @@ Bool testCollision(const Sphere& s, const LineSegment& ls)
 
 	if(w1dv > 0.0f) // if the ang is <90
 	{
-		return w1.getLengthSquared() <= rsq;
+		return w1.lengthSquared() <= rsq;
 	}
 
 	// the big parenthesis is the projection of w0 to v
-	const Vec4 tmp = w0 - (v * (w0.dot(v) / v.getLengthSquared()));
-	return tmp.getLengthSquared() <= rsq;
+	const Vec4 tmp = w0 - (v * (w0.dot(v) / v.lengthSquared()));
+	return tmp.lengthSquared() <= rsq;
 }
 
 Bool testCollision(const Sphere& sphere, const Cone& cone)
@@ -353,7 +353,7 @@ Bool testCollision(const Plane& plane, const Ray& ray, Vec4& intersection)
 Bool testCollision(const Plane& plane, const Vec4& vector, Vec4& intersection)
 {
 	ANKI_ASSERT(vector.w() == 0.0f);
-	const Vec4 pp = vector.getNormalized();
+	const Vec4 pp = vector.normalize();
 	const F32 dot = pp.dot(plane.getNormal());
 
 	if(!isZero(dot))
@@ -370,7 +370,7 @@ Bool testCollision(const Plane& plane, const Vec4& vector, Vec4& intersection)
 
 Bool intersect(const Sphere& sphere, const Ray& ray, Array<Vec4, 2>& intersectionPoints, U& intersectionPointCount)
 {
-	ANKI_ASSERT(isZero(ray.getDirection().getLengthSquared() - 1.0f));
+	ANKI_ASSERT(isZero(ray.getDirection().lengthSquared() - 1.0f));
 
 	// See https://en.wikipedia.org/wiki/Line%E2%80%93sphere_intersection
 
@@ -382,7 +382,7 @@ Bool intersect(const Sphere& sphere, const Ray& ray, Array<Vec4, 2>& intersectio
 	const Vec4 o_c = o - c;
 
 	const F32 a = l.dot(o_c);
-	const F32 b = a * a - o_c.getLengthSquared() + R2;
+	const F32 b = a * a - o_c.lengthSquared() + R2;
 
 	if(b < 0.0f)
 	{

+ 3 - 3
AnKi/Collision/Obb.cpp

@@ -54,9 +54,9 @@ void Obb::getExtremePoints(Array<Vec4, 8>& points) const
 	points[RTF] = er;
 	points[LBB] = -er;
 
-	const Vec4 xAxis = Vec4(m_rotation.getColumn(0).getNormalized(), 0.0f);
-	const Vec4 yAxis = Vec4(m_rotation.getColumn(1).getNormalized(), 0.0f);
-	const Vec4 zAxis = Vec4(m_rotation.getColumn(2).getNormalized(), 0.0f);
+	const Vec4 xAxis = Vec4(m_rotation.getColumn(0).normalize(), 0.0f);
+	const Vec4 yAxis = Vec4(m_rotation.getColumn(1).normalize(), 0.0f);
+	const Vec4 zAxis = Vec4(m_rotation.getColumn(2).normalize(), 0.0f);
 
 	// Reflection: x1' = 2n|x1.n| - x1
 

+ 3 - 3
AnKi/Collision/Plane.cpp

@@ -18,9 +18,9 @@ void Plane::setFrom3Points(const Vec4& p0, const Vec4& p1, const Vec4& p2)
 	m_normal = u.cross(v);
 
 	// length of normal had better not be zero
-	ANKI_ASSERT(m_normal.getLengthSquared() != 0.0f);
+	ANKI_ASSERT(m_normal.lengthSquared() != 0.0f);
 
-	m_normal.normalize();
+	m_normal = m_normal.normalize();
 	m_offset = m_normal.dot(p0);
 }
 
@@ -29,7 +29,7 @@ void Plane::setFromPlaneEquation(F32 a, F32 b, F32 c, F32 d)
 	m_normal = Vec4(a, b, c, 0.0f);
 
 	// length of normal had better not be zero
-	ANKI_ASSERT(isZero(m_normal.getLength() - 1.0));
+	ANKI_ASSERT(isZero(m_normal.length() - 1.0));
 
 	m_offset = d;
 }

+ 1 - 1
AnKi/Collision/Ray.h

@@ -108,7 +108,7 @@ private:
 
 	void check() const
 	{
-		ANKI_ASSERT(m_origin.w() == 0.0f && m_dir.w() == 0.0f && isZero(m_dir.getLengthSquared() - 1.0f, kEpsilonf * 100.0f));
+		ANKI_ASSERT(m_origin.w() == 0.0f && m_dir.w() == 0.0f && isZero(m_dir.lengthSquared() - 1.0f, kEpsilonf * 100.0f));
 	}
 };
 /// @}

+ 3 - 3
AnKi/Collision/Sphere.cpp

@@ -12,7 +12,7 @@ Sphere Sphere::getCompoundShape(const Sphere& b) const
 	const Sphere& a = *this;
 
 	const Vec4 c = b.getCenter() - a.getCenter();
-	const F32 cLen = c.getLength();
+	const F32 cLen = c.length();
 
 	if(cLen + b.getRadius() < a.getRadius())
 	{
@@ -28,7 +28,7 @@ Sphere Sphere::getCompoundShape(const Sphere& b) const
 	const Vec4 ca = (-bnorm) * a.getRadius() + a.getCenter();
 	const Vec4 cb = (bnorm)*b.getRadius() + b.getCenter();
 
-	return Sphere((ca + cb) / 2.0f, (ca - cb).getLength() / 2.0f);
+	return Sphere((ca + cb) / 2.0f, (ca - cb).length() / 2.0f);
 }
 
 void Sphere::setFromPointCloud(const Vec3* pointBuffer, U pointCount, PtrSize pointStride, [[maybe_unused]] PtrSize buffSize)
@@ -64,7 +64,7 @@ void Sphere::setFromPointCloud(const Vec3* pointBuffer, U pointCount, PtrSize po
 			ANKI_ASSERT((ptrToNumber(ptr) + sizeof(Vec3) - ptrToNumber(pointBuffer)) <= buffSize);
 			const Vec3& pos = *reinterpret_cast<const Vec3*>(ptr);
 
-			const F32 dist = (Vec4(pos, 0.0f) - m_center).getLengthSquared();
+			const F32 dist = (Vec4(pos, 0.0f) - m_center).lengthSquared();
 			if(dist > maxDist)
 			{
 				maxDist = dist;

+ 1 - 1
AnKi/Collision/Sphere.h

@@ -108,7 +108,7 @@ public:
 	/// Compute the GJK support.
 	Vec4 computeSupport(const Vec4& dir) const
 	{
-		return m_center + dir.getNormalized() * m_radius;
+		return m_center + dir.normalize() * m_radius;
 	}
 
 private:

+ 2 - 2
AnKi/Gr/Vulkan/VkCommandBuffer.cpp

@@ -1038,8 +1038,8 @@ void CommandBuffer::setFastConstants(const void* data, U32 dataSize)
 {
 	ANKI_VK_SELF(CommandBufferImpl);
 	ANKI_ASSERT(data && dataSize && dataSize % 16 == 0);
-	ANKI_ASSERT(static_cast<const ShaderProgramImpl&>(self.getBoundProgram()).getReflection().m_descriptor.m_fastConstantsSize == dataSize
-				&& "The bound program should have push constants equal to the \"dataSize\" parameter");
+	// ANKI_ASSERT(static_cast<const ShaderProgramImpl&>(self.getBoundProgram()).getReflection().m_descriptor.m_fastConstantsSize == dataSize
+	//		&& "The bound program should have push constants equal to the \"dataSize\" parameter");
 
 	self.commandCommon();
 	self.m_descriptorState.setFastConstants(data, dataSize);

+ 5 - 5
AnKi/Importer/GltfImporter.cpp

@@ -56,9 +56,9 @@ static void removeScale(Mat4& m)
 	Vec3 yAxis = m.getColumn(1).xyz();
 	Vec3 zAxis = m.getColumn(2).xyz();
 
-	xAxis.normalize();
-	yAxis.normalize();
-	zAxis.normalize();
+	xAxis = xAxis.normalize();
+	yAxis = yAxis.normalize();
+	zAxis = zAxis.normalize();
 
 	Mat3 rot;
 	rot.setColumns(xAxis, yAxis, zAxis);
@@ -75,7 +75,7 @@ static void getNodeTransform(const cgltf_node& node, Vec3& tsl, Mat3& rot, Vec3&
 		Vec3 yAxis = trf.getColumn(1).xyz();
 		Vec3 zAxis = trf.getColumn(2).xyz();
 
-		scale = Vec3(xAxis.getLength(), yAxis.getLength(), zAxis.getLength());
+		scale = Vec3(xAxis.length(), yAxis.length(), zAxis.length());
 
 		removeScale(trf);
 		rot = trf.getRotationPart();
@@ -1016,7 +1016,7 @@ Error GltfImporter::writeSkeleton(const cgltf_skin& skin) const
 		// Bone transform
 		ANKI_CHECK(file.writeText("boneTransform=\""));
 		Mat4 btrf(&boneMats[i][0]);
-		btrf.transpose();
+		btrf = btrf.transpose();
 		const Mat3x4 btrf3x4(btrf);
 		for(U32 j = 0; j < 12; j++)
 		{

+ 2 - 3
AnKi/Importer/GltfImporterAnimation.cpp

@@ -234,8 +234,7 @@ Error GltfImporter::writeAnimation(const cgltf_animation& anim)
 				const F32 scaleEpsilon = 0.0001f;
 
 				// Normalize the scale because scaleEpsilon is relative
-				Vec3 scale = scales[i];
-				scale.normalize();
+				Vec3 scale = scales[i].normalize();
 
 				if(!scaleErrorReported && (absolute(scale[0] - scale[1]) > scaleEpsilon || absolute(scale[0] - scale[2]) > scaleEpsilon))
 				{
@@ -271,7 +270,7 @@ Error GltfImporter::writeAnimation(const cgltf_animation& anim)
 					return a.abs() < kKillEpsilon;
 				},
 				[&](const Vec3& a, const Vec3& b) -> Bool {
-					return (a - b).getLength() < kKillEpsilon;
+					return (a - b).length() < kKillEpsilon;
 				},
 				[&](const Vec3& a, const Vec3& b, F32 u) -> Vec3 {
 					return linearInterpolate(a, b, u);

+ 2 - 2
AnKi/Importer/GltfImporterMesh.cpp

@@ -274,7 +274,7 @@ static void fixNormals(const F32 normalsMergeAngle, SubMesh& submesh)
 			const Vec3& otherPos = submesh.m_verts[prevV].m_position;
 
 			// Check the positions dist
-			const F32 posDist = (otherPos - pos).getLengthSquared();
+			const F32 posDist = (otherPos - pos).lengthSquared();
 			if(posDist > kEpsilonf * kEpsilonf)
 			{
 				continue;
@@ -289,7 +289,7 @@ static void fixNormals(const F32 normalsMergeAngle, SubMesh& submesh)
 			}
 
 			// Merge normals
-			const Vec3 newNormal = (otherNormal + normal).getNormalized();
+			const Vec3 newNormal = (otherNormal + normal).normalize();
 			normal = newNormal;
 			otherNormal = newNormal;
 		}

+ 1 - 1
AnKi/Math/Functions.h

@@ -279,7 +279,7 @@ inline F32 computeTriangleArea(const TVec& a, const TVec& b, const TVec& c)
 {
 	const TVec ab = b - a;
 	const TVec ac = c - a;
-	const F32 area = ab.cross(ac).getLength() / 2.0f;
+	const F32 area = ab.cross(ac).length() / 2.0f;
 	return absolute(area);
 }
 /// @}

+ 143 - 165
AnKi/Math/Mat.h

@@ -95,17 +95,84 @@ public:
 
 	explicit constexpr TMat(const TQuat<T>& q) requires(kSize == 9)
 	{
-		setRotationPart(q);
+		TMat& m = *this;
+		// If length is > 1 + 0.002 or < 1 - 0.002 then not normalized quat
+		ANKI_ASSERT(absolute(T(1) - q.length()) <= 0.002);
+
+		T xs, ys, zs, wx, wy, wz, xx, xy, xz, yy, yz, zz;
+
+		xs = q.x() + q.x();
+		ys = q.y() + q.y();
+		zs = q.z() + q.z();
+		wx = q.w() * xs;
+		wy = q.w() * ys;
+		wz = q.w() * zs;
+		xx = q.x() * xs;
+		xy = q.x() * ys;
+		xz = q.x() * zs;
+		yy = q.y() * ys;
+		yz = q.y() * zs;
+		zz = q.z() * zs;
+
+		m(0, 0) = T(1) - (yy + zz);
+		m(0, 1) = xy - wz;
+		m(0, 2) = xz + wy;
+
+		m(1, 0) = xy + wz;
+		m(1, 1) = T(1) - (xx + zz);
+		m(1, 2) = yz - wx;
+
+		m(2, 0) = xz - wy;
+		m(2, 1) = yz + wx;
+		m(2, 2) = T(1) - (xx + yy);
 	}
 
 	explicit constexpr TMat(const TEuler<T>& e) requires(kSize == 9)
 	{
-		setRotationPart(e);
+		TMat& m = *this;
+		T ch, sh, ca, sa, cb, sb;
+		sinCos(e.y(), sh, ch);
+		sinCos(e.z(), sa, ca);
+		sinCos(e.x(), sb, cb);
+
+		m(0, 0) = ch * ca;
+		m(0, 1) = sh * sb - ch * sa * cb;
+		m(0, 2) = ch * sa * sb + sh * cb;
+		m(1, 0) = sa;
+		m(1, 1) = ca * cb;
+		m(1, 2) = -ca * sb;
+		m(2, 0) = -sh * ca;
+		m(2, 1) = sh * sa * cb + ch * sb;
+		m(2, 2) = -sh * sa * sb + ch * cb;
 	}
 
 	explicit constexpr TMat(const TAxisang<T>& axisang) requires(kSize == 9)
 	{
-		setRotationPart(axisang);
+		TMat& m = *this;
+		// Not normalized axis
+		ANKI_ASSERT(isZero<T>(T(1) - axisang.getAxis().getLength()));
+
+		T c, s;
+		sinCos(axisang.getAngle(), s, c);
+		T t = T(1) - c;
+
+		const TVec<T, 3>& axis = axisang.getAxis();
+		m(0, 0) = c + axis.x() * axis.x() * t;
+		m(1, 1) = c + axis.y() * axis.y() * t;
+		m(2, 2) = c + axis.z() * axis.z() * t;
+
+		T tmp1 = axis.x() * axis.y() * t;
+		T tmp2 = axis.z() * s;
+		m(1, 0) = tmp1 + tmp2;
+		m(0, 1) = tmp1 - tmp2;
+		tmp1 = axis.x() * axis.z() * t;
+		tmp2 = axis.y() * s;
+		m(2, 0) = tmp1 - tmp2;
+		m(0, 2) = tmp1 + tmp2;
+		tmp1 = axis.y() * axis.z() * t;
+		tmp2 = axis.x() * s;
+		m(2, 1) = tmp1 + tmp2;
+		m(1, 2) = tmp1 - tmp2;
 	}
 
 	// 4x4 specific constructors
@@ -242,7 +309,7 @@ public:
 		return m_arr2[j][i];
 	}
 
-	T operator()(const U j, const U i) const
+	[[nodiscard]] T operator()(const U j, const U i) const
 	{
 		return m_arr2[j][i];
 	}
@@ -252,7 +319,7 @@ public:
 		return m_arr1[n];
 	}
 
-	T operator[](const U n) const
+	[[nodiscard]] T operator[](const U n) const
 	{
 		return m_arr1[n];
 	}
@@ -271,7 +338,7 @@ public:
 		return *this;
 	}
 
-	TMat operator+(const TMat& b) const
+	[[nodiscard]] TMat operator+(const TMat& b) const
 	{
 		TMat c;
 		for(U i = 0; i < kRowCount; ++i)
@@ -290,7 +357,7 @@ public:
 		return *this;
 	}
 
-	TMat operator-(const TMat& b) const
+	[[nodiscard]] TMat operator-(const TMat& b) const
 	{
 		TMat c;
 		for(U i = 0; i < kRowCount; ++i)
@@ -309,7 +376,7 @@ public:
 		return *this;
 	}
 
-	TMat operator*(const TMat& b) const requires(kIsSquare && !kIs4x4Simd)
+	[[nodiscard]] TMat operator*(const TMat& b) const requires(kIsSquare && !kIs4x4Simd)
 	{
 		TMat out;
 		const TMat& a = *this;
@@ -328,7 +395,7 @@ public:
 	}
 
 #if ANKI_ENABLE_SIMD
-	TMat operator*(const TMat& b) const requires(kIs4x4Simd)
+	[[nodiscard]] TMat operator*(const TMat& b) const requires(kIs4x4Simd)
 	{
 		TMat out;
 		const auto& m = *this;
@@ -373,7 +440,7 @@ public:
 		return *this;
 	}
 
-	Bool operator==(const TMat& b) const
+	[[nodiscard]] Bool operator==(const TMat& b) const
 	{
 		for(U i = 0; i < N; i++)
 		{
@@ -385,7 +452,7 @@ public:
 		return true;
 	}
 
-	Bool operator!=(const TMat& b) const
+	[[nodiscard]] Bool operator!=(const TMat& b) const
 	{
 		for(U i = 0; i < N; i++)
 		{
@@ -400,7 +467,7 @@ public:
 
 	/// @name Operators with T
 	/// @{
-	TMat operator+(const T f) const
+	[[nodiscard]] TMat operator+(const T f) const
 	{
 		TMat out;
 		for(U i = 0; i < kRowCount; ++i)
@@ -419,7 +486,7 @@ public:
 		return *this;
 	}
 
-	TMat operator-(const T f) const
+	[[nodiscard]] TMat operator-(const T f) const
 	{
 		TMat out;
 		for(U i = 0; i < kRowCount; ++i)
@@ -438,7 +505,7 @@ public:
 		return *this;
 	}
 
-	TMat operator*(const T f) const
+	[[nodiscard]] TMat operator*(const T f) const
 	{
 		TMat out;
 		for(U i = 0; i < kRowCount; ++i)
@@ -457,7 +524,7 @@ public:
 		return *this;
 	}
 
-	TMat operator/(const T f) const
+	[[nodiscard]] TMat operator/(const T f) const
 	{
 		ANKI_ASSERT(f != T(0));
 		TMat out;
@@ -481,7 +548,7 @@ public:
 
 	/// @name Operators with other types
 	/// @{
-	ColumnVec operator*(const RowVec& v) const requires(!kHasSimd)
+	[[nodiscard]] ColumnVec operator*(const RowVec& v) const requires(!kHasSimd)
 	{
 		const TMat& m = *this;
 		ColumnVec out;
@@ -498,7 +565,7 @@ public:
 	}
 
 #if ANKI_SIMD_SSE
-	ColumnVec operator*(const RowVec& v) const requires(kIs4x4Simd)
+	[[nodiscard]] ColumnVec operator*(const RowVec& v) const requires(kIs4x4Simd)
 	{
 		__m128 a = _mm_mul_ps(m_simd[0], v.getSimd());
 		__m128 b = _mm_mul_ps(m_simd[1], v.getSimd());
@@ -511,7 +578,7 @@ public:
 		return RowVec(_mm_hadd_ps(a, c));
 	}
 
-	ColumnVec operator*(const RowVec& v) const requires(kIs3x4Simd)
+	[[nodiscard]] ColumnVec operator*(const RowVec& v) const requires(kIs3x4Simd)
 	{
 		__m128 a = _mm_mul_ps(m_simd[0], v.getSimd());
 		__m128 b = _mm_mul_ps(m_simd[1], v.getSimd());
@@ -524,8 +591,7 @@ public:
 	}
 
 #else
-
-	ColumnVec operator*(const RowVec& v) const requires(kHasSimd)
+	[[nodiscard]] ColumnVec operator*(const RowVec& v) const requires(kHasSimd)
 	{
 		ColumnVec out;
 		for(U i = 0; i < kTRowCount; i++)
@@ -596,7 +662,7 @@ public:
 		setColumn(3, d);
 	}
 
-	ColumnVec getColumn(const U i) const
+	[[nodiscard]] ColumnVec getColumn(const U i) const
 	{
 		ColumnVec out;
 		for(U j = 0; j < kTRowCount; j++)
@@ -620,19 +686,19 @@ public:
 	}
 
 	/// Get 1st column
-	ColumnVec getXAxis() const
+	[[nodiscard]] ColumnVec getXAxis() const
 	{
 		return getColumn(0);
 	}
 
 	/// Get 2nd column
-	ColumnVec getYAxis() const
+	[[nodiscard]] ColumnVec getYAxis() const
 	{
 		return getColumn(1);
 	}
 
 	/// Get 3rd column
-	ColumnVec getZAxis() const
+	[[nodiscard]] ColumnVec getZAxis() const
 	{
 		return getColumn(2);
 	}
@@ -798,89 +864,7 @@ public:
 		}
 	}
 
-	void setRotationPart(const TQuat<T>& q)
-	{
-		TMat& m = *this;
-		// If length is > 1 + 0.002 or < 1 - 0.002 then not normalized quat
-		ANKI_ASSERT(absolute(T(1) - q.getLength()) <= 0.002);
-
-		T xs, ys, zs, wx, wy, wz, xx, xy, xz, yy, yz, zz;
-
-		xs = q.x() + q.x();
-		ys = q.y() + q.y();
-		zs = q.z() + q.z();
-		wx = q.w() * xs;
-		wy = q.w() * ys;
-		wz = q.w() * zs;
-		xx = q.x() * xs;
-		xy = q.x() * ys;
-		xz = q.x() * zs;
-		yy = q.y() * ys;
-		yz = q.y() * zs;
-		zz = q.z() * zs;
-
-		m(0, 0) = T(1) - (yy + zz);
-		m(0, 1) = xy - wz;
-		m(0, 2) = xz + wy;
-
-		m(1, 0) = xy + wz;
-		m(1, 1) = T(1) - (xx + zz);
-		m(1, 2) = yz - wx;
-
-		m(2, 0) = xz - wy;
-		m(2, 1) = yz + wx;
-		m(2, 2) = T(1) - (xx + yy);
-	}
-
-	void setRotationPart(const TEuler<T>& e)
-	{
-		TMat& m = *this;
-		T ch, sh, ca, sa, cb, sb;
-		sinCos(e.y(), sh, ch);
-		sinCos(e.z(), sa, ca);
-		sinCos(e.x(), sb, cb);
-
-		m(0, 0) = ch * ca;
-		m(0, 1) = sh * sb - ch * sa * cb;
-		m(0, 2) = ch * sa * sb + sh * cb;
-		m(1, 0) = sa;
-		m(1, 1) = ca * cb;
-		m(1, 2) = -ca * sb;
-		m(2, 0) = -sh * ca;
-		m(2, 1) = sh * sa * cb + ch * sb;
-		m(2, 2) = -sh * sa * sb + ch * cb;
-	}
-
-	void setRotationPart(const TAxisang<T>& axisang)
-	{
-		TMat& m = *this;
-		// Not normalized axis
-		ANKI_ASSERT(isZero<T>(T(1) - axisang.getAxis().getLength()));
-
-		T c, s;
-		sinCos(axisang.getAngle(), s, c);
-		T t = T(1) - c;
-
-		const TVec<T, 3>& axis = axisang.getAxis();
-		m(0, 0) = c + axis.x() * axis.x() * t;
-		m(1, 1) = c + axis.y() * axis.y() * t;
-		m(2, 2) = c + axis.z() * axis.z() * t;
-
-		T tmp1 = axis.x() * axis.y() * t;
-		T tmp2 = axis.z() * s;
-		m(1, 0) = tmp1 + tmp2;
-		m(0, 1) = tmp1 - tmp2;
-		tmp1 = axis.x() * axis.z() * t;
-		tmp2 = axis.y() * s;
-		m(2, 0) = tmp1 - tmp2;
-		m(0, 2) = tmp1 + tmp2;
-		tmp1 = axis.y() * axis.z() * t;
-		tmp2 = axis.x() * s;
-		m(2, 1) = tmp1 + tmp2;
-		m(1, 2) = tmp1 - tmp2;
-	}
-
-	TMat<T, 3, 3> getRotationPart() const
+	[[nodiscard]] TMat<T, 3, 3> getRotationPart() const
 	{
 		const TMat& m = *this;
 		TMat<T, 3, 3> m3;
@@ -905,53 +889,66 @@ public:
 		setColumn(3, c);
 	}
 
-	ColumnVec getTranslationPart() const
+	[[nodiscard]] ColumnVec getTranslationPart() const requires(kTColumnCount == 4)
 	{
 		return getColumn(3);
 	}
 
-	void reorthogonalize()
+	[[nodiscard]] TMat reorthogonalize() const requires(kTRowCount == 3)
 	{
 		// There are 2 methods, the standard and the Gram-Schmidt method with a twist for zAxis. This uses the 2nd. For the first see < r664
 		ColumnVec xAxis, yAxis, zAxis;
 		getColumns(xAxis, yAxis, zAxis);
 
-		xAxis.normalize();
+		xAxis = xAxis.normalize();
 
 		yAxis = yAxis - (xAxis * xAxis.dot(yAxis));
-		yAxis.normalize();
+		yAxis = yAxis.normalize();
 
 		zAxis = xAxis.cross(yAxis);
 
-		setColumns(xAxis, yAxis, zAxis);
+		TMat out = *this;
+		out.setColumns(xAxis, yAxis, zAxis);
+		return out;
 	}
 
-	void transpose() requires(kIsSquare && !kHasSimd)
+	[[nodiscard]] TMat transpose() const requires(kIsSquare && !kHasSimd)
 	{
+		TMat out;
 		for(U j = 0; j < kTRowCount; j++)
 		{
-			for(U i = j + 1; i < kTColumnCount; i++)
+			for(U i = 0; i < kTColumnCount; i++)
 			{
-				T tmp = m_arr2[j][i];
-				m_arr2[j][i] = m_arr2[i][j];
-				m_arr2[i][j] = tmp;
+				out.m_arr2[i][j] = m_arr2[j][i];
 			}
 		}
+		return out;
 	}
 
 #if ANKI_ENABLE_SIMD
-	void transpose() requires(kIsSquare&& kHasSimd)
+	[[nodiscard]] TMat transpose() const requires(kIsSquare&& kHasSimd)
 	{
+		TMat out;
+
 #	if ANKI_SIMD_SSE
-		_MM_TRANSPOSE4_PS(m_simd[0], m_simd[1], m_simd[2], m_simd[3]);
+		const __m128 tmp0 = _mm_shuffle_ps(m_simd[0], m_simd[1], 0x44);
+		const __m128 tmp2 = _mm_shuffle_ps(m_simd[0], m_simd[1], 0xEE);
+		const __m128 tmp1 = _mm_shuffle_ps(m_simd[2], m_simd[3], 0x44);
+		const __m128 tmp3 = _mm_shuffle_ps(m_simd[2], m_simd[3], 0xEE);
+
+		out.m_simd[0] = _mm_shuffle_ps(tmp0, tmp1, 0x88);
+		out.m_simd[1] = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
+		out.m_simd[2] = _mm_shuffle_ps(tmp2, tmp3, 0x88);
+		out.m_simd[3] = _mm_shuffle_ps(tmp2, tmp3, 0xDD);
 #	else
 		const float32x4x2_t row01 = vtrnq_f32(m_simd[0], m_simd[1]);
 		const float32x4x2_t row23 = vtrnq_f32(m_simd[2], m_simd[3]);
-		m_simd[0] = vcombine_f32(vget_low_f32(row01.val[0]), vget_low_f32(row23.val[0]));
-		m_simd[1] = vcombine_f32(vget_low_f32(row01.val[1]), vget_low_f32(row23.val[1]));
-		m_simd[2] = vcombine_f32(vget_high_f32(row01.val[0]), vget_high_f32(row23.val[0]));
-		m_simd[3] = vcombine_f32(vget_high_f32(row01.val[1]), vget_high_f32(row23.val[1]));
+		out.m_simd[0] = vcombine_f32(vget_low_f32(row01.val[0]), vget_low_f32(row23.val[0]));
+		out.m_simd[1] = vcombine_f32(vget_low_f32(row01.val[1]), vget_low_f32(row23.val[1]));
+		out.m_simd[2] = vcombine_f32(vget_high_f32(row01.val[0]), vget_high_f32(row23.val[0]));
+		out.m_simd[3] = vcombine_f32(vget_high_f32(row01.val[1]), vget_high_f32(row23.val[1]));
 #	endif
+		return out;
 	}
 #endif
 
@@ -968,20 +965,7 @@ public:
 		}
 	}
 
-	TMat getTransposed() const requires(kIsSquare)
-	{
-		TMat out;
-		for(U j = 0; j < kTRowCount; j++)
-		{
-			for(U i = 0; i < kTColumnCount; i++)
-			{
-				out.m_arr2[i][j] = m_arr2[j][i];
-			}
-		}
-		return out;
-	}
-
-	T getDet() const requires(kSize == 9)
+	[[nodiscard]] T getDet() const requires(kSize == 9)
 	{
 		const auto& m = *this;
 		// For the accurate method see < r664
@@ -989,7 +973,7 @@ public:
 			   + m(0, 2) * (m(0, 1) * m(2, 1) - m(1, 1) * m(2, 0));
 	}
 
-	T getDet() const requires(kSize == 16)
+	[[nodiscard]] T getDet() const requires(kSize == 16)
 	{
 		const auto& t = *this;
 		return t(0, 3) * t(1, 2) * t(2, 1) * t(3, 0) - t(0, 2) * t(1, 3) * t(2, 1) * t(3, 0) - t(0, 3) * t(1, 1) * t(2, 2) * t(3, 0)
@@ -1002,7 +986,7 @@ public:
 			   - t(0, 0) * t(1, 2) * t(2, 1) * t(3, 3) - t(0, 1) * t(1, 0) * t(2, 2) * t(3, 3) + t(0, 0) * t(1, 1) * t(2, 2) * t(3, 3);
 	}
 
-	TMat getInverse() const requires(kSize == 9)
+	[[nodiscard]] TMat invert() const requires(kSize == 9)
 	{
 		// Using Gramer's method Inv(A) = (1 / getDet(A)) * Adj(A)
 		const TMat& m = *this;
@@ -1034,7 +1018,7 @@ public:
 	}
 
 	/// Invert using Cramer's rule
-	TMat getInverse() const requires(kSize == 16)
+	[[nodiscard]] TMat invert() const requires(kSize == 16)
 	{
 		Array<T, 12> tmp;
 		const auto& in = (*this);
@@ -1108,14 +1092,8 @@ public:
 		return m4;
 	}
 
-	/// See getInverse
-	void invert() requires(kSize == 16 || kSize == 9)
-	{
-		(*this) = getInverse();
-	}
-
 	/// 12 muls, 27 adds. Something like m4 = m0 * m1 but without touching the 4rth row and allot faster
-	static TMat combineTransformations(const TMat& m0, const TMat& m1) requires(kSize == 16)
+	[[nodiscard]] static TMat combineTransformations(const TMat& m0, const TMat& m1) requires(kSize == 16)
 	{
 		// See the clean code in < r664
 
@@ -1316,7 +1294,7 @@ public:
 	/// Vec2 xy = ndc.xy() * unprojParams.xy() * z;
 	/// Vec3 posViewSpace(xy, z);
 	/// @endcode
-	static TVec<T, 4> calculatePerspectiveUnprojectionParams(T fovX, T fovY, T near, T far) requires(kSize == 16)
+	[[nodiscard]] static TVec<T, 4> calculatePerspectiveUnprojectionParams(T fovX, T fovY, T near, T far) requires(kSize == 16)
 	{
 		TVec<T, 4> out;
 		const T g = near - far;
@@ -1347,7 +1325,7 @@ public:
 	}
 
 	/// Assuming this is a projection matrix extract the unprojection parameters. See calculatePerspectiveUnprojectionParams for more info.
-	TVec<T, 4> extractPerspectiveUnprojectionParams() const requires(kSize == 16)
+	[[nodiscard]] TVec<T, 4> extractPerspectiveUnprojectionParams() const requires(kSize == 16)
 	{
 		TVec<T, 4> out;
 		const auto& m = *this;
@@ -1359,25 +1337,25 @@ public:
 	}
 
 	/// If we suppose this matrix represents a transformation, return the inverted transformation
-	TMat getInverseTransformation() const requires(kSize == 16)
+	[[nodiscard]] TMat invertTransformation() const requires(kSize == 16)
 	{
-		const TMat<T, 3, 3> invertedRot = getRotationPart().getTransposed();
+		const TMat<T, 3, 3> invertedRot = getRotationPart().transpose();
 		TVec<T, 3> invertedTsl = getTranslationPart().xyz();
 		invertedTsl = -(invertedRot * invertedTsl);
 		return TMat(invertedTsl.xyz0(), invertedRot);
 	}
 
 	/// If we suppose this matrix represents a transformation, return the inverted transformation
-	TMat getInverseTransformation() const requires(kSize == 12)
+	[[nodiscard]] TMat invertTransformation() const requires(kSize == 12)
 	{
-		const TMat<T, 3, 3> invertedRot = getRotationPart().getTransposed();
+		const TMat<T, 3, 3> invertedRot = getRotationPart().transpose();
 		TVec<T, 3> invertedTsl = getTranslationPart().xyz();
 		invertedTsl = -(invertedRot * invertedTsl);
 		return TMat(invertedTsl.xyz(), invertedRot);
 	}
 
 	/// @note 9 muls, 9 adds
-	TVec<T, 3> transform(const TVec<T, 3>& v) const requires(kSize == 16)
+	[[nodiscard]] TVec<T, 3> transform(const TVec<T, 3>& v) const requires(kSize == 16)
 	{
 		const auto& m = *this;
 		return TVec<T, 3>(m(0, 0) * v.x() + m(0, 1) * v.y() + m(0, 2) * v.z() + m(0, 3),
@@ -1387,11 +1365,11 @@ public:
 
 	/// Create a new transform matrix position at eye and looking at refPoint.
 	template<U kVecDimensions>
-	static TMat lookAt(const TVec<T, kVecDimensions>& eye, const TVec<T, kVecDimensions>& refPoint,
-					   const TVec<T, kVecDimensions>& up) requires(kTRowCount == 3 && kTColumnCount == 4 && kVecDimensions >= 3)
+	[[nodiscard]] static TMat lookAt(const TVec<T, kVecDimensions>& eye, const TVec<T, kVecDimensions>& refPoint,
+									 const TVec<T, kVecDimensions>& up) requires(kTRowCount == 3 && kTColumnCount == 4 && kVecDimensions >= 3)
 	{
-		const TVec<T, 3> vdir = (refPoint.xyz() - eye.xyz()).getNormalized();
-		const TVec<T, 3> vup = (up.xyz() - vdir * up.xyz().dot(vdir)).getNormalized();
+		const TVec<T, 3> vdir = (refPoint.xyz() - eye.xyz()).normalize();
+		const TVec<T, 3> vup = (up.xyz() - vdir * up.xyz().dot(vdir)).normalize();
 		const TVec<T, 3> vside = vdir.cross(vup);
 		TMat out;
 		out.setColumns(vside, vup, -vdir, eye.xyz());
@@ -1400,11 +1378,11 @@ public:
 
 	/// Create a new transform matrix position at eye and looking at refPoint.
 	template<U kVecDimensions>
-	static TMat lookAt(const TVec<T, kVecDimensions>& eye, const TVec<T, kVecDimensions>& refPoint,
-					   const TVec<T, kVecDimensions>& up) requires(kTRowCount == 4 && kTColumnCount == 4 && kVecDimensions >= 3)
+	[[nodiscard]] static TMat lookAt(const TVec<T, kVecDimensions>& eye, const TVec<T, kVecDimensions>& refPoint,
+									 const TVec<T, kVecDimensions>& up) requires(kTRowCount == 4 && kTColumnCount == 4 && kVecDimensions >= 3)
 	{
-		const TVec<T, 4> vdir = (refPoint.xyz0() - eye.xyz0()).getNormalized();
-		const TVec<T, 4> vup = (up.xyz0() - vdir * up.xyz0().dot(vdir)).getNormalized();
+		const TVec<T, 4> vdir = (refPoint.xyz0() - eye.xyz0()).normalize();
+		const TVec<T, 4> vup = (up.xyz0() - vdir * up.xyz0().dot(vdir)).normalize();
 		const TVec<T, 4> vside = vdir.cross(vup);
 		TMat out;
 		out.setColumns(vside, vup, -vdir, eye.xyz1());
@@ -1412,7 +1390,7 @@ public:
 	}
 
 	/// Create a rotation matrix from some direction. http://jcgt.org/published/0006/01/01/
-	static TMat rotationFromDirection(const TVec<T, 3>& zAxis) requires(kSize == 9)
+	[[nodiscard]] static TMat rotationFromDirection(const TVec<T, 3>& zAxis) requires(kSize == 9)
 	{
 		const TVec<T, 3> z = zAxis;
 		const T sign = (z.z() >= T(0)) ? T(1) : -T(1);
@@ -1427,7 +1405,7 @@ public:
 		return out;
 	}
 
-	TMat lerp(const TMat& b, T t) const
+	[[nodiscard]] TMat lerp(const TMat& b, T t) const
 	{
 		return ((*this) * (T(1) - t)) + (b * t);
 	}
@@ -1467,7 +1445,7 @@ public:
 		return U8(kTColumnCount * kTRowCount);
 	}
 
-	String toString() const requires(std::is_floating_point<T>::value)
+	[[nodiscard]] String toString() const requires(std::is_floating_point<T>::value)
 	{
 		String str;
 		for(U j = 0; j < kTRowCount; ++j)

+ 49 - 34
AnKi/Math/Quat.h

@@ -19,6 +19,7 @@ class alignas(16) TQuat
 {
 public:
 	static constexpr Bool kSimdEnabled = std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
+	static constexpr Bool kSseEnabled = kSimdEnabled && ANKI_SIMD_SSE;
 
 	/// @name Constructors
 	/// @{
@@ -124,7 +125,7 @@ public:
 
 	explicit TQuat(const TAxisang<T>& axisang)
 	{
-		const T lengthsq = axisang.getAxis().getLengthSquared();
+		const T lengthsq = axisang.getAxis().lengthSquared();
 		if(isZero<T>(lengthsq))
 		{
 			(*this) = getIdentity();
@@ -138,10 +139,7 @@ public:
 
 		const T scalefactor = sintheta / sqrt(lengthsq);
 
-		x() = scalefactor * axisang.getAxis().x();
-		y() = scalefactor * axisang.getAxis().y();
-		z() = scalefactor * axisang.getAxis().z();
-		w() = costheta;
+		m_value = TVec<T, 4>(axisang.getAxis(), costheta) * TVec<T, 4>(scalefactor, scalefactor, scalefactor, T(1));
 	}
 	/// @}
 
@@ -165,8 +163,9 @@ public:
 		return m_value != b.m_value;
 	}
 
+#if ANKI_SIMD_SSE
 	/// Combine rotations (SIMD version)
-	TQuat operator*(const TQuat& b) requires(kSimdEnabled&& ANKI_SIMD_SSE)
+	TQuat operator*(const TQuat& b) requires(kSseEnabled)
 	{
 		// Taken from: http://momchil-velikov.blogspot.nl/2013/10/fast-sse-quternion-multiplication.html
 		const __m128 abcd = m_value.getSimd();
@@ -213,9 +212,10 @@ public:
 		// [dw-ax-by-cz,dz+ay-bx+cw,dy-az+bw+cx,dx+aw+bz-cy]
 		return TQuat(TVec<T, 4>(_mm_shuffle_ps(e, e, _MM_SHUFFLE(2, 3, 1, 0))));
 	}
+#endif
 
 	/// Combine rotations (non-SIMD version)
-	TQuat operator*(const TQuat& b) requires(!(kSimdEnabled && ANKI_SIMD_SSE))
+	TQuat operator*(const TQuat& b) requires(!kSseEnabled)
 	{
 		const T lx = m_value.x();
 		const T ly = m_value.y();
@@ -260,7 +260,7 @@ public:
 	{
 		// Rotating a vector by a quaternion is done by: p' = q * p * q^-1 (q^-1 = conjugated(q) for a unit quaternion)
 		ANKI_ASSERT(isZero<T>(T(1) - m_value.getLength()));
-		return TVec<T, 3>((*this * TQuat(TVec<T, 4>(inValue, T(0))) * getConjugated()).m_value.xyz());
+		return TVec<T, 3>((*this * TQuat(TVec<T, 4>(inValue, T(0))) * conjugated()).m_value.xyz());
 	}
 	/// @}
 
@@ -310,57 +310,49 @@ public:
 	/// @name Other
 	/// @{
 
-	T getLength() const
+	[[nodiscard]] T length() const
 	{
-		return m_value.getLength();
+		return m_value.length();
 	}
 
 	/// Calculates the rotation from vector "from" to "to".
-	void setFromPointToPoint(const TVec<T, 3>& from, const TVec<T, 3>& to)
+	static TQuat fromPointToPoint(const TVec<T, 3>& from, const TVec<T, 3>& to)
 	{
 		const TVec<T, 3> axis(from.cross(to));
-		*this = TQuat(axis.x(), axis.y(), axis.z(), from.dot(to));
-		m_value.normalize();
-		w() += T(1);
+		TVec<T, 4> quat = TVec4<T, 4>(axis.x(), axis.y(), axis.z(), from.dot(to));
+		quat = quat.normalize();
+		quat.w() += T(1);
 
-		if(w() <= T(0.0001))
+		if(quat.w() <= T(0.0001))
 		{
 			if(from.z() * from.z() > from.x() * from.x())
 			{
-				*this = TQuat(T(0), from.z(), -from.y(), T(0));
+				quat = TVec<T, 4>(T(0), from.z(), -from.y(), T(0));
 			}
 			else
 			{
-				*this = TQuat(from.y(), -from.x(), T(0), T(0));
+				quat = TVec<T, 4>(from.y(), -from.x(), T(0), T(0));
 			}
 		}
-		m_value.normalize();
-	}
-
-	TQuat getInverted() const
-	{
-		const T len = m_value.getLength();
-		ANKI_ASSERT(!isZero<T>(len));
-		return getConjugated() / len;
-	}
 
-	void invert()
-	{
-		*this = getInverted();
+		quat = quat.normalize();
+		return TQuat(quat);
 	}
 
-	void conjugate()
+	[[nodiscard]] TQuat invert() const
 	{
-		*this = getConjugated();
+		const T len = m_value.length();
+		ANKI_ASSERT(!isZero<T>(len));
+		return conjugated() / len;
 	}
 
-	TQuat getConjugated() const
+	[[nodiscard]] TQuat conjugated() const
 	{
 		return TQuat(m_value * TVec<T, 4>(T(-1), T(-1), T(-1), T(1)));
 	}
 
 	/// Returns slerp(this, q1, t)
-	TQuat slerp(const TQuat& destination, const T t) const
+	[[nodiscard]] TQuat slerp(const TQuat& destination, const T t) const
 	{
 		// Difference at which to LERP instead of SLERP
 		const T delta = T(0.0001);
@@ -395,7 +387,30 @@ public:
 
 		// Interpolate between the two quaternions
 		const TVec<T, 4> v = TVec<T, 4>(scale0) * m_value + TVec<T, 4>(scale1) * destination.m_value;
-		return TQuat(v.getNormalized());
+		return TQuat(v.normalize());
+	}
+
+	[[nodiscard]] TQuat rotateXAxis(const T rad) const
+	{
+		const TQuat r(Axisang<T>(rad, TVec<T, 3>(T(1), T(0), T(0))));
+		return r * (*this);
+	}
+
+	[[nodiscard]] TQuat rotateYAxis(const T rad) const
+	{
+		const TQuat r(Axisang<T>(rad, TVec<T, 3>(T(0), T(1), T(0))));
+		return r * (*this);
+	}
+
+	[[nodiscard]] TQuat rotateZAxis(const T rad) const
+	{
+		const TQuat r(Axisang<T>(rad, TVec<T, 3>(T(0), T(0), T(1))));
+		return r * (*this);
+	}
+
+	[[nodiscard]] TQuat normalize() const
+	{
+		return TQuat(m_value.normalize());
 	}
 
 	void setIdentity()

+ 14 - 18
AnKi/Math/Transform.h

@@ -40,7 +40,7 @@ public:
 		const TVec<T, 3> s1 = m4.getColumn(1).xyz();
 		const TVec<T, 3> s2 = m4.getColumn(2).xyz();
 
-		m_scale = TVec<T, 4>(s0.getLength(), s1.getLength(), s2.getLength(), T(0));
+		m_scale = TVec<T, 4>(s0.length(), s1.length(), s2.length(), T(0));
 
 		m_rotation.setColumns(s0 / m_scale.x(), s1 / m_scale.x(), s2 / m_scale.x(), TVec<T, 3>(T(0)));
 		m_origin = m4.getTranslationPart().xyz0();
@@ -145,7 +145,7 @@ public:
 
 	[[nodiscard]] static TTransform getIdentity()
 	{
-		return TTransform(TVec<T, 4>(T(0)), TMat<T, 3, 4>::getIdentity(), TVec<T, 4>(T(1), T(1), T(1), T(0)));
+		return TTransform();
 	}
 
 	/// @copybrief combineTTransformations
@@ -164,24 +164,17 @@ public:
 	}
 
 	/// Get the inverse transformation. Its faster that inverting a Mat4
-	[[nodiscard]] TTransform getInverse() const
+	[[nodiscard]] TTransform invert() const
 	{
 		TTransform o;
 		o.m_rotation = m_rotation;
 		o.m_rotation.transposeRotationPart();
-		o.m_scale = T(1) / m_scale.xyz1();
+		o.m_scale = TVec<T, 4>(T(1), T(1), T(1), T(0)) / m_scale.xyz1();
 		o.m_origin = -(o.m_rotation * (o.m_scale * m_origin)).xyz0();
-		check();
+		o.check();
 		return o;
 	}
 
-	void invert()
-	{
-		m_rotation.transposeRotationPart();
-		m_scale = T(1) / m_scale;
-		m_origin = -(m_rotation * (m_scale * m_origin));
-	}
-
 	/// Transform a TVec3
 	[[nodiscard]] TVec<T, 3> transform(const TVec<T, 3>& b) const
 	{
@@ -198,14 +191,17 @@ public:
 	}
 
 	template<U kVecComponentCount>
-	TTransform& lookAt(const TVec<T, kVecComponentCount>& refPoint, const TVec<T, kVecComponentCount>& up)
+	[[nodiscard]] TTransform lookAt(const TVec<T, kVecComponentCount>& refPoint, const TVec<T, kVecComponentCount>& up) const
 	{
 		const TVec<T, 4> j = up.xyz0();
-		const TVec<T, 4> vdir = (refPoint.xyz0() - m_origin).getNormalized();
-		const TVec<T, 4> vup = (j - vdir * j.dot(vdir)).getNormalized();
+		const TVec<T, 4> vdir = (refPoint.xyz0() - m_origin).normalize();
+		const TVec<T, 4> vup = (j - vdir * j.dot(vdir)).normalize();
 		const TVec<T, 4> vside = vdir.cross(vup);
-		m_rotation.setColumns(vside.xyz(), vup.xyz(), (-vdir).xyz());
-		return *this;
+		TTransform out;
+		out.m_origin = m_origin;
+		out.m_scale = m_scale;
+		out.m_rotation.setColumns(vside.xyz(), vup.xyz(), (-vdir).xyz());
+		return out;
 	}
 
 	[[nodiscard]] String toString() const requires(std::is_floating_point<T>::value)
@@ -225,7 +221,7 @@ public:
 		return str;
 	}
 
-	Bool hasUniformScale() const
+	[[nodiscard]] Bool hasUniformScale() const
 	{
 		return m_scale.x() == m_scale.y() && m_scale.x() == m_scale.z();
 	}

+ 140 - 157
AnKi/Math/Vec.h

@@ -20,6 +20,10 @@ template<typename T, U kTComponentCount>
 class alignas(MathSimd<T, kTComponentCount>::kAlignment) TVec
 {
 public:
+	friend class TVec<T, 2>;
+	friend class TVec<T, 3>;
+	friend class TVec<T, 4>;
+
 	using Scalar = T;
 	using Simd = typename MathSimd<T, kTComponentCount>::Type;
 	static constexpr U kComponentCount = kTComponentCount;
@@ -112,25 +116,19 @@ public:
 
 	// Vec3 specific
 
-	TVec(const T x_, const T y_, const T z_) requires(kTComponentCount == 3)
+	constexpr TVec(const T x_, const T y_, const T z_) requires(kTComponentCount == 3)
+		: m_arr{x_, y_, z_}
 	{
-		x() = x_;
-		y() = y_;
-		z() = z_;
 	}
 
-	TVec(const TVec<T, 2>& a, const T z_) requires(kTComponentCount == 3)
+	constexpr TVec(const TVec<T, 2>& a, const T z_) requires(kTComponentCount == 3)
+		: m_arr{a.m_arr[0], a.m_arr[1], z_}
 	{
-		x() = a.x();
-		y() = a.y();
-		z() = z_;
 	}
 
-	TVec(const T x_, const TVec<T, 2>& a) requires(kTComponentCount == 3)
+	constexpr TVec(const T x_, const TVec<T, 2>& a) requires(kTComponentCount == 3)
+		: m_arr{x_, a.m_arr[0], a.m_arr[1]}
 	{
-		x() = x_;
-		y() = a.x();
-		z() = a.y();
 	}
 
 	// Vec4 specific
@@ -151,52 +149,34 @@ public:
 	}
 #endif
 
-	TVec(const TVec<T, 3>& a, const T w_) requires(kTComponentCount == 4)
+	constexpr TVec(const TVec<T, 3>& a, const T w_) requires(kTComponentCount == 4)
+		: m_arr{a.m_arr[0], a.m_arr[1], a.m_arr[2], w_}
 	{
-		x() = a.x();
-		y() = a.y();
-		z() = a.z();
-		w() = w_;
 	}
 
-	TVec(const T x_, const TVec<T, 3>& a) requires(kTComponentCount == 4)
+	constexpr TVec(const T x_, const TVec<T, 3>& a) requires(kTComponentCount == 4)
+		: m_arr{x_, a.m_arr[0], a.m_arr[1], a.m_arr[2]}
 	{
-		x() = x_;
-		y() = a.x();
-		z() = a.y();
-		w() = a.z();
 	}
 
-	TVec(const TVec<T, 2>& a, const T z_, const T w_) requires(kTComponentCount == 4)
+	constexpr TVec(const TVec<T, 2>& a, const T z_, const T w_) requires(kTComponentCount == 4)
+		: m_arr{a.m_arr[0], a.m_arr[1], z_, w_}
 	{
-		x() = a.x();
-		y() = a.y();
-		z() = z_;
-		w() = w_;
 	}
 
-	TVec(const T x_, const TVec<T, 2>& a, const T w_) requires(kTComponentCount == 4)
+	constexpr TVec(const T x_, const TVec<T, 2>& a, const T w_) requires(kTComponentCount == 4)
+		: m_arr{x_, a.m_arr[0], a.m_arr[1], w_}
 	{
-		x() = x_;
-		y() = a.x();
-		z() = a.y();
-		w() = w_;
 	}
 
-	TVec(const T x_, const T y_, const TVec<T, 2>& a) requires(kTComponentCount == 4)
+	constexpr TVec(const T x_, const T y_, const TVec<T, 2>& a) requires(kTComponentCount == 4)
+		: m_arr{x_, y_, a.m_arr[0], a.m_arr[1]}
 	{
-		x() = x_;
-		y() = y_;
-		z() = a.x();
-		w() = a.y();
 	}
 
-	TVec(const TVec<T, 2>& a, const TVec<T, 2>& b) requires(kTComponentCount == 4)
+	constexpr TVec(const TVec<T, 2>& a, const TVec<T, 2>& b) requires(kTComponentCount == 4)
+		: m_arr{a.m_arr[0], a.m_arr[1], b.m_arr[0], b.m_arr[1]}
 	{
-		x() = a.x();
-		y() = a.y();
-		z() = b.x();
-		w() = b.y();
 	}
 	/// @}
 
@@ -1975,7 +1955,7 @@ public:
 		return *this;
 	}
 
-	TVec operator+(const TVec& b) const requires(!kVec4Simd)
+	[[nodiscard]] TVec operator+(const TVec& b) const requires(!kVec4Simd)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; i++)
@@ -1986,7 +1966,7 @@ public:
 	}
 
 #if ANKI_ENABLE_SIMD
-	TVec operator+(const TVec& b) const requires(kVec4Simd)
+	[[nodiscard]] TVec operator+(const TVec& b) const requires(kVec4Simd)
 	{
 #	if ANKI_SIMD_SSE
 		return TVec(_mm_add_ps(m_simd, b.m_simd));
@@ -2017,7 +1997,7 @@ public:
 	}
 #endif
 
-	TVec operator-(const TVec& b) const requires(!kVec4Simd)
+	[[nodiscard]] TVec operator-(const TVec& b) const requires(!kVec4Simd)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; i++)
@@ -2028,7 +2008,7 @@ public:
 	}
 
 #if ANKI_ENABLE_SIMD
-	TVec operator-(const TVec& b) const requires(kVec4Simd)
+	[[nodiscard]] TVec operator-(const TVec& b) const requires(kVec4Simd)
 	{
 #	if ANKI_SIMD_SSE
 		return TVec(_mm_sub_ps(m_simd, b.m_simd));
@@ -2059,7 +2039,7 @@ public:
 	}
 #endif
 
-	TVec operator*(const TVec& b) const requires(!kVec4Simd)
+	[[nodiscard]] TVec operator*(const TVec& b) const requires(!kVec4Simd)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; i++)
@@ -2070,7 +2050,7 @@ public:
 	}
 
 #if ANKI_ENABLE_SIMD
-	TVec operator*(const TVec& b) const requires(kVec4Simd)
+	[[nodiscard]] TVec operator*(const TVec& b) const requires(kVec4Simd)
 	{
 #	if ANKI_SIMD_SSE
 		return TVec(_mm_mul_ps(m_simd, b.m_simd));
@@ -2101,7 +2081,7 @@ public:
 	}
 #endif
 
-	TVec operator/(const TVec& b) const requires(!kVec4Simd)
+	[[nodiscard]] TVec operator/(const TVec& b) const requires(!kVec4Simd)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; i++)
@@ -2113,7 +2093,7 @@ public:
 	}
 
 #if ANKI_ENABLE_SIMD
-	TVec operator/(const TVec& b) const requires(kVec4Simd)
+	[[nodiscard]] TVec operator/(const TVec& b) const requires(kVec4Simd)
 	{
 #	if ANKI_SIMD_SSE
 		return TVec(_mm_div_ps(m_simd, b.m_simd));
@@ -2145,7 +2125,7 @@ public:
 	}
 #endif
 
-	TVec operator-() const requires(!kVec4Simd)
+	[[nodiscard]] TVec operator-() const requires(!kVec4Simd)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; i++)
@@ -2156,7 +2136,7 @@ public:
 	}
 
 #if ANKI_ENABLE_SIMD
-	TVec operator-() const requires(kVec4Simd)
+	[[nodiscard]] TVec operator-() const requires(kVec4Simd)
 	{
 #	if ANKI_SIMD_SSE
 		return TVec(_mm_xor_ps(m_simd, _mm_set1_ps(-0.0)));
@@ -2166,7 +2146,7 @@ public:
 	}
 #endif
 
-	TVec operator<<(const TVec& b) const requires(kIsInteger)
+	[[nodiscard]] TVec operator<<(const TVec& b) const requires(kIsInteger)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; i++)
@@ -2185,7 +2165,7 @@ public:
 		return *this;
 	}
 
-	TVec operator>>(const TVec& b) const requires(kIsInteger)
+	[[nodiscard]] TVec operator>>(const TVec& b) const requires(kIsInteger)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; i++)
@@ -2204,7 +2184,7 @@ public:
 		return *this;
 	}
 
-	TVec operator&(const TVec& b) const requires(kIsInteger)
+	[[nodiscard]] TVec operator&(const TVec& b) const requires(kIsInteger)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; i++)
@@ -2223,7 +2203,7 @@ public:
 		return *this;
 	}
 
-	TVec operator|(const TVec& b) const requires(kIsInteger)
+	[[nodiscard]] TVec operator|(const TVec& b) const requires(kIsInteger)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; i++)
@@ -2242,7 +2222,7 @@ public:
 		return *this;
 	}
 
-	TVec operator^(const TVec& b) const requires(kIsInteger)
+	[[nodiscard]] TVec operator^(const TVec& b) const requires(kIsInteger)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; i++)
@@ -2261,7 +2241,7 @@ public:
 		return *this;
 	}
 
-	TVec operator%(const TVec& b) const requires(kIsInteger)
+	[[nodiscard]] TVec operator%(const TVec& b) const requires(kIsInteger)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; i++)
@@ -2280,7 +2260,7 @@ public:
 		return *this;
 	}
 
-	Bool operator==(const TVec& b) const
+	[[nodiscard]] Bool operator==(const TVec& b) const
 	{
 		for(U i = 0; i < kTComponentCount; i++)
 		{
@@ -2292,12 +2272,12 @@ public:
 		return true;
 	}
 
-	Bool operator!=(const TVec& b) const
+	[[nodiscard]] Bool operator!=(const TVec& b) const
 	{
 		return !operator==(b);
 	}
 
-	Bool operator<(const TVec& b) const
+	[[nodiscard]] Bool operator<(const TVec& b) const
 	{
 		for(U i = 0; i < kTComponentCount; i++)
 		{
@@ -2309,7 +2289,7 @@ public:
 		return true;
 	}
 
-	Bool operator<=(const TVec& b) const
+	[[nodiscard]] Bool operator<=(const TVec& b) const
 	{
 		for(U i = 0; i < kTComponentCount; i++)
 		{
@@ -2321,7 +2301,7 @@ public:
 		return true;
 	}
 
-	Bool operator>(const TVec& b) const
+	[[nodiscard]] Bool operator>(const TVec& b) const
 	{
 		for(U i = 0; i < kTComponentCount; i++)
 		{
@@ -2333,7 +2313,7 @@ public:
 		return true;
 	}
 
-	Bool operator>=(const TVec& b) const
+	[[nodiscard]] Bool operator>=(const TVec& b) const
 	{
 		for(U i = 0; i < kTComponentCount; i++)
 		{
@@ -2348,7 +2328,7 @@ public:
 
 	/// @name Operators with T
 	/// @{
-	TVec operator+(const T f) const
+	[[nodiscard]] TVec operator+(const T f) const
 	{
 		return (*this) + TVec(f);
 	}
@@ -2359,7 +2339,7 @@ public:
 		return *this;
 	}
 
-	TVec operator-(const T f) const
+	[[nodiscard]] TVec operator-(const T f) const
 	{
 		return (*this) - TVec(f);
 	}
@@ -2370,7 +2350,7 @@ public:
 		return *this;
 	}
 
-	TVec operator*(const T f) const
+	[[nodiscard]] TVec operator*(const T f) const
 	{
 		return (*this) * TVec(f);
 	}
@@ -2381,7 +2361,7 @@ public:
 		return *this;
 	}
 
-	TVec operator/(const T f) const
+	[[nodiscard]] TVec operator/(const T f) const
 	{
 		return (*this) / TVec(f);
 	}
@@ -2392,7 +2372,7 @@ public:
 		return *this;
 	}
 
-	TVec operator<<(const T f) const requires(kIsInteger)
+	[[nodiscard]] TVec operator<<(const T f) const requires(kIsInteger)
 	{
 		return (*this) << TVec(f);
 	}
@@ -2403,7 +2383,7 @@ public:
 		return *this;
 	}
 
-	TVec operator>>(const T f) const requires(kIsInteger)
+	[[nodiscard]] TVec operator>>(const T f) const requires(kIsInteger)
 	{
 		return (*this) >> TVec(f);
 	}
@@ -2414,7 +2394,7 @@ public:
 		return *this;
 	}
 
-	TVec operator&(const T f) const requires(kIsInteger)
+	[[nodiscard]] TVec operator&(const T f) const requires(kIsInteger)
 	{
 		return (*this) & TVec(f);
 	}
@@ -2425,7 +2405,7 @@ public:
 		return *this;
 	}
 
-	TVec operator|(const T f) const requires(kIsInteger)
+	[[nodiscard]] TVec operator|(const T f) const requires(kIsInteger)
 	{
 		return (*this) | TVec(f);
 	}
@@ -2436,7 +2416,7 @@ public:
 		return *this;
 	}
 
-	TVec operator^(const T f) const requires(kIsInteger)
+	[[nodiscard]] TVec operator^(const T f) const requires(kIsInteger)
 	{
 		return (*this) ^ TVec(f);
 	}
@@ -2447,7 +2427,7 @@ public:
 		return *this;
 	}
 
-	TVec operator%(const T f) const requires(kIsInteger)
+	[[nodiscard]] TVec operator%(const T f) const requires(kIsInteger)
 	{
 		return (*this) % TVec(f);
 	}
@@ -2458,32 +2438,32 @@ public:
 		return *this;
 	}
 
-	Bool operator==(const T f) const
+	[[nodiscard]] Bool operator==(const T f) const
 	{
 		return *this == TVec(f);
 	}
 
-	Bool operator!=(const T f) const
+	[[nodiscard]] Bool operator!=(const T f) const
 	{
 		return *this != TVec(f);
 	}
 
-	Bool operator<(const T f) const
+	[[nodiscard]] Bool operator<(const T f) const
 	{
 		return *this < TVec(f);
 	}
 
-	Bool operator<=(const T f) const
+	[[nodiscard]] Bool operator<=(const T f) const
 	{
 		return *this <= TVec(f);
 	}
 
-	Bool operator>(const T f) const
+	[[nodiscard]] Bool operator>(const T f) const
 	{
 		return *this > TVec(f);
 	}
 
-	Bool operator>=(const T f) const
+	[[nodiscard]] Bool operator>=(const T f) const
 	{
 		return *this >= TVec(f);
 	}
@@ -2493,7 +2473,7 @@ public:
 	/// @{
 
 	/// @note 16 muls 12 adds
-	TVec operator*(const TMat<T, 4, 4>& m4) const requires(kTComponentCount == 4)
+	[[nodiscard]] TVec operator*(const TMat<T, 4, 4>& m4) const requires(kTComponentCount == 4)
 	{
 		TVec out;
 		out.x() = x() * m4(0, 0) + y() * m4(1, 0) + z() * m4(2, 0) + w() * m4(3, 0);
@@ -2506,7 +2486,7 @@ public:
 
 	/// @name Other
 	/// @{
-	T dot(const TVec& b) const requires(!kVec4Simd)
+	[[nodiscard]] T dot(const TVec& b) const requires(!kVec4Simd)
 	{
 		T out = T(0);
 		for(U i = 0; i < kTComponentCount; i++)
@@ -2517,7 +2497,7 @@ public:
 	}
 
 #if ANKI_ENABLE_SIMD
-	T dot(const TVec& b) const requires(kVec4Simd)
+	[[nodiscard]] T dot(const TVec& b) const requires(kVec4Simd)
 	{
 		T o;
 #	if ANKI_SIMD_SSE
@@ -2533,13 +2513,13 @@ public:
 #endif
 
 	/// 6 muls, 3 adds
-	TVec cross(const TVec& b) const requires(kTComponentCount == 3)
+	[[nodiscard]] TVec cross(const TVec& b) const requires(kTComponentCount == 3)
 	{
 		return TVec(y() * b.z() - z() * b.y(), z() * b.x() - x() * b.z(), x() * b.y() - y() * b.x());
 	}
 
 	/// It's like calculating the cross of a 3 component TVec.
-	TVec cross(const TVec& b) const requires(kTComponentCount == 4 && !kVec4Simd)
+	[[nodiscard]] TVec cross(const TVec& b) const requires(kTComponentCount == 4 && !kVec4Simd)
 	{
 		ANKI_ASSERT(w() == T(0));
 		ANKI_ASSERT(b.w() == T(0));
@@ -2547,7 +2527,7 @@ public:
 	}
 
 #if ANKI_ENABLE_SIMD
-	TVec cross(const TVec& b) const requires(kTComponentCount == 4 && kVec4Simd)
+	[[nodiscard]] TVec cross(const TVec& b) const requires(kTComponentCount == 4 && kVec4Simd)
 	{
 		ANKI_ASSERT(w() == T(0));
 		ANKI_ASSERT(b.w() == T(0));
@@ -2575,24 +2555,24 @@ public:
 	}
 #endif
 
-	TVec projectTo(const TVec& toThis) const requires(kTComponentCount == 3)
+	[[nodiscard]] TVec projectTo(const TVec& toThis) const requires(kTComponentCount == 3)
 	{
 		return toThis * ((*this).dot(toThis) / (toThis.dot(toThis)));
 	}
 
-	TVec projectTo(const TVec& toThis) const requires(kTComponentCount == 4)
+	[[nodiscard]] TVec projectTo(const TVec& toThis) const requires(kTComponentCount == 4)
 	{
 		ANKI_ASSERT(w() == T(0));
 		return (toThis * ((*this).dot(toThis) / (toThis.dot(toThis)))).xyz0();
 	}
 
-	TVec projectTo(const TVec& rayOrigin, const TVec& rayDir) const requires(kTComponentCount == 3)
+	[[nodiscard]] TVec projectTo(const TVec& rayOrigin, const TVec& rayDir) const requires(kTComponentCount == 3)
 	{
 		const auto& a = *this;
 		return rayOrigin + rayDir * ((a - rayOrigin).dot(rayDir));
 	}
 
-	TVec projectTo(const TVec& rayOrigin, const TVec& rayDir) const requires(kTComponentCount == 4)
+	[[nodiscard]] TVec projectTo(const TVec& rayOrigin, const TVec& rayDir) const requires(kTComponentCount == 4)
 	{
 		ANKI_ASSERT(w() == T(0));
 		ANKI_ASSERT(rayOrigin.w() == T(0));
@@ -2602,7 +2582,7 @@ public:
 	}
 
 	/// Perspective divide. Divide the xyzw of this to the w of this. This method will handle some edge cases.
-	TVec perspectiveDivide() const requires(kTComponentCount == 4)
+	[[nodiscard]] TVec perspectiveDivide() const requires(kTComponentCount == 4)
 	{
 		auto invw = T(1) / w(); // This may become (+-)inf
 		invw = (invw > 1e+11) ? 1e+11 : invw; // Clamp
@@ -2610,70 +2590,33 @@ public:
 		return (*this) * invw;
 	}
 
-	T getLengthSquared() const requires(!kVec4Simd)
-	{
-		T out = T(0);
-		for(U i = 0; i < kTComponentCount; i++)
-		{
-			out += m_arr[i] * m_arr[i];
-		}
-		return out;
-	}
-
-	T getLengthSquared() const requires(kVec4Simd)
+	[[nodiscard]] T lengthSquared() const
 	{
 		return dot(*this);
 	}
 
-	T getLength() const
+	[[nodiscard]] T length() const
 	{
-		return sqrt<T>(getLengthSquared());
+		return sqrt<T>(lengthSquared());
 	}
 
-	T getDistanceSquared(const TVec& b) const
+	[[nodiscard]] T distanceSquared(const TVec& b) const
 	{
-		return ((*this) - b).getLengthSquared();
+		return ((*this) - b).lengthSquared();
 	}
 
-	T getDistance(const TVec& b) const
+	[[nodiscard]] T distance(const TVec& b) const
 	{
-		return sqrt<T>(getDistance(b));
-	}
-
-	void normalize() requires(!kVec4Simd)
-	{
-		(*this) /= getLength();
-	}
-
-#if ANKI_ENABLE_SIMD
-	void normalize() requires(kVec4Simd)
-	{
-#	if ANKI_SIMD_SSE
-		const __m128 inverseNorm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
-		m_simd = _mm_mul_ps(m_simd, inverseNorm);
-#	else
-		// Dot (len squared)
-		float32x4_t tmp = m_simd * m_simd;
-		float32x2_t sum = vpadd_f32(vget_low_f32(tmp), vget_high_f32(tmp));
-		sum = vpadd_f32(sum, sum);
-		float32x4_t lensq = vdupq_lane_f32(sum, 0);
-
-		// 1/sqrt(lensq)
-		float32x4_t mul = vrsqrteq_f32(lensq);
-
-		// Multiply
-		m_simd *= mul;
-#	endif
+		return sqrt<T>(distance(b));
 	}
-#endif
 
-	TVec getNormalized() const requires(!kVec4Simd)
+	[[nodiscard]] TVec normalize() const requires(!kVec4Simd)
 	{
-		return (*this) / getLength();
+		return (*this) / length();
 	}
 
 #if ANKI_ENABLE_SIMD
-	TVec getNormalized() const requires(kVec4Simd)
+	[[nodiscard]] TVec normalize() const requires(kVec4Simd)
 	{
 #	if ANKI_SIMD_SSE
 		const __m128 inverse_norm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
@@ -2695,7 +2638,7 @@ public:
 #endif
 
 	/// Return lerp(this, v1, t)
-	TVec lerp(const TVec& v1, const TVec& t) const
+	[[nodiscard]] TVec lerp(const TVec& v1, const TVec& t) const
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; ++i)
@@ -2706,12 +2649,12 @@ public:
 	}
 
 	/// Return lerp(this, v1, t)
-	TVec lerp(const TVec& v1, T t) const
+	[[nodiscard]] TVec lerp(const TVec& v1, T t) const
 	{
 		return ((*this) * (T(1) - t)) + (v1 * t);
 	}
 
-	TVec abs() const requires(!kVec4Simd)
+	[[nodiscard]] TVec abs() const requires(!kVec4Simd)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; ++i)
@@ -2722,7 +2665,7 @@ public:
 	}
 
 #if ANKI_ENABLE_SIMD
-	TVec abs() const requires(kVec4Simd)
+	[[nodiscard]] TVec abs() const requires(kVec4Simd)
 	{
 #	if ANKI_SIMD_SSE
 		const __m128 signMask = _mm_set1_ps(-0.0f);
@@ -2734,19 +2677,19 @@ public:
 #endif
 
 	/// Get clamped between two values.
-	TVec clamp(const T minv, const T maxv) const
+	[[nodiscard]] TVec clamp(const T minv, const T maxv) const
 	{
 		return max(TVec(minv)).min(TVec(maxv));
 	}
 
 	/// Get clamped between two vectors.
-	TVec clamp(const TVec& minv, const TVec& maxv) const
+	[[nodiscard]] TVec clamp(const TVec& minv, const TVec& maxv) const
 	{
 		return max(minv).min(maxv);
 	}
 
 	/// Get the min of all components.
-	TVec min(const TVec& b) const requires(!kVec4Simd)
+	[[nodiscard]] TVec min(const TVec& b) const requires(!kVec4Simd)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; ++i)
@@ -2758,7 +2701,7 @@ public:
 
 #if ANKI_ENABLE_SIMD
 	/// Get the min of all components.
-	TVec min(const TVec& b) const requires(kVec4Simd)
+	[[nodiscard]] TVec min(const TVec& b) const requires(kVec4Simd)
 	{
 #	if ANKI_SIMD_SSE
 		return TVec(_mm_min_ps(m_simd, b.m_simd));
@@ -2769,13 +2712,13 @@ public:
 #endif
 
 	/// Get the min of all components.
-	TVec min(const T b) const
+	[[nodiscard]] TVec min(const T b) const
 	{
 		return min(TVec(b));
 	}
 
 	/// Get the max of all components.
-	TVec max(const TVec& b) const requires(!kVec4Simd)
+	[[nodiscard]] TVec max(const TVec& b) const requires(!kVec4Simd)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; ++i)
@@ -2787,7 +2730,7 @@ public:
 
 #if ANKI_ENABLE_SIMD
 	/// Get the max of all components.
-	TVec max(const TVec& b) const requires(kVec4Simd)
+	[[nodiscard]] TVec max(const TVec& b) const requires(kVec4Simd)
 	{
 #	if ANKI_SIMD_SSE
 		return TVec(_mm_max_ps(m_simd, b.m_simd));
@@ -2798,12 +2741,12 @@ public:
 #endif
 
 	/// Get the max of all components.
-	TVec max(const T b) const
+	[[nodiscard]] TVec max(const T b) const
 	{
 		return max(TVec(b));
 	}
 
-	TVec round() const requires(!kIsInteger)
+	[[nodiscard]] TVec round() const requires(!kIsInteger)
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; ++i)
@@ -2814,7 +2757,7 @@ public:
 	}
 
 	/// Get a safe 1 / (*this)
-	TVec reciprocal() const
+	[[nodiscard]] TVec reciprocal() const
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; ++i)
@@ -2825,7 +2768,7 @@ public:
 	}
 
 	/// Power
-	TVec pow(const TVec& b) const
+	[[nodiscard]] TVec pow(const TVec& b) const
 	{
 		TVec out;
 		for(U i = 0; i < kTComponentCount; ++i)
@@ -2836,11 +2779,51 @@ public:
 	}
 
 	/// Power
-	TVec pow(T b) const
+	[[nodiscard]] TVec pow(T b) const
 	{
 		return pow(TVec(b));
 	}
 
+	static TVec xAxis() requires(kTComponentCount == 2)
+	{
+		return TVec(T(1), T(0));
+	}
+
+	static TVec xAxis() requires(kTComponentCount == 3)
+	{
+		return TVec(T(1), T(0), T(0));
+	}
+
+	static TVec xAxis() requires(kTComponentCount == 4)
+	{
+		return TVec(T(1), T(0), T(0), T(0));
+	}
+
+	static TVec yAxis() requires(kTComponentCount == 2)
+	{
+		return TVec(T(0), T(1));
+	}
+
+	static TVec yAxis() requires(kTComponentCount == 3)
+	{
+		return TVec(T(0), T(1), T(0));
+	}
+
+	static TVec yAxis() requires(kTComponentCount == 4)
+	{
+		return TVec(T(0), T(1), T(0), T(0));
+	}
+
+	static TVec zAxis() requires(kTComponentCount == 3)
+	{
+		return TVec(T(0), T(0), T(1));
+	}
+
+	static TVec zAxis() requires(kTComponentCount == 4)
+	{
+		return TVec(T(0), T(0), T(1), T(0));
+	}
+
 	/// Serialize the structure.
 	void serialize(void* data, PtrSize& size) const
 	{
@@ -2863,7 +2846,7 @@ public:
 		return U8(kTComponentCount);
 	}
 
-	String toString() const requires(std::is_floating_point<T>::value)
+	[[nodiscard]] String toString() const requires(std::is_floating_point<T>::value)
 	{
 		String str;
 		for(U i = 0; i < kTComponentCount; ++i)
@@ -2874,7 +2857,7 @@ public:
 	}
 
 	static constexpr Bool kClangWorkaround = std::is_integral<T>::value && std::is_unsigned<T>::value;
-	String toString() const requires(kClangWorkaround)
+	[[nodiscard]] String toString() const requires(kClangWorkaround)
 	{
 		String str;
 		for(U i = 0; i < kTComponentCount; ++i)
@@ -2885,7 +2868,7 @@ public:
 	}
 
 	static constexpr Bool kClangWorkaround2 = std::is_integral<T>::value && std::is_signed<T>::value;
-	String toString() const requires(kClangWorkaround2)
+	[[nodiscard]] String toString() const requires(kClangWorkaround2)
 	{
 		String str;
 		for(U i = 0; i < kTComponentCount; ++i)

+ 1 - 1
AnKi/Physics/PhysicsBody.cpp

@@ -56,7 +56,7 @@ void PhysicsBody::init(const PhysicsBodyInitInfo& init)
 	const Quat rot = Quat(init.m_transform.getRotation());
 
 	// Create a scale shape
-	const Bool hasScale = (init.m_transform.getScale().xyz() - 1.0).getLengthSquared() > kEpsilonf * 10.0;
+	const Bool hasScale = (init.m_transform.getScale().xyz() - 1.0).lengthSquared() > kEpsilonf * 10.0;
 	PhysicsCollisionShapePtr scaledShape;
 	if(hasScale)
 	{

+ 3 - 3
AnKi/Renderer/IndirectDiffuseProbes.cpp

@@ -250,7 +250,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 
 					RenderableDrawerArguments args;
 					args.m_viewMatrix = viewMat;
-					args.m_cameraTransform = args.m_viewMatrix.getInverseTransformation();
+					args.m_cameraTransform = args.m_viewMatrix.invertTransformation();
 					args.m_viewProjectionMatrix = viewProjMat;
 					args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
 					args.m_renderingTechinuqe = RenderingTechnique::kGBuffer;
@@ -319,7 +319,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 
 					RenderableDrawerArguments args;
 					args.m_viewMatrix = cascadeViewMat;
-					args.m_cameraTransform = cascadeViewMat.getInverseTransformation();
+					args.m_cameraTransform = cascadeViewMat.invertTransformation();
 					args.m_viewProjectionMatrix = cascadeViewProjMat;
 					args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
 					args.m_sampler = getRenderer().getSamplers().m_trilinearRepeat.get();
@@ -392,7 +392,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 					// Draw light shading
 					TraditionalDeferredLightShadingDrawInfo dsInfo;
 					dsInfo.m_viewProjectionMatrix = viewProjMat;
-					dsInfo.m_invViewProjectionMatrix = viewProjMat.getInverse();
+					dsInfo.m_invViewProjectionMatrix = viewProjMat.invert();
 					dsInfo.m_cameraPosWSpace = cellCenter.xyz1();
 					dsInfo.m_viewport = UVec4(0, 0, m_tileSize, m_tileSize);
 					dsInfo.m_effectiveShadowDistance = (doShadows) ? probeToRefresh->getShadowsRenderRadius() : -1.0f;

+ 3 - 3
AnKi/Renderer/ProbeReflections.cpp

@@ -187,7 +187,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 
 					RenderableDrawerArguments args;
 					args.m_viewMatrix = viewMat;
-					args.m_cameraTransform = viewMat.getInverseTransformation();
+					args.m_cameraTransform = viewMat.invertTransformation();
 					args.m_viewProjectionMatrix = viewProjMat;
 					args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care about prev mats
 					args.m_sampler = getRenderer().getSamplers().m_trilinearRepeat.get();
@@ -254,7 +254,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 
 				RenderableDrawerArguments args;
 				args.m_viewMatrix = cascadeViewMat;
-				args.m_cameraTransform = cascadeViewMat.getInverseTransformation();
+				args.m_cameraTransform = cascadeViewMat.invertTransformation();
 				args.m_viewProjectionMatrix = cascadeViewProjMat;
 				args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
 				args.m_sampler = getRenderer().getSamplers().m_trilinearRepeatAniso.get();
@@ -319,7 +319,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 
 				TraditionalDeferredLightShadingDrawInfo dsInfo;
 				dsInfo.m_viewProjectionMatrix = viewProjMat;
-				dsInfo.m_invViewProjectionMatrix = viewProjMat.getInverse();
+				dsInfo.m_invViewProjectionMatrix = viewProjMat.invert();
 				dsInfo.m_cameraPosWSpace = probeToRefresh->getWorldPosition().xyz1();
 				dsInfo.m_viewport = UVec4(0, 0, m_lightShading.m_tileSize, m_lightShading.m_tileSize);
 				dsInfo.m_effectiveShadowDistance = probeToRefresh->getShadowsRenderRadius();

+ 3 - 3
AnKi/Renderer/Renderer.cpp

@@ -755,9 +755,9 @@ Error Renderer::render(Texture* presentTex)
 
 	ctx.m_matrices.m_projectionJitter = ctx.m_matrices.m_jitter * ctx.m_matrices.m_projection;
 	ctx.m_matrices.m_viewProjectionJitter = ctx.m_matrices.m_projectionJitter * Mat4(ctx.m_matrices.m_view, Vec4(0.0f, 0.0f, 0.0f, 1.0f));
-	ctx.m_matrices.m_invertedViewProjectionJitter = ctx.m_matrices.m_viewProjectionJitter.getInverse();
-	ctx.m_matrices.m_invertedViewProjection = ctx.m_matrices.m_viewProjection.getInverse();
-	ctx.m_matrices.m_invertedProjectionJitter = ctx.m_matrices.m_projectionJitter.getInverse();
+	ctx.m_matrices.m_invertedViewProjectionJitter = ctx.m_matrices.m_viewProjectionJitter.invert();
+	ctx.m_matrices.m_invertedViewProjection = ctx.m_matrices.m_viewProjection.invert();
+	ctx.m_matrices.m_invertedProjectionJitter = ctx.m_matrices.m_projectionJitter.invert();
 
 	ctx.m_matrices.m_reprojection = ctx.m_prevMatrices.m_viewProjection * ctx.m_matrices.m_invertedViewProjection;
 

+ 2 - 2
AnKi/Renderer/RtShadows.cpp

@@ -388,7 +388,7 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 			rgraphCtx.bindUav(0, 0, m_runCtx.m_intermediateShadowsRts[1]);
 			rgraphCtx.bindUav(1, 0, m_runCtx.m_varianceRts[1]);
 
-			const Mat4& invProjMat = ctx.m_matrices.m_projectionJitter.getInverse();
+			const Mat4& invProjMat = ctx.m_matrices.m_projectionJitter.invert();
 			cmdb.setFastConstants(&invProjMat, sizeof(invProjMat));
 
 			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2);
@@ -453,7 +453,7 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 					rgraphCtx.bindUav(0, 0, m_runCtx.m_historyRt);
 				}
 
-				const Mat4& invProjMat = ctx.m_matrices.m_projectionJitter.getInverse();
+				const Mat4& invProjMat = ctx.m_matrices.m_projectionJitter.invert();
 				cmdb.setFastConstants(&invProjMat, sizeof(invProjMat));
 
 				dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2);

+ 2 - 2
AnKi/Renderer/ShadowMapping.cpp

@@ -147,7 +147,7 @@ void ShadowMapping::chooseDetail(const Vec3& cameraOrigin, const LightComponent&
 {
 	if(lightc.getLightComponentType() == LightComponentType::kPoint)
 	{
-		const F32 distFromTheCamera = (cameraOrigin - lightc.getWorldPosition()).getLength() - lightc.getRadius();
+		const F32 distFromTheCamera = (cameraOrigin - lightc.getWorldPosition()).length() - lightc.getRadius();
 		if(distFromTheCamera < lodDistances[0])
 		{
 			tileAllocatorHierarchy = kPointLightMaxTileAllocHierarchy;
@@ -643,7 +643,7 @@ void ShadowMapping::createDrawShadowsPass(ConstWeakArray<ShadowSubpassInfo> subp
 			RenderableDrawerArguments args;
 			args.m_renderingTechinuqe = RenderingTechnique::kDepth;
 			args.m_viewMatrix = spass.m_viewMat;
-			args.m_cameraTransform = spass.m_viewMat.getInverseTransformation();
+			args.m_cameraTransform = spass.m_viewMat.invertTransformation();
 			args.m_viewProjectionMatrix = spass.m_viewProjMat;
 			args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
 			args.m_sampler = getRenderer().getSamplers().m_trilinearRepeat.get();

+ 23 - 2
AnKi/Resource/MaterialResource.cpp

@@ -203,7 +203,17 @@ Error MaterialResource::parseShaderProgram(XmlElement shaderProgramEl, Bool asyn
 				m_techniquesMask |= RenderingTechniqueBit::kRtShadow;
 			}
 		}
-		else if(t.m_name.getBegin() == CString("Forward"))
+		else if(t.m_name.getBegin() == CString("ForwardSwMeshletRendering"))
+		{
+			m_techniquesMask |= RenderingTechniqueBit::kForward;
+			m_shaderTechniques |= ShaderTechniqueBit::kSwMeshletRendering;
+		}
+		else if(t.m_name.getBegin() == CString("ForwardMeshShaders"))
+		{
+			m_techniquesMask |= RenderingTechniqueBit::kForward;
+			m_shaderTechniques |= ShaderTechniqueBit::kMeshSaders;
+		}
+		else if(t.m_name.getBegin() == CString("ForwardLegacy"))
 		{
 			m_techniquesMask |= RenderingTechniqueBit::kForward;
 			m_shaderTechniques |= ShaderTechniqueBit::kLegacy;
@@ -605,7 +615,18 @@ const MaterialVariant& MaterialResource::getOrCreateVariant(const RenderingKey&
 		}
 		break;
 	case RenderingTechnique::kForward:
-		initInfo.requestTechniqueAndTypes(ShaderTypeBit::kVertex | ShaderTypeBit::kPixel, "Forward");
+		if(key.getMeshletRendering() && meshShadersSupported)
+		{
+			initInfo.requestTechniqueAndTypes(ShaderTypeBit::kMesh | ShaderTypeBit::kPixel, "ForwardMeshShaders");
+		}
+		else if(key.getMeshletRendering())
+		{
+			initInfo.requestTechniqueAndTypes(ShaderTypeBit::kVertex | ShaderTypeBit::kPixel, "ForwardSwMeshletRendering");
+		}
+		else
+		{
+			initInfo.requestTechniqueAndTypes(ShaderTypeBit::kVertex | ShaderTypeBit::kPixel, "ForwardLegacy");
+		}
 		break;
 	case RenderingTechnique::kRtShadow:
 		initInfo.requestTechniqueAndTypes(ShaderTypeBit::kAllHit, "RtShadows");

+ 1 - 2
AnKi/Resource/MeshResource.cpp

@@ -357,8 +357,7 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 					packSnorm4x8(Vec4(inMeshlet.m_coneDirection, cos(inMeshlet.m_coneAngle / 2.0f)));
 				outMeshletBoundingVolume.m_coneApex = inMeshlet.m_coneApex;
 				outMeshletBoundingVolume.m_sphereRadius =
-					((outMeshletBoundingVolume.m_aabbMin + outMeshletBoundingVolume.m_aabbMax) / 2.0f - outMeshletBoundingVolume.m_aabbMax)
-						.getLength();
+					((outMeshletBoundingVolume.m_aabbMin + outMeshletBoundingVolume.m_aabbMax) / 2.0f - outMeshletBoundingVolume.m_aabbMax).length();
 				outMeshletBoundingVolume.m_primitiveCount = inMeshlet.m_primitiveCount;
 			}
 

+ 1 - 1
AnKi/Scene/Components/BodyComponent.cpp

@@ -112,7 +112,7 @@ Error BodyComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		info.m_node->setLocalTransform(m_body->getTransform());
 	}
 
-	if(m_force.getLengthSquared() > 0.0f)
+	if(m_force.lengthSquared() > 0.0f)
 	{
 		if(m_forcePosition != 0.0f)
 		{

+ 2 - 2
AnKi/Scene/Components/DecalComponent.cpp

@@ -54,7 +54,7 @@ Error DecalComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		// Calculate the texture matrix
 		const Mat4 worldTransform(info.m_node->getWorldTransform());
 
-		const Mat4 viewMat = worldTransform.getInverse();
+		const Mat4 viewMat = worldTransform.invert();
 
 		const Mat4 projMat = Mat4::calculateOrthographicProjectionMatrix(halfBoxSize.x(), -halfBoxSize.x(), halfBoxSize.y(), -halfBoxSize.y(),
 																		 kClusterObjectFrustumNearPlane, m_boxSize.z());
@@ -77,7 +77,7 @@ Error DecalComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		gpuDecal.m_roughnessMetalnessFactor = m_layers[LayerType::kRoughnessMetalness].m_blendFactor;
 		gpuDecal.m_textureMatrix = m_biasProjViewMat;
 		gpuDecal.m_sphereCenter = obbW.getCenter().xyz();
-		gpuDecal.m_sphereRadius = obbW.getExtend().getLength();
+		gpuDecal.m_sphereRadius = obbW.getExtend().length();
 
 		m_gpuSceneDecal.uploadToGpuScene(gpuDecal);
 	}

+ 5 - 5
AnKi/Scene/Components/LightComponent.cpp

@@ -177,7 +177,7 @@ Error LightComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 			const Mat4 uvToAtlas(m_shadowAtlasUvViewports[0].z(), 0.0f, 0.0f, m_shadowAtlasUvViewports[0].x(), 0.0f, m_shadowAtlasUvViewports[0].w(),
 								 0.0f, m_shadowAtlasUvViewports[0].y(), 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f);
 
-			m_spot.m_viewMat = Mat3x4(m_worldTransform.getInverse());
+			m_spot.m_viewMat = Mat3x4(m_worldTransform.invert());
 			m_spot.m_viewProjMat = proj * Mat4(m_spot.m_viewMat, Vec4(0.0f, 0.0f, 0.0f, 1.0f));
 			const Mat4 texMat = uvToAtlas * biasMat4 * m_spot.m_viewProjMat;
 
@@ -280,8 +280,8 @@ void LightComponent::computeCascadeFrustums(const Frustum& primaryFrustum, Const
 
 			// View
 			const Vec3 zAxis = m_worldTransform.getRotation().getZAxis();
-			const Vec3 xAxis = Vec3(0.0f, 1.0f, 0.0f).cross(zAxis).getNormalized();
-			const Vec3 yAxis = zAxis.cross(xAxis).getNormalized();
+			const Vec3 xAxis = Vec3(0.0f, 1.0f, 0.0f).cross(zAxis).normalize();
+			const Vec3 yAxis = zAxis.cross(xAxis).normalize();
 			Mat3x4 rot;
 			rot.setXAxis(xAxis);
 			rot.setYAxis(yAxis);
@@ -289,10 +289,10 @@ void LightComponent::computeCascadeFrustums(const Frustum& primaryFrustum, Const
 			rot.setTranslationPart(Vec3(0.0f));
 
 			const Transform cascadeTransform(eye.xyz0(), rot, Vec4(1.0f, 1.0f, 1.0f, 0.0f));
-			const Mat4 cascadeViewMat = Mat4(cascadeTransform.getInverse());
+			const Mat4 cascadeViewMat = Mat4(cascadeTransform.invert());
 
 			// Projection
-			const F32 far = (eye - sphereCenter).getLength() + sphereRadius;
+			const F32 far = (eye - sphereCenter).length() + sphereRadius;
 			Mat4 cascadeProjMat = Mat4::calculateOrthographicProjectionMatrix(sphereRadius, -sphereRadius, sphereRadius, -sphereRadius,
 																			  kClusterObjectFrustumNearPlane, far);
 

+ 1 - 2
AnKi/Scene/Components/ParticleEmitterComponent.cpp

@@ -159,8 +159,7 @@ public:
 		// force
 		if(forceFlag)
 		{
-			Vec3 forceDir = getRandom(props.m_particle.m_minForceDirection, props.m_particle.m_maxForceDirection);
-			forceDir.normalize();
+			Vec3 forceDir = getRandom(props.m_particle.m_minForceDirection, props.m_particle.m_maxForceDirection).normalize();
 
 			// The forceDir depends on the particle emitter rotation
 			forceDir = trf.getRotation().getRotationPart() * forceDir;

+ 1 - 1
AnKi/Scene/Frustum.cpp

@@ -71,7 +71,7 @@ Bool Frustum::update()
 	if(m_worldTransformDirty)
 	{
 		updated = true;
-		m_viewMat = Mat3x4(m_worldTransform.getInverse());
+		m_viewMat = Mat3x4(m_worldTransform.invert());
 	}
 
 	// Updates that are affected by transform & shape updates

+ 1 - 1
AnKi/Scene/SceneNode.h

@@ -370,7 +370,7 @@ public:
 
 	void lookAtPoint(const Vec4& point)
 	{
-		m_ltrf.lookAt(point, Vec4(0.0f, 1.0f, 0.0f, 0.0f));
+		m_ltrf = m_ltrf.lookAt(point, Vec4::yAxis());
 		m_localTransformDirty = true;
 	}
 	/// @}

+ 4 - 4
AnKi/Scene/SoftwareRasterizer.cpp

@@ -84,14 +84,14 @@ void SoftwareRasterizer::clipTriangle(const Vec4* inVerts, Vec4* outVerts, U& ou
 
 		// Find first intersection
 		Vec4 rayOrigin = inVerts[i].xyz0();
-		Vec4 rayDir = (inVerts[next].xyz0() - rayOrigin).getNormalized();
+		Vec4 rayDir = (inVerts[next].xyz0() - rayOrigin).normalize();
 
 		Vec4 intersection0;
 		[[maybe_unused]] Bool intersects = testCollision(plane, Ray(rayOrigin, rayDir), intersection0);
 		ANKI_ASSERT(intersects);
 
 		// Find second intersection
-		rayDir = (inVerts[prev].xyz0() - rayOrigin).getNormalized();
+		rayDir = (inVerts[prev].xyz0() - rayOrigin).normalize();
 
 		Vec4 intersection1;
 		intersects = testCollision(plane, Ray(rayOrigin, rayDir), intersection1);
@@ -130,7 +130,7 @@ void SoftwareRasterizer::clipTriangle(const Vec4* inVerts, Vec4* outVerts, U& ou
 
 		// Find first intersection
 		Vec4 rayOrigin = inVerts[in1].xyz0();
-		Vec4 rayDir = (inVerts[out].xyz0() - rayOrigin).getNormalized();
+		Vec4 rayDir = (inVerts[out].xyz0() - rayOrigin).normalize();
 
 		Vec4 intersection0;
 		[[maybe_unused]] Bool intersects = testCollision(plane, Ray(rayOrigin, rayDir), intersection0);
@@ -138,7 +138,7 @@ void SoftwareRasterizer::clipTriangle(const Vec4* inVerts, Vec4* outVerts, U& ou
 
 		// Find second intersection
 		rayOrigin = inVerts[in0].xyz0();
-		rayDir = (inVerts[out].xyz0() - rayOrigin).getNormalized();
+		rayDir = (inVerts[out].xyz0() - rayOrigin).normalize();
 
 		Vec4 intersection1;
 		intersects = testCollision(plane, Ray(rayOrigin, rayDir), intersection1);

+ 36 - 156
AnKi/Script/Math.cpp

@@ -26,7 +26,7 @@ static void deserializeVec2(const void* data, LuaUserData& self)
 	obj->deserialize(data);
 }
 
-LuaUserDataTypeInfo luaUserDataTypeInfoVec2 = {7803499094187981619, "Vec2", LuaUserData::computeSizeForGarbageCollected<Vec2>(), serializeVec2,
+LuaUserDataTypeInfo luaUserDataTypeInfoVec2 = {7646200881744055672, "Vec2", LuaUserData::computeSizeForGarbageCollected<Vec2>(), serializeVec2,
 											   deserializeVec2};
 
 template<>
@@ -812,8 +812,8 @@ static int wrapVec2__eq(lua_State* l)
 	return 0;
 }
 
-/// Pre-wrap method Vec2::getLength.
-static inline int pwrapVec2getLength(lua_State* l)
+/// Pre-wrap method Vec2::length.
+static inline int pwrapVec2length(lua_State* l)
 {
 	[[maybe_unused]] LuaUserData* ud;
 	[[maybe_unused]] void* voidp;
@@ -833,7 +833,7 @@ static inline int pwrapVec2getLength(lua_State* l)
 	Vec2* self = ud->getData<Vec2>();
 
 	// Call the method
-	F32 ret = self->getLength();
+	F32 ret = self->length();
 
 	// Push return value
 	lua_pushnumber(l, lua_Number(ret));
@@ -841,10 +841,10 @@ static inline int pwrapVec2getLength(lua_State* l)
 	return 1;
 }
 
-/// Wrap method Vec2::getLength.
-static int wrapVec2getLength(lua_State* l)
+/// Wrap method Vec2::length.
+static int wrapVec2length(lua_State* l)
 {
-	int res = pwrapVec2getLength(l);
+	int res = pwrapVec2length(l);
 	if(res >= 0)
 	{
 		return res;
@@ -854,8 +854,8 @@ static int wrapVec2getLength(lua_State* l)
 	return 0;
 }
 
-/// Pre-wrap method Vec2::getNormalized.
-static inline int pwrapVec2getNormalized(lua_State* l)
+/// Pre-wrap method Vec2::normalize.
+static inline int pwrapVec2normalize(lua_State* l)
 {
 	[[maybe_unused]] LuaUserData* ud;
 	[[maybe_unused]] void* voidp;
@@ -875,7 +875,7 @@ static inline int pwrapVec2getNormalized(lua_State* l)
 	Vec2* self = ud->getData<Vec2>();
 
 	// Call the method
-	Vec2 ret = self->getNormalized();
+	Vec2 ret = self->normalize();
 
 	// Push return value
 	size = LuaUserData::computeSizeForGarbageCollected<Vec2>();
@@ -889,45 +889,6 @@ static inline int pwrapVec2getNormalized(lua_State* l)
 	return 1;
 }
 
-/// Wrap method Vec2::getNormalized.
-static int wrapVec2getNormalized(lua_State* l)
-{
-	int res = pwrapVec2getNormalized(l);
-	if(res >= 0)
-	{
-		return res;
-	}
-
-	lua_error(l);
-	return 0;
-}
-
-/// Pre-wrap method Vec2::normalize.
-static inline int pwrapVec2normalize(lua_State* l)
-{
-	[[maybe_unused]] LuaUserData* ud;
-	[[maybe_unused]] void* voidp;
-	[[maybe_unused]] PtrSize size;
-
-	if(LuaBinder::checkArgsCount(l, 1)) [[unlikely]]
-	{
-		return -1;
-	}
-
-	// Get "this" as "self"
-	if(LuaBinder::checkUserData(l, 1, luaUserDataTypeInfoVec2, ud))
-	{
-		return -1;
-	}
-
-	Vec2* self = ud->getData<Vec2>();
-
-	// Call the method
-	self->normalize();
-
-	return 0;
-}
-
 /// Wrap method Vec2::normalize.
 static int wrapVec2normalize(lua_State* l)
 {
@@ -1012,8 +973,7 @@ static inline void wrapVec2(lua_State* l)
 	LuaBinder::pushLuaCFuncMethod(l, "__mul", wrapVec2__mul);
 	LuaBinder::pushLuaCFuncMethod(l, "__div", wrapVec2__div);
 	LuaBinder::pushLuaCFuncMethod(l, "__eq", wrapVec2__eq);
-	LuaBinder::pushLuaCFuncMethod(l, "getLength", wrapVec2getLength);
-	LuaBinder::pushLuaCFuncMethod(l, "getNormalized", wrapVec2getNormalized);
+	LuaBinder::pushLuaCFuncMethod(l, "length", wrapVec2length);
 	LuaBinder::pushLuaCFuncMethod(l, "normalize", wrapVec2normalize);
 	LuaBinder::pushLuaCFuncMethod(l, "dot", wrapVec2dot);
 	lua_settop(l, 0);
@@ -1035,7 +995,7 @@ static void deserializeVec3(const void* data, LuaUserData& self)
 	obj->deserialize(data);
 }
 
-LuaUserDataTypeInfo luaUserDataTypeInfoVec3 = {6911238639175218670, "Vec3", LuaUserData::computeSizeForGarbageCollected<Vec3>(), serializeVec3,
+LuaUserDataTypeInfo luaUserDataTypeInfoVec3 = {7123097970105283442, "Vec3", LuaUserData::computeSizeForGarbageCollected<Vec3>(), serializeVec3,
 											   deserializeVec3};
 
 template<>
@@ -1955,8 +1915,8 @@ static int wrapVec3__eq(lua_State* l)
 	return 0;
 }
 
-/// Pre-wrap method Vec3::getLength.
-static inline int pwrapVec3getLength(lua_State* l)
+/// Pre-wrap method Vec3::length.
+static inline int pwrapVec3length(lua_State* l)
 {
 	[[maybe_unused]] LuaUserData* ud;
 	[[maybe_unused]] void* voidp;
@@ -1976,7 +1936,7 @@ static inline int pwrapVec3getLength(lua_State* l)
 	Vec3* self = ud->getData<Vec3>();
 
 	// Call the method
-	F32 ret = self->getLength();
+	F32 ret = self->length();
 
 	// Push return value
 	lua_pushnumber(l, lua_Number(ret));
@@ -1984,10 +1944,10 @@ static inline int pwrapVec3getLength(lua_State* l)
 	return 1;
 }
 
-/// Wrap method Vec3::getLength.
-static int wrapVec3getLength(lua_State* l)
+/// Wrap method Vec3::length.
+static int wrapVec3length(lua_State* l)
 {
-	int res = pwrapVec3getLength(l);
+	int res = pwrapVec3length(l);
 	if(res >= 0)
 	{
 		return res;
@@ -1997,8 +1957,8 @@ static int wrapVec3getLength(lua_State* l)
 	return 0;
 }
 
-/// Pre-wrap method Vec3::getNormalized.
-static inline int pwrapVec3getNormalized(lua_State* l)
+/// Pre-wrap method Vec3::normalize.
+static inline int pwrapVec3normalize(lua_State* l)
 {
 	[[maybe_unused]] LuaUserData* ud;
 	[[maybe_unused]] void* voidp;
@@ -2018,7 +1978,7 @@ static inline int pwrapVec3getNormalized(lua_State* l)
 	Vec3* self = ud->getData<Vec3>();
 
 	// Call the method
-	Vec3 ret = self->getNormalized();
+	Vec3 ret = self->normalize();
 
 	// Push return value
 	size = LuaUserData::computeSizeForGarbageCollected<Vec3>();
@@ -2032,45 +1992,6 @@ static inline int pwrapVec3getNormalized(lua_State* l)
 	return 1;
 }
 
-/// Wrap method Vec3::getNormalized.
-static int wrapVec3getNormalized(lua_State* l)
-{
-	int res = pwrapVec3getNormalized(l);
-	if(res >= 0)
-	{
-		return res;
-	}
-
-	lua_error(l);
-	return 0;
-}
-
-/// Pre-wrap method Vec3::normalize.
-static inline int pwrapVec3normalize(lua_State* l)
-{
-	[[maybe_unused]] LuaUserData* ud;
-	[[maybe_unused]] void* voidp;
-	[[maybe_unused]] PtrSize size;
-
-	if(LuaBinder::checkArgsCount(l, 1)) [[unlikely]]
-	{
-		return -1;
-	}
-
-	// Get "this" as "self"
-	if(LuaBinder::checkUserData(l, 1, luaUserDataTypeInfoVec3, ud))
-	{
-		return -1;
-	}
-
-	Vec3* self = ud->getData<Vec3>();
-
-	// Call the method
-	self->normalize();
-
-	return 0;
-}
-
 /// Wrap method Vec3::normalize.
 static int wrapVec3normalize(lua_State* l)
 {
@@ -2157,8 +2078,7 @@ static inline void wrapVec3(lua_State* l)
 	LuaBinder::pushLuaCFuncMethod(l, "__mul", wrapVec3__mul);
 	LuaBinder::pushLuaCFuncMethod(l, "__div", wrapVec3__div);
 	LuaBinder::pushLuaCFuncMethod(l, "__eq", wrapVec3__eq);
-	LuaBinder::pushLuaCFuncMethod(l, "getLength", wrapVec3getLength);
-	LuaBinder::pushLuaCFuncMethod(l, "getNormalized", wrapVec3getNormalized);
+	LuaBinder::pushLuaCFuncMethod(l, "length", wrapVec3length);
 	LuaBinder::pushLuaCFuncMethod(l, "normalize", wrapVec3normalize);
 	LuaBinder::pushLuaCFuncMethod(l, "dot", wrapVec3dot);
 	lua_settop(l, 0);
@@ -2180,7 +2100,7 @@ static void deserializeVec4(const void* data, LuaUserData& self)
 	obj->deserialize(data);
 }
 
-LuaUserDataTypeInfo luaUserDataTypeInfoVec4 = {-5133042424981777357, "Vec4", LuaUserData::computeSizeForGarbageCollected<Vec4>(), serializeVec4,
+LuaUserDataTypeInfo luaUserDataTypeInfoVec4 = {-5145388461665443836, "Vec4", LuaUserData::computeSizeForGarbageCollected<Vec4>(), serializeVec4,
 											   deserializeVec4};
 
 template<>
@@ -3200,8 +3120,8 @@ static int wrapVec4__eq(lua_State* l)
 	return 0;
 }
 
-/// Pre-wrap method Vec4::getLength.
-static inline int pwrapVec4getLength(lua_State* l)
+/// Pre-wrap method Vec4::length.
+static inline int pwrapVec4length(lua_State* l)
 {
 	[[maybe_unused]] LuaUserData* ud;
 	[[maybe_unused]] void* voidp;
@@ -3221,7 +3141,7 @@ static inline int pwrapVec4getLength(lua_State* l)
 	Vec4* self = ud->getData<Vec4>();
 
 	// Call the method
-	F32 ret = self->getLength();
+	F32 ret = self->length();
 
 	// Push return value
 	lua_pushnumber(l, lua_Number(ret));
@@ -3229,10 +3149,10 @@ static inline int pwrapVec4getLength(lua_State* l)
 	return 1;
 }
 
-/// Wrap method Vec4::getLength.
-static int wrapVec4getLength(lua_State* l)
+/// Wrap method Vec4::length.
+static int wrapVec4length(lua_State* l)
 {
-	int res = pwrapVec4getLength(l);
+	int res = pwrapVec4length(l);
 	if(res >= 0)
 	{
 		return res;
@@ -3242,8 +3162,8 @@ static int wrapVec4getLength(lua_State* l)
 	return 0;
 }
 
-/// Pre-wrap method Vec4::getNormalized.
-static inline int pwrapVec4getNormalized(lua_State* l)
+/// Pre-wrap method Vec4::normalize.
+static inline int pwrapVec4normalize(lua_State* l)
 {
 	[[maybe_unused]] LuaUserData* ud;
 	[[maybe_unused]] void* voidp;
@@ -3263,7 +3183,7 @@ static inline int pwrapVec4getNormalized(lua_State* l)
 	Vec4* self = ud->getData<Vec4>();
 
 	// Call the method
-	Vec4 ret = self->getNormalized();
+	Vec4 ret = self->normalize();
 
 	// Push return value
 	size = LuaUserData::computeSizeForGarbageCollected<Vec4>();
@@ -3277,45 +3197,6 @@ static inline int pwrapVec4getNormalized(lua_State* l)
 	return 1;
 }
 
-/// Wrap method Vec4::getNormalized.
-static int wrapVec4getNormalized(lua_State* l)
-{
-	int res = pwrapVec4getNormalized(l);
-	if(res >= 0)
-	{
-		return res;
-	}
-
-	lua_error(l);
-	return 0;
-}
-
-/// Pre-wrap method Vec4::normalize.
-static inline int pwrapVec4normalize(lua_State* l)
-{
-	[[maybe_unused]] LuaUserData* ud;
-	[[maybe_unused]] void* voidp;
-	[[maybe_unused]] PtrSize size;
-
-	if(LuaBinder::checkArgsCount(l, 1)) [[unlikely]]
-	{
-		return -1;
-	}
-
-	// Get "this" as "self"
-	if(LuaBinder::checkUserData(l, 1, luaUserDataTypeInfoVec4, ud))
-	{
-		return -1;
-	}
-
-	Vec4* self = ud->getData<Vec4>();
-
-	// Call the method
-	self->normalize();
-
-	return 0;
-}
-
 /// Wrap method Vec4::normalize.
 static int wrapVec4normalize(lua_State* l)
 {
@@ -3404,14 +3285,13 @@ static inline void wrapVec4(lua_State* l)
 	LuaBinder::pushLuaCFuncMethod(l, "__mul", wrapVec4__mul);
 	LuaBinder::pushLuaCFuncMethod(l, "__div", wrapVec4__div);
 	LuaBinder::pushLuaCFuncMethod(l, "__eq", wrapVec4__eq);
-	LuaBinder::pushLuaCFuncMethod(l, "getLength", wrapVec4getLength);
-	LuaBinder::pushLuaCFuncMethod(l, "getNormalized", wrapVec4getNormalized);
+	LuaBinder::pushLuaCFuncMethod(l, "length", wrapVec4length);
 	LuaBinder::pushLuaCFuncMethod(l, "normalize", wrapVec4normalize);
 	LuaBinder::pushLuaCFuncMethod(l, "dot", wrapVec4dot);
 	lua_settop(l, 0);
 }
 
-LuaUserDataTypeInfo luaUserDataTypeInfoMat3 = {6777119210890395568, "Mat3", LuaUserData::computeSizeForGarbageCollected<Mat3>(), nullptr, nullptr};
+LuaUserDataTypeInfo luaUserDataTypeInfoMat3 = {288806843489649530, "Mat3", LuaUserData::computeSizeForGarbageCollected<Mat3>(), nullptr, nullptr};
 
 template<>
 const LuaUserDataTypeInfo& LuaUserData::getDataTypeInfoFor<Mat3>()
@@ -3797,7 +3677,7 @@ static inline void wrapMat3(lua_State* l)
 	lua_settop(l, 0);
 }
 
-LuaUserDataTypeInfo luaUserDataTypeInfoMat3x4 = {8812777471101110980, "Mat3x4", LuaUserData::computeSizeForGarbageCollected<Mat3x4>(), nullptr,
+LuaUserDataTypeInfo luaUserDataTypeInfoMat3x4 = {-5331181224552445128, "Mat3x4", LuaUserData::computeSizeForGarbageCollected<Mat3x4>(), nullptr,
 												 nullptr};
 
 template<>
@@ -4202,7 +4082,7 @@ static inline void wrapMat3x4(lua_State* l)
 	lua_settop(l, 0);
 }
 
-LuaUserDataTypeInfo luaUserDataTypeInfoTransform = {5321497660535424691, "Transform", LuaUserData::computeSizeForGarbageCollected<Transform>(),
+LuaUserDataTypeInfo luaUserDataTypeInfoTransform = {432149697915855870, "Transform", LuaUserData::computeSizeForGarbageCollected<Transform>(),
 													nullptr, nullptr};
 
 template<>

+ 6 - 9
AnKi/Script/Math.xml

@@ -99,13 +99,12 @@ namespace anki {]]></head>
 					</args>
 					<return>Bool</return>
 				</method>
-				<method name="getLength">
+				<method name="length">
 					<return>F32</return>
 				</method>
-				<method name="getNormalized">
+				<method name="normalize">
 					<return>Vec2</return>
 				</method>
-				<method name="normalize"></method>
 				<method name="dot">
 					<args>
 						<arg>const Vec2&amp;</arg>
@@ -218,13 +217,12 @@ namespace anki {]]></head>
 					</args>
 					<return>Bool</return>
 				</method>
-				<method name="getLength">
+				<method name="length">
 					<return>F32</return>
 				</method>
-				<method name="getNormalized">
+				<method name="normalize">
 					<return>Vec3</return>
 				</method>
-				<method name="normalize"></method>
 				<method name="dot">
 					<args>
 						<arg>const Vec3&amp;</arg>
@@ -350,13 +348,12 @@ namespace anki {]]></head>
 					</args>
 					<return>Bool</return>
 				</method>
-				<method name="getLength">
+				<method name="length">
 					<return>F32</return>
 				</method>
-				<method name="getNormalized">
+				<method name="normalize">
 					<return>Vec4</return>
 				</method>
-				<method name="normalize"></method>
 				<method name="dot">
 					<args>
 						<arg>const Vec4&amp;</arg>

+ 1 - 1
AnKi/Shaders/ForwardShadingFog.ankiprog

@@ -3,7 +3,7 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki technique Forward vert pixel
+#pragma anki technique ForwardLegacy vert pixel
 
 #include <AnKi/Shaders/ForwardShadingCommon.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>

+ 106 - 6
AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog

@@ -6,10 +6,14 @@
 #pragma anki mutator TEXTURE 0 1
 #pragma anki mutator LIGHT 0 1
 
-#pragma anki technique Forward vert pixel
+#pragma anki technique ForwardLegacy vert pixel
+#pragma anki technique ForwardSwMeshletRendering vert pixel
+#pragma anki technique ForwardMeshShaders mesh pixel
 
 #include <AnKi/Shaders/ForwardShadingCommon.hlsl>
 
+#define SW_MESHLETS ANKI_TECHNIQUE_ForwardSwMeshletRendering
+
 #pragma anki struct AnKiLocalConstants
 #pragma anki member U32 m_texture
 #pragma anki member RVec4 m_colorScale
@@ -18,7 +22,7 @@
 
 struct VertIn
 {
-	UVec4 m_gpuSceneRenderable : MISC0;
+	UVec4 m_instanceData : MISC0;
 	U32 m_svVertexId : SV_VERTEXID;
 };
 
@@ -30,27 +34,123 @@ struct VertOut
 	nointerpolation U32 m_constantsOffset : CONSTANTS_OFFSET;
 };
 
+struct MeshPerVertOut
+{
+	Vec4 m_svPosition : SV_POSITION;
+	Vec2 m_uv : TEXCOORD;
+};
+
+struct MeshPerPrimitiveOut
+{
+	ANKI_PER_PRIMITIVE_MEMBER U32 m_constantsOffset : UNIS_OFFSET;
+};
+
+// ===========================================================================
+// Vert                                                                      =
+// ===========================================================================
 #if ANKI_VERTEX_SHADER
 VertOut main(VertIn input)
 {
 	VertOut output;
 
-	const GpuSceneRenderableInstance renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
-	const GpuSceneMeshLod mesh = g_meshLods[renderable.m_meshLodIndex];
-	const Mat3x4 worldTransform = g_transforms[renderable.m_worldTransformsIndex];
+#	if SW_MESHLETS
+	const GpuSceneMeshletInstance instance = unpackGpuSceneMeshletInstance(input.m_instanceData);
+
+	if(input.m_svVertexId >= (instance.m_worldTransformsIndex_25bit_meshletPrimitiveCount_7bit & ((1u << 7u) - 1u)) * 3u)
+	{
+		// Discard the primitive
+		output = (VertOut)0;
+		output.m_svPosition = kNaN;
+		return output;
+	}
+
+	const MeshletGeometryDescriptor meshlet = g_meshletGeometryDescriptors[instance.m_meshletGeometryDescriptorIndex];
+	const UnpackedMeshVertex vertex = loadVertex(meshlet, input.m_svVertexId, false);
+	const U32 worldTransformsIndex = instance.m_worldTransformsIndex_25bit_meshletPrimitiveCount_7bit >> 7u;
+#	else // SW_MESHLETS
+	const GpuSceneRenderableInstance instance = unpackGpuSceneRenderableVertex(input.m_instanceData);
+	const GpuSceneMeshLod mesh = g_meshLods[instance.m_meshLodIndex];
+	const U32 worldTransformsIndex = instance.m_worldTransformsIndex;
 	const UnpackedMeshVertex vertex = loadVertex(mesh, input.m_svVertexId, false);
+#	endif // SW_MESHLETS
+
+	const U32 constantsOffset = instance.m_constantsOffset;
+
+	const Mat3x4 worldTransform = g_transforms[worldTransformsIndex];
 
 	output.m_worldPosition = mul(worldTransform, Vec4(vertex.m_position, 1.0));
 
 	output.m_svPosition = mul(g_globalConstants.m_viewProjectionMatrix, Vec4(output.m_worldPosition, 1.0));
 
 	output.m_uv = vertex.m_uv;
-	output.m_constantsOffset = renderable.m_constantsOffset;
+	output.m_constantsOffset = constantsOffset;
 
 	return output;
 }
 #endif // ANKI_VERTEX_SHADER
 
+// ===========================================================================
+// Mesh                                                                      =
+// ===========================================================================
+#if ANKI_MESH_SHADER
+[numthreads(ANKI_MESH_SHADER_THREADGROUP_SIZE, 1, 1)] [outputtopology("triangle")] void
+main(U32 svGroupId : SV_GROUPID, U32 svGroupIndex : SV_GROUPINDEX, out vertices MeshPerVertOut verts[kMaxVerticesPerMeshlet],
+	 out primitives MeshPerPrimitiveOut primitives[kMaxPrimitivesPerMeshlet], out indices UVec3 indices[kMaxPrimitivesPerMeshlet])
+{
+	const U32 instanceIdx = g_firstMeshlet[g_consts.m_bucketIndex] + svGroupId;
+
+	const GpuSceneMeshletInstance instance = g_meshletInstances[instanceIdx];
+	const U32 constantsOffset = instance.m_constantsOffset;
+	const U32 worldTransformsIndex = instance.m_worldTransformsIndex_25bit_meshletPrimitiveCount_7bit >> 7u;
+
+	const MeshletGeometryDescriptor meshlet = g_meshletGeometryDescriptors[instance.m_meshletGeometryDescriptorIndex];
+	const U32 primCount = meshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint >> 16u;
+	const U32 vertCount = meshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint & 0xFFFFu;
+
+	SetMeshOutputCounts(vertCount, primCount);
+
+	// Write the verts
+	const U32 vertLoopCount = kMaxVerticesPerMeshlet / ANKI_MESH_SHADER_THREADGROUP_SIZE;
+	[unroll] for(U32 l = 0; l < vertLoopCount; ++l)
+	{
+		const U32 idx = l * ANKI_MESH_SHADER_THREADGROUP_SIZE + svGroupIndex;
+
+		if(idx < vertCount)
+		{
+			MeshPerVertOut output;
+
+			const UnpackedMeshVertex vert = loadVertexLocalIndex(meshlet, idx, false);
+
+			const Mat3x4 worldTransform = g_transforms[worldTransformsIndex];
+			const Vec3 worldPos = mul(worldTransform, Vec4(vert.m_position, 1.0));
+			output.m_svPosition = mul(g_globalConstants.m_viewProjectionMatrix, Vec4(worldPos, 1.0f));
+
+			output.m_uv = vert.m_uv;
+
+			verts[idx] = output;
+		}
+	}
+
+	// Write the primitive data
+	const U32 primLoopCount = (kMaxPrimitivesPerMeshlet + ANKI_MESH_SHADER_THREADGROUP_SIZE - 1u) / ANKI_MESH_SHADER_THREADGROUP_SIZE;
+	[unroll] for(U32 l = 0; l < primLoopCount; ++l)
+	{
+		const U32 idx = l * ANKI_MESH_SHADER_THREADGROUP_SIZE + svGroupIndex;
+
+		if(idx < primCount)
+		{
+			const UVec3 prim = g_unifiedGeom_R8G8B8A8_Uint[meshlet.m_firstPrimitive + idx].xyz;
+			indices[idx] = prim;
+
+			primitives[idx].m_constantsOffset = constantsOffset;
+		}
+	}
+}
+#endif // ANKI_MESH_SHADER
+
+// ===========================================================================
+// Pixel                                                                     =
+// ===========================================================================
 #if ANKI_PIXEL_SHADER
 PixelOut main(VertOut input)
 {

+ 1 - 1
AnKi/Shaders/ForwardShadingParticles.ankiprog

@@ -6,7 +6,7 @@
 #pragma anki mutator ANIMATED_TEXTURE 0 1
 #pragma anki mutator LIGHT 0 1
 
-#pragma anki technique Forward vert pixel
+#pragma anki technique ForwardLegacy vert pixel
 
 #include <AnKi/Shaders/ForwardShadingCommon.hlsl>
 

+ 3 - 10
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -264,14 +264,7 @@ VertOut main(VertIn input)
 	}
 
 	const MeshletGeometryDescriptor meshlet = g_meshletGeometryDescriptors[instance.m_meshletGeometryDescriptorIndex];
-
-	// Indices are stored in R8G8B8A8_Uint per primitive. Last component is not used. Instead of reading 4 bytes use the code bellow to read just 1.
-	// Find prev version in an older commit
-	const U32 componentsPerPrimitive = 4u;
-	const F32 offset = floor(F32(input.m_svVertexId) / 3.0f) * F32(componentsPerPrimitive) + fmod(F32(input.m_svVertexId), 3.0f);
-	const U32 localIdx = g_unifiedGeom_R8_Uint[meshlet.m_firstPrimitive * componentsPerPrimitive + U32(offset)];
-
-	UnpackedMeshVertex vert = loadVertex(meshlet, localIdx, ANKI_BONES);
+	UnpackedMeshVertex vert = loadVertex(meshlet, input.m_svVertexId, ANKI_BONES);
 
 	const U32 constantsOffset = instance.m_constantsOffset;
 	const U32 worldTransformsIndex = instance.m_worldTransformsIndex_25bit_meshletPrimitiveCount_7bit >> 7u;
@@ -366,7 +359,7 @@ main(U32 svGroupId : SV_GROUPID, U32 svGroupIndex : SV_GROUPINDEX, out vertices
 		{
 			MeshPerVertOut output;
 
-			UnpackedMeshVertex vert = loadVertex(meshlet, idx, ANKI_BONES);
+			UnpackedMeshVertex vert = loadVertexLocalIndex(meshlet, idx, ANKI_BONES);
 
 			const Mat3x4 worldTransform = g_transforms[worldTransformsIndex];
 			const Mat3x4 prevWorldTransform = g_transforms[worldTransformsIndex + 1u];
@@ -460,7 +453,7 @@ main(U32 svGroupId : SV_GROUPID, U32 svGroupIndex : SV_GROUPINDEX, out vertices
 #endif // ANKI_MESH_SHADER
 
 // ===========================================================================
-// Frag                                                                      =
+// Pixel                                                                     =
 // ===========================================================================
 #if ANKI_PIXEL_SHADER
 

+ 1 - 1
AnKi/Shaders/Include/GpuSceneFunctions.h

@@ -38,7 +38,7 @@ inline GpuSceneRenderableBoundingVolume initGpuSceneRenderableBoundingVolume(Vec
 	const Vec3 sphereCenter = (aabbMin + aabbMax) * 0.5f;
 	const Vec3 aabbExtend = aabbMax - sphereCenter;
 #if defined(__cplusplus)
-	gpuVolume.m_sphereRadius = aabbExtend.getLength();
+	gpuVolume.m_sphereRadius = aabbExtend.length();
 #else
 	gpuVolume.m_sphereRadius = length(aabbExtend);
 #endif

+ 17 - 6
AnKi/Shaders/MaterialShadersCommon.hlsl

@@ -87,24 +87,35 @@ UnpackedMeshVertex loadVertex(GpuSceneMeshLod mlod, U32 svVertexId, Bool bones)
 	return v;
 }
 
-UnpackedMeshVertex loadVertex(MeshletGeometryDescriptor meshlet, U32 vertexIndex, Bool bones)
+UnpackedMeshVertex loadVertexLocalIndex(MeshletGeometryDescriptor meshlet, U32 localIdx, Bool bones)
 {
 	UnpackedMeshVertex v;
-	v.m_position = g_unifiedGeom_R16G16B16A16_Unorm[meshlet.m_vertexOffsets[(U32)VertexStreamId::kPosition] + vertexIndex];
+	v.m_position = g_unifiedGeom_R16G16B16A16_Unorm[meshlet.m_vertexOffsets[(U32)VertexStreamId::kPosition] + localIdx];
 	v.m_position = v.m_position * meshlet.m_positionScale + meshlet.m_positionTranslation;
 
-	v.m_normal = g_unifiedGeom_R8G8B8A8_Snorm[meshlet.m_vertexOffsets[(U32)VertexStreamId::kNormal] + vertexIndex].xyz;
-	v.m_uv = g_unifiedGeom_R32G32_Sfloat[meshlet.m_vertexOffsets[(U32)VertexStreamId::kUv] + vertexIndex];
+	v.m_normal = g_unifiedGeom_R8G8B8A8_Snorm[meshlet.m_vertexOffsets[(U32)VertexStreamId::kNormal] + localIdx].xyz;
+	v.m_uv = g_unifiedGeom_R32G32_Sfloat[meshlet.m_vertexOffsets[(U32)VertexStreamId::kUv] + localIdx];
 
 	if(bones)
 	{
-		v.m_boneIndices = g_unifiedGeom_R8G8B8A8_Uint[meshlet.m_vertexOffsets[(U32)VertexStreamId::kBoneIds] + vertexIndex];
-		v.m_boneWeights = g_unifiedGeom_R8G8B8A8_Snorm[meshlet.m_vertexOffsets[(U32)VertexStreamId::kBoneWeights] + vertexIndex];
+		v.m_boneIndices = g_unifiedGeom_R8G8B8A8_Uint[meshlet.m_vertexOffsets[(U32)VertexStreamId::kBoneIds] + localIdx];
+		v.m_boneWeights = g_unifiedGeom_R8G8B8A8_Snorm[meshlet.m_vertexOffsets[(U32)VertexStreamId::kBoneWeights] + localIdx];
 	}
 
 	return v;
 }
 
+UnpackedMeshVertex loadVertex(MeshletGeometryDescriptor meshlet, U32 svVertexId, Bool bones)
+{
+	// Indices are stored in R8G8B8A8_Uint per primitive. Last component is not used. Instead of reading 4 bytes use the code bellow to read just 1.
+	// Find prev version in an older commit
+	const U32 componentsPerPrimitive = 4u;
+	const F32 offset = floor(F32(svVertexId) / 3.0f) * F32(componentsPerPrimitive) + fmod(F32(svVertexId), 3.0f);
+	const U32 localIdx = g_unifiedGeom_R8_Uint[meshlet.m_firstPrimitive * componentsPerPrimitive + U32(offset)];
+
+	return loadVertexLocalIndex(meshlet, localIdx, bones);
+}
+
 Bool cullBackfaceMeshlet(MeshletBoundingVolume meshlet, Mat3x4 worldTransform, Vec3 cameraWorldPos)
 {
 	const Vec4 coneDirAndAng = unpackSnorm4x8<F32>(meshlet.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);

+ 4 - 4
Samples/PhysicsPlayground/Main.cpp

@@ -296,11 +296,11 @@ Error MyApp::userMainLoop(Bool& quit, [[maybe_unused]] Second elapsedTime)
 
 			rot = player.getLocalRotation().combineTransformations(rot);
 
-			Vec3 newz = rot.getColumn(2).getNormalized();
+			Vec3 newz = rot.getColumn(2).normalize();
 			Vec3 newx = Vec3(0.0, 1.0, 0.0).cross(newz);
 			Vec3 newy = newz.cross(newx);
 			rot.setColumns(newx, newy, newz, Vec3(0.0));
-			rot.reorthogonalize();
+			rot = rot.reorthogonalize();
 
 			// Update move
 			player.setLocalRotation(rot);
@@ -349,7 +349,7 @@ Error MyApp::userMainLoop(Bool& quit, [[maybe_unused]] Second elapsedTime)
 			{
 				dir = -(player.getLocalRotation() * moveVec.xyz0());
 				dir.y() = 0.0f;
-				dir.normalize();
+				dir = dir.normalize();
 			}
 
 			F32 speed1 = speed;
@@ -403,7 +403,7 @@ Error MyApp::userMainLoop(Bool& quit, [[maybe_unused]] Second elapsedTime)
 			// Create rotation
 			const Vec3& zAxis = result.m_normal;
 			Vec3 yAxis = Vec3(0, 1, 0.5);
-			Vec3 xAxis = yAxis.cross(zAxis).getNormalized();
+			Vec3 xAxis = yAxis.cross(zAxis).normalize();
 			yAxis = zAxis.cross(xAxis);
 
 			Mat3x4 rot = Mat3x4::getIdentity();

+ 1 - 1
Tests/Gr/GrAsyncCompute.cpp

@@ -297,7 +297,7 @@ float4 main(VertOut input) : SV_Target0
 
 				constexpr F32 orthoHalfSize = 10.0f;
 				constexpr F32 orthoSize = orthoHalfSize * 2.0f;
-				const Mat4 viewMat = Mat4::getIdentity().getInverse();
+				const Mat4 viewMat = Mat4::getIdentity().invert();
 				const Mat4 projMat =
 					Mat4::calculateOrthographicProjectionMatrix(orthoHalfSize, -orthoHalfSize, orthoHalfSize, -orthoHalfSize, 0.1f, 200.0f);
 				consts.m_viewProjMat = projMat * viewMat;

+ 4 - 4
Tests/Math/Math.cpp

@@ -97,16 +97,16 @@ void length()
 	}
 
 	res = T(sqrt(F32(res)));
-	ANKI_TEST_EXPECT_EQ(vec.getLength(), res);
+	ANKI_TEST_EXPECT_EQ(vec.length(), res);
 
 	if(Vec::kIsInteger)
 	{
-		ANKI_TEST_EXPECT_EQ(vec.getNormalized(), vec / res);
+		ANKI_TEST_EXPECT_EQ(vec.normalize(), vec / res);
 	}
 	else
 	{
 		auto a = vec / res;
-		auto b = vec.getNormalized();
+		auto b = vec.normalize();
 		for(U i = 0; i < size; i++)
 		{
 			ANKI_TEST_EXPECT_NEAR(a[i], b[i], 0.0001);
@@ -251,7 +251,7 @@ template<typename Mat>
 void transpose()
 {
 	Mat a = getNonEmptyMat<Mat>();
-	Mat b = a.getTransposed();
+	Mat b = a.transpose();
 
 	for(U j = 0; j < Mat::kRowCount; j++)
 	{