Browse Source

Math submodule refactoring

Panagiotis Christopoulos Charitos 6 years ago
parent
commit
cf76f3ac43

+ 2 - 2
src/anki/math/Forward.h

@@ -12,7 +12,7 @@ namespace anki
 
 class F16;
 
-template<typename T, U N, typename TSimd, typename TV>
+template<typename T, U N, typename TV>
 class TVec;
 template<typename T>
 class TVec2;
@@ -21,7 +21,7 @@ class TVec3;
 template<typename T>
 class TVec4;
 
-template<typename T, U J, U I, typename TSimd, typename TM, typename TVJ, typename TVI>
+template<typename T, U J, U I, typename TM, typename TVJ, typename TVI>
 class TMat;
 template<typename T>
 class TMat3;

+ 481 - 6
src/anki/math/Mat.h

@@ -21,14 +21,20 @@ namespace anki
 /// @tparam TM The type of the derived class. Eg TMat3.
 /// @tparam TVJ The vector type of the row.
 /// @tparam TVI The vector type of the column.
-template<typename T, U J, U I, typename TSimd, typename TM, typename TVJ, typename TVI>
+template<typename T, U J, U I, typename TM, typename TVJ, typename TVI>
 class TMat
 {
 public:
 	using Scalar = T;
+	using Simd = typename MathSimd<T, I>::Type;
+	using SimdArray = Array<Simd, J>;
 	static constexpr U ROW_SIZE = J; ///< Number of rows
 	static constexpr U COLUMN_SIZE = I; ///< Number of columns
 	static constexpr U SIZE = J * I; ///< Number of total elements
+	static constexpr Bool HAS_SIMD = I == 4 && std::is_same<T, F32>::value && ANKI_SIMD == ANKI_SIMD_SSE;
+	static constexpr Bool HAS_MAT4_SIMD = J == 4 && I == 4 && std::is_same<T, F32>::value && ANKI_SIMD == ANKI_SIMD_SSE;
+	static constexpr Bool HAS_MAT3X4_SIMD =
+		J == 3 && I == 4 && std::is_same<T, F32>::value && ANKI_SIMD == ANKI_SIMD_SSE;
 
 	/// @name Constructors
 	/// @{
@@ -36,6 +42,7 @@ public:
 	{
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_SIMD)
 	TMat(const TMat& b)
 	{
 		for(U i = 0; i < N; i++)
@@ -44,6 +51,16 @@ public:
 		}
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_SIMD)
+	TMat(const TMat& b)
+	{
+		for(U i = 0; i < J; i++)
+		{
+			m_simd[i] = b.m_simd[i];
+		}
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_SIMD)
 	explicit TMat(const T f)
 	{
 		for(T& x : m_arr1)
@@ -52,6 +69,15 @@ public:
 		}
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_SIMD)
+	explicit TMat(const T f)
+	{
+		for(U i = 0; i < J; i++)
+		{
+			m_simd[i] = _mm_set1_ps(f);
+		}
+	}
+
 	explicit TMat(const T arr[])
 	{
 		for(U i = 0; i < N; i++)
@@ -59,6 +85,85 @@ public:
 			m_arr1[i] = arr[i];
 		}
 	}
+
+	// 3x3 specific constructors
+
+	ANKI_ENABLE_IF_EXPRESSION(J == 3 && I == 3)
+	TMat(T m00, T m01, T m02, T m10, T m11, T m12, T m20, T m21, T m22)
+	{
+		auto& m = *this;
+		m(0, 0) = m00;
+		m(0, 1) = m01;
+		m(0, 2) = m02;
+		m(1, 0) = m10;
+		m(1, 1) = m11;
+		m(1, 2) = m12;
+		m(2, 0) = m20;
+		m(2, 1) = m21;
+		m(2, 2) = m22;
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(J == 3 && I == 3)
+	explicit TMat(const TQuat<T>& q)
+	{
+		setRotationPart(q);
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(J == 3 && I == 3)
+	explicit TMat(const TEuler<T>& e)
+	{
+		setRotationPart(e);
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(J == 3 && I == 3)
+	explicit TMat(const TAxisang<T>& axisang)
+	{
+		setRotationPart(axisang);
+	}
+
+	// 4x4 specific constructors
+
+	ANKI_ENABLE_IF_EXPRESSION(J == 4 && I == 4)
+	TMat(T m00, T m01, T m02, T m03, T m10, T m11, T m12, T m13, T m20, T m21, T m22, T m23, T m30, T m31, T m32, T m33)
+	{
+		auto& m = *this;
+		m(0, 0) = m00;
+		m(0, 1) = m01;
+		m(0, 2) = m02;
+		m(0, 3) = m03;
+		m(1, 0) = m10;
+		m(1, 1) = m11;
+		m(1, 2) = m12;
+		m(1, 3) = m13;
+		m(2, 0) = m20;
+		m(2, 1) = m21;
+		m(2, 2) = m22;
+		m(2, 3) = m23;
+		m(3, 0) = m30;
+		m(3, 1) = m31;
+		m(3, 2) = m32;
+		m(3, 3) = m33;
+	}
+
+	// 3x4 specific constructors
+
+	ANKI_ENABLE_IF_EXPRESSION(J == 3 && I == 4)
+	TMat(T m00, T m01, T m02, T m03, T m10, T m11, T m12, T m13, T m20, T m21, T m22, T m23)
+	{
+		auto& m = *this;
+		m(0, 0) = m00;
+		m(0, 1) = m01;
+		m(0, 2) = m02;
+		m(0, 3) = m03;
+		m(1, 0) = m10;
+		m(1, 1) = m11;
+		m(1, 2) = m12;
+		m(1, 3) = m13;
+		m(2, 0) = m20;
+		m(2, 1) = m21;
+		m(2, 2) = m22;
+		m(2, 3) = m23;
+	}
 	/// @}
 
 	/// @name Accessors
@@ -86,6 +191,7 @@ public:
 
 	/// @name Operators with same type
 	/// @{
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_SIMD)
 	TM& operator=(const TM& b)
 	{
 		for(U n = 0; n < N; n++)
@@ -95,6 +201,17 @@ public:
 		return static_cast<TM&>(*this);
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_SIMD)
+	TM& operator=(const TM& b)
+	{
+		for(U i = 0; i < J; i++)
+		{
+			m_simd[i] = b.m_simd[i];
+		}
+		return static_cast<TM&>(*this);
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_SIMD)
 	TM operator+(const TM& b) const
 	{
 		TM c;
@@ -105,6 +222,18 @@ public:
 		return c;
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_SIMD)
+	TM operator+(const TM& b) const
+	{
+		TM c;
+		for(U i = 0; i < J; i++)
+		{
+			c.m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
+		}
+		return c;
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_SIMD)
 	TM& operator+=(const TM& b)
 	{
 		for(U n = 0; n < N; n++)
@@ -114,6 +243,17 @@ public:
 		return static_cast<TM&>(*this);
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_SIMD)
+	TM& operator+=(const TM& b)
+	{
+		for(U i = 0; i < J; i++)
+		{
+			m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
+		}
+		return static_cast<TM&>(*this);
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_SIMD)
 	TM operator-(const TM& b) const
 	{
 		TM c;
@@ -124,6 +264,18 @@ public:
 		return c;
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_SIMD)
+	TM operator-(const TM& b) const
+	{
+		TM c;
+		for(U i = 0; i < J; i++)
+		{
+			c.m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
+		}
+		return c;
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_SIMD)
 	TM& operator-=(const TM& b)
 	{
 		for(U n = 0; n < N; n++)
@@ -133,9 +285,19 @@ public:
 		return static_cast<TM&>(*this);
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_SIMD)
+	TM& operator-=(const TM& b)
+	{
+		for(U i = 0; i < J; i++)
+		{
+			m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
+		}
+		return static_cast<TM&>(*this);
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(J == I && !HAS_MAT4_SIMD)
 	TM operator*(const TM& b) const
 	{
-		static_assert(I == J, "Only for square matrices");
 		TM out;
 		const TMat& a = *this;
 		for(U j = 0; j < J; j++)
@@ -152,6 +314,31 @@ public:
 		return out;
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_MAT4_SIMD)
+	TM operator*(const TM& b) const
+	{
+		TM out;
+		const auto& m = *this;
+
+		for(U i = 0; i < 4; i++)
+		{
+			__m128 t1, t2;
+
+			t1 = _mm_set1_ps(m(i, 0));
+			t2 = _mm_mul_ps(b.m_simd[0], t1);
+			t1 = _mm_set1_ps(m(i, 1));
+			t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[1], t1), t2);
+			t1 = _mm_set1_ps(m(i, 2));
+			t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[2], t1), t2);
+			t1 = _mm_set1_ps(m(i, 3));
+			t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[3], t1), t2);
+
+			out.m_simd[i] = t2;
+		}
+
+		return out;
+	}
+
 	TM& operator*=(const TM& b)
 	{
 		(*this) = (*this) * b;
@@ -285,6 +472,7 @@ public:
 
 	/// @name Other
 	/// @{
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_SIMD)
 	void setRow(const U j, const TVJ& v)
 	{
 		for(U i = 0; i < I; i++)
@@ -293,6 +481,12 @@ public:
 		}
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_SIMD)
+	void setRow(const U j, const TVJ& v)
+	{
+		m_simd[j] = v.getSimd();
+	}
+
 	void setRows(const TVJ& a, const TVJ& b, const TVJ& c)
 	{
 		setRow(0, a);
@@ -300,9 +494,9 @@ public:
 		setRow(2, c);
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(J > 3)
 	void setRows(const TVJ& a, const TVJ& b, const TVJ& c, const TVJ& d)
 	{
-		static_assert(J > 3, "Wrong matrix");
 		setRows(a, b, c);
 		setRow(3, d);
 	}
@@ -687,9 +881,9 @@ public:
 		setColumns(xAxis, yAxis, zAxis);
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(J == I && !HAS_SIMD)
 	void transpose()
 	{
-		static_assert(I == J, "Only for square matrices");
 		for(U j = 0; j < J; j++)
 		{
 			for(U i = j + 1; i < I; i++)
@@ -701,6 +895,12 @@ public:
 		}
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(J == I && HAS_SIMD)
+	void transpose()
+	{
+		_MM_TRANSPOSE4_PS(m_simd[0], m_simd[1], m_simd[2], m_simd[3]);
+	}
+
 	void transposeRotationPart()
 	{
 		for(U j = 0; j < 3; j++)
@@ -714,9 +914,9 @@ public:
 		}
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(I == J)
 	TM getTransposed() const
 	{
-		static_assert(I == J, "Only for square matrices");
 		TM out;
 		for(U j = 0; j < J; j++)
 		{
@@ -728,6 +928,255 @@ public:
 		return out;
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(I == 4 && J == 4)
+	T getDet() const
+	{
+		const auto& t = *this;
+		return t(0, 3) * t(1, 2) * t(2, 1) * t(3, 0) - t(0, 2) * t(1, 3) * t(2, 1) * t(3, 0)
+			   - t(0, 3) * t(1, 1) * t(2, 2) * t(3, 0) + t(0, 1) * t(1, 3) * t(2, 2) * t(3, 0)
+			   + t(0, 2) * t(1, 1) * t(2, 3) * t(3, 0) - t(0, 1) * t(1, 2) * t(2, 3) * t(3, 0)
+			   - t(0, 3) * t(1, 2) * t(2, 0) * t(3, 1) + t(0, 2) * t(1, 3) * t(2, 0) * t(3, 1)
+			   + t(0, 3) * t(1, 0) * t(2, 2) * t(3, 1) - t(0, 0) * t(1, 3) * t(2, 2) * t(3, 1)
+			   - t(0, 2) * t(1, 0) * t(2, 3) * t(3, 1) + t(0, 0) * t(1, 2) * t(2, 3) * t(3, 1)
+			   + t(0, 3) * t(1, 1) * t(2, 0) * t(3, 2) - t(0, 1) * t(1, 3) * t(2, 0) * t(3, 2)
+			   - t(0, 3) * t(1, 0) * t(2, 1) * t(3, 2) + t(0, 0) * t(1, 3) * t(2, 1) * t(3, 2)
+			   + t(0, 1) * t(1, 0) * t(2, 3) * t(3, 2) - t(0, 0) * t(1, 1) * t(2, 3) * t(3, 2)
+			   - t(0, 2) * t(1, 1) * t(2, 0) * t(3, 3) + t(0, 1) * t(1, 2) * t(2, 0) * t(3, 3)
+			   + t(0, 2) * t(1, 0) * t(2, 1) * t(3, 3) - t(0, 0) * t(1, 2) * t(2, 1) * t(3, 3)
+			   - t(0, 1) * t(1, 0) * t(2, 2) * t(3, 3) + t(0, 0) * t(1, 1) * t(2, 2) * t(3, 3);
+	}
+
+	/// Invert using Cramer's rule
+	ANKI_ENABLE_IF_EXPRESSION(I == 4 && J == 4)
+	TM getInverse() const
+	{
+		Array<T, 12> tmp;
+		const auto& in = (*this);
+		TM m4;
+
+		tmp[0] = in(2, 2) * in(3, 3);
+		tmp[1] = in(3, 2) * in(2, 3);
+		tmp[2] = in(1, 2) * in(3, 3);
+		tmp[3] = in(3, 2) * in(1, 3);
+		tmp[4] = in(1, 2) * in(2, 3);
+		tmp[5] = in(2, 2) * in(1, 3);
+		tmp[6] = in(0, 2) * in(3, 3);
+		tmp[7] = in(3, 2) * in(0, 3);
+		tmp[8] = in(0, 2) * in(2, 3);
+		tmp[9] = in(2, 2) * in(0, 3);
+		tmp[10] = in(0, 2) * in(1, 3);
+		tmp[11] = in(1, 2) * in(0, 3);
+
+		m4(0, 0) = tmp[0] * in(1, 1) + tmp[3] * in(2, 1) + tmp[4] * in(3, 1);
+		m4(0, 0) -= tmp[1] * in(1, 1) + tmp[2] * in(2, 1) + tmp[5] * in(3, 1);
+		m4(0, 1) = tmp[1] * in(0, 1) + tmp[6] * in(2, 1) + tmp[9] * in(3, 1);
+		m4(0, 1) -= tmp[0] * in(0, 1) + tmp[7] * in(2, 1) + tmp[8] * in(3, 1);
+		m4(0, 2) = tmp[2] * in(0, 1) + tmp[7] * in(1, 1) + tmp[10] * in(3, 1);
+		m4(0, 2) -= tmp[3] * in(0, 1) + tmp[6] * in(1, 1) + tmp[11] * in(3, 1);
+		m4(0, 3) = tmp[5] * in(0, 1) + tmp[8] * in(1, 1) + tmp[11] * in(2, 1);
+		m4(0, 3) -= tmp[4] * in(0, 1) + tmp[9] * in(1, 1) + tmp[10] * in(2, 1);
+		m4(1, 0) = tmp[1] * in(1, 0) + tmp[2] * in(2, 0) + tmp[5] * in(3, 0);
+		m4(1, 0) -= tmp[0] * in(1, 0) + tmp[3] * in(2, 0) + tmp[4] * in(3, 0);
+		m4(1, 1) = tmp[0] * in(0, 0) + tmp[7] * in(2, 0) + tmp[8] * in(3, 0);
+		m4(1, 1) -= tmp[1] * in(0, 0) + tmp[6] * in(2, 0) + tmp[9] * in(3, 0);
+		m4(1, 2) = tmp[3] * in(0, 0) + tmp[6] * in(1, 0) + tmp[11] * in(3, 0);
+		m4(1, 2) -= tmp[2] * in(0, 0) + tmp[7] * in(1, 0) + tmp[10] * in(3, 0);
+		m4(1, 3) = tmp[4] * in(0, 0) + tmp[9] * in(1, 0) + tmp[10] * in(2, 0);
+		m4(1, 3) -= tmp[5] * in(0, 0) + tmp[8] * in(1, 0) + tmp[11] * in(2, 0);
+
+		tmp[0] = in(2, 0) * in(3, 1);
+		tmp[1] = in(3, 0) * in(2, 1);
+		tmp[2] = in(1, 0) * in(3, 1);
+		tmp[3] = in(3, 0) * in(1, 1);
+		tmp[4] = in(1, 0) * in(2, 1);
+		tmp[5] = in(2, 0) * in(1, 1);
+		tmp[6] = in(0, 0) * in(3, 1);
+		tmp[7] = in(3, 0) * in(0, 1);
+		tmp[8] = in(0, 0) * in(2, 1);
+		tmp[9] = in(2, 0) * in(0, 1);
+		tmp[10] = in(0, 0) * in(1, 1);
+		tmp[11] = in(1, 0) * in(0, 1);
+
+		m4(2, 0) = tmp[0] * in(1, 3) + tmp[3] * in(2, 3) + tmp[4] * in(3, 3);
+		m4(2, 0) -= tmp[1] * in(1, 3) + tmp[2] * in(2, 3) + tmp[5] * in(3, 3);
+		m4(2, 1) = tmp[1] * in(0, 3) + tmp[6] * in(2, 3) + tmp[9] * in(3, 3);
+		m4(2, 1) -= tmp[0] * in(0, 3) + tmp[7] * in(2, 3) + tmp[8] * in(3, 3);
+		m4(2, 2) = tmp[2] * in(0, 3) + tmp[7] * in(1, 3) + tmp[10] * in(3, 3);
+		m4(2, 2) -= tmp[3] * in(0, 3) + tmp[6] * in(1, 3) + tmp[11] * in(3, 3);
+		m4(2, 3) = tmp[5] * in(0, 3) + tmp[8] * in(1, 3) + tmp[11] * in(2, 3);
+		m4(2, 3) -= tmp[4] * in(0, 3) + tmp[9] * in(1, 3) + tmp[10] * in(2, 3);
+		m4(3, 0) = tmp[2] * in(2, 2) + tmp[5] * in(3, 2) + tmp[1] * in(1, 2);
+		m4(3, 0) -= tmp[4] * in(3, 2) + tmp[0] * in(1, 2) + tmp[3] * in(2, 2);
+		m4(3, 1) = tmp[8] * in(3, 2) + tmp[0] * in(0, 2) + tmp[7] * in(2, 2);
+		m4(3, 1) -= tmp[6] * in(2, 2) + tmp[9] * in(3, 2) + tmp[1] * in(0, 2);
+		m4(3, 2) = tmp[6] * in(1, 2) + tmp[11] * in(3, 2) + tmp[3] * in(0, 2);
+		m4(3, 2) -= tmp[10] * in(3, 2) + tmp[2] * in(0, 2) + tmp[7] * in(1, 2);
+		m4(3, 3) = tmp[10] * in(2, 2) + tmp[4] * in(0, 2) + tmp[9] * in(1, 2);
+		m4(3, 3) -= tmp[8] * in(1, 2) + tmp[11] * in(2, 2) + tmp[5] * in(0, 2);
+
+		T det = in(0, 0) * m4(0, 0) + in(1, 0) * m4(0, 1) + in(2, 0) * m4(0, 2) + in(3, 0) * m4(0, 3);
+
+		ANKI_ASSERT(!isZero<T>(det)); // Cannot invert, det == 0
+		det = 1.0 / det;
+		m4 *= det;
+		return m4;
+	}
+
+	/// See getInverse
+	ANKI_ENABLE_IF_EXPRESSION(I == 4 && J == 4)
+	void invert()
+	{
+		(*this) = getInverse();
+	}
+
+	/// 12 muls, 27 adds. Something like m4 = m0 * m1 but without touching the 4rth row and allot faster
+	ANKI_ENABLE_IF_EXPRESSION(I == 4 && J == 4)
+	static TM combineTransformations(const TMat& m0, const TMat& m1)
+	{
+		// See the clean code in < r664
+
+		// one of the 2 mat4 doesnt represent transformation
+		ANKI_ASSERT(isZero<T>(m0(3, 0) + m0(3, 1) + m0(3, 2) + m0(3, 3) - 1.0)
+					&& isZero<T>(m1(3, 0) + m1(3, 1) + m1(3, 2) + m1(3, 3) - 1.0));
+
+		TM m4;
+
+		m4(0, 0) = m0(0, 0) * m1(0, 0) + m0(0, 1) * m1(1, 0) + m0(0, 2) * m1(2, 0);
+		m4(0, 1) = m0(0, 0) * m1(0, 1) + m0(0, 1) * m1(1, 1) + m0(0, 2) * m1(2, 1);
+		m4(0, 2) = m0(0, 0) * m1(0, 2) + m0(0, 1) * m1(1, 2) + m0(0, 2) * m1(2, 2);
+		m4(1, 0) = m0(1, 0) * m1(0, 0) + m0(1, 1) * m1(1, 0) + m0(1, 2) * m1(2, 0);
+		m4(1, 1) = m0(1, 0) * m1(0, 1) + m0(1, 1) * m1(1, 1) + m0(1, 2) * m1(2, 1);
+		m4(1, 2) = m0(1, 0) * m1(0, 2) + m0(1, 1) * m1(1, 2) + m0(1, 2) * m1(2, 2);
+		m4(2, 0) = m0(2, 0) * m1(0, 0) + m0(2, 1) * m1(1, 0) + m0(2, 2) * m1(2, 0);
+		m4(2, 1) = m0(2, 0) * m1(0, 1) + m0(2, 1) * m1(1, 1) + m0(2, 2) * m1(2, 1);
+		m4(2, 2) = m0(2, 0) * m1(0, 2) + m0(2, 1) * m1(1, 2) + m0(2, 2) * m1(2, 2);
+
+		m4(0, 3) = m0(0, 0) * m1(0, 3) + m0(0, 1) * m1(1, 3) + m0(0, 2) * m1(2, 3) + m0(0, 3);
+
+		m4(1, 3) = m0(1, 0) * m1(0, 3) + m0(1, 1) * m1(1, 3) + m0(1, 2) * m1(2, 3) + m0(1, 3);
+
+		m4(2, 3) = m0(2, 0) * m1(0, 3) + m0(2, 1) * m1(1, 3) + m0(2, 2) * m1(2, 3) + m0(2, 3);
+
+		m4(3, 0) = m4(3, 1) = m4(3, 2) = 0.0;
+		m4(3, 3) = 1.0;
+
+		return m4;
+	}
+
+	/// Create a new matrix that is equivalent to Mat4(this)*Mat4(b)
+	ANKI_ENABLE_IF_EXPRESSION(J == 3 && I == 4 && !HAS_SIMD)
+	TM combineTransformations(const TM& b) const
+	{
+		const auto& a = *this;
+		TM c;
+
+		c(0, 0) = a(0, 0) * b(0, 0) + a(0, 1) * b(1, 0) + a(0, 2) * b(2, 0);
+		c(0, 1) = a(0, 0) * b(0, 1) + a(0, 1) * b(1, 1) + a(0, 2) * b(2, 1);
+		c(0, 2) = a(0, 0) * b(0, 2) + a(0, 1) * b(1, 2) + a(0, 2) * b(2, 2);
+		c(1, 0) = a(1, 0) * b(0, 0) + a(1, 1) * b(1, 0) + a(1, 2) * b(2, 0);
+		c(1, 1) = a(1, 0) * b(0, 1) + a(1, 1) * b(1, 1) + a(1, 2) * b(2, 1);
+		c(1, 2) = a(1, 0) * b(0, 2) + a(1, 1) * b(1, 2) + a(1, 2) * b(2, 2);
+		c(2, 0) = a(2, 0) * b(0, 0) + a(2, 1) * b(1, 0) + a(2, 2) * b(2, 0);
+		c(2, 1) = a(2, 0) * b(0, 1) + a(2, 1) * b(1, 1) + a(2, 2) * b(2, 1);
+		c(2, 2) = a(2, 0) * b(0, 2) + a(2, 1) * b(1, 2) + a(2, 2) * b(2, 2);
+
+		c(0, 3) = a(0, 0) * b(0, 3) + a(0, 1) * b(1, 3) + a(0, 2) * b(2, 3) + a(0, 3);
+
+		c(1, 3) = a(1, 0) * b(0, 3) + a(1, 1) * b(1, 3) + a(1, 2) * b(2, 3) + a(1, 3);
+
+		c(2, 3) = a(2, 0) * b(0, 3) + a(2, 1) * b(1, 3) + a(2, 2) * b(2, 3) + a(2, 3);
+
+		return c;
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(J == 3 && I == 4 && HAS_SIMD)
+	TM combineTransformations(const TM& b) const
+	{
+		TM c;
+		const auto& a = *this;
+
+		for(U i = 0; i < 3; i++)
+		{
+			__m128 t1, t2;
+
+			t1 = _mm_set1_ps(a(i, 0));
+			t2 = _mm_mul_ps(b.m_simd[0], t1);
+			t1 = _mm_set1_ps(a(i, 1));
+			t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[1], t1), t2);
+			t1 = _mm_set1_ps(a(i, 2));
+			t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[2], t1), t2);
+
+			TVec4<F32> v4(0.0, 0.0, 0.0, a(i, 3));
+			t2 = _mm_add_ps(v4.getSimd(), t2);
+
+			c.m_simd[i] = t2;
+		}
+
+		return c;
+	}
+
+	/// Calculate a perspective projection matrix. The z is mapped in [0, 1] range just like DX and Vulkan.
+	ANKI_ENABLE_IF_EXPRESSION(I == 4 && J == 4)
+	static ANKI_USE_RESULT TM calculatePerspectiveProjectionMatrix(T fovX, T fovY, T near, T far)
+	{
+		ANKI_ASSERT(fovX > T(0) && fovY > T(0) && near > T(0) && far > T(0));
+		const T g = near - far;
+		const T f = T(1) / tan(fovY / T(2)); // f = cot(fovY/2)
+
+		TM proj;
+		proj(0, 0) = f * (fovY / fovX); // = f/aspectRatio;
+		proj(0, 1) = T(0);
+		proj(0, 2) = T(0);
+		proj(0, 3) = T(0);
+		proj(1, 0) = T(0);
+		proj(1, 1) = f;
+		proj(1, 2) = T(0);
+		proj(1, 3) = T(0);
+		proj(2, 0) = T(0);
+		proj(2, 1) = T(0);
+		proj(2, 2) = far / g;
+		proj(2, 3) = (far * near) / g;
+		proj(3, 0) = T(0);
+		proj(3, 1) = T(0);
+		proj(3, 2) = T(-1);
+		proj(3, 3) = T(0);
+
+		return proj;
+	}
+
+	/// Calculate an orthographic projection matrix. The z is mapped in [0, 1] range just like DX and Vulkan.
+	ANKI_ENABLE_IF_EXPRESSION(I == 4 && J == 4)
+	static ANKI_USE_RESULT TM calculateOrthographicProjectionMatrix(T right, T left, T top, T bottom, T near, T far)
+	{
+		ANKI_ASSERT(right != T(0) && left != T(0) && top != T(0) && bottom != T(0) && near != T(0) && far != T(0));
+		const T difx = right - left;
+		const T dify = top - bottom;
+		const T difz = far - near;
+		const T tx = -(right + left) / difx;
+		const T ty = -(top + bottom) / dify;
+		const T tz = -near / difz;
+		TM m;
+
+		m(0, 0) = T(2) / difx;
+		m(0, 1) = T(0);
+		m(0, 2) = T(0);
+		m(0, 3) = tx;
+		m(1, 0) = T(0);
+		m(1, 1) = T(2) / dify;
+		m(1, 2) = T(0);
+		m(1, 3) = ty;
+		m(2, 0) = T(0);
+		m(2, 1) = T(0);
+		m(2, 2) = T(-1) / difz;
+		m(2, 3) = tz;
+		m(3, 0) = T(0);
+		m(3, 1) = T(0);
+		m(3, 2) = T(0);
+		m(3, 3) = T(1);
+
+		return m;
+	}
+
 	TMat lerp(const TMat& b, T t) const
 	{
 		return ((*this) * (1.0 - t)) + (b * t);
@@ -744,6 +1193,32 @@ public:
 		*this = getZero();
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(I == 3 && J == 3)
+	static const TM& getIdentity()
+	{
+		static const TM ident(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0);
+		return ident;
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(I == 4 && J == 4)
+	static const TM& getIdentity()
+	{
+		static const TM ident(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0);
+		return ident;
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(I == 4 && J == 3)
+	static const TM& getIdentity()
+	{
+		static const TM ident(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0);
+		return ident;
+	}
+
+	void setIdentity()
+	{
+		(*this) = getIdentity();
+	}
+
 	template<typename TAlloc>
 	String toString(TAlloc alloc) const
 	{
@@ -764,7 +1239,7 @@ protected:
 		Array2d<T, J, I> m_arr2;
 		T m_carr1[N]; ///< For easier debugging with gdb
 		T m_carr2[J][I]; ///< For easier debugging with gdb
-		TSimd m_simd;
+		SimdArray m_simd;
 	};
 	/// @}
 };

+ 3 - 71
src/anki/math/Mat3.h

@@ -17,7 +17,7 @@ namespace anki
 /// 3x3 Matrix. Mainly used for rotations. It includes many helpful member functions. Its row major. The columns are
 /// the x,y,z axis @note TMat3*TMat3: 27 muls 18 adds
 template<typename T>
-class TMat3 : public TMat<T, 3, 3, Array<T, 9>, TMat3<T>, TVec3<T>, TVec3<T>>
+class TMat3 : public TMat<T, 3, 3, TMat3<T>, TVec3<T>, TVec3<T>>
 {
 	/// @name Friends
 	/// @{
@@ -32,61 +32,10 @@ class TMat3 : public TMat<T, 3, 3, Array<T, 9>, TMat3<T>, TVec3<T>, TVec3<T>>
 	/// @}
 
 public:
-	using Base = TMat<T, 3, 3, Array<T, 9>, TMat3<T>, TVec3<T>, TVec3<T>>;
+	using Base = TMat<T, 3, 3, TMat3<T>, TVec3<T>, TVec3<T>>;
 
 	using Base::setRotationPart;
-
-	/// @name Constructors
-	/// @{
-	TMat3()
-		: Base()
-	{
-	}
-
-	TMat3(const TMat3& b)
-		: Base(b)
-	{
-	}
-
-	TMat3(T m00, T m01, T m02, T m10, T m11, T m12, T m20, T m21, T m22)
-	{
-		TMat3& m = *this;
-		m(0, 0) = m00;
-		m(0, 1) = m01;
-		m(0, 2) = m02;
-		m(1, 0) = m10;
-		m(1, 1) = m11;
-		m(1, 2) = m12;
-		m(2, 0) = m20;
-		m(2, 1) = m21;
-		m(2, 2) = m22;
-	}
-
-	explicit TMat3(const T f)
-		: Base(f)
-	{
-	}
-
-	explicit TMat3(const T arr[])
-		: Base(arr)
-	{
-	}
-
-	explicit TMat3(const TQuat<T>& q)
-	{
-		setRotationPart(q);
-	}
-
-	explicit TMat3(const TEuler<T>& e)
-	{
-		setRotationPart(e);
-	}
-
-	explicit TMat3(const TAxisang<T>& axisang)
-	{
-		setRotationPart(axisang);
-	}
-	/// @}
+	using Base::Base;
 
 	/// @name Other
 	/// @{
@@ -133,23 +82,6 @@ public:
 	{
 		(*this) = getInverse();
 	}
-
-	void setIdentity()
-	{
-		(*this) = getIdentity();
-	}
-
-	static const TMat3& getZero()
-	{
-		static const TMat3 zero(0.0);
-		return zero;
-	}
-
-	static const TMat3& getIdentity()
-	{
-		static const TMat3 ident(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0);
-		return ident;
-	}
 	/// @}
 };
 

+ 2 - 101
src/anki/math/Mat3x4.h

@@ -14,68 +14,18 @@ namespace anki
 /// @addtogroup math
 /// @{
 
-/// Template struct that gives the type of the TMat4 SIMD
-template<typename T>
-class TMat3x4Simd
-{
-public:
-	using Type = Array<T, 12>;
-};
-
-#if ANKI_SIMD == ANKI_SIMD_SSE
-// Specialize for F32
-template<>
-class TMat3x4Simd<F32>
-{
-public:
-	using Type = Array<__m128, 3>;
-};
-#endif
-
 /// 3x4 Matrix. Mainly used for transformations. It includes many helpful member functions. Its row major. The columns
 /// are the x,y,z axis
 template<typename T>
-class alignas(16) TMat3x4 : public TMat<T, 3, 4, typename TMat3x4Simd<T>::Type, TMat3x4<T>, TVec4<T>, TVec3<T>>
+class alignas(16) TMat3x4 : public TMat<T, 3, 4, TMat3x4<T>, TVec4<T>, TVec3<T>>
 {
 public:
-	using Base = TMat<T, 3, 4, typename TMat3x4Simd<T>::Type, TMat3x4<T>, TVec4<T>, TVec3<T>>;
+	using Base = TMat<T, 3, 4, TMat3x4<T>, TVec4<T>, TVec3<T>>;
 
 	using Base::Base;
 
 	/// @name Constructors
 	/// @{
-	TMat3x4()
-		: Base()
-	{
-	}
-
-	TMat3x4(const TMat3x4& b)
-		: Base(b)
-	{
-	}
-
-	TMat3x4(T m00, T m01, T m02, T m03, T m10, T m11, T m12, T m13, T m20, T m21, T m22, T m23)
-	{
-		TMat3x4& m = *this;
-		m(0, 0) = m00;
-		m(0, 1) = m01;
-		m(0, 2) = m02;
-		m(0, 3) = m03;
-		m(1, 0) = m10;
-		m(1, 1) = m11;
-		m(1, 2) = m12;
-		m(1, 3) = m13;
-		m(2, 0) = m20;
-		m(2, 1) = m21;
-		m(2, 2) = m22;
-		m(2, 3) = m23;
-	}
-
-	explicit TMat3x4(const T f)
-		: Base(f)
-	{
-	}
-
 	explicit TMat3x4(const TMat3<T>& m3)
 	{
 		TMat3x4& m = *this;
@@ -170,46 +120,6 @@ public:
 		(*this) = TMat3x4(t.getOrigin(), t.getRotation(), t.getScale());
 	}
 	/// @}
-
-	/// @name Other
-	/// @{
-
-	/// Create a new matrix that is equivalent to Mat4(this)*Mat4(b)
-	TMat3x4 combineTransformations(const TMat3x4& b) const
-	{
-		const TMat3x4& a = *static_cast<const TMat3x4*>(this);
-		TMat3x4 c;
-
-		c(0, 0) = a(0, 0) * b(0, 0) + a(0, 1) * b(1, 0) + a(0, 2) * b(2, 0);
-		c(0, 1) = a(0, 0) * b(0, 1) + a(0, 1) * b(1, 1) + a(0, 2) * b(2, 1);
-		c(0, 2) = a(0, 0) * b(0, 2) + a(0, 1) * b(1, 2) + a(0, 2) * b(2, 2);
-		c(1, 0) = a(1, 0) * b(0, 0) + a(1, 1) * b(1, 0) + a(1, 2) * b(2, 0);
-		c(1, 1) = a(1, 0) * b(0, 1) + a(1, 1) * b(1, 1) + a(1, 2) * b(2, 1);
-		c(1, 2) = a(1, 0) * b(0, 2) + a(1, 1) * b(1, 2) + a(1, 2) * b(2, 2);
-		c(2, 0) = a(2, 0) * b(0, 0) + a(2, 1) * b(1, 0) + a(2, 2) * b(2, 0);
-		c(2, 1) = a(2, 0) * b(0, 1) + a(2, 1) * b(1, 1) + a(2, 2) * b(2, 1);
-		c(2, 2) = a(2, 0) * b(0, 2) + a(2, 1) * b(1, 2) + a(2, 2) * b(2, 2);
-
-		c(0, 3) = a(0, 0) * b(0, 3) + a(0, 1) * b(1, 3) + a(0, 2) * b(2, 3) + a(0, 3);
-
-		c(1, 3) = a(1, 0) * b(0, 3) + a(1, 1) * b(1, 3) + a(1, 2) * b(2, 3) + a(1, 3);
-
-		c(2, 3) = a(2, 0) * b(0, 3) + a(2, 1) * b(1, 3) + a(2, 2) * b(2, 3) + a(2, 3);
-
-		return c;
-	}
-
-	void setIdentity()
-	{
-		(*this) = getIdentity();
-	}
-
-	static const TMat3x4& getIdentity()
-	{
-		static const TMat3x4 ident(1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f);
-		return ident;
-	}
-	/// @}
 };
 
 #if ANKI_SIMD == ANKI_SIMD_SSE
@@ -218,18 +128,9 @@ public:
 
 using TMat3x4F32Base = TMat3x4<F32>::Base;
 
-template<>
-TMat3x4F32Base::TMat(const TMat3x4F32Base& b);
-
-template<>
-TMat3x4F32Base::TMat(const F32 f);
-
 template<>
 inline TVec3<F32> TMat3x4F32Base::operator*(const TVec4<F32>& b) const;
 
-template<>
-TMat3x4<F32> TMat3x4<F32>::combineTransformations(const TMat3x4<F32>& b) const;
-
 #elif ANKI_SIMD == ANKI_SIMD_NEON
 
 #	error "TODO"

+ 0 - 44
src/anki/math/Mat3x4.inl.h

@@ -10,24 +10,6 @@ namespace anki
 
 #if ANKI_SIMD == ANKI_SIMD_SSE
 
-template<>
-inline TMat3x4F32Base::TMat(const TMat3x4F32Base& b)
-{
-	for(U i = 0; i < 3; i++)
-	{
-		m_simd[i] = b.m_simd[i];
-	}
-}
-
-template<>
-inline TMat3x4F32Base::TMat(const F32 f)
-{
-	for(U i = 0; i < 3; i++)
-	{
-		m_simd[i] = _mm_set1_ps(f);
-	}
-}
-
 template<>
 inline TVec3<F32> TMat3x4F32Base::operator*(const TVec4<F32>& b) const
 {
@@ -41,32 +23,6 @@ inline TVec3<F32> TMat3x4F32Base::operator*(const TVec4<F32>& b) const
 	return v;
 }
 
-template<>
-inline TMat3x4<F32> TMat3x4<F32>::combineTransformations(const TMat3x4<F32>& b) const
-{
-	TMat3x4<F32> c;
-	const TMat3x4<F32>& a = *static_cast<const TMat3x4<F32>*>(this);
-
-	for(U i = 0; i < 3; i++)
-	{
-		__m128 t1, t2;
-
-		t1 = _mm_set1_ps(a(i, 0));
-		t2 = _mm_mul_ps(b.m_simd[0], t1);
-		t1 = _mm_set1_ps(a(i, 1));
-		t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[1], t1), t2);
-		t1 = _mm_set1_ps(a(i, 2));
-		t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[2], t1), t2);
-
-		TVec4<F32> v4(0.0, 0.0, 0.0, a(i, 3));
-		t2 = _mm_add_ps(v4.getSimd(), t2);
-
-		c.m_simd[i] = t2;
-	}
-
-	return c;
-}
-
 #endif
 
 } // end namespace anki

+ 3 - 292
src/anki/math/Mat4.h

@@ -13,28 +13,10 @@ namespace anki
 /// @addtogroup math
 /// @{
 
-/// Template struct that gives the type of the TVec4 SIMD
-template<typename T>
-class TMat4Simd
-{
-public:
-	using Type = Array<T, 16>;
-};
-
-#if ANKI_SIMD == ANKI_SIMD_SSE
-// Specialize for F32
-template<>
-class TMat4Simd<F32>
-{
-public:
-	using Type = Array<__m128, 4>;
-};
-#endif
-
 /// 4x4 Matrix. Used mainly for transformations but not necessarily. It's row major. SSE optimized
 /// @note TMat4*TMat4: 64 muls 48 adds
 template<typename T>
-class alignas(16) TMat4 : public TMat<T, 4, 4, typename TMat4Simd<T>::Type, TMat4<T>, TVec4<T>, TVec4<T>>
+class alignas(16) TMat4 : public TMat<T, 4, 4, TMat4<T>, TVec4<T>, TVec4<T>>
 {
 	/// @name Friends
 	/// @{
@@ -49,52 +31,16 @@ class alignas(16) TMat4 : public TMat<T, 4, 4, typename TMat4Simd<T>::Type, TMat
 	/// @}
 
 public:
-	using Base = TMat<T, 4, 4, typename TMat4Simd<T>::Type, TMat4<T>, TVec4<T>, TVec4<T>>;
+	using Base = TMat<T, 4, 4, TMat4<T>, TVec4<T>, TVec4<T>>;
 
 	using Base::getTranslationPart;
 	using Base::setTranslationPart;
 	using Base::getRotationPart;
 	using Base::setRotationPart;
+	using Base::Base;
 
 	/// @name Constructors
 	/// @{
-	TMat4()
-		: Base()
-	{
-	}
-
-	TMat4(const TMat4& b)
-		: Base(b)
-	{
-	}
-
-	TMat4(
-		T m00, T m01, T m02, T m03, T m10, T m11, T m12, T m13, T m20, T m21, T m22, T m23, T m30, T m31, T m32, T m33)
-	{
-		TMat4& m = *this;
-		m(0, 0) = m00;
-		m(0, 1) = m01;
-		m(0, 2) = m02;
-		m(0, 3) = m03;
-		m(1, 0) = m10;
-		m(1, 1) = m11;
-		m(1, 2) = m12;
-		m(1, 3) = m13;
-		m(2, 0) = m20;
-		m(2, 1) = m21;
-		m(2, 2) = m22;
-		m(2, 3) = m23;
-		m(3, 0) = m30;
-		m(3, 1) = m31;
-		m(3, 2) = m32;
-		m(3, 3) = m33;
-	}
-
-	explicit TMat4(const T f)
-		: Base(f)
-	{
-	}
-
 	explicit TMat4(const TMat3<T>& m3)
 	{
 		TMat4& m = *this;
@@ -191,103 +137,6 @@ public:
 
 	/// @name Other
 	/// @{
-	T getDet() const
-	{
-		const TMat4& t = *this;
-		return t(0, 3) * t(1, 2) * t(2, 1) * t(3, 0) - t(0, 2) * t(1, 3) * t(2, 1) * t(3, 0)
-			   - t(0, 3) * t(1, 1) * t(2, 2) * t(3, 0) + t(0, 1) * t(1, 3) * t(2, 2) * t(3, 0)
-			   + t(0, 2) * t(1, 1) * t(2, 3) * t(3, 0) - t(0, 1) * t(1, 2) * t(2, 3) * t(3, 0)
-			   - t(0, 3) * t(1, 2) * t(2, 0) * t(3, 1) + t(0, 2) * t(1, 3) * t(2, 0) * t(3, 1)
-			   + t(0, 3) * t(1, 0) * t(2, 2) * t(3, 1) - t(0, 0) * t(1, 3) * t(2, 2) * t(3, 1)
-			   - t(0, 2) * t(1, 0) * t(2, 3) * t(3, 1) + t(0, 0) * t(1, 2) * t(2, 3) * t(3, 1)
-			   + t(0, 3) * t(1, 1) * t(2, 0) * t(3, 2) - t(0, 1) * t(1, 3) * t(2, 0) * t(3, 2)
-			   - t(0, 3) * t(1, 0) * t(2, 1) * t(3, 2) + t(0, 0) * t(1, 3) * t(2, 1) * t(3, 2)
-			   + t(0, 1) * t(1, 0) * t(2, 3) * t(3, 2) - t(0, 0) * t(1, 1) * t(2, 3) * t(3, 2)
-			   - t(0, 2) * t(1, 1) * t(2, 0) * t(3, 3) + t(0, 1) * t(1, 2) * t(2, 0) * t(3, 3)
-			   + t(0, 2) * t(1, 0) * t(2, 1) * t(3, 3) - t(0, 0) * t(1, 2) * t(2, 1) * t(3, 3)
-			   - t(0, 1) * t(1, 0) * t(2, 2) * t(3, 3) + t(0, 0) * t(1, 1) * t(2, 2) * t(3, 3);
-	}
-
-	/// Invert using Cramer's rule
-	TMat4 getInverse() const
-	{
-		Array<T, 12> tmp;
-		const TMat4& in = (*this);
-		TMat4 m4;
-
-		tmp[0] = in(2, 2) * in(3, 3);
-		tmp[1] = in(3, 2) * in(2, 3);
-		tmp[2] = in(1, 2) * in(3, 3);
-		tmp[3] = in(3, 2) * in(1, 3);
-		tmp[4] = in(1, 2) * in(2, 3);
-		tmp[5] = in(2, 2) * in(1, 3);
-		tmp[6] = in(0, 2) * in(3, 3);
-		tmp[7] = in(3, 2) * in(0, 3);
-		tmp[8] = in(0, 2) * in(2, 3);
-		tmp[9] = in(2, 2) * in(0, 3);
-		tmp[10] = in(0, 2) * in(1, 3);
-		tmp[11] = in(1, 2) * in(0, 3);
-
-		m4(0, 0) = tmp[0] * in(1, 1) + tmp[3] * in(2, 1) + tmp[4] * in(3, 1);
-		m4(0, 0) -= tmp[1] * in(1, 1) + tmp[2] * in(2, 1) + tmp[5] * in(3, 1);
-		m4(0, 1) = tmp[1] * in(0, 1) + tmp[6] * in(2, 1) + tmp[9] * in(3, 1);
-		m4(0, 1) -= tmp[0] * in(0, 1) + tmp[7] * in(2, 1) + tmp[8] * in(3, 1);
-		m4(0, 2) = tmp[2] * in(0, 1) + tmp[7] * in(1, 1) + tmp[10] * in(3, 1);
-		m4(0, 2) -= tmp[3] * in(0, 1) + tmp[6] * in(1, 1) + tmp[11] * in(3, 1);
-		m4(0, 3) = tmp[5] * in(0, 1) + tmp[8] * in(1, 1) + tmp[11] * in(2, 1);
-		m4(0, 3) -= tmp[4] * in(0, 1) + tmp[9] * in(1, 1) + tmp[10] * in(2, 1);
-		m4(1, 0) = tmp[1] * in(1, 0) + tmp[2] * in(2, 0) + tmp[5] * in(3, 0);
-		m4(1, 0) -= tmp[0] * in(1, 0) + tmp[3] * in(2, 0) + tmp[4] * in(3, 0);
-		m4(1, 1) = tmp[0] * in(0, 0) + tmp[7] * in(2, 0) + tmp[8] * in(3, 0);
-		m4(1, 1) -= tmp[1] * in(0, 0) + tmp[6] * in(2, 0) + tmp[9] * in(3, 0);
-		m4(1, 2) = tmp[3] * in(0, 0) + tmp[6] * in(1, 0) + tmp[11] * in(3, 0);
-		m4(1, 2) -= tmp[2] * in(0, 0) + tmp[7] * in(1, 0) + tmp[10] * in(3, 0);
-		m4(1, 3) = tmp[4] * in(0, 0) + tmp[9] * in(1, 0) + tmp[10] * in(2, 0);
-		m4(1, 3) -= tmp[5] * in(0, 0) + tmp[8] * in(1, 0) + tmp[11] * in(2, 0);
-
-		tmp[0] = in(2, 0) * in(3, 1);
-		tmp[1] = in(3, 0) * in(2, 1);
-		tmp[2] = in(1, 0) * in(3, 1);
-		tmp[3] = in(3, 0) * in(1, 1);
-		tmp[4] = in(1, 0) * in(2, 1);
-		tmp[5] = in(2, 0) * in(1, 1);
-		tmp[6] = in(0, 0) * in(3, 1);
-		tmp[7] = in(3, 0) * in(0, 1);
-		tmp[8] = in(0, 0) * in(2, 1);
-		tmp[9] = in(2, 0) * in(0, 1);
-		tmp[10] = in(0, 0) * in(1, 1);
-		tmp[11] = in(1, 0) * in(0, 1);
-
-		m4(2, 0) = tmp[0] * in(1, 3) + tmp[3] * in(2, 3) + tmp[4] * in(3, 3);
-		m4(2, 0) -= tmp[1] * in(1, 3) + tmp[2] * in(2, 3) + tmp[5] * in(3, 3);
-		m4(2, 1) = tmp[1] * in(0, 3) + tmp[6] * in(2, 3) + tmp[9] * in(3, 3);
-		m4(2, 1) -= tmp[0] * in(0, 3) + tmp[7] * in(2, 3) + tmp[8] * in(3, 3);
-		m4(2, 2) = tmp[2] * in(0, 3) + tmp[7] * in(1, 3) + tmp[10] * in(3, 3);
-		m4(2, 2) -= tmp[3] * in(0, 3) + tmp[6] * in(1, 3) + tmp[11] * in(3, 3);
-		m4(2, 3) = tmp[5] * in(0, 3) + tmp[8] * in(1, 3) + tmp[11] * in(2, 3);
-		m4(2, 3) -= tmp[4] * in(0, 3) + tmp[9] * in(1, 3) + tmp[10] * in(2, 3);
-		m4(3, 0) = tmp[2] * in(2, 2) + tmp[5] * in(3, 2) + tmp[1] * in(1, 2);
-		m4(3, 0) -= tmp[4] * in(3, 2) + tmp[0] * in(1, 2) + tmp[3] * in(2, 2);
-		m4(3, 1) = tmp[8] * in(3, 2) + tmp[0] * in(0, 2) + tmp[7] * in(2, 2);
-		m4(3, 1) -= tmp[6] * in(2, 2) + tmp[9] * in(3, 2) + tmp[1] * in(0, 2);
-		m4(3, 2) = tmp[6] * in(1, 2) + tmp[11] * in(3, 2) + tmp[3] * in(0, 2);
-		m4(3, 2) -= tmp[10] * in(3, 2) + tmp[2] * in(0, 2) + tmp[7] * in(1, 2);
-		m4(3, 3) = tmp[10] * in(2, 2) + tmp[4] * in(0, 2) + tmp[9] * in(1, 2);
-		m4(3, 3) -= tmp[8] * in(1, 2) + tmp[11] * in(2, 2) + tmp[5] * in(0, 2);
-
-		T det = in(0, 0) * m4(0, 0) + in(1, 0) * m4(0, 1) + in(2, 0) * m4(0, 2) + in(3, 0) * m4(0, 3);
-
-		ANKI_ASSERT(!isZero<T>(det)); // Cannot invert, det == 0
-		det = 1.0 / det;
-		m4 *= det;
-		return m4;
-	}
-
-	/// See getInverse
-	void invert()
-	{
-		(*this) = getInverse();
-	}
 
 	/// If we suppose this matrix represents a transformation, return the inverted transformation
 	TMat4 getInverseTransformation() const
@@ -298,50 +147,6 @@ public:
 		return TMat4(invertedTsl.xyz0(), invertedRot);
 	}
 
-	void setIdentity()
-	{
-		(*this) = getIdentity();
-	}
-
-	static const TMat4& getIdentity()
-	{
-		static const TMat4 ident(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0);
-		return ident;
-	}
-
-	/// 12 muls, 27 adds. Something like m4 = m0 * m1 but without touching the 4rth row and allot faster
-	static TMat4 combineTransformations(const TMat4& m0, const TMat4& m1)
-	{
-		// See the clean code in < r664
-
-		// one of the 2 mat4 doesnt represent transformation
-		ANKI_ASSERT(isZero<T>(m0(3, 0) + m0(3, 1) + m0(3, 2) + m0(3, 3) - 1.0)
-					&& isZero<T>(m1(3, 0) + m1(3, 1) + m1(3, 2) + m1(3, 3) - 1.0));
-
-		TMat4 m4;
-
-		m4(0, 0) = m0(0, 0) * m1(0, 0) + m0(0, 1) * m1(1, 0) + m0(0, 2) * m1(2, 0);
-		m4(0, 1) = m0(0, 0) * m1(0, 1) + m0(0, 1) * m1(1, 1) + m0(0, 2) * m1(2, 1);
-		m4(0, 2) = m0(0, 0) * m1(0, 2) + m0(0, 1) * m1(1, 2) + m0(0, 2) * m1(2, 2);
-		m4(1, 0) = m0(1, 0) * m1(0, 0) + m0(1, 1) * m1(1, 0) + m0(1, 2) * m1(2, 0);
-		m4(1, 1) = m0(1, 0) * m1(0, 1) + m0(1, 1) * m1(1, 1) + m0(1, 2) * m1(2, 1);
-		m4(1, 2) = m0(1, 0) * m1(0, 2) + m0(1, 1) * m1(1, 2) + m0(1, 2) * m1(2, 2);
-		m4(2, 0) = m0(2, 0) * m1(0, 0) + m0(2, 1) * m1(1, 0) + m0(2, 2) * m1(2, 0);
-		m4(2, 1) = m0(2, 0) * m1(0, 1) + m0(2, 1) * m1(1, 1) + m0(2, 2) * m1(2, 1);
-		m4(2, 2) = m0(2, 0) * m1(0, 2) + m0(2, 1) * m1(1, 2) + m0(2, 2) * m1(2, 2);
-
-		m4(0, 3) = m0(0, 0) * m1(0, 3) + m0(0, 1) * m1(1, 3) + m0(0, 2) * m1(2, 3) + m0(0, 3);
-
-		m4(1, 3) = m0(1, 0) * m1(0, 3) + m0(1, 1) * m1(1, 3) + m0(1, 2) * m1(2, 3) + m0(1, 3);
-
-		m4(2, 3) = m0(2, 0) * m1(0, 3) + m0(2, 1) * m1(1, 3) + m0(2, 2) * m1(2, 3) + m0(2, 3);
-
-		m4(3, 0) = m4(3, 1) = m4(3, 2) = 0.0;
-		m4(3, 3) = 1.0;
-
-		return m4;
-	}
-
 	/// @note 9 muls, 9 adds
 	TVec3<T> transform(const TVec3<T>& v) const
 	{
@@ -352,67 +157,6 @@ public:
 			m(2, 0) * v.x() + m(2, 1) * v.y() + m(2, 2) * v.z() + m(2, 3));
 	}
 
-	/// Calculate a perspective projection matrix. The z is mapped in [0, 1] range just like DX and Vulkan.
-	static TMat4 calculatePerspectiveProjectionMatrix(T fovX, T fovY, T near, T far)
-	{
-		ANKI_ASSERT(fovX > T(0) && fovY > T(0) && near > T(0) && far > T(0));
-		T g = near - far;
-
-		T f = T(1) / tan(fovY / T(2)); // f = cot(fovY/2)
-
-		TMat4 proj;
-		proj(0, 0) = f * (fovY / fovX); // = f/aspectRatio;
-		proj(0, 1) = T(0);
-		proj(0, 2) = T(0);
-		proj(0, 3) = T(0);
-		proj(1, 0) = T(0);
-		proj(1, 1) = f;
-		proj(1, 2) = T(0);
-		proj(1, 3) = T(0);
-		proj(2, 0) = T(0);
-		proj(2, 1) = T(0);
-		proj(2, 2) = far / g;
-		proj(2, 3) = (far * near) / g;
-		proj(3, 0) = T(0);
-		proj(3, 1) = T(0);
-		proj(3, 2) = T(-1);
-		proj(3, 3) = T(0);
-
-		return proj;
-	}
-
-	/// Calculate an orthographic projection matrix. The z is mapped in [0, 1] range just like DX and Vulkan.
-	static TMat4 calculateOrthographicProjectionMatrix(T right, T left, T top, T bottom, T near, T far)
-	{
-		ANKI_ASSERT(right != T(0) && left != T(0) && top != T(0) && bottom != T(0) && near != T(0) && far != T(0));
-		T difx = right - left;
-		T dify = top - bottom;
-		T difz = far - near;
-		T tx = -(right + left) / difx;
-		T ty = -(top + bottom) / dify;
-		T tz = -near / difz;
-		TMat4 m;
-
-		m(0, 0) = T(2) / difx;
-		m(0, 1) = T(0);
-		m(0, 2) = T(0);
-		m(0, 3) = tx;
-		m(1, 0) = T(0);
-		m(1, 1) = T(2) / dify;
-		m(1, 2) = T(0);
-		m(1, 3) = ty;
-		m(2, 0) = T(0);
-		m(2, 1) = T(0);
-		m(2, 2) = T(-1) / difz;
-		m(2, 3) = tz;
-		m(3, 0) = T(0);
-		m(3, 1) = T(0);
-		m(3, 2) = T(0);
-		m(3, 3) = T(1);
-
-		return m;
-	}
-
 	/// Given the parameters that construct a projection matrix extract 4 values that can be used to unproject a point
 	/// from NDC to view space.
 	/// @code
@@ -472,42 +216,9 @@ public:
 
 using TMat4F32Base = TMat4<F32>::Base;
 
-template<>
-TMat4F32Base::TMat(const TMat4F32Base& b);
-
-template<>
-TMat4F32Base::TMat(const F32 f);
-
-template<>
-TMat4<F32>& TMat4F32Base::operator=(const TMat4<F32>& b);
-
-template<>
-TMat4<F32> TMat4F32Base::operator+(const TMat4<F32>& b) const;
-
-template<>
-TMat4<F32>& TMat4F32Base::operator+=(const TMat4<F32>& b);
-
-template<>
-TMat4<F32> TMat4F32Base::operator-(const TMat4<F32>& b) const;
-
-template<>
-TMat4<F32>& TMat4F32Base::operator-=(const TMat4<F32>& b);
-
-template<>
-TMat4<F32> TMat4F32Base::operator*(const TMat4<F32>& b) const;
-
 template<>
 TVec4<F32> TMat4F32Base::operator*(const TVec4<F32>& b) const;
 
-template<>
-void TMat4F32Base::setRows(const TVec4<F32>& a, const TVec4<F32>& b, const TVec4<F32>& c, const TVec4<F32>& d);
-
-template<>
-void TMat4F32Base::setRow(const U i, const TVec4<F32>& v);
-
-template<>
-void TMat4F32Base::transpose();
-
 #elif ANKI_SIMD == ANKI_SIMD_NEON
 
 #	error "TODO"

+ 0 - 116
src/anki/math/Mat4.inl.h

@@ -48,101 +48,6 @@ TMat4<T> operator/(const T f, const TMat4<T>& m4)
 
 #if ANKI_SIMD == ANKI_SIMD_SSE
 
-template<>
-inline TMat4F32Base::TMat(const TMat4F32Base& b)
-{
-	for(U i = 0; i < 4; i++)
-	{
-		m_simd[i] = b.m_simd[i];
-	}
-}
-
-template<>
-inline TMat4F32Base::TMat(const F32 f)
-{
-	for(U i = 0; i < 4; i++)
-	{
-		m_simd[i] = _mm_set1_ps(f);
-	}
-}
-
-template<>
-inline TMat4<F32>& TMat4F32Base::operator=(const TMat4<F32>& b)
-{
-	for(U i = 0; i < 4; i++)
-	{
-		m_simd[i] = b.m_simd[i];
-	}
-	return static_cast<TMat4<F32>&>(*this);
-}
-
-template<>
-inline TMat4<F32> TMat4F32Base::operator+(const TMat4<F32>& b) const
-{
-	TMat4<F32> c;
-	for(U i = 0; i < 4; i++)
-	{
-		c.m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
-	}
-	return c;
-}
-
-template<>
-inline TMat4<F32>& TMat4F32Base::operator+=(const TMat4<F32>& b)
-{
-	for(U i = 0; i < 4; i++)
-	{
-		m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
-	}
-	return static_cast<TMat4<F32>&>(*this);
-}
-
-template<>
-inline TMat4<F32> TMat4F32Base::operator-(const TMat4<F32>& b) const
-{
-	TMat4<F32> c;
-	for(U i = 0; i < 4; i++)
-	{
-		c.m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
-	}
-	return c;
-}
-
-template<>
-inline TMat4<F32>& TMat4F32Base::operator-=(const TMat4<F32>& b)
-{
-	for(U i = 0; i < 4; i++)
-	{
-		m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
-	}
-	return static_cast<TMat4<F32>&>(*this);
-}
-
-template<>
-inline TMat4<F32> TMat4F32Base::operator*(const TMat4<F32>& b) const
-{
-	TMat4<F32> out;
-	const TMat4<F32>& m = *static_cast<const TMat4<F32>*>(this);
-
-	for(U i = 0; i < 4; i++)
-	{
-		__m128 t1, t2;
-
-		t1 = _mm_set1_ps(m(i, 0));
-		t2 = _mm_mul_ps(b.m_simd[0], t1);
-		t1 = _mm_set1_ps(m(i, 1));
-		t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[1], t1), t2);
-		t1 = _mm_set1_ps(m(i, 2));
-		t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[2], t1), t2);
-		t1 = _mm_set1_ps(m(i, 3));
-		t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[3], t1), t2);
-
-		out.m_simd[i] = t2;
-	}
-
-	return out;
-}
-
 template<>
 inline TVec4<F32> TMat4F32Base::operator*(const TVec4<F32>& b) const
 {
@@ -156,27 +61,6 @@ inline TVec4<F32> TMat4F32Base::operator*(const TVec4<F32>& b) const
 	return v;
 }
 
-template<>
-inline void TMat4F32Base::setRows(const TVec4<F32>& a, const TVec4<F32>& b, const TVec4<F32>& c, const TVec4<F32>& d)
-{
-	m_simd[0] = a.getSimd();
-	m_simd[1] = b.getSimd();
-	m_simd[2] = c.getSimd();
-	m_simd[3] = d.getSimd();
-}
-
-template<>
-inline void TMat4F32Base::setRow(const U i, const TVec4<F32>& v)
-{
-	m_simd[i] = v.getSimd();
-}
-
-template<>
-inline void TMat4F32Base::transpose()
-{
-	_MM_TRANSPOSE4_PS(m_simd[0], m_simd[1], m_simd[2], m_simd[3]);
-}
-
 #elif ANKI_SIMD == ANKI_SIMD_NEON
 
 #	error "TODO"

+ 2 - 2
src/anki/math/Quat.h

@@ -16,10 +16,10 @@ namespace anki
 
 /// Quaternion. Used in rotations
 template<typename T>
-class alignas(16) TQuat : public TVec<T, 4, typename TVec4Simd<T>::Type, TQuat<T>>
+class alignas(16) TQuat : public TVec<T, 4, TQuat<T>>
 {
 public:
-	using Base = TVec<T, 4, typename TVec4Simd<T>::Type, TQuat<T>>;
+	using Base = TVec<T, 4, TQuat<T>>;
 
 	using Base::x;
 	using Base::y;

+ 32 - 1
src/anki/math/Simd.h

@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include <anki/Config.h>
+#include <anki/util/StdTypes.h>
 
 #if ANKI_SIMD == ANKI_SIMD_SSE
 #	include <smmintrin.h>
@@ -16,3 +16,34 @@
 #else
 #	error "See file"
 #endif
+
+namespace anki
+{
+
+/// Template class XXX
+template<typename T, U N>
+class MathSimd
+{
+public:
+	using Type = T[N];
+};
+
+#if ANKI_SIMD == ANKI_SIMD_SSE
+// Specialize for F32
+template<>
+class MathSimd<F32, 4>
+{
+public:
+	using Type = __m128;
+};
+#elif ANKI_SIMD == ANKI_SIMD_NEON
+// Specialize for F32
+template<>
+class MathSimd<F32, 4>
+{
+public:
+	using Type = float32x4_t;
+};
+#endif
+
+} // end namespace anki

+ 259 - 38
src/anki/math/Vec.h

@@ -6,7 +6,6 @@
 #pragma once
 
 #include <anki/math/CommonIncludes.h>
-#include <type_traits>
 
 namespace anki
 {
@@ -15,14 +14,15 @@ namespace anki
 /// @{
 
 /// Common code for all vectors
-template<typename T, U N, typename TSimd, typename TV>
+template<typename T, U N, typename TV>
 class TVec
 {
 public:
 	using Scalar = T;
-	using Simd = TSimd;
-	static constexpr U SIZE = N;
+	using Simd = typename MathSimd<T, N>::Type;
+	static constexpr U COMPONENT_COUNT = N;
 	static constexpr Bool IS_INTEGER = std::is_integral<T>::value;
+	static constexpr Bool HAS_VEC4_SIMD = N == 4 && std::is_same<T, F32>::value && ANKI_SIMD == ANKI_SIMD_SSE;
 
 	/// @name Constructors
 	/// @{
@@ -30,6 +30,7 @@ public:
 	{
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	TVec(const TVec& b)
 	{
 		for(U i = 0; i < N; i++)
@@ -38,30 +39,43 @@ public:
 		}
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TVec(const TVec& b)
+	{
+		m_simd = b.m_simd;
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(N == 2)
 	TVec(const T x_, const T y_)
 	{
-		static_assert(N == 2, "Wrong vector");
 		x() = x_;
 		y() = y_;
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(N == 3)
 	TVec(const T x_, const T y_, const T z_)
 	{
-		static_assert(N == 3, "Wrong vector");
 		x() = x_;
 		y() = y_;
 		z() = z_;
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(N == 4 && !HAS_VEC4_SIMD)
 	TVec(const T x_, const T y_, const T z_, const T w_)
 	{
-		static_assert(N == 4, "Wrong vector");
 		x() = x_;
 		y() = y_;
 		z() = z_;
 		w() = w_;
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TVec(const T x_, const T y_, const T z_, const T w_)
+	{
+		m_simd = _mm_set_ps(w_, z_, y_, x_);
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	explicit TVec(const T f)
 	{
 		for(U i = 0; i < N; ++i)
@@ -70,6 +84,13 @@ public:
 		}
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	explicit TVec(const T f)
+	{
+		m_simd = _mm_set1_ps(f);
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	explicit TVec(const T arr[])
 	{
 		for(U i = 0; i < N; ++i)
@@ -78,6 +99,12 @@ public:
 		}
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	explicit TVec(const T arr[])
+	{
+		m_simd = _mm_load_ps(arr);
+	}
+
 	explicit TVec(const Simd& simd)
 	{
 		m_simd = simd;
@@ -106,27 +133,27 @@ public:
 		return m_arr[1];
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(N > 2)
 	T& z()
 	{
-		static_assert(N > 2, "Wrong vector");
 		return m_arr[2];
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(N > 2)
 	T z() const
 	{
-		static_assert(N > 2, "Wrong vector");
 		return m_arr[2];
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(N > 3)
 	T& w()
 	{
-		static_assert(N > 3, "Wrong vector");
 		return m_arr[3];
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(N > 3)
 	T w() const
 	{
-		static_assert(N > 3, "Wrong vector");
 		return m_arr[3];
 	}
 
@@ -1876,12 +1903,12 @@ public:
 		return m_arr[i];
 	}
 
-	TSimd& getSimd()
+	Simd& getSimd()
 	{
 		return m_simd;
 	}
 
-	const TSimd& getSimd() const
+	const Simd& getSimd() const
 	{
 		return m_simd;
 	}
@@ -1889,13 +1916,21 @@ public:
 
 	/// @name Operators with same type
 	/// @{
-	TVec& operator=(const TVec& b)
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
+	TV& operator=(const TV& b)
 	{
 		for(U i = 0; i < N; i++)
 		{
 			m_arr[i] = b.m_arr[i];
 		}
-		return *this;
+		return static_cast<TV&>(*this);
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TV& operator=(const TV& b)
+	{
+		m_simd = b.m_simd;
+		return static_cast<TV&>(*this);
 	}
 
 	TV& operator=(const TV& b)
@@ -1907,6 +1942,7 @@ public:
 		return static_cast<TV&>(*this);
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	TV operator+(const TV& b) const
 	{
 		TV out;
@@ -1917,6 +1953,13 @@ public:
 		return out;
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TV operator+(const TV& b) const
+	{
+		return TV(_mm_add_ps(m_simd, b.m_simd));
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	TV& operator+=(const TV& b)
 	{
 		for(U i = 0; i < N; i++)
@@ -1926,6 +1969,14 @@ public:
 		return static_cast<TV&>(*this);
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TV& operator+=(const TV& b)
+	{
+		m_simd = _mm_add_ps(m_simd, b.m_simd);
+		return static_cast<TVec4<F32>&>(*this);
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	TV operator-(const TV& b) const
 	{
 		TV out;
@@ -1936,6 +1987,13 @@ public:
 		return out;
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TV operator-(const TV& b) const
+	{
+		return TV(_mm_sub_ps(m_simd, b.m_simd));
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	TV& operator-=(const TV& b)
 	{
 		for(U i = 0; i < N; i++)
@@ -1945,6 +2003,14 @@ public:
 		return static_cast<TV&>(*this);
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TV& operator-=(const TV& b)
+	{
+		m_simd = _mm_sub_ps(m_simd, b.m_simd);
+		return static_cast<TV&>(*this);
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	TV operator*(const TV& b) const
 	{
 		TV out;
@@ -1955,6 +2021,13 @@ public:
 		return out;
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TV operator*(const TV& b) const
+	{
+		return TV(_mm_mul_ps(m_simd, b.m_simd));
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	TV& operator*=(const TV& b)
 	{
 		for(U i = 0; i < N; i++)
@@ -1964,6 +2037,14 @@ public:
 		return static_cast<TV&>(*this);
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TV& operator*=(const TV& b)
+	{
+		m_simd = _mm_mul_ps(m_simd, b.m_simd);
+		return static_cast<TV&>(*this);
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	TV operator/(const TV& b) const
 	{
 		TV out;
@@ -1975,6 +2056,13 @@ public:
 		return out;
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TV operator/(const TV& b) const
+	{
+		return TV(_mm_div_ps(m_simd, b.m_simd));
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	TV& operator/=(const TV& b)
 	{
 		for(U i = 0; i < N; i++)
@@ -1985,6 +2073,14 @@ public:
 		return static_cast<TV&>(*this);
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TV& operator/=(const TV& b)
+	{
+		m_simd = _mm_div_ps(m_simd, b.m_simd);
+		return static_cast<TV&>(*this);
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	TV operator-() const
 	{
 		TV out;
@@ -1995,6 +2091,12 @@ public:
 		return out;
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TV operator-() const
+	{
+		return TV(_mm_xor_ps(m_simd, _mm_set1_ps(-0.0)));
+	}
+
 	Bool operator==(const TV& b) const
 	{
 		for(U i = 0; i < N; i++)
@@ -2110,9 +2212,10 @@ public:
 
 	/// @name Other
 	/// @{
-	T dot(const TV& b) const
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
+	ANKI_USE_RESULT T dot(const TV& b) const
 	{
-		T out = 0.0;
+		T out = T(0);
 		for(U i = 0; i < N; i++)
 		{
 			out += m_arr[i] * b.m_arr[i];
@@ -2120,9 +2223,89 @@ public:
 		return out;
 	}
 
-	T getLengthSquared() const
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	ANKI_USE_RESULT T dot(const TV& b) const
 	{
-		T out = 0.0;
+		T o;
+		_mm_store_ss(&o, _mm_dp_ps(m_simd, b.m_simd, 0xF1));
+		return o;
+	}
+
+	/// 6 muls, 3 adds
+	ANKI_ENABLE_IF_EXPRESSION(N == 3)
+	ANKI_USE_RESULT TV cross(const TV& b) const
+	{
+		return TV(y() * b.z() - z() * b.y(), z() * b.x() - x() * b.z(), x() * b.y() - y() * b.x());
+	}
+
+	/// It's like calculating the cross of a 3 component TVec.
+	ANKI_ENABLE_IF_EXPRESSION(N == 4 && !HAS_VEC4_SIMD)
+	ANKI_USE_RESULT TV cross(const TV& b) const
+	{
+		ANKI_ASSERT(w() == T(0));
+		ANKI_ASSERT(b.w() == T(0));
+		return TV(xyz().cross(b.xyz()), T(0));
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(N == 4 && HAS_VEC4_SIMD)
+	ANKI_USE_RESULT TV cross(const TV& b) const
+	{
+		ANKI_ASSERT(w() == T(0));
+		ANKI_ASSERT(b.w() == T(0));
+		const auto& a = *this;
+		const int mask0 = _MM_SHUFFLE(3, 0, 2, 1);
+		const int mask1 = _MM_SHUFFLE(3, 1, 0, 2);
+
+		__m128 tmp0 = _mm_mul_ps(_mm_shuffle_ps(a.m_simd, a.m_simd, mask0), _mm_shuffle_ps(b.m_simd, b.m_simd, mask1));
+		__m128 tmp1 = _mm_mul_ps(_mm_shuffle_ps(a.m_simd, a.m_simd, mask1), _mm_shuffle_ps(b.m_simd, b.m_simd, mask0));
+
+		return TV(_mm_sub_ps(tmp0, tmp1));
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(N == 3)
+	ANKI_USE_RESULT TV projectTo(const TV& toThis) const
+	{
+		return toThis * ((*this).dot(toThis) / (toThis.dot(toThis)));
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(N == 2)
+	ANKI_USE_RESULT TV projectTo(const TV& toThis) const
+	{
+		ANKI_ASSERT(w() == T(0));
+		return (toThis * ((*this).dot(toThis) / (toThis.dot(toThis)))).xyz0();
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(N == 3)
+	ANKI_USE_RESULT TV projectTo(const TV& rayOrigin, const TV& rayDir) const
+	{
+		const auto& a = *this;
+		return rayOrigin + rayDir * ((a - rayOrigin).dot(rayDir));
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(N == 4)
+	ANKI_USE_RESULT TV projectTo(const TV& rayOrigin, const TV& rayDir) const
+	{
+		ANKI_ASSERT(w() == T(0));
+		ANKI_ASSERT(rayOrigin.w() == T(0));
+		ANKI_ASSERT(rayDir.w() == T(0));
+		const auto& a = *this;
+		return rayOrigin + rayDir * ((a - rayOrigin).dot(rayDir));
+	}
+
+	/// Perspective divide. Divide the xyzw of this to the w of this. This method will handle some edge cases.
+	ANKI_ENABLE_IF_EXPRESSION(N == 4)
+	ANKI_USE_RESULT TV perspectiveDivide() const
+	{
+		auto invw = T(1) / w(); // This may become (+-)inf
+		invw = (invw > 1e+11) ? 1e+11 : invw; // Clamp
+		invw = (invw < -1e+11) ? -1e+11 : invw; // Clamp
+		return (*this) * invw;
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
+	ANKI_USE_RESULT T getLengthSquared() const
+	{
+		T out = T(0);
 		for(U i = 0; i < N; i++)
 		{
 			out += m_arr[i] * m_arr[i];
@@ -2130,37 +2313,62 @@ public:
 		return out;
 	}
 
-	T getLength() const
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	ANKI_USE_RESULT T getLengthSquared() const
+	{
+		T o;
+		_mm_store_ss(&o, _mm_dp_ps(m_simd, m_simd, 0xF1));
+		return o;
+	}
+
+	ANKI_USE_RESULT T getLength() const
 	{
 		return sqrt<T>(getLengthSquared());
 	}
 
-	T getDistanceSquared(const TV& b) const
+	ANKI_USE_RESULT T getDistanceSquared(const TV& b) const
 	{
 		return ((*this) - b).getLengthSquared();
 	}
 
-	T getDistance(const TV& b) const
+	ANKI_USE_RESULT T getDistance(const TV& b) const
 	{
 		return sqrt<T>(getDistance(b));
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	void normalize()
 	{
 		(*this) /= getLength();
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	void normalize()
+	{
+		__m128 inverseNorm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
+		m_simd = _mm_mul_ps(m_simd, inverseNorm);
+	}
+
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	TV getNormalized() const
 	{
 		return (*this) / getLength();
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TV getNormalized() const
+	{
+		__m128 inverse_norm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
+		return TV(_mm_mul_ps(m_simd, inverse_norm));
+	}
+
 	/// Return lerp(this, v1, t)
 	TV lerp(const TV& v1, T t) const
 	{
 		return ((*this) * (1.0 - t)) + (v1 * t);
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	TV getAbs() const
 	{
 		TV out;
@@ -2171,6 +2379,13 @@ public:
 		return out;
 	}
 
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TV getAbs() const
+	{
+		static const __m128 signMask = _mm_set1_ps(-0.0f);
+		return TV(_mm_andnot_ps(signMask, m_simd));
+	}
+
 	/// Clamp between two values.
 	void clamp(const T& minv, const T& maxv)
 	{
@@ -2196,28 +2411,32 @@ public:
 	}
 
 	/// Get the min of all components.
-	TV min(const T& b) const
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
+	TV min(const TV& b) const
 	{
 		TV out;
 		for(U i = 0; i < N; ++i)
 		{
-			out[i] = anki::min<T>(m_arr[i], b);
+			out[i] = anki::min<T>(m_arr[i], b[i]);
 		}
 		return out;
 	}
 
 	/// Get the min of all components.
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
 	TV min(const TV& b) const
 	{
-		TV out;
-		for(U i = 0; i < N; ++i)
-		{
-			out[i] = anki::min<T>(m_arr[i], b[i]);
-		}
-		return out;
+		return TV(_mm_min_ps(m_simd, b.m_simd));
+	}
+
+	/// Get the min of all components.
+	TV min(const T b) const
+	{
+		return min(TV(b));
 	}
 
 	/// Get the max of all components.
+	ANKI_ENABLE_IF_EXPRESSION(!HAS_VEC4_SIMD)
 	TV max(const TV& b) const
 	{
 		TV out;
@@ -2229,14 +2448,16 @@ public:
 	}
 
 	/// Get the max of all components.
-	TV max(const T& b) const
+	ANKI_ENABLE_IF_EXPRESSION(HAS_VEC4_SIMD)
+	TV max(const TV& b) const
 	{
-		TV out;
-		for(U i = 0; i < N; ++i)
-		{
-			out[i] = anki::max<T>(m_arr[i], b);
-		}
-		return out;
+		return TV(_mm_max_ps(m_simd, b.m_simd));
+	}
+
+	/// Get the max of all components.
+	TV max(const T b) const
+	{
+		return max(TV(b));
 	}
 
 	/// Get a safe 1 / (*this)
@@ -2281,7 +2502,7 @@ protected:
 	union
 	{
 		Array<T, N> m_arr;
-		TSimd m_simd;
+		Simd m_simd;
 	};
 	/// @}
 };

+ 2 - 2
src/anki/math/Vec2.h

@@ -16,7 +16,7 @@ namespace anki
 
 /// 2D vector
 template<typename T>
-class TVec2 : public TVec<T, 2, Array<T, 2>, TVec2<T>>
+class TVec2 : public TVec<T, 2, TVec2<T>>
 {
 	/// @name Friends
 	/// @{
@@ -31,7 +31,7 @@ class TVec2 : public TVec<T, 2, Array<T, 2>, TVec2<T>>
 	///@}
 
 public:
-	using Base = TVec<T, 2, Array<T, 2>, TVec2<T>>;
+	using Base = TVec<T, 2, TVec2<T>>;
 
 	/// @name Constructors
 	/// @{

+ 2 - 23
src/anki/math/Vec3.h

@@ -16,7 +16,7 @@ namespace anki
 
 /// 3D vector template. One of the most used classes
 template<typename T>
-class TVec3 : public TVec<T, 3, Array<T, 3>, TVec3<T>>
+class TVec3 : public TVec<T, 3, TVec3<T>>
 {
 	/// @name Friends
 	/// @{
@@ -31,7 +31,7 @@ class TVec3 : public TVec<T, 3, Array<T, 3>, TVec3<T>>
 	/// @}
 
 public:
-	using Base = TVec<T, 3, Array<T, 3>, TVec3<T>>;
+	using Base = TVec<T, 3, TVec3<T>>;
 
 	using Base::x;
 	using Base::y;
@@ -70,27 +70,6 @@ public:
 	{
 	}
 	/// @}
-
-	/// @name Other
-	/// @{
-
-	/// 6 muls, 3 adds
-	TVec3 cross(const TVec3& b) const
-	{
-		return TVec3(y() * b.z() - z() * b.y(), z() * b.x() - x() * b.z(), x() * b.y() - y() * b.x());
-	}
-
-	TVec3 projectTo(const TVec3& toThis) const
-	{
-		return toThis * ((*this).dot(toThis) / (toThis.dot(toThis)));
-	}
-
-	TVec3 projectTo(const TVec3& rayOrigin, const TVec3& rayDir) const
-	{
-		const auto& a = *this;
-		return rayOrigin + rayDir * ((a - rayOrigin).dot(rayDir));
-	}
-	/// @}
 };
 
 /// @memberof TVec3

+ 2 - 137
src/anki/math/Vec4.h

@@ -14,35 +14,9 @@ namespace anki
 /// @addtogroup math
 /// @{
 
-/// Template struct that gives the type of the TVec4 SIMD
-template<typename T>
-class TVec4Simd
-{
-public:
-	using Type = Array<T, 4>;
-};
-
-#if ANKI_SIMD == ANKI_SIMD_SSE
-// Specialize for F32
-template<>
-class TVec4Simd<F32>
-{
-public:
-	using Type = __m128;
-};
-#elif ANKI_SIMD == ANKI_SIMD_NEON
-// Specialize for F32
-template<>
-class TVec4Simd<F32>
-{
-public:
-	using Type = float32x4_t;
-};
-#endif
-
 /// 4D vector. SIMD optimized
 template<typename T>
-class alignas(16) TVec4 : public TVec<T, 4, typename TVec4Simd<T>::Type, TVec4<T>>
+class alignas(16) TVec4 : public TVec<T, 4, TVec4<T>>
 {
 	/// @name Friends
 	/// @{
@@ -57,7 +31,7 @@ class alignas(16) TVec4 : public TVec<T, 4, typename TVec4Simd<T>::Type, TVec4<T
 	/// @}
 
 public:
-	using Base = TVec<T, 4, typename TVec4Simd<T>::Type, TVec4<T>>;
+	using Base = TVec<T, 4, TVec4<T>>;
 
 	using Base::w;
 	using Base::x;
@@ -113,30 +87,6 @@ public:
 	}
 	/// @}
 
-	/// @name Operators with same
-	/// @{
-
-	/// It's like calculating the cross of a TVec3
-	ANKI_USE_RESULT TVec4 cross(const TVec4& b) const
-	{
-		ANKI_ASSERT(isZero<T>(Base::w()));
-		ANKI_ASSERT(isZero<T>(b.w()));
-		return TVec4(Base::xyz().cross(b.xyz()), static_cast<T>(0));
-	}
-
-	ANKI_USE_RESULT TVec4 projectTo(const TVec4& toThis) const
-	{
-		ANKI_ASSERT(w() == T(0));
-		return (toThis * ((*this).dot(toThis) / (toThis.dot(toThis)))).xyz0();
-	}
-
-	ANKI_USE_RESULT TVec4 projectTo(const TVec4& rayOrigin, const TVec4& rayDir) const
-	{
-		const auto& a = *this;
-		return rayOrigin + rayDir * ((a - rayOrigin).dot(rayDir));
-	}
-	/// @{
-
 	/// @name Operators with other
 	/// @{
 
@@ -148,94 +98,9 @@ public:
 			x() * m4(0, 2) + y() * m4(1, 2) + z() * m4(2, 2) + w() * m4(3, 2),
 			x() * m4(0, 3) + y() * m4(1, 3) + z() * m4(2, 3) + w() * m4(3, 3));
 	}
-
-	/// Perspective divide. Divide the xyzw of this to the w of this. This method will handle some edge cases.
-	ANKI_USE_RESULT TVec4 perspectiveDivide() const
-	{
-		auto invw = T(1) / w(); // This may become (+-)inf
-		invw = (invw > 1e+11) ? 1e+11 : invw; // Clamp
-		invw = (invw < -1e+11) ? -1e+11 : invw; // Clamp
-		return (*this) * invw;
-	}
 	/// @}
 };
 
-#if ANKI_SIMD == ANKI_SIMD_SSE
-
-// Forward declare specializations
-
-template<>
-TVec4<F32>::TVec4(F32 f);
-
-template<>
-TVec4<F32>::TVec4(const F32 arr_[]);
-
-template<>
-TVec4<F32>::TVec4(const F32 x_, const F32 y_, const F32 z_, const F32 w_);
-
-template<>
-TVec4<F32>::TVec4(const TVec4<F32>& b);
-
-template<>
-TVec4<F32>& TVec4<F32>::Base::operator=(const TVec4<F32>& b);
-
-template<>
-TVec4<F32> TVec4<F32>::Base::operator+(const TVec4<F32>& b) const;
-
-template<>
-TVec4<F32>& TVec4<F32>::Base::operator+=(const TVec4<F32>& b);
-
-template<>
-TVec4<F32> TVec4<F32>::Base::operator-(const TVec4<F32>& b) const;
-
-template<>
-TVec4<F32>& TVec4<F32>::Base::operator-=(const TVec4<F32>& b);
-
-template<>
-TVec4<F32> TVec4<F32>::Base::operator*(const TVec4<F32>& b) const;
-
-template<>
-TVec4<F32>& TVec4<F32>::Base::operator*=(const TVec4<F32>& b);
-
-template<>
-TVec4<F32> TVec4<F32>::Base::operator/(const TVec4<F32>& b) const;
-
-template<>
-TVec4<F32>& TVec4<F32>::Base::operator/=(const TVec4<F32>& b);
-
-template<>
-F32 TVec4<F32>::Base::dot(const TVec4<F32>& b) const;
-
-template<>
-TVec4<F32> TVec4<F32>::Base::getNormalized() const;
-
-template<>
-void TVec4<F32>::Base::normalize();
-
-template<>
-TVec4<F32> TVec4<F32>::cross(const TVec4<F32>& b) const;
-
-template<>
-TVec4<F32> TVec4<F32>::Base::min(const TVec4<F32>& b) const;
-
-template<>
-TVec4<F32> TVec4<F32>::Base::max(const TVec4<F32>& b) const;
-
-template<>
-TVec4<F32> TVec4<F32>::Base::getAbs() const;
-
-template<>
-F32 TVec4<F32>::Base::getLengthSquared() const;
-
-template<>
-TVec4<F32> TVec4<F32>::Base::operator-() const;
-
-#elif ANKI_SIMD == ANKI_SIMD_NEON
-
-#	error "TODO"
-
-#endif
-
 /// F32 4D vector
 using Vec4 = TVec4<F32>;
 static_assert(sizeof(Vec4) == sizeof(F32) * 4, "Incorrect size");

+ 0 - 167
src/anki/math/Vec4.inl.h

@@ -36,171 +36,4 @@ TVec4<T> operator/(const T f, const TVec4<T>& v4)
 	return TVec4<T>(f) / v4;
 }
 
-#if ANKI_SIMD == ANKI_SIMD_SSE
-
-template<>
-inline TVec4<F32>::TVec4(F32 f)
-{
-	m_simd = _mm_set1_ps(f);
-}
-
-template<>
-inline TVec4<F32>::TVec4(const F32 arr[])
-{
-	m_simd = _mm_load_ps(arr);
-}
-
-template<>
-inline TVec4<F32>::TVec4(const F32 x_, const F32 y_, const F32 z_, const F32 w_)
-{
-	m_simd = _mm_set_ps(w_, z_, y_, x_);
-}
-
-template<>
-inline TVec4<F32>::TVec4(const TVec4<F32>& b)
-	: Base()
-{
-	m_simd = b.m_simd;
-}
-
-template<>
-inline TVec4<F32>& TVec4<F32>::Base::operator=(const TVec4<F32>& b)
-{
-	m_simd = b.m_simd;
-	return static_cast<TVec4<F32>&>(*this);
-}
-
-template<>
-inline TVec4<F32> TVec4<F32>::Base::operator+(const TVec4<F32>& b) const
-{
-	return TVec4<F32>(_mm_add_ps(m_simd, b.m_simd));
-}
-
-template<>
-inline TVec4<F32>& TVec4<F32>::Base::operator+=(const TVec4<F32>& b)
-{
-	m_simd = _mm_add_ps(m_simd, b.m_simd);
-	return static_cast<TVec4<F32>&>(*this);
-}
-
-template<>
-inline TVec4<F32> TVec4<F32>::Base::operator-(const TVec4<F32>& b) const
-{
-	return TVec4<F32>(_mm_sub_ps(m_simd, b.m_simd));
-}
-
-template<>
-inline TVec4<F32>& TVec4<F32>::Base::operator-=(const TVec4<F32>& b)
-{
-	m_simd = _mm_sub_ps(m_simd, b.m_simd);
-	return static_cast<TVec4<F32>&>(*this);
-}
-
-template<>
-inline TVec4<F32> TVec4<F32>::Base::operator*(const TVec4<F32>& b) const
-{
-	return TVec4<F32>(_mm_mul_ps(m_simd, b.m_simd));
-}
-
-template<>
-inline TVec4<F32>& TVec4<F32>::Base::operator*=(const TVec4<F32>& b)
-{
-	m_simd = _mm_mul_ps(m_simd, b.m_simd);
-	return static_cast<TVec4<F32>&>(*this);
-}
-
-template<>
-inline TVec4<F32> TVec4<F32>::Base::operator/(const TVec4<F32>& b) const
-{
-	return TVec4<F32>(_mm_div_ps(m_simd, b.m_simd));
-}
-
-template<>
-inline TVec4<F32>& TVec4<F32>::Base::operator/=(const TVec4<F32>& b)
-{
-	m_simd = _mm_div_ps(m_simd, b.m_simd);
-	return static_cast<TVec4<F32>&>(*this);
-}
-
-template<>
-inline TVec4<F32> TVec4<F32>::cross(const TVec4<F32>& b) const
-{
-	ANKI_ASSERT(Base::w() == 0.0);
-	ANKI_ASSERT(b.w() == 0.0);
-
-	const auto& a = *this;
-	const int mask0 = _MM_SHUFFLE(3, 0, 2, 1);
-	const int mask1 = _MM_SHUFFLE(3, 1, 0, 2);
-
-	__m128 tmp0 = _mm_mul_ps(_mm_shuffle_ps(a.m_simd, a.m_simd, mask0), _mm_shuffle_ps(b.m_simd, b.m_simd, mask1));
-	__m128 tmp1 = _mm_mul_ps(_mm_shuffle_ps(a.m_simd, a.m_simd, mask1), _mm_shuffle_ps(b.m_simd, b.m_simd, mask0));
-
-	return TVec4<F32>(_mm_sub_ps(tmp0, tmp1));
-}
-
-template<>
-inline TVec4<F32> TVec4<F32>::Base::min(const TVec4<F32>& b) const
-{
-	__m128 out = _mm_min_ps(m_simd, b.m_simd);
-	return TVec4<F32>(out);
-}
-
-template<>
-inline TVec4<F32> TVec4<F32>::Base::max(const TVec4<F32>& b) const
-{
-	__m128 out = _mm_max_ps(m_simd, b.m_simd);
-	return TVec4<F32>(out);
-}
-
-template<>
-inline F32 TVec4<F32>::Base::dot(const TVec4<F32>& b) const
-{
-	F32 o;
-	_mm_store_ss(&o, _mm_dp_ps(m_simd, b.m_simd, 0xF1));
-	return o;
-}
-
-template<>
-inline TVec4<F32> TVec4<F32>::Base::getNormalized() const
-{
-	__m128 inverse_norm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
-	return TVec4<F32>(_mm_mul_ps(m_simd, inverse_norm));
-}
-
-template<>
-inline void TVec4<F32>::Base::normalize()
-{
-	__m128 inverseNorm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
-	m_simd = _mm_mul_ps(m_simd, inverseNorm);
-}
-
-template<>
-inline TVec4<F32> TVec4<F32>::Base::getAbs() const
-{
-	static const __m128 signMask = _mm_set1_ps(-0.0f);
-	return TVec4<F32>(_mm_andnot_ps(signMask, m_simd));
-}
-
-template<>
-inline F32 TVec4<F32>::Base::getLengthSquared() const
-{
-	F32 o;
-	_mm_store_ss(&o, _mm_dp_ps(m_simd, m_simd, 0xF1));
-	return o;
-}
-
-template<>
-inline TVec4<F32> TVec4<F32>::Base::operator-() const
-{
-	TVec4<F32> o;
-	o.getSimd() = _mm_sub_ps(_mm_setzero_ps(), getSimd());
-	return o;
-}
-
-#elif ANKI_SIMD == ANKI_SIMD_NEON
-
-#	error "TODO"
-
-#endif
-
 } // end namespace anki

+ 24 - 0
src/anki/util/Functions.h

@@ -247,6 +247,30 @@ inline void splitThreadedProblem(
 
 /// Make a preprocessor token a string.
 #define ANKI_STRINGIZE(a) _ANKI_STRINGIZE(a)
+
+/// Use it to enable a method based on a type.
+/// @code
+/// template<typename T> class Foo {
+/// 	ANKI_ENABLE_IF_TYPE(T, int)
+/// 	void foo() {}
+///	};
+/// @endcode
+#define ANKI_ENABLE_IF_TYPE(TemplateType, EqualToType) \
+	template<bool dependOn = true, \
+		typename std::enable_if<std::is_same<TemplateType, EqualToType>::value && dependOn, int>::type = 0>
+
+/// Use it to enable a method based on a constant expression.
+/// @code
+/// template<int N> class Foo {
+/// 	ANKI_ENABLE_IF_EXPRESSION(N == 10)
+/// 	void foo() {}
+///	};
+/// @endcode
+#define ANKI_ENABLE_IF_EXPRESSION(expression) \
+	template<bool dependOn = true, \
+		typename std::enable_if< \
+			(std::is_same<typename std::conditional<(expression) && dependOn, int, double>::type, int>::value), \
+			int>::type = 0>
 /// @}
 
 } // end namespace anki

+ 4 - 4
tests/math/Math.cpp

@@ -12,7 +12,7 @@ using namespace anki;
 template<typename Vec>
 void operatorsSame()
 {
-	const U size = Vec::SIZE;
+	const U size = Vec::COMPONENT_COUNT;
 	using T = typename Vec::Scalar;
 
 	Vec a, b;
@@ -63,7 +63,7 @@ void operatorsSame()
 template<typename Vec>
 void dot()
 {
-	const U size = Vec::SIZE;
+	const U size = Vec::COMPONENT_COUNT;
 	using T = typename Vec::Scalar;
 	T res = 0;
 	Vec vec;
@@ -84,7 +84,7 @@ template<typename Vec>
 void length()
 {
 	using T = typename Vec::Scalar;
-	U size = Vec::SIZE;
+	U size = Vec::COMPONENT_COUNT;
 	Vec vec;
 	T res = 0;
 
@@ -117,7 +117,7 @@ void length()
 template<typename Vec>
 void comparision()
 {
-	U size = Vec::SIZE;
+	U size = Vec::COMPONENT_COUNT;
 	Vec a, a1, b;
 
 	for(U i = 0; i < size; i++)