4 years ago · f3c4d60bdf
--- a/AnKi/Input/InputAndroid.cpp
+++ b/AnKi/Input/InputAndroid.cpp
@@ -45,10 +45,12 @@ Error Input::handleEvents()
 
				 	return Error::NONE;
			
 
				 }
			
 
				 
			
 
				-Error Input::initInternal(NativeWindow*)
			
 
				+Error Input::initInternal(NativeWindow* window)
			
 
				 {
			
 
				+	ANKI_ASSERT(window);
			
 
				 	g_androidApp->userData = this;
			
 
				 	g_androidApp->onAppCmd = handleAndroidEvents;
			
 
				+	m_nativeWindow = window;
			
 
				 
			
 
				 	return Error::NONE;
			
 
				 }
			
@@ -59,7 +61,9 @@ void Input::destroy()
 
				 
			
 
				 void Input::moveCursor(const Vec2& posNdc)
			
 
				 {
			
 
				-	// do nothing
			
 
				+	m_mousePosNdc = posNdc;
			
 
				+	m_mousePosWin =
			
 
				+		UVec2((posNdc * 0.5f + 0.5f) * Vec2(F32(m_nativeWindow->getWidth()), F32(m_nativeWindow->getHeight())));
			
 
				 }
			
 
				 
			
 
				 void Input::hideCursor(Bool hide)
			
--- a/AnKi/Math/Mat.h
+++ b/AnKi/Math/Mat.h
@@ -77,18 +77,20 @@ public:
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_SIMD)
			
 
				 	explicit TMat(const T f)
			
 
				 	{
			
 
				 		for(U i = 0; i < J; i++)
			
 
				 		{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 			m_simd[i] = _mm_set1_ps(f);
			
 
				-#else
			
 
				+#	else
			
 
				 			m_simd[i] = {f, f, f, f};
			
 
				-#endif
			
 
				+#	endif
			
 
				 		}
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	explicit TMat(const T arr[])
			
 
				 	{
			
@@ -316,20 +318,22 @@ public:
 
				 		return c;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_SIMD)
			
 
				 	TMat operator+(const TMat& b) const
			
 
				 	{
			
 
				 		TMat c;
			
 
				 		for(U i = 0; i < J; i++)
			
 
				 		{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 			c.m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
			
 
				-#else
			
 
				+#	else
			
 
				 			c.m_simd[i] = m_simd[i] + b.m_simd[i];
			
 
				-#endif
			
 
				+#	endif
			
 
				 		}
			
 
				 		return c;
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(!HAS_SIMD)
			
 
				 	TMat& operator+=(const TMat& b)
			
@@ -341,19 +345,21 @@ public:
 
				 		return *this;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_SIMD)
			
 
				 	TMat& operator+=(const TMat& b)
			
 
				 	{
			
 
				 		for(U i = 0; i < J; i++)
			
 
				 		{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 			m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
			
 
				-#else
			
 
				+#	else
			
 
				 			m_simd[i] += b.m_simd[i];
			
 
				-#endif
			
 
				+#	endif
			
 
				 		}
			
 
				 		return *this;
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(!HAS_SIMD)
			
 
				 	TMat operator-(const TMat& b) const
			
@@ -366,20 +372,22 @@ public:
 
				 		return c;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_SIMD)
			
 
				 	TMat operator-(const TMat& b) const
			
 
				 	{
			
 
				 		TMat c;
			
 
				 		for(U i = 0; i < J; i++)
			
 
				 		{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 			c.m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
			
 
				-#else
			
 
				+#	else
			
 
				 			c.m_simd[i] = m_simd[i] - b.m_simd[i];
			
 
				-#endif
			
 
				+#	endif
			
 
				 		}
			
 
				 		return c;
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(!HAS_SIMD)
			
 
				 	TMat& operator-=(const TMat& b)
			
@@ -391,19 +399,21 @@ public:
 
				 		return *this;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_SIMD)
			
 
				 	TMat& operator-=(const TMat& b)
			
 
				 	{
			
 
				 		for(U i = 0; i < J; i++)
			
 
				 		{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 			m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
			
 
				-#else
			
 
				+#	else
			
 
				 			m_simd[i] -= b.m_simd[i];
			
 
				-#endif
			
 
				+#	endif
			
 
				 		}
			
 
				 		return *this;
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(J == I && !HAS_MAT4_SIMD)
			
 
				 	TMat operator*(const TMat& b) const
			
@@ -424,6 +434,7 @@ public:
 
				 		return out;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_MAT4_SIMD)
			
 
				 	TMat operator*(const TMat& b) const
			
 
				 	{
			
@@ -431,7 +442,7 @@ public:
 
				 		const auto& m = *this;
			
 
				 		for(U i = 0; i < 4; i++)
			
 
				 		{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 			__m128 t1, t2;
			
 
				 
			
 
				 			t1 = _mm_set1_ps(m(i, 0));
			
@@ -444,7 +455,7 @@ public:
 
				 			t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[3], t1), t2);
			
 
				 
			
 
				 			out.m_simd[i] = t2;
			
 
				-#else
			
 
				+#	else
			
 
				 			float32x4_t t1, t2;
			
 
				 
			
 
				 			t1 = vmovq_n_f32(m(i, 0));
			
@@ -457,11 +468,12 @@ public:
 
				 			t2 = b.m_simd[3] * t1 + t2;
			
 
				 
			
 
				 			out.m_simd[i] = t2;
			
 
				-#endif
			
 
				+#	endif
			
 
				 		}
			
 
				 
			
 
				 		return out;
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	TMat& operator*=(const TMat& b)
			
 
				 	{
			
@@ -594,23 +606,25 @@ public:
 
				 		return out;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_SIMD)
			
 
				 	ColumnVec operator*(const RowVec& v) const
			
 
				 	{
			
 
				 		ColumnVec out;
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		for(U i = 0; i < J; i++)
			
 
				 		{
			
 
				 			_mm_store_ss(&out[i], _mm_dp_ps(m_simd[i], v.getSimd(), 0xF1));
			
 
				 		}
			
 
				-#else
			
 
				+#	else
			
 
				 		for(U i = 0; i < J; i++)
			
 
				 		{
			
 
				 			out[i] = RowVec(m_simd[i]).dot(v);
			
 
				 		}
			
 
				-#endif
			
 
				+#	endif
			
 
				 		return out;
			
 
				 	}
			
 
				+#endif
			
 
				 	/// @}
			
 
				 
			
 
				 	/// @name Other
			
@@ -1038,20 +1052,22 @@ public:
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(J == I && HAS_SIMD)
			
 
				 	void transpose()
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		_MM_TRANSPOSE4_PS(m_simd[0], m_simd[1], m_simd[2], m_simd[3]);
			
 
				-#else
			
 
				+#	else
			
 
				 		const float32x4x2_t row01 = vtrnq_f32(m_simd[0], m_simd[1]);
			
 
				 		const float32x4x2_t row23 = vtrnq_f32(m_simd[2], m_simd[3]);
			
 
				 		m_simd[0] = vcombine_f32(vget_low_f32(row01.val[0]), vget_low_f32(row23.val[0]));
			
 
				 		m_simd[1] = vcombine_f32(vget_low_f32(row01.val[1]), vget_low_f32(row23.val[1]));
			
 
				 		m_simd[2] = vcombine_f32(vget_high_f32(row01.val[0]), vget_high_f32(row23.val[0]));
			
 
				 		m_simd[3] = vcombine_f32(vget_high_f32(row01.val[1]), vget_high_f32(row23.val[1]));
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	void transposeRotationPart()
			
 
				 	{
			
@@ -1282,12 +1298,13 @@ public:
 
				 		return c;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(J == 3 && I == 4 && HAS_SIMD)
			
 
				 	TMat combineTransformations(const TMat& b) const
			
 
				 	{
			
 
				 		TMat c;
			
 
				 		const auto& a = *this;
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		for(U i = 0; i < 3; i++)
			
 
				 		{
			
 
				 			__m128 t1, t2;
			
@@ -1304,7 +1321,7 @@ public:
 
				 
			
 
				 			c.m_simd[i] = t2;
			
 
				 		}
			
 
				-#else
			
 
				+#	else
			
 
				 		for(U i = 0; i < 3; i++)
			
 
				 		{
			
 
				 			float32x4_t t1, t2;
			
@@ -1321,10 +1338,11 @@ public:
 
				 
			
 
				 			c.m_simd[i] = t2;
			
 
				 		}
			
 
				-#endif
			
 
				+#	endif
			
 
				 
			
 
				 		return c;
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	/// Calculate a perspective projection matrix. The z is mapped in [0, 1] range just like DX and Vulkan.
			
 
				 	ANKI_ENABLE_METHOD(I == 4 && J == 4)
			
--- a/AnKi/Math/Vec.h
+++ b/AnKi/Math/Vec.h
@@ -69,15 +69,17 @@ public:
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	explicit TVec(const T f)
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		m_simd = _mm_set1_ps(f);
			
 
				-#else
			
 
				+#	else
			
 
				 		m_simd = vdupq_n_f32(f);
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
			
 
				 	explicit TVec(const T arr[])
			
@@ -143,15 +145,17 @@ public:
 
				 		w() = w_;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec(const T x_, const T y_, const T z_, const T w_)
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		m_simd = _mm_set_ps(w_, z_, y_, x_);
			
 
				-#else
			
 
				+#	else
			
 
				 		m_simd = {x_, y_, z_, w_};
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(N == 4)
			
 
				 	TVec(const TVec<T, 3>& a, const T w_)
			
@@ -2336,15 +2340,17 @@ public:
 
				 		return out;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec operator+(const TVec& b) const
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		return TVec(_mm_add_ps(m_simd, b.m_simd));
			
 
				-#else
			
 
				+#	else
			
 
				 		return TVec(m_simd + b.m_simd);
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
			
 
				 	TVec& operator+=(const TVec& b)
			
@@ -2356,16 +2362,18 @@ public:
 
				 		return *this;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec& operator+=(const TVec& b)
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		m_simd = _mm_add_ps(m_simd, b.m_simd);
			
 
				-#else
			
 
				+#	else
			
 
				 		m_simd += b.m_simd;
			
 
				-#endif
			
 
				+#	endif
			
 
				 		return *this;
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
			
 
				 	TVec operator-(const TVec& b) const
			
@@ -2378,15 +2386,17 @@ public:
 
				 		return out;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec operator-(const TVec& b) const
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		return TVec(_mm_sub_ps(m_simd, b.m_simd));
			
 
				-#else
			
 
				+#	else
			
 
				 		return TVec(m_simd - b.m_simd);
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
			
 
				 	TVec& operator-=(const TVec& b)
			
@@ -2398,16 +2408,18 @@ public:
 
				 		return *this;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec& operator-=(const TVec& b)
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		m_simd = _mm_sub_ps(m_simd, b.m_simd);
			
 
				-#else
			
 
				+#	else
			
 
				 		m_simd -= b.m_simd;
			
 
				-#endif
			
 
				+#	endif
			
 
				 		return *this;
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
			
 
				 	TVec operator*(const TVec& b) const
			
@@ -2420,15 +2432,17 @@ public:
 
				 		return out;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec operator*(const TVec& b) const
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		return TVec(_mm_mul_ps(m_simd, b.m_simd));
			
 
				-#else
			
 
				+#	else
			
 
				 		return TVec(m_simd * b.m_simd);
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
			
 
				 	TVec& operator*=(const TVec& b)
			
@@ -2440,16 +2454,18 @@ public:
 
				 		return *this;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec& operator*=(const TVec& b)
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		m_simd = _mm_mul_ps(m_simd, b.m_simd);
			
 
				-#else
			
 
				+#	else
			
 
				 		m_simd *= b.m_simd;
			
 
				-#endif
			
 
				+#	endif
			
 
				 		return *this;
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
			
 
				 	TVec operator/(const TVec& b) const
			
@@ -2463,15 +2479,17 @@ public:
 
				 		return out;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec operator/(const TVec& b) const
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		return TVec(_mm_div_ps(m_simd, b.m_simd));
			
 
				-#else
			
 
				+#	else
			
 
				 		return TVec(m_simd / b.m_simd);
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
			
 
				 	TVec& operator/=(const TVec& b)
			
@@ -2484,16 +2502,18 @@ public:
 
				 		return *this;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec& operator/=(const TVec& b)
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		m_simd = _mm_div_ps(m_simd, b.m_simd);
			
 
				-#else
			
 
				+#	else
			
 
				 		m_simd /= b.m_simd;
			
 
				-#endif
			
 
				+#	endif
			
 
				 		return *this;
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
			
 
				 	TVec operator-() const
			
@@ -2506,15 +2526,17 @@ public:
 
				 		return out;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec operator-() const
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		return TVec(_mm_xor_ps(m_simd, _mm_set1_ps(-0.0)));
			
 
				-#else
			
 
				+#	else
			
 
				 		return TVec(-m_simd);
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(IS_INTEGER)
			
 
				 	TVec operator<<(const TVec& b) const
			
@@ -2856,20 +2878,22 @@ public:
 
				 		return out;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	T dot(const TVec& b) const
			
 
				 	{
			
 
				 		T o;
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		_mm_store_ss(&o, _mm_dp_ps(m_simd, b.m_simd, 0xF1));
			
 
				-#else
			
 
				+#	else
			
 
				 		const float32x4_t tmp = m_simd * b.m_simd;
			
 
				 		float32x2_t sum = vpadd_f32(vget_low_f32(tmp), vget_high_f32(tmp));
			
 
				 		sum = vpadd_f32(sum, sum);
			
 
				 		o = sum[0];
			
 
				-#endif
			
 
				+#	endif
			
 
				 		return o;
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	/// 6 muls, 3 adds
			
 
				 	ANKI_ENABLE_METHOD(N == 3)
			
@@ -2887,12 +2911,13 @@ public:
 
				 		return TVec(xyz().cross(b.xyz()), T(0));
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(N == 4 && HAS_VEC4_SIMD)
			
 
				 	TVec cross(const TVec& b) const
			
 
				 	{
			
 
				 		ANKI_ASSERT(w() == T(0));
			
 
				 		ANKI_ASSERT(b.w() == T(0));
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		const auto& a = *this;
			
 
				 		constexpr unsigned int mask0 = _MM_SHUFFLE(3, 0, 2, 1);
			
 
				 		constexpr unsigned int mask1 = _MM_SHUFFLE(3, 1, 0, 2);
			
@@ -2903,7 +2928,7 @@ public:
 
				 			_mm_mul_ps(_mm_shuffle_ps(a.m_simd, a.m_simd, U8(mask1)), _mm_shuffle_ps(b.m_simd, b.m_simd, U8(mask0)));
			
 
				 
			
 
				 		return TVec(_mm_sub_ps(tmp0, tmp1));
			
 
				-#else
			
 
				+#	else
			
 
				 		TVec out;
			
 
				 		float32x4_t& c = out.m_simd;
			
 
				 		const float32x4_t& v0 = m_simd;
			
@@ -2914,8 +2939,9 @@ public:
 
				 		c = __builtin_shufflevector(c, c, 1, 2, 0, 3);
			
 
				 
			
 
				 		return out;
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(N == 3)
			
 
				 	TVec projectTo(const TVec& toThis) const
			
@@ -2995,13 +3021,14 @@ public:
 
				 		(*this) /= getLength();
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	void normalize()
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		const __m128 inverseNorm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
			
 
				 		m_simd = _mm_mul_ps(m_simd, inverseNorm);
			
 
				-#else
			
 
				+#	else
			
 
				 		// Dot (len squared)
			
 
				 		float32x4_t tmp = m_simd * m_simd;
			
 
				 		float32x2_t sum = vpadd_f32(vget_low_f32(tmp), vget_high_f32(tmp));
			
@@ -3013,8 +3040,9 @@ public:
 
				 
			
 
				 		// Multiply
			
 
				 		m_simd *= mul;
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
			
 
				 	TVec getNormalized() const
			
@@ -3022,13 +3050,14 @@ public:
 
				 		return (*this) / getLength();
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec getNormalized() const
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		const __m128 inverse_norm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
			
 
				 		return TVec(_mm_mul_ps(m_simd, inverse_norm));
			
 
				-#else
			
 
				+#	else
			
 
				 		// Dot (len squared)
			
 
				 		float32x4_t tmp = m_simd * m_simd;
			
 
				 		float32x2_t sum = vpadd_f32(vget_low_f32(tmp), vget_high_f32(tmp));
			
@@ -3040,8 +3069,9 @@ public:
 
				 
			
 
				 		// Multiply
			
 
				 		return TVec(m_simd * mul);
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	/// Return lerp(this, v1, t)
			
 
				 	TVec lerp(const TVec& v1, T t) const
			
@@ -3060,16 +3090,18 @@ public:
 
				 		return out;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec abs() const
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		const __m128 signMask = _mm_set1_ps(-0.0f);
			
 
				 		return TVec(_mm_andnot_ps(signMask, m_simd));
			
 
				-#else
			
 
				+#	else
			
 
				 		return TVec(vabsq_f32(m_simd));
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	/// Get clamped between two values.
			
 
				 	TVec clamp(const T minv, const T maxv) const
			
@@ -3095,16 +3127,18 @@ public:
 
				 		return out;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	/// Get the min of all components.
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec min(const TVec& b) const
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		return TVec(_mm_min_ps(m_simd, b.m_simd));
			
 
				-#else
			
 
				+#	else
			
 
				 		return TVec(vminq_f32(m_simd, b.m_simd));
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	/// Get the min of all components.
			
 
				 	TVec min(const T b) const
			
@@ -3124,16 +3158,18 @@ public:
 
				 		return out;
			
 
				 	}
			
 
				 
			
 
				+#if ANKI_ENABLE_SIMD
			
 
				 	/// Get the max of all components.
			
 
				 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
			
 
				 	TVec max(const TVec& b) const
			
 
				 	{
			
 
				-#if ANKI_SIMD_SSE
			
 
				+#	if ANKI_SIMD_SSE
			
 
				 		return TVec(_mm_max_ps(m_simd, b.m_simd));
			
 
				-#else
			
 
				+#	else
			
 
				 		return TVec(vmaxq_f32(m_simd, b.m_simd));
			
 
				-#endif
			
 
				+#	endif
			
 
				 	}
			
 
				+#endif
			
 
				 
			
 
				 	/// Get the max of all components.
			
 
				 	TVec max(const T b) const
			
--- a/AnKi/Shaders/ApplyIrradianceToReflection.ankiprog
+++ b/AnKi/Shaders/ApplyIrradianceToReflection.ankiprog
@@ -23,15 +23,12 @@ void main()
 
				 {
			
 
				 	const UVec2 cubeSizeu = UVec2(imageSize(u_cubeTex));
			
 
				 	const Vec2 cubeSize = Vec2(cubeSizeu);
			
 
				-	if(gl_GlobalInvocationID.x >= cubeSizeu.x || gl_GlobalInvocationID.y >= cubeSizeu.y)
			
 
				-	{
			
 
				-		return;
			
 
				-	}
			
 
				+	const UVec2 globalInvocationID = min(gl_GlobalInvocationID.xy, cubeSizeu - 1u);
			
 
				 
			
 
				 	const U32 faceIdx = gl_LocalInvocationID.z;
			
 
				 
			
 
				 	// Compute the UVs to read the gbuffer from
			
 
				-	Vec2 sampleUv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(cubeSize);
			
 
				+	Vec2 sampleUv = (Vec2(globalInvocationID) + 0.5) / Vec2(cubeSize);
			
 
				 	sampleUv.x *= (1.0 / 6.0);
			
 
				 	sampleUv.x += (1.0 / 6.0) * F32(faceIdx);
			
 
				 
			
@@ -48,7 +45,7 @@ void main()
 
				 	const Vec3 indirect = gbuffer.m_diffuse * irradiance;
			
 
				 
			
 
				 	// Read the prev color and apply indirect
			
 
				-	const IVec3 coords = IVec3(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y, faceIdx);
			
 
				+	const IVec3 coords = IVec3(globalInvocationID, faceIdx);
			
 
				 	const Vec3 prevColor = imageLoad(u_cubeTex, coords).xyz;
			
 
				 	const Vec3 prevColorWithIndirectDiffuse = prevColor + gbuffer.m_diffuse * indirect;
			
 
				 
			
--- a/AnKi/Shaders/Ssao.glsl
+++ b/AnKi/Shaders/Ssao.glsl
@@ -134,18 +134,8 @@ Vec3 computeNormal(Vec2 uv, Vec3 origin, F32 depth)
 
				 void main(void)
			
 
				 {
			
 
				 #if USE_COMPUTE
			
 
				-	if(gl_GlobalInvocationID.x >= FB_SIZE.x || gl_GlobalInvocationID.y >= FB_SIZE.y)
			
 
				-	{
			
 
				-#	if DO_SOFT_BLUR
			
 
				-		// Store something anyway because alive threads might read it when SOFT_BLUR is enabled
			
 
				-		s_scratch[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = Vec3(1.0);
			
 
				-#	endif
			
 
				-
			
 
				-		// Skip if it's out of bounds
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(FB_SIZE);
			
 
				+	const UVec2 globalInvocationID = min(gl_GlobalInvocationID.xy, FB_SIZE - 1u);
			
 
				+	const Vec2 uv = (Vec2(globalInvocationID) + 0.5) / Vec2(FB_SIZE);
			
 
				 #else
			
 
				 	const Vec2 uv = in_uv;
			
 
				 #endif
			
@@ -229,7 +219,7 @@ void main(void)
 
				 
			
 
				 	// Store the result
			
 
				 #if USE_COMPUTE
			
 
				-	imageStore(out_img, IVec2(gl_GlobalInvocationID.xy), Vec4(ssao));
			
 
				+	imageStore(out_img, IVec2(globalInvocationID), Vec4(ssao));
			
 
				 #else
			
 
				 	out_color = ssao;
			
 
				 #endif
			
--- a/AnKi/Shaders/SsgiReconstruct.ankiprog
+++ b/AnKi/Shaders/SsgiReconstruct.ankiprog
@@ -94,31 +94,25 @@ void main()
 
				 {
			
 
				 	const UVec2 IN_TEXTURE_SIZE = FB_SIZE / 2u;
			
 
				 
			
 
				-	// Initialize the storage for all threads, including helpers. The check should be a constexpr
			
 
				-	if((WORKGROUP_SIZE.x % IN_TEXTURE_SIZE.x) != 0u || (WORKGROUP_SIZE.y % IN_TEXTURE_SIZE.y) != 0u)
			
 
				+	Vec3 color;
			
 
				+	Vec4 depthRefs;
			
 
				+	ANKI_BRANCH if(gl_GlobalInvocationID.x < IN_TEXTURE_SIZE.x && gl_GlobalInvocationID.y < IN_TEXTURE_SIZE.y)
			
 
				 	{
			
 
				-		s_colors[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = Vec3(0.0);
			
 
				-		s_depths[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = Vec4(1000.0); // High value so it has low weight
			
 
				+		const Vec2 inTexelSize = 1.0 / Vec2(IN_TEXTURE_SIZE);
			
 
				+		const Vec2 fbTexelSize = 1.0 / Vec2(FB_SIZE);
			
 
				 
			
 
				-		memoryBarrierShared();
			
 
				-		barrier();
			
 
				-	}
			
 
				+		const Vec2 inUv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(IN_TEXTURE_SIZE);
			
 
				+		const Vec2 fbUv = (Vec2(gl_GlobalInvocationID.xy) * 2.0 + 1.0) / Vec2(FB_SIZE);
			
 
				 
			
 
				-	ANKI_BRANCH if(gl_GlobalInvocationID.x >= IN_TEXTURE_SIZE.x || gl_GlobalInvocationID.y >= IN_TEXTURE_SIZE.y)
			
 
				+		color = textureLod(u_inTex, u_linearAnyClampSampler, inUv, 0.0).rgb;
			
 
				+		depthRefs = textureGather(sampler2D(u_depthTex, u_linearAnyClampSampler), fbUv, 0);
			
 
				+	}
			
 
				+	else
			
 
				 	{
			
 
				-		// Out of bounds
			
 
				-		return;
			
 
				+		color = Vec3(0.0);
			
 
				+		depthRefs = Vec4(1000.0);
			
 
				 	}
			
 
				 
			
 
				-	const Vec2 inTexelSize = 1.0 / Vec2(IN_TEXTURE_SIZE);
			
 
				-	const Vec2 fbTexelSize = 1.0 / Vec2(FB_SIZE);
			
 
				-
			
 
				-	const Vec2 inUv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(IN_TEXTURE_SIZE);
			
 
				-	const Vec2 fbUv = (Vec2(gl_GlobalInvocationID.xy) * 2.0 + 1.0) / Vec2(FB_SIZE);
			
 
				-
			
 
				-	const Vec3 color = textureLod(u_inTex, u_linearAnyClampSampler, inUv, 0.0).rgb;
			
 
				-	const Vec4 depthRefs = textureGather(sampler2D(u_depthTex, u_linearAnyClampSampler), fbUv, 0);
			
 
				-
			
 
				 	s_colors[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = color;
			
 
				 	s_depths[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = depthRefs;