Browse Source

Android fixes

Panagiotis Christopoulos Charitos 4 years ago
parent
commit
f3c4d60bdf

+ 6 - 2
AnKi/Input/InputAndroid.cpp

@@ -45,10 +45,12 @@ Error Input::handleEvents()
 	return Error::NONE;
 }
 
-Error Input::initInternal(NativeWindow*)
+Error Input::initInternal(NativeWindow* window)
 {
+	ANKI_ASSERT(window);
 	g_androidApp->userData = this;
 	g_androidApp->onAppCmd = handleAndroidEvents;
+	m_nativeWindow = window;
 
 	return Error::NONE;
 }
@@ -59,7 +61,9 @@ void Input::destroy()
 
 void Input::moveCursor(const Vec2& posNdc)
 {
-	// do nothing
+	m_mousePosNdc = posNdc;
+	m_mousePosWin =
+		UVec2((posNdc * 0.5f + 0.5f) * Vec2(F32(m_nativeWindow->getWidth()), F32(m_nativeWindow->getHeight())));
 }
 
 void Input::hideCursor(Bool hide)

+ 45 - 27
AnKi/Math/Mat.h

@@ -77,18 +77,20 @@ public:
 		}
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_SIMD)
 	explicit TMat(const T f)
 	{
 		for(U i = 0; i < J; i++)
 		{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 			m_simd[i] = _mm_set1_ps(f);
-#else
+#	else
 			m_simd[i] = {f, f, f, f};
-#endif
+#	endif
 		}
 	}
+#endif
 
 	explicit TMat(const T arr[])
 	{
@@ -316,20 +318,22 @@ public:
 		return c;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_SIMD)
 	TMat operator+(const TMat& b) const
 	{
 		TMat c;
 		for(U i = 0; i < J; i++)
 		{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 			c.m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
-#else
+#	else
 			c.m_simd[i] = m_simd[i] + b.m_simd[i];
-#endif
+#	endif
 		}
 		return c;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_SIMD)
 	TMat& operator+=(const TMat& b)
@@ -341,19 +345,21 @@ public:
 		return *this;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_SIMD)
 	TMat& operator+=(const TMat& b)
 	{
 		for(U i = 0; i < J; i++)
 		{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 			m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
-#else
+#	else
 			m_simd[i] += b.m_simd[i];
-#endif
+#	endif
 		}
 		return *this;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_SIMD)
 	TMat operator-(const TMat& b) const
@@ -366,20 +372,22 @@ public:
 		return c;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_SIMD)
 	TMat operator-(const TMat& b) const
 	{
 		TMat c;
 		for(U i = 0; i < J; i++)
 		{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 			c.m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
-#else
+#	else
 			c.m_simd[i] = m_simd[i] - b.m_simd[i];
-#endif
+#	endif
 		}
 		return c;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_SIMD)
 	TMat& operator-=(const TMat& b)
@@ -391,19 +399,21 @@ public:
 		return *this;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_SIMD)
 	TMat& operator-=(const TMat& b)
 	{
 		for(U i = 0; i < J; i++)
 		{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 			m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
-#else
+#	else
 			m_simd[i] -= b.m_simd[i];
-#endif
+#	endif
 		}
 		return *this;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(J == I && !HAS_MAT4_SIMD)
 	TMat operator*(const TMat& b) const
@@ -424,6 +434,7 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_MAT4_SIMD)
 	TMat operator*(const TMat& b) const
 	{
@@ -431,7 +442,7 @@ public:
 		const auto& m = *this;
 		for(U i = 0; i < 4; i++)
 		{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 			__m128 t1, t2;
 
 			t1 = _mm_set1_ps(m(i, 0));
@@ -444,7 +455,7 @@ public:
 			t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[3], t1), t2);
 
 			out.m_simd[i] = t2;
-#else
+#	else
 			float32x4_t t1, t2;
 
 			t1 = vmovq_n_f32(m(i, 0));
@@ -457,11 +468,12 @@ public:
 			t2 = b.m_simd[3] * t1 + t2;
 
 			out.m_simd[i] = t2;
-#endif
+#	endif
 		}
 
 		return out;
 	}
+#endif
 
 	TMat& operator*=(const TMat& b)
 	{
@@ -594,23 +606,25 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_SIMD)
 	ColumnVec operator*(const RowVec& v) const
 	{
 		ColumnVec out;
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		for(U i = 0; i < J; i++)
 		{
 			_mm_store_ss(&out[i], _mm_dp_ps(m_simd[i], v.getSimd(), 0xF1));
 		}
-#else
+#	else
 		for(U i = 0; i < J; i++)
 		{
 			out[i] = RowVec(m_simd[i]).dot(v);
 		}
-#endif
+#	endif
 		return out;
 	}
+#endif
 	/// @}
 
 	/// @name Other
@@ -1038,20 +1052,22 @@ public:
 		}
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(J == I && HAS_SIMD)
 	void transpose()
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		_MM_TRANSPOSE4_PS(m_simd[0], m_simd[1], m_simd[2], m_simd[3]);
-#else
+#	else
 		const float32x4x2_t row01 = vtrnq_f32(m_simd[0], m_simd[1]);
 		const float32x4x2_t row23 = vtrnq_f32(m_simd[2], m_simd[3]);
 		m_simd[0] = vcombine_f32(vget_low_f32(row01.val[0]), vget_low_f32(row23.val[0]));
 		m_simd[1] = vcombine_f32(vget_low_f32(row01.val[1]), vget_low_f32(row23.val[1]));
 		m_simd[2] = vcombine_f32(vget_high_f32(row01.val[0]), vget_high_f32(row23.val[0]));
 		m_simd[3] = vcombine_f32(vget_high_f32(row01.val[1]), vget_high_f32(row23.val[1]));
-#endif
+#	endif
 	}
+#endif
 
 	void transposeRotationPart()
 	{
@@ -1282,12 +1298,13 @@ public:
 		return c;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(J == 3 && I == 4 && HAS_SIMD)
 	TMat combineTransformations(const TMat& b) const
 	{
 		TMat c;
 		const auto& a = *this;
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		for(U i = 0; i < 3; i++)
 		{
 			__m128 t1, t2;
@@ -1304,7 +1321,7 @@ public:
 
 			c.m_simd[i] = t2;
 		}
-#else
+#	else
 		for(U i = 0; i < 3; i++)
 		{
 			float32x4_t t1, t2;
@@ -1321,10 +1338,11 @@ public:
 
 			c.m_simd[i] = t2;
 		}
-#endif
+#	endif
 
 		return c;
 	}
+#endif
 
 	/// Calculate a perspective projection matrix. The z is mapped in [0, 1] range just like DX and Vulkan.
 	ANKI_ENABLE_METHOD(I == 4 && J == 4)

+ 90 - 54
AnKi/Math/Vec.h

@@ -69,15 +69,17 @@ public:
 		}
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	explicit TVec(const T f)
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		m_simd = _mm_set1_ps(f);
-#else
+#	else
 		m_simd = vdupq_n_f32(f);
-#endif
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	explicit TVec(const T arr[])
@@ -143,15 +145,17 @@ public:
 		w() = w_;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec(const T x_, const T y_, const T z_, const T w_)
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		m_simd = _mm_set_ps(w_, z_, y_, x_);
-#else
+#	else
 		m_simd = {x_, y_, z_, w_};
-#endif
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(N == 4)
 	TVec(const TVec<T, 3>& a, const T w_)
@@ -2336,15 +2340,17 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec operator+(const TVec& b) const
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_add_ps(m_simd, b.m_simd));
-#else
+#	else
 		return TVec(m_simd + b.m_simd);
-#endif
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec& operator+=(const TVec& b)
@@ -2356,16 +2362,18 @@ public:
 		return *this;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec& operator+=(const TVec& b)
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		m_simd = _mm_add_ps(m_simd, b.m_simd);
-#else
+#	else
 		m_simd += b.m_simd;
-#endif
+#	endif
 		return *this;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec operator-(const TVec& b) const
@@ -2378,15 +2386,17 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec operator-(const TVec& b) const
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_sub_ps(m_simd, b.m_simd));
-#else
+#	else
 		return TVec(m_simd - b.m_simd);
-#endif
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec& operator-=(const TVec& b)
@@ -2398,16 +2408,18 @@ public:
 		return *this;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec& operator-=(const TVec& b)
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		m_simd = _mm_sub_ps(m_simd, b.m_simd);
-#else
+#	else
 		m_simd -= b.m_simd;
-#endif
+#	endif
 		return *this;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec operator*(const TVec& b) const
@@ -2420,15 +2432,17 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec operator*(const TVec& b) const
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_mul_ps(m_simd, b.m_simd));
-#else
+#	else
 		return TVec(m_simd * b.m_simd);
-#endif
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec& operator*=(const TVec& b)
@@ -2440,16 +2454,18 @@ public:
 		return *this;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec& operator*=(const TVec& b)
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		m_simd = _mm_mul_ps(m_simd, b.m_simd);
-#else
+#	else
 		m_simd *= b.m_simd;
-#endif
+#	endif
 		return *this;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec operator/(const TVec& b) const
@@ -2463,15 +2479,17 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec operator/(const TVec& b) const
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_div_ps(m_simd, b.m_simd));
-#else
+#	else
 		return TVec(m_simd / b.m_simd);
-#endif
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec& operator/=(const TVec& b)
@@ -2484,16 +2502,18 @@ public:
 		return *this;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec& operator/=(const TVec& b)
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		m_simd = _mm_div_ps(m_simd, b.m_simd);
-#else
+#	else
 		m_simd /= b.m_simd;
-#endif
+#	endif
 		return *this;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec operator-() const
@@ -2506,15 +2526,17 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec operator-() const
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_xor_ps(m_simd, _mm_set1_ps(-0.0)));
-#else
+#	else
 		return TVec(-m_simd);
-#endif
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(IS_INTEGER)
 	TVec operator<<(const TVec& b) const
@@ -2856,20 +2878,22 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	T dot(const TVec& b) const
 	{
 		T o;
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		_mm_store_ss(&o, _mm_dp_ps(m_simd, b.m_simd, 0xF1));
-#else
+#	else
 		const float32x4_t tmp = m_simd * b.m_simd;
 		float32x2_t sum = vpadd_f32(vget_low_f32(tmp), vget_high_f32(tmp));
 		sum = vpadd_f32(sum, sum);
 		o = sum[0];
-#endif
+#	endif
 		return o;
 	}
+#endif
 
 	/// 6 muls, 3 adds
 	ANKI_ENABLE_METHOD(N == 3)
@@ -2887,12 +2911,13 @@ public:
 		return TVec(xyz().cross(b.xyz()), T(0));
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(N == 4 && HAS_VEC4_SIMD)
 	TVec cross(const TVec& b) const
 	{
 		ANKI_ASSERT(w() == T(0));
 		ANKI_ASSERT(b.w() == T(0));
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		const auto& a = *this;
 		constexpr unsigned int mask0 = _MM_SHUFFLE(3, 0, 2, 1);
 		constexpr unsigned int mask1 = _MM_SHUFFLE(3, 1, 0, 2);
@@ -2903,7 +2928,7 @@ public:
 			_mm_mul_ps(_mm_shuffle_ps(a.m_simd, a.m_simd, U8(mask1)), _mm_shuffle_ps(b.m_simd, b.m_simd, U8(mask0)));
 
 		return TVec(_mm_sub_ps(tmp0, tmp1));
-#else
+#	else
 		TVec out;
 		float32x4_t& c = out.m_simd;
 		const float32x4_t& v0 = m_simd;
@@ -2914,8 +2939,9 @@ public:
 		c = __builtin_shufflevector(c, c, 1, 2, 0, 3);
 
 		return out;
-#endif
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(N == 3)
 	TVec projectTo(const TVec& toThis) const
@@ -2995,13 +3021,14 @@ public:
 		(*this) /= getLength();
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	void normalize()
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		const __m128 inverseNorm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
 		m_simd = _mm_mul_ps(m_simd, inverseNorm);
-#else
+#	else
 		// Dot (len squared)
 		float32x4_t tmp = m_simd * m_simd;
 		float32x2_t sum = vpadd_f32(vget_low_f32(tmp), vget_high_f32(tmp));
@@ -3013,8 +3040,9 @@ public:
 
 		// Multiply
 		m_simd *= mul;
-#endif
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec getNormalized() const
@@ -3022,13 +3050,14 @@ public:
 		return (*this) / getLength();
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec getNormalized() const
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		const __m128 inverse_norm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
 		return TVec(_mm_mul_ps(m_simd, inverse_norm));
-#else
+#	else
 		// Dot (len squared)
 		float32x4_t tmp = m_simd * m_simd;
 		float32x2_t sum = vpadd_f32(vget_low_f32(tmp), vget_high_f32(tmp));
@@ -3040,8 +3069,9 @@ public:
 
 		// Multiply
 		return TVec(m_simd * mul);
-#endif
+#	endif
 	}
+#endif
 
 	/// Return lerp(this, v1, t)
 	TVec lerp(const TVec& v1, T t) const
@@ -3060,16 +3090,18 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec abs() const
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		const __m128 signMask = _mm_set1_ps(-0.0f);
 		return TVec(_mm_andnot_ps(signMask, m_simd));
-#else
+#	else
 		return TVec(vabsq_f32(m_simd));
-#endif
+#	endif
 	}
+#endif
 
 	/// Get clamped between two values.
 	TVec clamp(const T minv, const T maxv) const
@@ -3095,16 +3127,18 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	/// Get the min of all components.
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec min(const TVec& b) const
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_min_ps(m_simd, b.m_simd));
-#else
+#	else
 		return TVec(vminq_f32(m_simd, b.m_simd));
-#endif
+#	endif
 	}
+#endif
 
 	/// Get the min of all components.
 	TVec min(const T b) const
@@ -3124,16 +3158,18 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	/// Get the max of all components.
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec max(const TVec& b) const
 	{
-#if ANKI_SIMD_SSE
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_max_ps(m_simd, b.m_simd));
-#else
+#	else
 		return TVec(vmaxq_f32(m_simd, b.m_simd));
-#endif
+#	endif
 	}
+#endif
 
 	/// Get the max of all components.
 	TVec max(const T b) const

+ 3 - 6
AnKi/Shaders/ApplyIrradianceToReflection.ankiprog

@@ -23,15 +23,12 @@ void main()
 {
 	const UVec2 cubeSizeu = UVec2(imageSize(u_cubeTex));
 	const Vec2 cubeSize = Vec2(cubeSizeu);
-	if(gl_GlobalInvocationID.x >= cubeSizeu.x || gl_GlobalInvocationID.y >= cubeSizeu.y)
-	{
-		return;
-	}
+	const UVec2 globalInvocationID = min(gl_GlobalInvocationID.xy, cubeSizeu - 1u);
 
 	const U32 faceIdx = gl_LocalInvocationID.z;
 
 	// Compute the UVs to read the gbuffer from
-	Vec2 sampleUv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(cubeSize);
+	Vec2 sampleUv = (Vec2(globalInvocationID) + 0.5) / Vec2(cubeSize);
 	sampleUv.x *= (1.0 / 6.0);
 	sampleUv.x += (1.0 / 6.0) * F32(faceIdx);
 
@@ -48,7 +45,7 @@ void main()
 	const Vec3 indirect = gbuffer.m_diffuse * irradiance;
 
 	// Read the prev color and apply indirect
-	const IVec3 coords = IVec3(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y, faceIdx);
+	const IVec3 coords = IVec3(globalInvocationID, faceIdx);
 	const Vec3 prevColor = imageLoad(u_cubeTex, coords).xyz;
 	const Vec3 prevColorWithIndirectDiffuse = prevColor + gbuffer.m_diffuse * indirect;
 

+ 3 - 13
AnKi/Shaders/Ssao.glsl

@@ -134,18 +134,8 @@ Vec3 computeNormal(Vec2 uv, Vec3 origin, F32 depth)
 void main(void)
 {
 #if USE_COMPUTE
-	if(gl_GlobalInvocationID.x >= FB_SIZE.x || gl_GlobalInvocationID.y >= FB_SIZE.y)
-	{
-#	if DO_SOFT_BLUR
-		// Store something anyway because alive threads might read it when SOFT_BLUR is enabled
-		s_scratch[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = Vec3(1.0);
-#	endif
-
-		// Skip if it's out of bounds
-		return;
-	}
-
-	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(FB_SIZE);
+	const UVec2 globalInvocationID = min(gl_GlobalInvocationID.xy, FB_SIZE - 1u);
+	const Vec2 uv = (Vec2(globalInvocationID) + 0.5) / Vec2(FB_SIZE);
 #else
 	const Vec2 uv = in_uv;
 #endif
@@ -229,7 +219,7 @@ void main(void)
 
 	// Store the result
 #if USE_COMPUTE
-	imageStore(out_img, IVec2(gl_GlobalInvocationID.xy), Vec4(ssao));
+	imageStore(out_img, IVec2(globalInvocationID), Vec4(ssao));
 #else
 	out_color = ssao;
 #endif

+ 13 - 19
AnKi/Shaders/SsgiReconstruct.ankiprog

@@ -94,31 +94,25 @@ void main()
 {
 	const UVec2 IN_TEXTURE_SIZE = FB_SIZE / 2u;
 
-	// Initialize the storage for all threads, including helpers. The check should be a constexpr
-	if((WORKGROUP_SIZE.x % IN_TEXTURE_SIZE.x) != 0u || (WORKGROUP_SIZE.y % IN_TEXTURE_SIZE.y) != 0u)
+	Vec3 color;
+	Vec4 depthRefs;
+	ANKI_BRANCH if(gl_GlobalInvocationID.x < IN_TEXTURE_SIZE.x && gl_GlobalInvocationID.y < IN_TEXTURE_SIZE.y)
 	{
-		s_colors[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = Vec3(0.0);
-		s_depths[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = Vec4(1000.0); // High value so it has low weight
+		const Vec2 inTexelSize = 1.0 / Vec2(IN_TEXTURE_SIZE);
+		const Vec2 fbTexelSize = 1.0 / Vec2(FB_SIZE);
 
-		memoryBarrierShared();
-		barrier();
-	}
+		const Vec2 inUv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(IN_TEXTURE_SIZE);
+		const Vec2 fbUv = (Vec2(gl_GlobalInvocationID.xy) * 2.0 + 1.0) / Vec2(FB_SIZE);
 
-	ANKI_BRANCH if(gl_GlobalInvocationID.x >= IN_TEXTURE_SIZE.x || gl_GlobalInvocationID.y >= IN_TEXTURE_SIZE.y)
+		color = textureLod(u_inTex, u_linearAnyClampSampler, inUv, 0.0).rgb;
+		depthRefs = textureGather(sampler2D(u_depthTex, u_linearAnyClampSampler), fbUv, 0);
+	}
+	else
 	{
-		// Out of bounds
-		return;
+		color = Vec3(0.0);
+		depthRefs = Vec4(1000.0);
 	}
 
-	const Vec2 inTexelSize = 1.0 / Vec2(IN_TEXTURE_SIZE);
-	const Vec2 fbTexelSize = 1.0 / Vec2(FB_SIZE);
-
-	const Vec2 inUv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(IN_TEXTURE_SIZE);
-	const Vec2 fbUv = (Vec2(gl_GlobalInvocationID.xy) * 2.0 + 1.0) / Vec2(FB_SIZE);
-
-	const Vec3 color = textureLod(u_inTex, u_linearAnyClampSampler, inUv, 0.0).rgb;
-	const Vec4 depthRefs = textureGather(sampler2D(u_depthTex, u_linearAnyClampSampler), fbUv, 0);
-
 	s_colors[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = color;
 	s_depths[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = depthRefs;